1 | /* |
2 | * Copyright 1993-2020 NVIDIA Corporation. All rights reserved. |
3 | * |
4 | * NOTICE TO LICENSEE: |
5 | * |
6 | * This source code and/or documentation ("Licensed Deliverables") are |
7 | * subject to NVIDIA intellectual property rights under U.S. and |
8 | * international Copyright laws. |
9 | * |
10 | * These Licensed Deliverables contained herein is PROPRIETARY and |
11 | * CONFIDENTIAL to NVIDIA and is being provided under the terms and |
12 | * conditions of a form of NVIDIA software license agreement by and |
13 | * between NVIDIA and Licensee ("License Agreement") or electronically |
14 | * accepted by Licensee. Notwithstanding any terms or conditions to |
15 | * the contrary in the License Agreement, reproduction or disclosure |
16 | * of the Licensed Deliverables to any third party without the express |
17 | * written consent of NVIDIA is prohibited. |
18 | * |
19 | * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE |
20 | * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE |
21 | * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS |
22 | * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. |
23 | * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED |
24 | * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, |
25 | * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. |
26 | * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE |
27 | * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY |
28 | * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY |
29 | * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, |
30 | * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS |
31 | * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE |
32 | * OF THESE LICENSED DELIVERABLES. |
33 | * |
34 | * U.S. Government End Users. These Licensed Deliverables are a |
35 | * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT |
36 | * 1995), consisting of "commercial computer software" and "commercial |
37 | * computer software documentation" as such terms are used in 48 |
38 | * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government |
39 | * only as a commercial end item. Consistent with 48 C.F.R.12.212 and |
40 | * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all |
41 | * U.S. Government End Users acquire the Licensed Deliverables with |
42 | * only those rights set forth herein. |
43 | * |
44 | * Any use of the Licensed Deliverables in individual and commercial |
45 | * software must include, in the user documentation and internal |
46 | * comments to the code, the above Disclaimer and U.S. Government End |
47 | * Users Notice. |
48 | */ |
49 | |
50 | /* |
51 | * cudnn_cnn_infer : cuDNN's basic definitions and inference CNN functions. |
52 | */ |
53 | |
54 | #if !defined(CUDNN_CNN_INFER_H_) |
55 | #define CUDNN_CNN_INFER_H_ |
56 | |
57 | #pragma once |
58 | #include <cuda_runtime.h> |
59 | #include <stdint.h> |
60 | |
61 | #include "cudnn_version.h" |
62 | #include "cudnn_ops_infer.h" |
63 | |
64 | /* These version numbers are autogenerated, do not edit manually. */ |
65 | #define CUDNN_CNN_INFER_MAJOR 8 |
66 | #define CUDNN_CNN_INFER_MINOR 2 |
67 | #define CUDNN_CNN_INFER_PATCH 4 |
68 | |
69 | #if (CUDNN_CNN_INFER_MAJOR != CUDNN_MAJOR) || (CUDNN_CNN_INFER_MINOR != CUDNN_MINOR) || \ |
70 | (CUDNN_CNN_INFER_PATCH != CUDNN_PATCHLEVEL) |
71 | #error Version mismatch in cuDNN CNN INFER!!! |
72 | #endif |
73 | |
74 | #if defined(__cplusplus) |
75 | extern "C" { |
76 | #endif |
77 | |
78 | typedef struct cudnnConvolutionStruct *cudnnConvolutionDescriptor_t; |
79 | |
80 | /* |
81 | * convolution mode |
82 | */ |
83 | typedef enum { CUDNN_CONVOLUTION = 0, CUDNN_CROSS_CORRELATION = 1 } cudnnConvolutionMode_t; |
84 | |
85 | /* |
86 | * CUDNN Reorder |
87 | */ |
88 | typedef enum { |
89 | CUDNN_DEFAULT_REORDER = 0, |
90 | CUDNN_NO_REORDER = 1, |
91 | } cudnnReorderType_t; |
92 | |
93 | typedef struct cudnnConvolutionFwdAlgoPerfStruct { |
94 | cudnnConvolutionFwdAlgo_t algo; |
95 | cudnnStatus_t status; |
96 | float time; |
97 | size_t memory; |
98 | cudnnDeterminism_t determinism; |
99 | cudnnMathType_t mathType; |
100 | int reserved[3]; |
101 | } cudnnConvolutionFwdAlgoPerf_t; |
102 | |
103 | /* Create an instance of convolution descriptor */ |
104 | cudnnStatus_t CUDNNWINAPI |
105 | cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc); |
106 | |
107 | /* Destroy an instance of convolution descriptor */ |
108 | cudnnStatus_t CUDNNWINAPI |
109 | cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc); |
110 | |
111 | cudnnStatus_t CUDNNWINAPI |
112 | cudnnSetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType); |
113 | |
114 | cudnnStatus_t CUDNNWINAPI |
115 | cudnnGetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType); |
116 | |
117 | cudnnStatus_t CUDNNWINAPI |
118 | cudnnSetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int groupCount); |
119 | |
120 | cudnnStatus_t CUDNNWINAPI |
121 | cudnnGetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int *groupCount); |
122 | |
123 | cudnnStatus_t CUDNNWINAPI |
124 | cudnnSetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t reorderType); |
125 | |
126 | cudnnStatus_t CUDNNWINAPI |
127 | cudnnGetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t *reorderType); |
128 | |
129 | cudnnStatus_t CUDNNWINAPI |
130 | cudnnSetConvolution2dDescriptor(cudnnConvolutionDescriptor_t convDesc, |
131 | int pad_h, /* zero-padding height */ |
132 | int pad_w, /* zero-padding width */ |
133 | int u, /* vertical filter stride */ |
134 | int v, /* horizontal filter stride */ |
135 | int dilation_h, /* filter dilation in the vertical dimension */ |
136 | int dilation_w, /* filter dilation in the horizontal dimension */ |
137 | cudnnConvolutionMode_t mode, |
138 | cudnnDataType_t computeType); |
139 | |
140 | cudnnStatus_t CUDNNWINAPI |
141 | cudnnGetConvolution2dDescriptor(const cudnnConvolutionDescriptor_t convDesc, |
142 | int *pad_h, /* zero-padding height */ |
143 | int *pad_w, /* zero-padding width */ |
144 | int *u, /* vertical filter stride */ |
145 | int *v, /* horizontal filter stride */ |
146 | int *dilation_h, /* filter dilation in the vertical dimension */ |
147 | int *dilation_w, /* filter dilation in the horizontal dimension */ |
148 | cudnnConvolutionMode_t *mode, |
149 | cudnnDataType_t *computeType); |
150 | |
151 | cudnnStatus_t CUDNNWINAPI |
152 | cudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc, |
153 | int arrayLength, /* nbDims-2 size */ |
154 | const int padA[], |
155 | const int filterStrideA[], |
156 | const int dilationA[], |
157 | cudnnConvolutionMode_t mode, |
158 | cudnnDataType_t computeType); /* convolution data type */ |
159 | |
160 | /* Helper function to return the dimensions of the output tensor given a convolution descriptor */ |
161 | cudnnStatus_t CUDNNWINAPI |
162 | cudnnGetConvolutionNdDescriptor(const cudnnConvolutionDescriptor_t convDesc, |
163 | int arrayLengthRequested, |
164 | int *arrayLength, |
165 | int padA[], |
166 | int strideA[], |
167 | int dilationA[], |
168 | cudnnConvolutionMode_t *mode, |
169 | cudnnDataType_t *computeType); /* convolution data type */ |
170 | |
171 | cudnnStatus_t CUDNNWINAPI |
172 | cudnnGetConvolution2dForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc, |
173 | const cudnnTensorDescriptor_t inputTensorDesc, |
174 | const cudnnFilterDescriptor_t filterDesc, |
175 | int *n, |
176 | int *c, |
177 | int *h, |
178 | int *w); |
179 | |
180 | /* Helper function to return the dimensions of the output tensor given a convolution descriptor */ |
181 | cudnnStatus_t CUDNNWINAPI |
182 | cudnnGetConvolutionNdForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc, |
183 | const cudnnTensorDescriptor_t inputTensorDesc, |
184 | const cudnnFilterDescriptor_t filterDesc, |
185 | int nbDims, |
186 | int tensorOuputDimA[]); |
187 | |
188 | /* helper function to provide the convolution forward algo that fit best the requirement */ |
189 | cudnnStatus_t CUDNNWINAPI |
190 | cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count); |
191 | |
192 | cudnnStatus_t CUDNNWINAPI |
193 | cudnnGetConvolutionForwardAlgorithm_v7(cudnnHandle_t handle, |
194 | const cudnnTensorDescriptor_t srcDesc, |
195 | const cudnnFilterDescriptor_t filterDesc, |
196 | const cudnnConvolutionDescriptor_t convDesc, |
197 | const cudnnTensorDescriptor_t destDesc, |
198 | const int requestedAlgoCount, |
199 | int *returnedAlgoCount, |
200 | cudnnConvolutionFwdAlgoPerf_t *perfResults); |
201 | |
202 | cudnnStatus_t CUDNNWINAPI |
203 | cudnnFindConvolutionForwardAlgorithm(cudnnHandle_t handle, |
204 | const cudnnTensorDescriptor_t xDesc, |
205 | const cudnnFilterDescriptor_t wDesc, |
206 | const cudnnConvolutionDescriptor_t convDesc, |
207 | const cudnnTensorDescriptor_t yDesc, |
208 | const int requestedAlgoCount, |
209 | int *returnedAlgoCount, |
210 | cudnnConvolutionFwdAlgoPerf_t *perfResults); |
211 | |
212 | cudnnStatus_t CUDNNWINAPI |
213 | cudnnFindConvolutionForwardAlgorithmEx(cudnnHandle_t handle, |
214 | const cudnnTensorDescriptor_t xDesc, |
215 | const void *x, |
216 | const cudnnFilterDescriptor_t wDesc, |
217 | const void *w, |
218 | const cudnnConvolutionDescriptor_t convDesc, |
219 | const cudnnTensorDescriptor_t yDesc, |
220 | void *y, |
221 | const int requestedAlgoCount, |
222 | int *returnedAlgoCount, |
223 | cudnnConvolutionFwdAlgoPerf_t *perfResults, |
224 | void *workSpace, |
225 | size_t workSpaceSizeInBytes); |
226 | |
227 | cudnnStatus_t CUDNNWINAPI |
228 | cudnnIm2Col(cudnnHandle_t handle, |
229 | const cudnnTensorDescriptor_t xDesc, |
230 | const void *x, |
231 | const cudnnFilterDescriptor_t wDesc, |
232 | const cudnnConvolutionDescriptor_t convDesc, |
233 | void *colBuffer); |
234 | |
235 | cudnnStatus_t CUDNNWINAPI |
236 | cudnnReorderFilterAndBias(cudnnHandle_t handle, |
237 | const cudnnFilterDescriptor_t filterDesc, |
238 | cudnnReorderType_t reorderType, |
239 | const void *filterData, |
240 | void *reorderedFilterData, |
241 | int reorderBias, |
242 | const void *biasData, |
243 | void *reorderedBiasData); |
244 | |
245 | /* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/ |
246 | cudnnStatus_t CUDNNWINAPI |
247 | cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle_t handle, |
248 | const cudnnTensorDescriptor_t xDesc, |
249 | const cudnnFilterDescriptor_t wDesc, |
250 | const cudnnConvolutionDescriptor_t convDesc, |
251 | const cudnnTensorDescriptor_t yDesc, |
252 | cudnnConvolutionFwdAlgo_t algo, |
253 | size_t *sizeInBytes); |
254 | |
255 | /* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */ |
256 | |
257 | /* Function to perform the forward pass for batch convolution */ |
258 | cudnnStatus_t CUDNNWINAPI |
259 | cudnnConvolutionForward(cudnnHandle_t handle, |
260 | const void *alpha, |
261 | const cudnnTensorDescriptor_t xDesc, |
262 | const void *x, |
263 | const cudnnFilterDescriptor_t wDesc, |
264 | const void *w, |
265 | const cudnnConvolutionDescriptor_t convDesc, |
266 | cudnnConvolutionFwdAlgo_t algo, |
267 | void *workSpace, |
268 | size_t workSpaceSizeInBytes, |
269 | const void *beta, |
270 | const cudnnTensorDescriptor_t yDesc, |
271 | void *y); |
272 | |
273 | /* Fused conv/bias/activation operation : y = Act( alpha1 * conv(x) + alpha2 * z + bias ) */ |
274 | cudnnStatus_t CUDNNWINAPI |
275 | cudnnConvolutionBiasActivationForward(cudnnHandle_t handle, |
276 | const void *alpha1, |
277 | const cudnnTensorDescriptor_t xDesc, |
278 | const void *x, |
279 | const cudnnFilterDescriptor_t wDesc, |
280 | const void *w, |
281 | const cudnnConvolutionDescriptor_t convDesc, |
282 | cudnnConvolutionFwdAlgo_t algo, |
283 | void *workSpace, |
284 | size_t workSpaceSizeInBytes, |
285 | const void *alpha2, |
286 | const cudnnTensorDescriptor_t zDesc, |
287 | const void *z, |
288 | const cudnnTensorDescriptor_t biasDesc, |
289 | const void *bias, |
290 | const cudnnActivationDescriptor_t activationDesc, |
291 | const cudnnTensorDescriptor_t yDesc, |
292 | void *y); |
293 | |
294 | /* helper function to provide the convolution backward data algo that fit best the requirement */ |
295 | |
296 | typedef struct cudnnConvolutionBwdDataAlgoPerfStruct { |
297 | cudnnConvolutionBwdDataAlgo_t algo; |
298 | cudnnStatus_t status; |
299 | float time; |
300 | size_t memory; |
301 | cudnnDeterminism_t determinism; |
302 | cudnnMathType_t mathType; |
303 | int reserved[3]; |
304 | } cudnnConvolutionBwdDataAlgoPerf_t; |
305 | |
306 | cudnnStatus_t CUDNNWINAPI |
307 | cudnnGetConvolutionBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, int *count); |
308 | |
309 | cudnnStatus_t CUDNNWINAPI |
310 | cudnnFindConvolutionBackwardDataAlgorithm(cudnnHandle_t handle, |
311 | const cudnnFilterDescriptor_t wDesc, |
312 | const cudnnTensorDescriptor_t dyDesc, |
313 | const cudnnConvolutionDescriptor_t convDesc, |
314 | const cudnnTensorDescriptor_t dxDesc, |
315 | const int requestedAlgoCount, |
316 | int *returnedAlgoCount, |
317 | cudnnConvolutionBwdDataAlgoPerf_t *perfResults); |
318 | |
319 | cudnnStatus_t CUDNNWINAPI |
320 | cudnnFindConvolutionBackwardDataAlgorithmEx(cudnnHandle_t handle, |
321 | const cudnnFilterDescriptor_t wDesc, |
322 | const void *w, |
323 | const cudnnTensorDescriptor_t dyDesc, |
324 | const void *dy, |
325 | const cudnnConvolutionDescriptor_t convDesc, |
326 | const cudnnTensorDescriptor_t dxDesc, |
327 | void *dx, |
328 | const int requestedAlgoCount, |
329 | int *returnedAlgoCount, |
330 | cudnnConvolutionBwdDataAlgoPerf_t *perfResults, |
331 | void *workSpace, |
332 | size_t workSpaceSizeInBytes); |
333 | |
334 | cudnnStatus_t CUDNNWINAPI |
335 | cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnnHandle_t handle, |
336 | const cudnnFilterDescriptor_t filterDesc, |
337 | const cudnnTensorDescriptor_t diffDesc, |
338 | const cudnnConvolutionDescriptor_t convDesc, |
339 | const cudnnTensorDescriptor_t gradDesc, |
340 | const int requestedAlgoCount, |
341 | int *returnedAlgoCount, |
342 | cudnnConvolutionBwdDataAlgoPerf_t *perfResults); |
343 | |
344 | /* |
345 | * convolution algorithm (which requires potentially some workspace) |
346 | */ |
347 | |
348 | /* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/ |
349 | cudnnStatus_t CUDNNWINAPI |
350 | cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle, |
351 | const cudnnFilterDescriptor_t wDesc, |
352 | const cudnnTensorDescriptor_t dyDesc, |
353 | const cudnnConvolutionDescriptor_t convDesc, |
354 | const cudnnTensorDescriptor_t dxDesc, |
355 | cudnnConvolutionBwdDataAlgo_t algo, |
356 | size_t *sizeInBytes); |
357 | |
358 | cudnnStatus_t CUDNNWINAPI |
359 | cudnnConvolutionBackwardData(cudnnHandle_t handle, |
360 | const void *alpha, |
361 | const cudnnFilterDescriptor_t wDesc, |
362 | const void *w, |
363 | const cudnnTensorDescriptor_t dyDesc, |
364 | const void *dy, |
365 | const cudnnConvolutionDescriptor_t convDesc, |
366 | cudnnConvolutionBwdDataAlgo_t algo, |
367 | void *workSpace, |
368 | size_t workSpaceSizeInBytes, |
369 | const void *beta, |
370 | const cudnnTensorDescriptor_t dxDesc, |
371 | void *dx); |
372 | |
373 | /* Helper function to calculate folding descriptors for dgrad */ |
374 | cudnnStatus_t CUDNNWINAPI |
375 | cudnnGetFoldedConvBackwardDataDescriptors(const cudnnHandle_t handle, |
376 | const cudnnFilterDescriptor_t filterDesc, |
377 | const cudnnTensorDescriptor_t diffDesc, |
378 | const cudnnConvolutionDescriptor_t convDesc, |
379 | const cudnnTensorDescriptor_t gradDesc, |
380 | const cudnnTensorFormat_t transformFormat, |
381 | cudnnFilterDescriptor_t foldedFilterDesc, |
382 | cudnnTensorDescriptor_t paddedDiffDesc, |
383 | cudnnConvolutionDescriptor_t foldedConvDesc, |
384 | cudnnTensorDescriptor_t foldedGradDesc, |
385 | cudnnTensorTransformDescriptor_t filterFoldTransDesc, |
386 | cudnnTensorTransformDescriptor_t diffPadTransDesc, |
387 | cudnnTensorTransformDescriptor_t gradFoldTransDesc, |
388 | cudnnTensorTransformDescriptor_t gradUnfoldTransDesc); |
389 | |
390 | /* cudnnFusedOps... */ |
391 | struct cudnnFusedOpsConstParamStruct; |
392 | typedef struct cudnnFusedOpsConstParamStruct *cudnnFusedOpsConstParamPack_t; |
393 | |
394 | struct cudnnFusedOpsVariantParamStruct; |
395 | typedef struct cudnnFusedOpsVariantParamStruct *cudnnFusedOpsVariantParamPack_t; |
396 | |
397 | struct cudnnFusedOpsPlanStruct; |
398 | typedef struct cudnnFusedOpsPlanStruct *cudnnFusedOpsPlan_t; |
399 | |
400 | typedef enum { |
401 | /* each op in [ ] can be disabled by passing NULL ptr */ |
402 | /* [per channel scale], [per channel bias], [activation], convolution, [generate BN stats] */ |
403 | CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS = 0, |
404 | /* [per channel scale], [per channel bias], [activation], convolutionBackwardWeights */ |
405 | CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD = 1, |
406 | /* utility for BN training in BN-conv fusion */ |
407 | /* computes the equivalent scale and bias from ySum ySqSum and learned scale, bias */ |
408 | /* optionally update running stats and generate saved stats */ |
409 | CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING = 2, |
410 | /* utility for BN inference in BN-conv fusion */ |
411 | /* computes the equivalent scale and bias from learned running stats and learned scale, bias */ |
412 | CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE = 3, |
413 | /* reserved for future use: convolution, [per channel scale], [per channel bias], [residual add], [activation] */ |
414 | CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION = 4, |
415 | /* reserved for future use: [per channel scale], [per channel bias], [residual add], activation, bitmask */ |
416 | CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK = 5, |
417 | /* reserved for future use */ |
418 | CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM = 6, |
419 | } cudnnFusedOps_t; |
420 | |
421 | typedef enum { |
422 | /* set XDESC: pass previously initialized cudnnTensorDescriptor_t */ |
423 | /* get XDESC: pass previously created cudnnTensorDescriptor_t */ |
424 | CUDNN_PARAM_XDESC = 0, |
425 | /* set/get XDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
426 | CUDNN_PARAM_XDATA_PLACEHOLDER = 1, |
427 | /* set/get BN_MODE: pass cudnnBatchNormMode_t* */ |
428 | CUDNN_PARAM_BN_MODE = 2, |
429 | /* set CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t */ |
430 | /* get CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t */ |
431 | CUDNN_PARAM_BN_EQSCALEBIAS_DESC = 3, |
432 | /* set/get BN_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
433 | CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER = 4, |
434 | /* set/get BN_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
435 | CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER = 5, |
436 | /* set ACTIVATION_DESC: pass previously initialized cudnnActivationDescriptor_t */ |
437 | /* get ACTIVATION_DESC: pass previously created cudnnActivationDescriptor_t */ |
438 | CUDNN_PARAM_ACTIVATION_DESC = 6, |
439 | /* set CONV_DESC: pass previously initialized cudnnConvolutionDescriptor_t */ |
440 | /* get CONV_DESC: pass previously created cudnnConvolutionDescriptor_t */ |
441 | CUDNN_PARAM_CONV_DESC = 7, |
442 | /* set WDESC: pass previously initialized cudnnFilterDescriptor_t */ |
443 | /* get WDESC: pass previously created cudnnFilterDescriptor_t */ |
444 | CUDNN_PARAM_WDESC = 8, |
445 | /* set/get WDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
446 | CUDNN_PARAM_WDATA_PLACEHOLDER = 9, |
447 | /* set DWDESC: pass previously initialized cudnnFilterDescriptor_t */ |
448 | /* get DWDESC: pass previously created cudnnFilterDescriptor_t */ |
449 | CUDNN_PARAM_DWDESC = 10, |
450 | /* set/get DWDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
451 | CUDNN_PARAM_DWDATA_PLACEHOLDER = 11, |
452 | /* set YDESC: pass previously initialized cudnnTensorDescriptor_t */ |
453 | /* get YDESC: pass previously created cudnnTensorDescriptor_t */ |
454 | CUDNN_PARAM_YDESC = 12, |
455 | /* set/get YDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
456 | CUDNN_PARAM_YDATA_PLACEHOLDER = 13, |
457 | /* set DYDESC: pass previously initialized cudnnTensorDescriptor_t */ |
458 | /* get DYDESC: pass previously created cudnnTensorDescriptor_t */ |
459 | CUDNN_PARAM_DYDESC = 14, |
460 | /* set/get DYDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
461 | CUDNN_PARAM_DYDATA_PLACEHOLDER = 15, |
462 | /* set YSTATS_DESC: pass previously initialized cudnnTensorDescriptor_t */ |
463 | /* get YSTATS_DESC: pass previously created cudnnTensorDescriptor_t */ |
464 | CUDNN_PARAM_YSTATS_DESC = 16, |
465 | /* set/get YSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
466 | CUDNN_PARAM_YSUM_PLACEHOLDER = 17, |
467 | /* set/get YSQSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
468 | CUDNN_PARAM_YSQSUM_PLACEHOLDER = 18, |
469 | /* set CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously initialized cudnnTensorDescriptor_t */ |
470 | /* get CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously created cudnnTensorDescriptor_t */ |
471 | CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC = 19, |
472 | /* set/get CUDNN_PARAM_BN_SCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
473 | CUDNN_PARAM_BN_SCALE_PLACEHOLDER = 20, |
474 | /* set/get CUDNN_PARAM_BN_BIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
475 | CUDNN_PARAM_BN_BIAS_PLACEHOLDER = 21, |
476 | /* set/get CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
477 | CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER = 22, |
478 | /* set/get CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
479 | CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER = 23, |
480 | /* set/get CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
481 | CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER = 24, |
482 | /* set/get CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
483 | CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER = 25, |
484 | |
485 | /* set ZDESC: pass previously initialized cudnnTensorDescriptor_t */ |
486 | /* get ZDESC: pass previously created cudnnTensorDescriptor_t */ |
487 | CUDNN_PARAM_ZDESC = 26, |
488 | /* set/get ZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
489 | CUDNN_PARAM_ZDATA_PLACEHOLDER = 27, |
490 | /* set BN_Z_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t */ |
491 | /* get BN_Z_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t */ |
492 | CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC = 28, |
493 | /* set/get BN_Z_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
494 | CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER = 29, |
495 | /* set/get BN_Z_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
496 | CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER = 30, |
497 | |
498 | /* set ACTIVATION_BITMASK_DESC: pass previously initialized cudnnTensorDescriptor_t */ |
499 | /* get ACTIVATION_BITMASK_DESC: pass previously created cudnnTensorDescriptor_t */ |
500 | CUDNN_PARAM_ACTIVATION_BITMASK_DESC = 31, |
501 | /* set/get ACTIVATION_BITMASK_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
502 | CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER = 32, |
503 | |
504 | /* set DXDESC: pass previously initialized cudnnTensorDescriptor_t */ |
505 | /* get DXDESC: pass previously created cudnnTensorDescriptor_t */ |
506 | CUDNN_PARAM_DXDESC = 33, |
507 | /* set/get DXDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
508 | CUDNN_PARAM_DXDATA_PLACEHOLDER = 34, |
509 | /* set DZDESC: pass previously initialized cudnnTensorDescriptor_t */ |
510 | /* get DZDESC: pass previously created cudnnTensorDescriptor_t */ |
511 | CUDNN_PARAM_DZDESC = 35, |
512 | /* set/get DZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
513 | CUDNN_PARAM_DZDATA_PLACEHOLDER = 36, |
514 | /* set/get CUDNN_PARAM_BN_DSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
515 | CUDNN_PARAM_BN_DSCALE_PLACEHOLDER = 37, |
516 | /* set/get CUDNN_PARAM_BN_DBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */ |
517 | CUDNN_PARAM_BN_DBIAS_PLACEHOLDER = 38, |
518 | } cudnnFusedOpsConstParamLabel_t; |
519 | |
520 | typedef enum { |
521 | CUDNN_PTR_NULL = 0, |
522 | CUDNN_PTR_ELEM_ALIGNED = 1, |
523 | CUDNN_PTR_16B_ALIGNED = 2, |
524 | } cudnnFusedOpsPointerPlaceHolder_t; |
525 | |
526 | typedef enum { |
527 | /* set: pass void* pointing to dev memory */ |
528 | /* get: pass void** pointing to host memory */ |
529 | CUDNN_PTR_XDATA = 0, |
530 | CUDNN_PTR_BN_EQSCALE = 1, |
531 | CUDNN_PTR_BN_EQBIAS = 2, |
532 | CUDNN_PTR_WDATA = 3, |
533 | CUDNN_PTR_DWDATA = 4, |
534 | CUDNN_PTR_YDATA = 5, |
535 | CUDNN_PTR_DYDATA = 6, |
536 | CUDNN_PTR_YSUM = 7, |
537 | CUDNN_PTR_YSQSUM = 8, |
538 | CUDNN_PTR_WORKSPACE = 9, |
539 | CUDNN_PTR_BN_SCALE = 10, |
540 | CUDNN_PTR_BN_BIAS = 11, |
541 | CUDNN_PTR_BN_SAVED_MEAN = 12, |
542 | CUDNN_PTR_BN_SAVED_INVSTD = 13, |
543 | CUDNN_PTR_BN_RUNNING_MEAN = 14, |
544 | CUDNN_PTR_BN_RUNNING_VAR = 15, |
545 | CUDNN_PTR_ZDATA = 16, |
546 | CUDNN_PTR_BN_Z_EQSCALE = 17, |
547 | CUDNN_PTR_BN_Z_EQBIAS = 18, |
548 | CUDNN_PTR_ACTIVATION_BITMASK = 19, |
549 | CUDNN_PTR_DXDATA = 20, |
550 | CUDNN_PTR_DZDATA = 21, |
551 | CUDNN_PTR_BN_DSCALE = 22, |
552 | CUDNN_PTR_BN_DBIAS = 23, |
553 | |
554 | /* set/get: pass size_t* pointing to host memory */ |
555 | CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES = 100, |
556 | /* set/get: pass int64_t* pointing to host memory */ |
557 | CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT = 101, |
558 | /* set/get: pass double* pointing to host memory */ |
559 | CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR = 102, |
560 | /* set/get: pass double* pointing to host memory */ |
561 | CUDNN_SCALAR_DOUBLE_BN_EPSILON = 103, |
562 | } cudnnFusedOpsVariantParamLabel_t; |
563 | |
564 | cudnnStatus_t CUDNNWINAPI |
565 | cudnnCnnInferVersionCheck(void); |
566 | |
567 | #if defined(__cplusplus) |
568 | } |
569 | #endif |
570 | |
571 | #endif /* CUDNN_CNN_INFER_H_ */ |
572 | |