1/*
2 * Copyright 1993-2020 NVIDIA Corporation. All rights reserved.
3 *
4 * NOTICE TO LICENSEE:
5 *
6 * This source code and/or documentation ("Licensed Deliverables") are
7 * subject to NVIDIA intellectual property rights under U.S. and
8 * international Copyright laws.
9 *
10 * These Licensed Deliverables contained herein is PROPRIETARY and
11 * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12 * conditions of a form of NVIDIA software license agreement by and
13 * between NVIDIA and Licensee ("License Agreement") or electronically
14 * accepted by Licensee. Notwithstanding any terms or conditions to
15 * the contrary in the License Agreement, reproduction or disclosure
16 * of the Licensed Deliverables to any third party without the express
17 * written consent of NVIDIA is prohibited.
18 *
19 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20 * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21 * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22 * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23 * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24 * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25 * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27 * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28 * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32 * OF THESE LICENSED DELIVERABLES.
33 *
34 * U.S. Government End Users. These Licensed Deliverables are a
35 * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36 * 1995), consisting of "commercial computer software" and "commercial
37 * computer software documentation" as such terms are used in 48
38 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39 * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40 * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41 * U.S. Government End Users acquire the Licensed Deliverables with
42 * only those rights set forth herein.
43 *
44 * Any use of the Licensed Deliverables in individual and commercial
45 * software must include, in the user documentation and internal
46 * comments to the code, the above Disclaimer and U.S. Government End
47 * Users Notice.
48 */
49
50/*
51 * cudnn_cnn_infer : cuDNN's basic definitions and inference CNN functions.
52 */
53
54#if !defined(CUDNN_CNN_INFER_H_)
55#define CUDNN_CNN_INFER_H_
56
57#pragma once
58#include <cuda_runtime.h>
59#include <stdint.h>
60
61#include "cudnn_version.h"
62#include "cudnn_ops_infer.h"
63
64/* These version numbers are autogenerated, do not edit manually. */
65#define CUDNN_CNN_INFER_MAJOR 8
66#define CUDNN_CNN_INFER_MINOR 2
67#define CUDNN_CNN_INFER_PATCH 4
68
69#if (CUDNN_CNN_INFER_MAJOR != CUDNN_MAJOR) || (CUDNN_CNN_INFER_MINOR != CUDNN_MINOR) || \
70 (CUDNN_CNN_INFER_PATCH != CUDNN_PATCHLEVEL)
71#error Version mismatch in cuDNN CNN INFER!!!
72#endif
73
74#if defined(__cplusplus)
75extern "C" {
76#endif
77
78typedef struct cudnnConvolutionStruct *cudnnConvolutionDescriptor_t;
79
80/*
81 * convolution mode
82 */
83typedef enum { CUDNN_CONVOLUTION = 0, CUDNN_CROSS_CORRELATION = 1 } cudnnConvolutionMode_t;
84
85/*
86 * CUDNN Reorder
87 */
88typedef enum {
89 CUDNN_DEFAULT_REORDER = 0,
90 CUDNN_NO_REORDER = 1,
91} cudnnReorderType_t;
92
93typedef struct cudnnConvolutionFwdAlgoPerfStruct {
94 cudnnConvolutionFwdAlgo_t algo;
95 cudnnStatus_t status;
96 float time;
97 size_t memory;
98 cudnnDeterminism_t determinism;
99 cudnnMathType_t mathType;
100 int reserved[3];
101} cudnnConvolutionFwdAlgoPerf_t;
102
103/* Create an instance of convolution descriptor */
104cudnnStatus_t CUDNNWINAPI
105cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc);
106
107/* Destroy an instance of convolution descriptor */
108cudnnStatus_t CUDNNWINAPI
109cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc);
110
111cudnnStatus_t CUDNNWINAPI
112cudnnSetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType);
113
114cudnnStatus_t CUDNNWINAPI
115cudnnGetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType);
116
117cudnnStatus_t CUDNNWINAPI
118cudnnSetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int groupCount);
119
120cudnnStatus_t CUDNNWINAPI
121cudnnGetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int *groupCount);
122
123cudnnStatus_t CUDNNWINAPI
124cudnnSetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t reorderType);
125
126cudnnStatus_t CUDNNWINAPI
127cudnnGetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t *reorderType);
128
129cudnnStatus_t CUDNNWINAPI
130cudnnSetConvolution2dDescriptor(cudnnConvolutionDescriptor_t convDesc,
131 int pad_h, /* zero-padding height */
132 int pad_w, /* zero-padding width */
133 int u, /* vertical filter stride */
134 int v, /* horizontal filter stride */
135 int dilation_h, /* filter dilation in the vertical dimension */
136 int dilation_w, /* filter dilation in the horizontal dimension */
137 cudnnConvolutionMode_t mode,
138 cudnnDataType_t computeType);
139
140cudnnStatus_t CUDNNWINAPI
141cudnnGetConvolution2dDescriptor(const cudnnConvolutionDescriptor_t convDesc,
142 int *pad_h, /* zero-padding height */
143 int *pad_w, /* zero-padding width */
144 int *u, /* vertical filter stride */
145 int *v, /* horizontal filter stride */
146 int *dilation_h, /* filter dilation in the vertical dimension */
147 int *dilation_w, /* filter dilation in the horizontal dimension */
148 cudnnConvolutionMode_t *mode,
149 cudnnDataType_t *computeType);
150
151cudnnStatus_t CUDNNWINAPI
152cudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc,
153 int arrayLength, /* nbDims-2 size */
154 const int padA[],
155 const int filterStrideA[],
156 const int dilationA[],
157 cudnnConvolutionMode_t mode,
158 cudnnDataType_t computeType); /* convolution data type */
159
160/* Helper function to return the dimensions of the output tensor given a convolution descriptor */
161cudnnStatus_t CUDNNWINAPI
162cudnnGetConvolutionNdDescriptor(const cudnnConvolutionDescriptor_t convDesc,
163 int arrayLengthRequested,
164 int *arrayLength,
165 int padA[],
166 int strideA[],
167 int dilationA[],
168 cudnnConvolutionMode_t *mode,
169 cudnnDataType_t *computeType); /* convolution data type */
170
171cudnnStatus_t CUDNNWINAPI
172cudnnGetConvolution2dForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
173 const cudnnTensorDescriptor_t inputTensorDesc,
174 const cudnnFilterDescriptor_t filterDesc,
175 int *n,
176 int *c,
177 int *h,
178 int *w);
179
180/* Helper function to return the dimensions of the output tensor given a convolution descriptor */
181cudnnStatus_t CUDNNWINAPI
182cudnnGetConvolutionNdForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
183 const cudnnTensorDescriptor_t inputTensorDesc,
184 const cudnnFilterDescriptor_t filterDesc,
185 int nbDims,
186 int tensorOuputDimA[]);
187
188/* helper function to provide the convolution forward algo that fit best the requirement */
189cudnnStatus_t CUDNNWINAPI
190cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count);
191
192cudnnStatus_t CUDNNWINAPI
193cudnnGetConvolutionForwardAlgorithm_v7(cudnnHandle_t handle,
194 const cudnnTensorDescriptor_t srcDesc,
195 const cudnnFilterDescriptor_t filterDesc,
196 const cudnnConvolutionDescriptor_t convDesc,
197 const cudnnTensorDescriptor_t destDesc,
198 const int requestedAlgoCount,
199 int *returnedAlgoCount,
200 cudnnConvolutionFwdAlgoPerf_t *perfResults);
201
202cudnnStatus_t CUDNNWINAPI
203cudnnFindConvolutionForwardAlgorithm(cudnnHandle_t handle,
204 const cudnnTensorDescriptor_t xDesc,
205 const cudnnFilterDescriptor_t wDesc,
206 const cudnnConvolutionDescriptor_t convDesc,
207 const cudnnTensorDescriptor_t yDesc,
208 const int requestedAlgoCount,
209 int *returnedAlgoCount,
210 cudnnConvolutionFwdAlgoPerf_t *perfResults);
211
212cudnnStatus_t CUDNNWINAPI
213cudnnFindConvolutionForwardAlgorithmEx(cudnnHandle_t handle,
214 const cudnnTensorDescriptor_t xDesc,
215 const void *x,
216 const cudnnFilterDescriptor_t wDesc,
217 const void *w,
218 const cudnnConvolutionDescriptor_t convDesc,
219 const cudnnTensorDescriptor_t yDesc,
220 void *y,
221 const int requestedAlgoCount,
222 int *returnedAlgoCount,
223 cudnnConvolutionFwdAlgoPerf_t *perfResults,
224 void *workSpace,
225 size_t workSpaceSizeInBytes);
226
227cudnnStatus_t CUDNNWINAPI
228cudnnIm2Col(cudnnHandle_t handle,
229 const cudnnTensorDescriptor_t xDesc,
230 const void *x,
231 const cudnnFilterDescriptor_t wDesc,
232 const cudnnConvolutionDescriptor_t convDesc,
233 void *colBuffer);
234
235cudnnStatus_t CUDNNWINAPI
236cudnnReorderFilterAndBias(cudnnHandle_t handle,
237 const cudnnFilterDescriptor_t filterDesc,
238 cudnnReorderType_t reorderType,
239 const void *filterData,
240 void *reorderedFilterData,
241 int reorderBias,
242 const void *biasData,
243 void *reorderedBiasData);
244
245/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
246cudnnStatus_t CUDNNWINAPI
247cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle_t handle,
248 const cudnnTensorDescriptor_t xDesc,
249 const cudnnFilterDescriptor_t wDesc,
250 const cudnnConvolutionDescriptor_t convDesc,
251 const cudnnTensorDescriptor_t yDesc,
252 cudnnConvolutionFwdAlgo_t algo,
253 size_t *sizeInBytes);
254
255/* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */
256
257/* Function to perform the forward pass for batch convolution */
258cudnnStatus_t CUDNNWINAPI
259cudnnConvolutionForward(cudnnHandle_t handle,
260 const void *alpha,
261 const cudnnTensorDescriptor_t xDesc,
262 const void *x,
263 const cudnnFilterDescriptor_t wDesc,
264 const void *w,
265 const cudnnConvolutionDescriptor_t convDesc,
266 cudnnConvolutionFwdAlgo_t algo,
267 void *workSpace,
268 size_t workSpaceSizeInBytes,
269 const void *beta,
270 const cudnnTensorDescriptor_t yDesc,
271 void *y);
272
273/* Fused conv/bias/activation operation : y = Act( alpha1 * conv(x) + alpha2 * z + bias ) */
274cudnnStatus_t CUDNNWINAPI
275cudnnConvolutionBiasActivationForward(cudnnHandle_t handle,
276 const void *alpha1,
277 const cudnnTensorDescriptor_t xDesc,
278 const void *x,
279 const cudnnFilterDescriptor_t wDesc,
280 const void *w,
281 const cudnnConvolutionDescriptor_t convDesc,
282 cudnnConvolutionFwdAlgo_t algo,
283 void *workSpace,
284 size_t workSpaceSizeInBytes,
285 const void *alpha2,
286 const cudnnTensorDescriptor_t zDesc,
287 const void *z,
288 const cudnnTensorDescriptor_t biasDesc,
289 const void *bias,
290 const cudnnActivationDescriptor_t activationDesc,
291 const cudnnTensorDescriptor_t yDesc,
292 void *y);
293
294/* helper function to provide the convolution backward data algo that fit best the requirement */
295
296typedef struct cudnnConvolutionBwdDataAlgoPerfStruct {
297 cudnnConvolutionBwdDataAlgo_t algo;
298 cudnnStatus_t status;
299 float time;
300 size_t memory;
301 cudnnDeterminism_t determinism;
302 cudnnMathType_t mathType;
303 int reserved[3];
304} cudnnConvolutionBwdDataAlgoPerf_t;
305
306cudnnStatus_t CUDNNWINAPI
307cudnnGetConvolutionBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, int *count);
308
309cudnnStatus_t CUDNNWINAPI
310cudnnFindConvolutionBackwardDataAlgorithm(cudnnHandle_t handle,
311 const cudnnFilterDescriptor_t wDesc,
312 const cudnnTensorDescriptor_t dyDesc,
313 const cudnnConvolutionDescriptor_t convDesc,
314 const cudnnTensorDescriptor_t dxDesc,
315 const int requestedAlgoCount,
316 int *returnedAlgoCount,
317 cudnnConvolutionBwdDataAlgoPerf_t *perfResults);
318
319cudnnStatus_t CUDNNWINAPI
320cudnnFindConvolutionBackwardDataAlgorithmEx(cudnnHandle_t handle,
321 const cudnnFilterDescriptor_t wDesc,
322 const void *w,
323 const cudnnTensorDescriptor_t dyDesc,
324 const void *dy,
325 const cudnnConvolutionDescriptor_t convDesc,
326 const cudnnTensorDescriptor_t dxDesc,
327 void *dx,
328 const int requestedAlgoCount,
329 int *returnedAlgoCount,
330 cudnnConvolutionBwdDataAlgoPerf_t *perfResults,
331 void *workSpace,
332 size_t workSpaceSizeInBytes);
333
334cudnnStatus_t CUDNNWINAPI
335cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnnHandle_t handle,
336 const cudnnFilterDescriptor_t filterDesc,
337 const cudnnTensorDescriptor_t diffDesc,
338 const cudnnConvolutionDescriptor_t convDesc,
339 const cudnnTensorDescriptor_t gradDesc,
340 const int requestedAlgoCount,
341 int *returnedAlgoCount,
342 cudnnConvolutionBwdDataAlgoPerf_t *perfResults);
343
344/*
345 * convolution algorithm (which requires potentially some workspace)
346 */
347
348/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
349cudnnStatus_t CUDNNWINAPI
350cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle,
351 const cudnnFilterDescriptor_t wDesc,
352 const cudnnTensorDescriptor_t dyDesc,
353 const cudnnConvolutionDescriptor_t convDesc,
354 const cudnnTensorDescriptor_t dxDesc,
355 cudnnConvolutionBwdDataAlgo_t algo,
356 size_t *sizeInBytes);
357
358cudnnStatus_t CUDNNWINAPI
359cudnnConvolutionBackwardData(cudnnHandle_t handle,
360 const void *alpha,
361 const cudnnFilterDescriptor_t wDesc,
362 const void *w,
363 const cudnnTensorDescriptor_t dyDesc,
364 const void *dy,
365 const cudnnConvolutionDescriptor_t convDesc,
366 cudnnConvolutionBwdDataAlgo_t algo,
367 void *workSpace,
368 size_t workSpaceSizeInBytes,
369 const void *beta,
370 const cudnnTensorDescriptor_t dxDesc,
371 void *dx);
372
373/* Helper function to calculate folding descriptors for dgrad */
374cudnnStatus_t CUDNNWINAPI
375cudnnGetFoldedConvBackwardDataDescriptors(const cudnnHandle_t handle,
376 const cudnnFilterDescriptor_t filterDesc,
377 const cudnnTensorDescriptor_t diffDesc,
378 const cudnnConvolutionDescriptor_t convDesc,
379 const cudnnTensorDescriptor_t gradDesc,
380 const cudnnTensorFormat_t transformFormat,
381 cudnnFilterDescriptor_t foldedFilterDesc,
382 cudnnTensorDescriptor_t paddedDiffDesc,
383 cudnnConvolutionDescriptor_t foldedConvDesc,
384 cudnnTensorDescriptor_t foldedGradDesc,
385 cudnnTensorTransformDescriptor_t filterFoldTransDesc,
386 cudnnTensorTransformDescriptor_t diffPadTransDesc,
387 cudnnTensorTransformDescriptor_t gradFoldTransDesc,
388 cudnnTensorTransformDescriptor_t gradUnfoldTransDesc);
389
390/* cudnnFusedOps... */
391struct cudnnFusedOpsConstParamStruct;
392typedef struct cudnnFusedOpsConstParamStruct *cudnnFusedOpsConstParamPack_t;
393
394struct cudnnFusedOpsVariantParamStruct;
395typedef struct cudnnFusedOpsVariantParamStruct *cudnnFusedOpsVariantParamPack_t;
396
397struct cudnnFusedOpsPlanStruct;
398typedef struct cudnnFusedOpsPlanStruct *cudnnFusedOpsPlan_t;
399
400typedef enum {
401 /* each op in [ ] can be disabled by passing NULL ptr */
402 /* [per channel scale], [per channel bias], [activation], convolution, [generate BN stats] */
403 CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS = 0,
404 /* [per channel scale], [per channel bias], [activation], convolutionBackwardWeights */
405 CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD = 1,
406 /* utility for BN training in BN-conv fusion */
407 /* computes the equivalent scale and bias from ySum ySqSum and learned scale, bias */
408 /* optionally update running stats and generate saved stats */
409 CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING = 2,
410 /* utility for BN inference in BN-conv fusion */
411 /* computes the equivalent scale and bias from learned running stats and learned scale, bias */
412 CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE = 3,
413 /* reserved for future use: convolution, [per channel scale], [per channel bias], [residual add], [activation] */
414 CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION = 4,
415 /* reserved for future use: [per channel scale], [per channel bias], [residual add], activation, bitmask */
416 CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK = 5,
417 /* reserved for future use */
418 CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM = 6,
419} cudnnFusedOps_t;
420
421typedef enum {
422 /* set XDESC: pass previously initialized cudnnTensorDescriptor_t */
423 /* get XDESC: pass previously created cudnnTensorDescriptor_t */
424 CUDNN_PARAM_XDESC = 0,
425 /* set/get XDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
426 CUDNN_PARAM_XDATA_PLACEHOLDER = 1,
427 /* set/get BN_MODE: pass cudnnBatchNormMode_t* */
428 CUDNN_PARAM_BN_MODE = 2,
429 /* set CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t */
430 /* get CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t */
431 CUDNN_PARAM_BN_EQSCALEBIAS_DESC = 3,
432 /* set/get BN_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
433 CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER = 4,
434 /* set/get BN_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
435 CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER = 5,
436 /* set ACTIVATION_DESC: pass previously initialized cudnnActivationDescriptor_t */
437 /* get ACTIVATION_DESC: pass previously created cudnnActivationDescriptor_t */
438 CUDNN_PARAM_ACTIVATION_DESC = 6,
439 /* set CONV_DESC: pass previously initialized cudnnConvolutionDescriptor_t */
440 /* get CONV_DESC: pass previously created cudnnConvolutionDescriptor_t */
441 CUDNN_PARAM_CONV_DESC = 7,
442 /* set WDESC: pass previously initialized cudnnFilterDescriptor_t */
443 /* get WDESC: pass previously created cudnnFilterDescriptor_t */
444 CUDNN_PARAM_WDESC = 8,
445 /* set/get WDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
446 CUDNN_PARAM_WDATA_PLACEHOLDER = 9,
447 /* set DWDESC: pass previously initialized cudnnFilterDescriptor_t */
448 /* get DWDESC: pass previously created cudnnFilterDescriptor_t */
449 CUDNN_PARAM_DWDESC = 10,
450 /* set/get DWDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
451 CUDNN_PARAM_DWDATA_PLACEHOLDER = 11,
452 /* set YDESC: pass previously initialized cudnnTensorDescriptor_t */
453 /* get YDESC: pass previously created cudnnTensorDescriptor_t */
454 CUDNN_PARAM_YDESC = 12,
455 /* set/get YDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
456 CUDNN_PARAM_YDATA_PLACEHOLDER = 13,
457 /* set DYDESC: pass previously initialized cudnnTensorDescriptor_t */
458 /* get DYDESC: pass previously created cudnnTensorDescriptor_t */
459 CUDNN_PARAM_DYDESC = 14,
460 /* set/get DYDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
461 CUDNN_PARAM_DYDATA_PLACEHOLDER = 15,
462 /* set YSTATS_DESC: pass previously initialized cudnnTensorDescriptor_t */
463 /* get YSTATS_DESC: pass previously created cudnnTensorDescriptor_t */
464 CUDNN_PARAM_YSTATS_DESC = 16,
465 /* set/get YSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
466 CUDNN_PARAM_YSUM_PLACEHOLDER = 17,
467 /* set/get YSQSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
468 CUDNN_PARAM_YSQSUM_PLACEHOLDER = 18,
469 /* set CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously initialized cudnnTensorDescriptor_t */
470 /* get CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously created cudnnTensorDescriptor_t */
471 CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC = 19,
472 /* set/get CUDNN_PARAM_BN_SCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
473 CUDNN_PARAM_BN_SCALE_PLACEHOLDER = 20,
474 /* set/get CUDNN_PARAM_BN_BIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
475 CUDNN_PARAM_BN_BIAS_PLACEHOLDER = 21,
476 /* set/get CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
477 CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER = 22,
478 /* set/get CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
479 CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER = 23,
480 /* set/get CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
481 CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER = 24,
482 /* set/get CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
483 CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER = 25,
484
485 /* set ZDESC: pass previously initialized cudnnTensorDescriptor_t */
486 /* get ZDESC: pass previously created cudnnTensorDescriptor_t */
487 CUDNN_PARAM_ZDESC = 26,
488 /* set/get ZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
489 CUDNN_PARAM_ZDATA_PLACEHOLDER = 27,
490 /* set BN_Z_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t */
491 /* get BN_Z_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t */
492 CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC = 28,
493 /* set/get BN_Z_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
494 CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER = 29,
495 /* set/get BN_Z_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
496 CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER = 30,
497
498 /* set ACTIVATION_BITMASK_DESC: pass previously initialized cudnnTensorDescriptor_t */
499 /* get ACTIVATION_BITMASK_DESC: pass previously created cudnnTensorDescriptor_t */
500 CUDNN_PARAM_ACTIVATION_BITMASK_DESC = 31,
501 /* set/get ACTIVATION_BITMASK_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
502 CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER = 32,
503
504 /* set DXDESC: pass previously initialized cudnnTensorDescriptor_t */
505 /* get DXDESC: pass previously created cudnnTensorDescriptor_t */
506 CUDNN_PARAM_DXDESC = 33,
507 /* set/get DXDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
508 CUDNN_PARAM_DXDATA_PLACEHOLDER = 34,
509 /* set DZDESC: pass previously initialized cudnnTensorDescriptor_t */
510 /* get DZDESC: pass previously created cudnnTensorDescriptor_t */
511 CUDNN_PARAM_DZDESC = 35,
512 /* set/get DZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
513 CUDNN_PARAM_DZDATA_PLACEHOLDER = 36,
514 /* set/get CUDNN_PARAM_BN_DSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
515 CUDNN_PARAM_BN_DSCALE_PLACEHOLDER = 37,
516 /* set/get CUDNN_PARAM_BN_DBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
517 CUDNN_PARAM_BN_DBIAS_PLACEHOLDER = 38,
518} cudnnFusedOpsConstParamLabel_t;
519
520typedef enum {
521 CUDNN_PTR_NULL = 0,
522 CUDNN_PTR_ELEM_ALIGNED = 1,
523 CUDNN_PTR_16B_ALIGNED = 2,
524} cudnnFusedOpsPointerPlaceHolder_t;
525
526typedef enum {
527 /* set: pass void* pointing to dev memory */
528 /* get: pass void** pointing to host memory */
529 CUDNN_PTR_XDATA = 0,
530 CUDNN_PTR_BN_EQSCALE = 1,
531 CUDNN_PTR_BN_EQBIAS = 2,
532 CUDNN_PTR_WDATA = 3,
533 CUDNN_PTR_DWDATA = 4,
534 CUDNN_PTR_YDATA = 5,
535 CUDNN_PTR_DYDATA = 6,
536 CUDNN_PTR_YSUM = 7,
537 CUDNN_PTR_YSQSUM = 8,
538 CUDNN_PTR_WORKSPACE = 9,
539 CUDNN_PTR_BN_SCALE = 10,
540 CUDNN_PTR_BN_BIAS = 11,
541 CUDNN_PTR_BN_SAVED_MEAN = 12,
542 CUDNN_PTR_BN_SAVED_INVSTD = 13,
543 CUDNN_PTR_BN_RUNNING_MEAN = 14,
544 CUDNN_PTR_BN_RUNNING_VAR = 15,
545 CUDNN_PTR_ZDATA = 16,
546 CUDNN_PTR_BN_Z_EQSCALE = 17,
547 CUDNN_PTR_BN_Z_EQBIAS = 18,
548 CUDNN_PTR_ACTIVATION_BITMASK = 19,
549 CUDNN_PTR_DXDATA = 20,
550 CUDNN_PTR_DZDATA = 21,
551 CUDNN_PTR_BN_DSCALE = 22,
552 CUDNN_PTR_BN_DBIAS = 23,
553
554 /* set/get: pass size_t* pointing to host memory */
555 CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES = 100,
556 /* set/get: pass int64_t* pointing to host memory */
557 CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT = 101,
558 /* set/get: pass double* pointing to host memory */
559 CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR = 102,
560 /* set/get: pass double* pointing to host memory */
561 CUDNN_SCALAR_DOUBLE_BN_EPSILON = 103,
562} cudnnFusedOpsVariantParamLabel_t;
563
564cudnnStatus_t CUDNNWINAPI
565cudnnCnnInferVersionCheck(void);
566
567#if defined(__cplusplus)
568}
569#endif
570
571#endif /* CUDNN_CNN_INFER_H_ */
572