cudnn_cnn_infer_v8.h source code [include/x86_64-linux-gnu/cudnn_cnn_infer_v8.h]

1	/*
2	* Copyright 1993-2020 NVIDIA Corporation. All rights reserved.
3	*
4	* NOTICE TO LICENSEE:
5	*
6	* This source code and/or documentation ("Licensed Deliverables") are
7	* subject to NVIDIA intellectual property rights under U.S. and
8	* international Copyright laws.
9	*
10	* These Licensed Deliverables contained herein is PROPRIETARY and
11	* CONFIDENTIAL to NVIDIA and is being provided under the terms and
12	* conditions of a form of NVIDIA software license agreement by and
13	* between NVIDIA and Licensee ("License Agreement") or electronically
14	* accepted by Licensee. Notwithstanding any terms or conditions to
15	* the contrary in the License Agreement, reproduction or disclosure
16	* of the Licensed Deliverables to any third party without the express
17	* written consent of NVIDIA is prohibited.
18	*
19	* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20	* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21	* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22	* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23	* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24	* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25	* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26	* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27	* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28	* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29	* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30	* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31	* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32	* OF THESE LICENSED DELIVERABLES.
33	*
34	* U.S. Government End Users. These Licensed Deliverables are a
35	* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36	* 1995), consisting of "commercial computer software" and "commercial
37	* computer software documentation" as such terms are used in 48
38	* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39	* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40	* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41	* U.S. Government End Users acquire the Licensed Deliverables with
42	* only those rights set forth herein.
43	*
44	* Any use of the Licensed Deliverables in individual and commercial
45	* software must include, in the user documentation and internal
46	* comments to the code, the above Disclaimer and U.S. Government End
47	* Users Notice.
48	*/
49
50	/*
51	* cudnn_cnn_infer : cuDNN's basic definitions and inference CNN functions.
52	*/
53
54	#if !defined(CUDNN_CNN_INFER_H_)
55	#define CUDNN_CNN_INFER_H_
56
57	#pragma once
58	#include <cuda_runtime.h>
59	#include <stdint.h>
60
61	#include "cudnn_version.h"
62	#include "cudnn_ops_infer.h"
63
64	/ These version numbers are autogenerated, do not edit manually. /
65	#define CUDNN_CNN_INFER_MAJOR 8
66	#define CUDNN_CNN_INFER_MINOR 2
67	#define CUDNN_CNN_INFER_PATCH 4
68
69	#if (CUDNN_CNN_INFER_MAJOR != CUDNN_MAJOR) \|\| (CUDNN_CNN_INFER_MINOR != CUDNN_MINOR) \|\| \
70	(CUDNN_CNN_INFER_PATCH != CUDNN_PATCHLEVEL)
71	#error Version mismatch in cuDNN CNN INFER!!!
72	#endif
73
74	#if defined(__cplusplus)
75	extern "C" {
76	#endif
77
78	typedef struct cudnnConvolutionStruct *cudnnConvolutionDescriptor_t;
79
80	/*
81	* convolution mode
82	*/
83	typedef enum { CUDNN_CONVOLUTION = `0`, CUDNN_CROSS_CORRELATION = `1` } cudnnConvolutionMode_t;
84
85	/*
86	* CUDNN Reorder
87	*/
88	typedef enum {
89	CUDNN_DEFAULT_REORDER = `0`,
90	CUDNN_NO_REORDER = `1`,
91	} cudnnReorderType_t;
92
93	typedef struct cudnnConvolutionFwdAlgoPerfStruct {
94	cudnnConvolutionFwdAlgo_t algo;
95	cudnnStatus_t status;
96	float time;
97	size_t memory;
98	cudnnDeterminism_t determinism;
99	cudnnMathType_t mathType;
100	int reserved[`3`];
101	} cudnnConvolutionFwdAlgoPerf_t;
102
103	/ Create an instance of convolution descriptor /
104	cudnnStatus_t CUDNNWINAPI
105	cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc);
106
107	/ Destroy an instance of convolution descriptor /
108	cudnnStatus_t CUDNNWINAPI
109	cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc);
110
111	cudnnStatus_t CUDNNWINAPI
112	cudnnSetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType);
113
114	cudnnStatus_t CUDNNWINAPI
115	cudnnGetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType);
116
117	cudnnStatus_t CUDNNWINAPI
118	cudnnSetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int groupCount);
119
120	cudnnStatus_t CUDNNWINAPI
121	cudnnGetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int *groupCount);
122
123	cudnnStatus_t CUDNNWINAPI
124	cudnnSetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t reorderType);
125
126	cudnnStatus_t CUDNNWINAPI
127	cudnnGetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t *reorderType);
128
129	cudnnStatus_t CUDNNWINAPI
130	cudnnSetConvolution2dDescriptor(cudnnConvolutionDescriptor_t convDesc,
131	int pad_h, / zero-padding height /
132	int pad_w, / zero-padding width /
133	int u, / vertical filter stride /
134	int v, / horizontal filter stride /
135	int dilation_h, / filter dilation in the vertical dimension /
136	int dilation_w, / filter dilation in the horizontal dimension /
137	cudnnConvolutionMode_t mode,
138	cudnnDataType_t computeType);
139
140	cudnnStatus_t CUDNNWINAPI
141	cudnnGetConvolution2dDescriptor(const cudnnConvolutionDescriptor_t convDesc,
142	int pad_h, /* zero-padding height /
143	int pad_w, /* zero-padding width /
144	int u, /* vertical filter stride /
145	int v, /* horizontal filter stride /
146	int dilation_h, /* filter dilation in the vertical dimension /
147	int dilation_w, /* filter dilation in the horizontal dimension /
148	cudnnConvolutionMode_t *mode,
149	cudnnDataType_t *computeType);
150
151	cudnnStatus_t CUDNNWINAPI
152	cudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc,
153	int arrayLength, / nbDims-2 size /
154	const int padA[],
155	const int filterStrideA[],
156	const int dilationA[],
157	cudnnConvolutionMode_t mode,
158	cudnnDataType_t computeType); / convolution data type /
159
160	/ Helper function to return the dimensions of the output tensor given a convolution descriptor /
161	cudnnStatus_t CUDNNWINAPI
162	cudnnGetConvolutionNdDescriptor(const cudnnConvolutionDescriptor_t convDesc,
163	int arrayLengthRequested,
164	int *arrayLength,
165	int padA[],
166	int strideA[],
167	int dilationA[],
168	cudnnConvolutionMode_t *mode,
169	cudnnDataType_t computeType); /* convolution data type /
170
171	cudnnStatus_t CUDNNWINAPI
172	cudnnGetConvolution2dForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
173	const cudnnTensorDescriptor_t inputTensorDesc,
174	const cudnnFilterDescriptor_t filterDesc,
175	int *n,
176	int *c,
177	int *h,
178	int *w);
179
180	/ Helper function to return the dimensions of the output tensor given a convolution descriptor /
181	cudnnStatus_t CUDNNWINAPI
182	cudnnGetConvolutionNdForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
183	const cudnnTensorDescriptor_t inputTensorDesc,
184	const cudnnFilterDescriptor_t filterDesc,
185	int nbDims,
186	int tensorOuputDimA[]);
187
188	/ helper function to provide the convolution forward algo that fit best the requirement /
189	cudnnStatus_t CUDNNWINAPI
190	cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count);
191
192	cudnnStatus_t CUDNNWINAPI
193	cudnnGetConvolutionForwardAlgorithm_v7(cudnnHandle_t handle,
194	const cudnnTensorDescriptor_t srcDesc,
195	const cudnnFilterDescriptor_t filterDesc,
196	const cudnnConvolutionDescriptor_t convDesc,
197	const cudnnTensorDescriptor_t destDesc,
198	const int requestedAlgoCount,
199	int *returnedAlgoCount,
200	cudnnConvolutionFwdAlgoPerf_t *perfResults);
201
202	cudnnStatus_t CUDNNWINAPI
203	cudnnFindConvolutionForwardAlgorithm(cudnnHandle_t handle,
204	const cudnnTensorDescriptor_t xDesc,
205	const cudnnFilterDescriptor_t wDesc,
206	const cudnnConvolutionDescriptor_t convDesc,
207	const cudnnTensorDescriptor_t yDesc,
208	const int requestedAlgoCount,
209	int *returnedAlgoCount,
210	cudnnConvolutionFwdAlgoPerf_t *perfResults);
211
212	cudnnStatus_t CUDNNWINAPI
213	cudnnFindConvolutionForwardAlgorithmEx(cudnnHandle_t handle,
214	const cudnnTensorDescriptor_t xDesc,
215	const void *x,
216	const cudnnFilterDescriptor_t wDesc,
217	const void *w,
218	const cudnnConvolutionDescriptor_t convDesc,
219	const cudnnTensorDescriptor_t yDesc,
220	void *y,
221	const int requestedAlgoCount,
222	int *returnedAlgoCount,
223	cudnnConvolutionFwdAlgoPerf_t *perfResults,
224	void *workSpace,
225	size_t workSpaceSizeInBytes);
226
227	cudnnStatus_t CUDNNWINAPI
228	cudnnIm2Col(cudnnHandle_t handle,
229	const cudnnTensorDescriptor_t xDesc,
230	const void *x,
231	const cudnnFilterDescriptor_t wDesc,
232	const cudnnConvolutionDescriptor_t convDesc,
233	void *colBuffer);
234
235	cudnnStatus_t CUDNNWINAPI
236	cudnnReorderFilterAndBias(cudnnHandle_t handle,
237	const cudnnFilterDescriptor_t filterDesc,
238	cudnnReorderType_t reorderType,
239	const void *filterData,
240	void *reorderedFilterData,
241	int reorderBias,
242	const void *biasData,
243	void *reorderedBiasData);
244
245	/ Helper function to return the minimum size of the workspace to be passed to the convolution given an algo/
246	cudnnStatus_t CUDNNWINAPI
247	cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle_t handle,
248	const cudnnTensorDescriptor_t xDesc,
249	const cudnnFilterDescriptor_t wDesc,
250	const cudnnConvolutionDescriptor_t convDesc,
251	const cudnnTensorDescriptor_t yDesc,
252	cudnnConvolutionFwdAlgo_t algo,
253	size_t *sizeInBytes);
254
255	/ Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" /
256
257	/ Function to perform the forward pass for batch convolution /
258	cudnnStatus_t CUDNNWINAPI
259	cudnnConvolutionForward(cudnnHandle_t handle,
260	const void *alpha,
261	const cudnnTensorDescriptor_t xDesc,
262	const void *x,
263	const cudnnFilterDescriptor_t wDesc,
264	const void *w,
265	const cudnnConvolutionDescriptor_t convDesc,
266	cudnnConvolutionFwdAlgo_t algo,
267	void *workSpace,
268	size_t workSpaceSizeInBytes,
269	const void *beta,
270	const cudnnTensorDescriptor_t yDesc,
271	void *y);
272
273	/ Fused conv/bias/activation operation : y = Act( alpha1 * conv(x) + alpha2 * z + bias ) /
274	cudnnStatus_t CUDNNWINAPI
275	cudnnConvolutionBiasActivationForward(cudnnHandle_t handle,
276	const void *alpha1,
277	const cudnnTensorDescriptor_t xDesc,
278	const void *x,
279	const cudnnFilterDescriptor_t wDesc,
280	const void *w,
281	const cudnnConvolutionDescriptor_t convDesc,
282	cudnnConvolutionFwdAlgo_t algo,
283	void *workSpace,
284	size_t workSpaceSizeInBytes,
285	const void *alpha2,
286	const cudnnTensorDescriptor_t zDesc,
287	const void *z,
288	const cudnnTensorDescriptor_t biasDesc,
289	const void *bias,
290	const cudnnActivationDescriptor_t activationDesc,
291	const cudnnTensorDescriptor_t yDesc,
292	void *y);
293
294	/ helper function to provide the convolution backward data algo that fit best the requirement /
295
296	typedef struct cudnnConvolutionBwdDataAlgoPerfStruct {
297	cudnnConvolutionBwdDataAlgo_t algo;
298	cudnnStatus_t status;
299	float time;
300	size_t memory;
301	cudnnDeterminism_t determinism;
302	cudnnMathType_t mathType;
303	int reserved[`3`];
304	} cudnnConvolutionBwdDataAlgoPerf_t;
305
306	cudnnStatus_t CUDNNWINAPI
307	cudnnGetConvolutionBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, int *count);
308
309	cudnnStatus_t CUDNNWINAPI
310	cudnnFindConvolutionBackwardDataAlgorithm(cudnnHandle_t handle,
311	const cudnnFilterDescriptor_t wDesc,
312	const cudnnTensorDescriptor_t dyDesc,
313	const cudnnConvolutionDescriptor_t convDesc,
314	const cudnnTensorDescriptor_t dxDesc,
315	const int requestedAlgoCount,
316	int *returnedAlgoCount,
317	cudnnConvolutionBwdDataAlgoPerf_t *perfResults);
318
319	cudnnStatus_t CUDNNWINAPI
320	cudnnFindConvolutionBackwardDataAlgorithmEx(cudnnHandle_t handle,
321	const cudnnFilterDescriptor_t wDesc,
322	const void *w,
323	const cudnnTensorDescriptor_t dyDesc,
324	const void *dy,
325	const cudnnConvolutionDescriptor_t convDesc,
326	const cudnnTensorDescriptor_t dxDesc,
327	void *dx,
328	const int requestedAlgoCount,
329	int *returnedAlgoCount,
330	cudnnConvolutionBwdDataAlgoPerf_t *perfResults,
331	void *workSpace,
332	size_t workSpaceSizeInBytes);
333
334	cudnnStatus_t CUDNNWINAPI
335	cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnnHandle_t handle,
336	const cudnnFilterDescriptor_t filterDesc,
337	const cudnnTensorDescriptor_t diffDesc,
338	const cudnnConvolutionDescriptor_t convDesc,
339	const cudnnTensorDescriptor_t gradDesc,
340	const int requestedAlgoCount,
341	int *returnedAlgoCount,
342	cudnnConvolutionBwdDataAlgoPerf_t *perfResults);
343
344	/*
345	* convolution algorithm (which requires potentially some workspace)
346	*/
347
348	/ Helper function to return the minimum size of the workspace to be passed to the convolution given an algo/
349	cudnnStatus_t CUDNNWINAPI
350	cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle,
351	const cudnnFilterDescriptor_t wDesc,
352	const cudnnTensorDescriptor_t dyDesc,
353	const cudnnConvolutionDescriptor_t convDesc,
354	const cudnnTensorDescriptor_t dxDesc,
355	cudnnConvolutionBwdDataAlgo_t algo,
356	size_t *sizeInBytes);
357
358	cudnnStatus_t CUDNNWINAPI
359	cudnnConvolutionBackwardData(cudnnHandle_t handle,
360	const void *alpha,
361	const cudnnFilterDescriptor_t wDesc,
362	const void *w,
363	const cudnnTensorDescriptor_t dyDesc,
364	const void *dy,
365	const cudnnConvolutionDescriptor_t convDesc,
366	cudnnConvolutionBwdDataAlgo_t algo,
367	void *workSpace,
368	size_t workSpaceSizeInBytes,
369	const void *beta,
370	const cudnnTensorDescriptor_t dxDesc,
371	void *dx);
372
373	/ Helper function to calculate folding descriptors for dgrad /
374	cudnnStatus_t CUDNNWINAPI
375	cudnnGetFoldedConvBackwardDataDescriptors(const cudnnHandle_t handle,
376	const cudnnFilterDescriptor_t filterDesc,
377	const cudnnTensorDescriptor_t diffDesc,
378	const cudnnConvolutionDescriptor_t convDesc,
379	const cudnnTensorDescriptor_t gradDesc,
380	const cudnnTensorFormat_t transformFormat,
381	cudnnFilterDescriptor_t foldedFilterDesc,
382	cudnnTensorDescriptor_t paddedDiffDesc,
383	cudnnConvolutionDescriptor_t foldedConvDesc,
384	cudnnTensorDescriptor_t foldedGradDesc,
385	cudnnTensorTransformDescriptor_t filterFoldTransDesc,
386	cudnnTensorTransformDescriptor_t diffPadTransDesc,
387	cudnnTensorTransformDescriptor_t gradFoldTransDesc,
388	cudnnTensorTransformDescriptor_t gradUnfoldTransDesc);
389
390	/ cudnnFusedOps... /
391	struct cudnnFusedOpsConstParamStruct;
392	typedef struct cudnnFusedOpsConstParamStruct *cudnnFusedOpsConstParamPack_t;
393
394	struct cudnnFusedOpsVariantParamStruct;
395	typedef struct cudnnFusedOpsVariantParamStruct *cudnnFusedOpsVariantParamPack_t;
396
397	struct cudnnFusedOpsPlanStruct;
398	typedef struct cudnnFusedOpsPlanStruct *cudnnFusedOpsPlan_t;
399
400	typedef enum {
401	/ each op in [ ] can be disabled by passing NULL ptr /
402	/ [per channel scale], [per channel bias], [activation], convolution, [generate BN stats] /
403	CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS = `0`,
404	/ [per channel scale], [per channel bias], [activation], convolutionBackwardWeights /
405	CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD = `1`,
406	/ utility for BN training in BN-conv fusion /
407	/ computes the equivalent scale and bias from ySum ySqSum and learned scale, bias /
408	/ optionally update running stats and generate saved stats /
409	CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING = `2`,
410	/ utility for BN inference in BN-conv fusion /
411	/ computes the equivalent scale and bias from learned running stats and learned scale, bias /
412	CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE = `3`,
413	/ reserved for future use: convolution, [per channel scale], [per channel bias], [residual add], [activation] /
414	CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION = `4`,
415	/ reserved for future use: [per channel scale], [per channel bias], [residual add], activation, bitmask /
416	CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK = `5`,
417	/ reserved for future use /
418	CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM = `6`,
419	} cudnnFusedOps_t;
420
421	typedef enum {
422	/ set XDESC: pass previously initialized cudnnTensorDescriptor_t /
423	/ get XDESC: pass previously created cudnnTensorDescriptor_t /
424	CUDNN_PARAM_XDESC = `0`,
425	/ set/get XDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
426	CUDNN_PARAM_XDATA_PLACEHOLDER = `1`,
427	/ set/get BN_MODE: pass cudnnBatchNormMode_t* /
428	CUDNN_PARAM_BN_MODE = `2`,
429	/ set CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t /
430	/ get CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t /
431	CUDNN_PARAM_BN_EQSCALEBIAS_DESC = `3`,
432	/ set/get BN_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
433	CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER = `4`,
434	/ set/get BN_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
435	CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER = `5`,
436	/ set ACTIVATION_DESC: pass previously initialized cudnnActivationDescriptor_t /
437	/ get ACTIVATION_DESC: pass previously created cudnnActivationDescriptor_t /
438	CUDNN_PARAM_ACTIVATION_DESC = `6`,
439	/ set CONV_DESC: pass previously initialized cudnnConvolutionDescriptor_t /
440	/ get CONV_DESC: pass previously created cudnnConvolutionDescriptor_t /
441	CUDNN_PARAM_CONV_DESC = `7`,
442	/ set WDESC: pass previously initialized cudnnFilterDescriptor_t /
443	/ get WDESC: pass previously created cudnnFilterDescriptor_t /
444	CUDNN_PARAM_WDESC = `8`,
445	/ set/get WDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
446	CUDNN_PARAM_WDATA_PLACEHOLDER = `9`,
447	/ set DWDESC: pass previously initialized cudnnFilterDescriptor_t /
448	/ get DWDESC: pass previously created cudnnFilterDescriptor_t /
449	CUDNN_PARAM_DWDESC = `10`,
450	/ set/get DWDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
451	CUDNN_PARAM_DWDATA_PLACEHOLDER = `11`,
452	/ set YDESC: pass previously initialized cudnnTensorDescriptor_t /
453	/ get YDESC: pass previously created cudnnTensorDescriptor_t /
454	CUDNN_PARAM_YDESC = `12`,
455	/ set/get YDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
456	CUDNN_PARAM_YDATA_PLACEHOLDER = `13`,
457	/ set DYDESC: pass previously initialized cudnnTensorDescriptor_t /
458	/ get DYDESC: pass previously created cudnnTensorDescriptor_t /
459	CUDNN_PARAM_DYDESC = `14`,
460	/ set/get DYDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
461	CUDNN_PARAM_DYDATA_PLACEHOLDER = `15`,
462	/ set YSTATS_DESC: pass previously initialized cudnnTensorDescriptor_t /
463	/ get YSTATS_DESC: pass previously created cudnnTensorDescriptor_t /
464	CUDNN_PARAM_YSTATS_DESC = `16`,
465	/ set/get YSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
466	CUDNN_PARAM_YSUM_PLACEHOLDER = `17`,
467	/ set/get YSQSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
468	CUDNN_PARAM_YSQSUM_PLACEHOLDER = `18`,
469	/ set CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously initialized cudnnTensorDescriptor_t /
470	/ get CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously created cudnnTensorDescriptor_t /
471	CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC = `19`,
472	/ set/get CUDNN_PARAM_BN_SCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
473	CUDNN_PARAM_BN_SCALE_PLACEHOLDER = `20`,
474	/ set/get CUDNN_PARAM_BN_BIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
475	CUDNN_PARAM_BN_BIAS_PLACEHOLDER = `21`,
476	/ set/get CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
477	CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER = `22`,
478	/ set/get CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
479	CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER = `23`,
480	/ set/get CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
481	CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER = `24`,
482	/ set/get CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
483	CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER = `25`,
484
485	/ set ZDESC: pass previously initialized cudnnTensorDescriptor_t /
486	/ get ZDESC: pass previously created cudnnTensorDescriptor_t /
487	CUDNN_PARAM_ZDESC = `26`,
488	/ set/get ZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
489	CUDNN_PARAM_ZDATA_PLACEHOLDER = `27`,
490	/ set BN_Z_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t /
491	/ get BN_Z_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t /
492	CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC = `28`,
493	/ set/get BN_Z_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
494	CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER = `29`,
495	/ set/get BN_Z_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
496	CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER = `30`,
497
498	/ set ACTIVATION_BITMASK_DESC: pass previously initialized cudnnTensorDescriptor_t /
499	/ get ACTIVATION_BITMASK_DESC: pass previously created cudnnTensorDescriptor_t /
500	CUDNN_PARAM_ACTIVATION_BITMASK_DESC = `31`,
501	/ set/get ACTIVATION_BITMASK_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
502	CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER = `32`,
503
504	/ set DXDESC: pass previously initialized cudnnTensorDescriptor_t /
505	/ get DXDESC: pass previously created cudnnTensorDescriptor_t /
506	CUDNN_PARAM_DXDESC = `33`,
507	/ set/get DXDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
508	CUDNN_PARAM_DXDATA_PLACEHOLDER = `34`,
509	/ set DZDESC: pass previously initialized cudnnTensorDescriptor_t /
510	/ get DZDESC: pass previously created cudnnTensorDescriptor_t /
511	CUDNN_PARAM_DZDESC = `35`,
512	/ set/get DZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
513	CUDNN_PARAM_DZDATA_PLACEHOLDER = `36`,
514	/ set/get CUDNN_PARAM_BN_DSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
515	CUDNN_PARAM_BN_DSCALE_PLACEHOLDER = `37`,
516	/ set/get CUDNN_PARAM_BN_DBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* /
517	CUDNN_PARAM_BN_DBIAS_PLACEHOLDER = `38`,
518	} cudnnFusedOpsConstParamLabel_t;
519
520	typedef enum {
521	CUDNN_PTR_NULL = `0`,
522	CUDNN_PTR_ELEM_ALIGNED = `1`,
523	CUDNN_PTR_16B_ALIGNED = `2`,
524	} cudnnFusedOpsPointerPlaceHolder_t;
525
526	typedef enum {
527	/ set: pass void* pointing to dev memory /
528	/ get: pass void** pointing to host memory /
529	CUDNN_PTR_XDATA = `0`,
530	CUDNN_PTR_BN_EQSCALE = `1`,
531	CUDNN_PTR_BN_EQBIAS = `2`,
532	CUDNN_PTR_WDATA = `3`,
533	CUDNN_PTR_DWDATA = `4`,
534	CUDNN_PTR_YDATA = `5`,
535	CUDNN_PTR_DYDATA = `6`,
536	CUDNN_PTR_YSUM = `7`,
537	CUDNN_PTR_YSQSUM = `8`,
538	CUDNN_PTR_WORKSPACE = `9`,
539	CUDNN_PTR_BN_SCALE = `10`,
540	CUDNN_PTR_BN_BIAS = `11`,
541	CUDNN_PTR_BN_SAVED_MEAN = `12`,
542	CUDNN_PTR_BN_SAVED_INVSTD = `13`,
543	CUDNN_PTR_BN_RUNNING_MEAN = `14`,
544	CUDNN_PTR_BN_RUNNING_VAR = `15`,
545	CUDNN_PTR_ZDATA = `16`,
546	CUDNN_PTR_BN_Z_EQSCALE = `17`,
547	CUDNN_PTR_BN_Z_EQBIAS = `18`,
548	CUDNN_PTR_ACTIVATION_BITMASK = `19`,
549	CUDNN_PTR_DXDATA = `20`,
550	CUDNN_PTR_DZDATA = `21`,
551	CUDNN_PTR_BN_DSCALE = `22`,
552	CUDNN_PTR_BN_DBIAS = `23`,
553
554	/ set/get: pass size_t* pointing to host memory /
555	CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES = `100`,
556	/ set/get: pass int64_t* pointing to host memory /
557	CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT = `101`,
558	/ set/get: pass double* pointing to host memory /
559	CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR = `102`,
560	/ set/get: pass double* pointing to host memory /
561	CUDNN_SCALAR_DOUBLE_BN_EPSILON = `103`,
562	} cudnnFusedOpsVariantParamLabel_t;
563
564	cudnnStatus_t CUDNNWINAPI
565	cudnnCnnInferVersionCheck(void);
566
567	#if defined(__cplusplus)
568	}
569	#endif
570
571	#endif /* CUDNN_CNN_INFER_H_ */
572

Browse the source code of include/x86_64-linux-gnu/cudnn_cnn_infer_v8.h