cudnn_ops_infer_v8.h source code [include/x86_64-linux-gnu/cudnn_ops_infer_v8.h]

1	/*
2	* Copyright 1993-2020 NVIDIA Corporation. All rights reserved.
3	*
4	* NOTICE TO LICENSEE:
5	*
6	* This source code and/or documentation ("Licensed Deliverables") are
7	* subject to NVIDIA intellectual property rights under U.S. and
8	* international Copyright laws.
9	*
10	* These Licensed Deliverables contained herein is PROPRIETARY and
11	* CONFIDENTIAL to NVIDIA and is being provided under the terms and
12	* conditions of a form of NVIDIA software license agreement by and
13	* between NVIDIA and Licensee ("License Agreement") or electronically
14	* accepted by Licensee. Notwithstanding any terms or conditions to
15	* the contrary in the License Agreement, reproduction or disclosure
16	* of the Licensed Deliverables to any third party without the express
17	* written consent of NVIDIA is prohibited.
18	*
19	* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20	* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21	* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22	* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23	* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24	* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25	* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26	* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27	* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28	* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29	* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30	* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31	* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32	* OF THESE LICENSED DELIVERABLES.
33	*
34	* U.S. Government End Users. These Licensed Deliverables are a
35	* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36	* 1995), consisting of "commercial computer software" and "commercial
37	* computer software documentation" as such terms are used in 48
38	* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39	* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40	* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41	* U.S. Government End Users acquire the Licensed Deliverables with
42	* only those rights set forth herein.
43	*
44	* Any use of the Licensed Deliverables in individual and commercial
45	* software must include, in the user documentation and internal
46	* comments to the code, the above Disclaimer and U.S. Government End
47	* Users Notice.
48	*/
49
50	/*
51	* cudnn_ops_infer : cuDNN's basic definitions and inference operations.
52	*/
53
54	#if !defined(CUDNN_OPS_INFER_H_)
55	#define CUDNN_OPS_INFER_H_
56
57	#include <cuda_runtime.h>
58	#include <stdint.h>
59
60	#include "cudnn_version.h"
61
62	/ These version numbers are autogenerated, do not edit manually. /
63	#define CUDNN_OPS_INFER_MAJOR 8
64	#define CUDNN_OPS_INFER_MINOR 2
65	#define CUDNN_OPS_INFER_PATCH 4
66
67	#if (CUDNN_OPS_INFER_MAJOR != CUDNN_MAJOR) \|\| (CUDNN_OPS_INFER_MINOR != CUDNN_MINOR) \|\| \
68	(CUDNN_OPS_INFER_PATCH != CUDNN_PATCHLEVEL)
69	#error Version mismatch in cuDNN OPS INFER!!!
70	#endif
71
72	#ifndef CUDNNWINAPI
73	#ifdef _WIN32
74	#define CUDNNWINAPI __stdcall
75	#else
76	#define CUDNNWINAPI
77	#endif
78	#endif
79
80	/ Warnings for deprecated API-s are enabled using the CUDNN_WARN_DEPRECATED macro /
81	#if defined(CUDNN_WARN_DEPRECATED) && (defined(__GNUC__) \|\| defined(__clang__))
82	/ GCC, Intel C/C++, Cray C/C++, CLANG, IBM XL C/C++ little endian /
83	#define CUDNN_DEPRECATED __attribute__((deprecated))
84	#elif defined(CUDNN_WARN_DEPRECATED) && defined(_MSC_VER)
85	/ Microsoft Visual C++ /
86	#define CUDNN_DEPRECATED __declspec(deprecated)
87	#elif defined(CUDNN_WARN_DEPRECATED) && (__cplusplus >= 201402L)
88	/ C++14 compilers /
89	#define CUDNN_DEPRECATED [[deprecated]]
90	#else
91	/ No support for the deprecated attribute /
92	#define CUDNN_DEPRECATED
93	#endif
94
95	#if defined(__cplusplus)
96	extern "C" {
97	#endif
98
99	struct cudnnContext;
100	typedef struct cudnnContext *cudnnHandle_t;
101
102	size_t CUDNNWINAPI
103	cudnnGetVersion(void);
104
105	/ Returns CUDA Runtime version statically linked against cudnn /
106	size_t CUDNNWINAPI
107	cudnnGetCudartVersion(void);
108
109	/*
110	* CUDNN return codes
111	*/
112	typedef enum {
113	CUDNN_STATUS_SUCCESS = `0`,
114	CUDNN_STATUS_NOT_INITIALIZED = `1`,
115	CUDNN_STATUS_ALLOC_FAILED = `2`,
116	CUDNN_STATUS_BAD_PARAM = `3`,
117	CUDNN_STATUS_INTERNAL_ERROR = `4`,
118	CUDNN_STATUS_INVALID_VALUE = `5`,
119	CUDNN_STATUS_ARCH_MISMATCH = `6`,
120	CUDNN_STATUS_MAPPING_ERROR = `7`,
121	CUDNN_STATUS_EXECUTION_FAILED = `8`,
122	CUDNN_STATUS_NOT_SUPPORTED = `9`,
123	CUDNN_STATUS_LICENSE_ERROR = `10`,
124	CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING = `11`,
125	CUDNN_STATUS_RUNTIME_IN_PROGRESS = `12`,
126	CUDNN_STATUS_RUNTIME_FP_OVERFLOW = `13`,
127	CUDNN_STATUS_VERSION_MISMATCH = `14`,
128	} cudnnStatus_t;
129
130	/ human-readable error messages /
131	const char *CUDNNWINAPI
132	cudnnGetErrorString(cudnnStatus_t status);
133
134	/ Forward definition in this version only /
135	typedef struct cudnnRuntimeTag_t cudnnRuntimeTag_t;
136
137	typedef enum {
138	CUDNN_ERRQUERY_RAWCODE = `0`,
139	CUDNN_ERRQUERY_NONBLOCKING = `1`,
140	CUDNN_ERRQUERY_BLOCKING = `2`,
141	} cudnnErrQueryMode_t;
142
143	cudnnStatus_t CUDNNWINAPI
144	cudnnQueryRuntimeError(cudnnHandle_t handle, cudnnStatus_t rstatus, cudnnErrQueryMode_t mode, cudnnRuntimeTag_t tag);
145
146	#ifndef __LIBRARY_TYPES_H__
147
148	typedef enum libraryPropertyType_t { MAJOR_VERSION, MINOR_VERSION, PATCH_LEVEL } libraryPropertyType;
149
150	#endif
151
152	cudnnStatus_t CUDNNWINAPI
153	cudnnGetProperty(libraryPropertyType type, int *value);
154
155	cudnnStatus_t CUDNNWINAPI
156	cudnnCreate(cudnnHandle_t *handle);
157	cudnnStatus_t CUDNNWINAPI
158	cudnnDestroy(cudnnHandle_t handle);
159	cudnnStatus_t CUDNNWINAPI
160	cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId);
161	cudnnStatus_t CUDNNWINAPI
162	cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId);
163
164	/ Data structures to represent Image/Filter and the Neural Network Layer /
165	typedef struct cudnnTensorStruct *cudnnTensorDescriptor_t;
166	typedef struct cudnnPoolingStruct *cudnnPoolingDescriptor_t;
167	typedef struct cudnnFilterStruct *cudnnFilterDescriptor_t;
168	typedef struct cudnnLRNStruct *cudnnLRNDescriptor_t;
169	typedef struct cudnnActivationStruct *cudnnActivationDescriptor_t;
170	typedef struct cudnnSpatialTransformerStruct *cudnnSpatialTransformerDescriptor_t;
171	typedef struct cudnnOpTensorStruct *cudnnOpTensorDescriptor_t;
172	typedef struct cudnnReduceTensorStruct *cudnnReduceTensorDescriptor_t;
173	typedef struct cudnnCTCLossStruct *cudnnCTCLossDescriptor_t;
174	typedef struct cudnnTensorTransformStruct *cudnnTensorTransformDescriptor_t;
175	/*
176	* CUDNN data type
177	*/
178	typedef enum {
179	CUDNN_DATA_FLOAT = `0`,
180	CUDNN_DATA_DOUBLE = `1`,
181	CUDNN_DATA_HALF = `2`,
182	CUDNN_DATA_INT8 = `3`,
183	CUDNN_DATA_INT32 = `4`,
184	CUDNN_DATA_INT8x4 = `5`,
185	CUDNN_DATA_UINT8 = `6`,
186	CUDNN_DATA_UINT8x4 = `7`,
187	CUDNN_DATA_INT8x32 = `8`,
188	CUDNN_DATA_BFLOAT16 = `9`,
189	CUDNN_DATA_INT64 = `10`,
190	} cudnnDataType_t;
191
192	/*
193	* CUDNN math type
194	*/
195	typedef enum {
196	CUDNN_DEFAULT_MATH = `0`,
197	CUDNN_TENSOR_OP_MATH = `1`,
198	CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION = `2`,
199	CUDNN_FMA_MATH = `3`,
200	} cudnnMathType_t;
201
202	/*
203	* CUDNN propagate Nan
204	*/
205	typedef enum {
206	CUDNN_NOT_PROPAGATE_NAN = `0`,
207	CUDNN_PROPAGATE_NAN = `1`,
208	} cudnnNanPropagation_t;
209
210	/*
211	* CUDNN Determinism
212	*/
213	typedef enum {
214	CUDNN_NON_DETERMINISTIC = `0`,
215	CUDNN_DETERMINISTIC = `1`,
216	} cudnnDeterminism_t;
217
218	/ Maximum supported number of tensor dimensions /
219	#define CUDNN_DIM_MAX 8
220
221	/ Create an instance of a generic Tensor descriptor /
222	cudnnStatus_t CUDNNWINAPI
223	cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc);
224
225	typedef enum {
226	CUDNN_TENSOR_NCHW = `0`, / row major (wStride = 1, hStride = w) /
227	CUDNN_TENSOR_NHWC = `1`, / feature maps interleaved ( cStride = 1 )/
228	CUDNN_TENSOR_NCHW_VECT_C = `2`, / each image point is vector of element of C, vector length in data type /
229	} cudnnTensorFormat_t;
230
231	cudnnStatus_t CUDNNWINAPI
232	cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc,
233	cudnnTensorFormat_t format,
234	cudnnDataType_t dataType, / image data type /
235	int n, / number of inputs (batch size) /
236	int c, / number of input feature maps /
237	int h, / height of input section /
238	int w); / width of input section /
239
240	cudnnStatus_t CUDNNWINAPI
241	cudnnSetTensor4dDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
242	cudnnDataType_t dataType, / image data type /
243	int n, / number of inputs (batch size) /
244	int c, / number of input feature maps /
245	int h, / height of input section /
246	int w, / width of input section /
247	int nStride,
248	int cStride,
249	int hStride,
250	int wStride);
251
252	cudnnStatus_t CUDNNWINAPI
253	cudnnGetTensor4dDescriptor(const cudnnTensorDescriptor_t tensorDesc,
254	cudnnDataType_t dataType, /* image data type /
255	int n, /* number of inputs (batch size) /
256	int c, /* number of input feature maps /
257	int h, /* height of input section /
258	int w, /* width of input section /
259	int *nStride,
260	int *cStride,
261	int *hStride,
262	int *wStride);
263
264	cudnnStatus_t CUDNNWINAPI
265	cudnnSetTensorNdDescriptor(cudnnTensorDescriptor_t tensorDesc,
266	cudnnDataType_t dataType,
267	int nbDims,
268	const int dimA[],
269	const int strideA[]);
270
271	cudnnStatus_t CUDNNWINAPI
272	cudnnSetTensorNdDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
273	cudnnTensorFormat_t format,
274	cudnnDataType_t dataType,
275	int nbDims,
276	const int dimA[]);
277
278	cudnnStatus_t CUDNNWINAPI
279	cudnnGetTensorNdDescriptor(const cudnnTensorDescriptor_t tensorDesc,
280	int nbDimsRequested,
281	cudnnDataType_t *dataType,
282	int *nbDims,
283	int dimA[],
284	int strideA[]);
285
286	cudnnStatus_t CUDNNWINAPI
287	cudnnGetTensorSizeInBytes(const cudnnTensorDescriptor_t tensorDesc, size_t *size);
288
289	/ PixelOffset( n, c, h, w ) = n input_stride + c feature_stride + h * h_stride + w * w_stride*
290
291	1)Example of all images in row major order one batch of features after the other (with an optional padding on row)
292	input_stride : c x h x h_stride
293	feature_stride : h x h_stride
294	h_stride : >= w ( h_stride = w if no padding)
295	w_stride : 1
296
297
298	2)Example of all images in row major with features maps interleaved
299	input_stride : c x h x h_stride
300	feature_stride : 1
301	h_stride : w x c
302	w_stride : c
303
304	3)Example of all images in column major order one batch of features after the other (with optional padding on column)
305	input_stride : c x w x w_stride
306	feature_stride : w x w_stride
307	h_stride : 1
308	w_stride : >= h
309
310	*/
311
312	/ Destroy an instance of Tensor4d descriptor /
313	cudnnStatus_t CUDNNWINAPI
314	cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc);
315
316	/ Fold/unfold transforms /
317	typedef enum {
318	CUDNN_TRANSFORM_FOLD = `0U`,
319	CUDNN_TRANSFORM_UNFOLD = `1U`,
320	} cudnnFoldingDirection_t;
321
322	/* Create a destination descriptor for cudnnTransformTensor /
323	cudnnStatus_t CUDNNWINAPI
324	cudnnInitTransformDest(const cudnnTensorTransformDescriptor_t transformDesc,
325	const cudnnTensorDescriptor_t srcDesc,
326	cudnnTensorDescriptor_t destDesc,
327	size_t *destSizeInBytes);
328
329	/* Create an empty tensor transform descriptor /
330	cudnnStatus_t CUDNNWINAPI
331	cudnnCreateTensorTransformDescriptor(cudnnTensorTransformDescriptor_t *transformDesc);
332
333	/* Initialize a previously created tensor transform descriptor. /
334	cudnnStatus_t CUDNNWINAPI
335	cudnnSetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
336	const uint32_t nbDims,
337	const cudnnTensorFormat_t destFormat,
338	const int32_t padBeforeA[],
339	const int32_t padAfterA[],
340	const uint32_t foldA[],
341	const cudnnFoldingDirection_t direction);
342
343	/**
344	* Retrieves the values stored in a previously initialized tensor transform
345	* descriptor.
346	*/
347	cudnnStatus_t CUDNNWINAPI
348	cudnnGetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
349	uint32_t nbDimsRequested,
350	cudnnTensorFormat_t *destFormat,
351	int32_t padBeforeA[],
352	int32_t padAfterA[],
353	uint32_t foldA[],
354	cudnnFoldingDirection_t *direction);
355
356	/**
357	* Destroys a previously created tensor transform descriptor.
358	*/
359	cudnnStatus_t CUDNNWINAPI
360	cudnnDestroyTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc);
361
362	/ Tensor layout conversion helper (y = alpha * x + beta * y) /
363	cudnnStatus_t CUDNNWINAPI
364	cudnnTransformTensor(cudnnHandle_t handle,
365	const void *alpha,
366	const cudnnTensorDescriptor_t xDesc,
367	const void *x,
368	const void *beta,
369	const cudnnTensorDescriptor_t yDesc,
370	void *y);
371
372	cudnnStatus_t CUDNNWINAPI
373	cudnnTransformTensorEx(cudnnHandle_t handle,
374	const cudnnTensorTransformDescriptor_t transDesc,
375	const void *alpha,
376	const cudnnTensorDescriptor_t srcDesc,
377	const void *srcData,
378	const void *beta,
379	const cudnnTensorDescriptor_t destDesc,
380	void *destData);
381
382	/ Tensor Bias addition : C = alpha * A + beta * C /
383	cudnnStatus_t CUDNNWINAPI
384	cudnnAddTensor(cudnnHandle_t handle,
385	const void *alpha,
386	const cudnnTensorDescriptor_t aDesc,
387	const void *A,
388	const void *beta,
389	const cudnnTensorDescriptor_t cDesc,
390	void *C);
391
392	/*
393	* CUDNN OpTensor op type
394	*/
395	typedef enum {
396	CUDNN_OP_TENSOR_ADD = `0`,
397	CUDNN_OP_TENSOR_MUL = `1`,
398	CUDNN_OP_TENSOR_MIN = `2`,
399	CUDNN_OP_TENSOR_MAX = `3`,
400	CUDNN_OP_TENSOR_SQRT = `4`,
401	CUDNN_OP_TENSOR_NOT = `5`,
402	} cudnnOpTensorOp_t;
403
404	cudnnStatus_t CUDNNWINAPI
405	cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc);
406
407	cudnnStatus_t CUDNNWINAPI
408	cudnnSetOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc,
409	cudnnOpTensorOp_t opTensorOp,
410	cudnnDataType_t opTensorCompType,
411	cudnnNanPropagation_t opTensorNanOpt);
412
413	cudnnStatus_t CUDNNWINAPI
414	cudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc,
415	cudnnOpTensorOp_t *opTensorOp,
416	cudnnDataType_t *opTensorCompType,
417	cudnnNanPropagation_t *opTensorNanOpt);
418
419	cudnnStatus_t CUDNNWINAPI
420	cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc);
421
422	/ Tensor operation : C = op( alpha1 * A, alpha2 * B ) + beta * C /
423	/ B tensor is ignored for CUDNN_OP_TENSOR_SQRT, CUDNN_OP_TENSOR_NOT. /
424	cudnnStatus_t CUDNNWINAPI
425	cudnnOpTensor(cudnnHandle_t handle,
426	const cudnnOpTensorDescriptor_t opTensorDesc,
427	const void *alpha1,
428	const cudnnTensorDescriptor_t aDesc,
429	const void *A,
430	const void *alpha2,
431	const cudnnTensorDescriptor_t bDesc,
432	const void *B,
433	const void *beta,
434	const cudnnTensorDescriptor_t cDesc,
435	void *C);
436
437	/*
438	* CUDNN ReduceTensor op type
439	*/
440	typedef enum {
441	CUDNN_REDUCE_TENSOR_ADD = `0`,
442	CUDNN_REDUCE_TENSOR_MUL = `1`,
443	CUDNN_REDUCE_TENSOR_MIN = `2`,
444	CUDNN_REDUCE_TENSOR_MAX = `3`,
445	CUDNN_REDUCE_TENSOR_AMAX = `4`,
446	CUDNN_REDUCE_TENSOR_AVG = `5`,
447	CUDNN_REDUCE_TENSOR_NORM1 = `6`,
448	CUDNN_REDUCE_TENSOR_NORM2 = `7`,
449	CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS = `8`,
450	} cudnnReduceTensorOp_t;
451
452	/*
453	* CUDNN ReduceTensor indices type
454	*/
455	typedef enum {
456	CUDNN_REDUCE_TENSOR_NO_INDICES = `0`,
457	CUDNN_REDUCE_TENSOR_FLATTENED_INDICES = `1`,
458	} cudnnReduceTensorIndices_t;
459
460	/*
461	* CUDNN tensor indices type size (all unsigned)
462	* Currently not supported, default is 32 bit unsigned.
463	*/
464	typedef enum {
465	CUDNN_32BIT_INDICES = `0`,
466	CUDNN_64BIT_INDICES = `1`,
467	CUDNN_16BIT_INDICES = `2`,
468	CUDNN_8BIT_INDICES = `3`,
469	} cudnnIndicesType_t;
470
471	cudnnStatus_t CUDNNWINAPI
472	cudnnCreateReduceTensorDescriptor(cudnnReduceTensorDescriptor_t *reduceTensorDesc);
473
474	cudnnStatus_t CUDNNWINAPI
475	cudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc,
476	cudnnReduceTensorOp_t reduceTensorOp,
477	cudnnDataType_t reduceTensorCompType,
478	cudnnNanPropagation_t reduceTensorNanOpt,
479	cudnnReduceTensorIndices_t reduceTensorIndices,
480	cudnnIndicesType_t reduceTensorIndicesType);
481
482	cudnnStatus_t CUDNNWINAPI
483	cudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorDesc,
484	cudnnReduceTensorOp_t *reduceTensorOp,
485	cudnnDataType_t *reduceTensorCompType,
486	cudnnNanPropagation_t *reduceTensorNanOpt,
487	cudnnReduceTensorIndices_t *reduceTensorIndices,
488	cudnnIndicesType_t *reduceTensorIndicesType);
489
490	cudnnStatus_t CUDNNWINAPI
491	cudnnDestroyReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc);
492
493	/ Helper function to return the minimum size of the index space to be passed to the reduction given the input and*
494	* output tensors */
495	cudnnStatus_t CUDNNWINAPI
496	cudnnGetReductionIndicesSize(cudnnHandle_t handle,
497	const cudnnReduceTensorDescriptor_t reduceTensorDesc,
498	const cudnnTensorDescriptor_t aDesc,
499	const cudnnTensorDescriptor_t cDesc,
500	size_t *sizeInBytes);
501
502	/ Helper function to return the minimum size of the workspace to be passed to the reduction given the input and output*
503	* tensors */
504	cudnnStatus_t CUDNNWINAPI
505	cudnnGetReductionWorkspaceSize(cudnnHandle_t handle,
506	const cudnnReduceTensorDescriptor_t reduceTensorDesc,
507	const cudnnTensorDescriptor_t aDesc,
508	const cudnnTensorDescriptor_t cDesc,
509	size_t *sizeInBytes);
510
511	/ Tensor operation : C = reduce op( alpha * A ) + beta * C /
512	/ The NaN propagation enum applies to only the min and max reduce ops; the other reduce ops propagate NaN as usual. /
513	/ The indices space is ignored for reduce ops other than min or max. /
514	cudnnStatus_t CUDNNWINAPI
515	cudnnReduceTensor(cudnnHandle_t handle,
516	const cudnnReduceTensorDescriptor_t reduceTensorDesc,
517	void *indices,
518	size_t indicesSizeInBytes,
519	void *workspace,
520	size_t workspaceSizeInBytes,
521	const void *alpha,
522	const cudnnTensorDescriptor_t aDesc,
523	const void *A,
524	const void *beta,
525	const cudnnTensorDescriptor_t cDesc,
526	void *C);
527
528	/ Set all values of a tensor to a given value : y[i] = value[0] /
529	cudnnStatus_t CUDNNWINAPI
530	cudnnSetTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void y, const* void *valuePtr);
531
532	/ Scale all values of a tensor by a given factor : y[i] = alpha * y[i] /
533	cudnnStatus_t CUDNNWINAPI
534	cudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void y, const* void *alpha);
535
536	/ Create an instance of FilterStruct /
537	cudnnStatus_t CUDNNWINAPI
538	cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc);
539
540	cudnnStatus_t CUDNNWINAPI
541	cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc,
542	cudnnDataType_t dataType, / image data type /
543	cudnnTensorFormat_t format,
544	int k, / number of output feature maps /
545	int c, / number of input feature maps /
546	int h, / height of each input filter /
547	int w); / width of each input filter /
548
549	cudnnStatus_t CUDNNWINAPI
550	cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc,
551	cudnnDataType_t dataType, /* image data type /
552	cudnnTensorFormat_t *format,
553	int k, /* number of output feature maps /
554	int c, /* number of input feature maps /
555	int h, /* height of each input filter /
556	int w); /* width of each input filter /
557
558	cudnnStatus_t CUDNNWINAPI
559	cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc,
560	cudnnDataType_t dataType, / image data type /
561	cudnnTensorFormat_t format,
562	int nbDims,
563	const int filterDimA[]);
564
565	cudnnStatus_t CUDNNWINAPI
566	cudnnGetFilterNdDescriptor(const cudnnFilterDescriptor_t filterDesc,
567	int nbDimsRequested,
568	cudnnDataType_t dataType, /* image data type /
569	cudnnTensorFormat_t *format,
570	int *nbDims,
571	int filterDimA[]);
572	cudnnStatus_t CUDNNWINAPI
573	cudnnGetFilterSizeInBytes(const cudnnFilterDescriptor_t filterDesc, size_t *size);
574
575	cudnnStatus_t CUDNNWINAPI
576	cudnnTransformFilter(cudnnHandle_t handle,
577	const cudnnTensorTransformDescriptor_t transDesc,
578	const void *alpha,
579	const cudnnFilterDescriptor_t srcDesc,
580	const void *srcData,
581	const void *beta,
582	const cudnnFilterDescriptor_t destDesc,
583	void *destData);
584
585	cudnnStatus_t CUDNNWINAPI
586	cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc);
587
588	/*
589	* softmax algorithm
590	*/
591	typedef enum {
592	CUDNN_SOFTMAX_FAST = `0`, / straightforward implementation /
593	CUDNN_SOFTMAX_ACCURATE = `1`, / subtract max from every point to avoid overflow /
594	CUDNN_SOFTMAX_LOG = `2`
595	} cudnnSoftmaxAlgorithm_t;
596
597	typedef enum {
598	CUDNN_SOFTMAX_MODE_INSTANCE = `0`, / compute the softmax over all C, H, W for each N /
599	CUDNN_SOFTMAX_MODE_CHANNEL = `1` / compute the softmax over all C for each H, W, N /
600	} cudnnSoftmaxMode_t;
601
602	/ Softmax functions: All of the form "output = alpha * Op(inputs) + beta * output" /
603
604	/ Function to perform forward softmax /
605	cudnnStatus_t CUDNNWINAPI
606	cudnnSoftmaxForward(cudnnHandle_t handle,
607	cudnnSoftmaxAlgorithm_t algo,
608	cudnnSoftmaxMode_t mode,
609	const void *alpha,
610	const cudnnTensorDescriptor_t xDesc,
611	const void *x,
612	const void *beta,
613	const cudnnTensorDescriptor_t yDesc,
614	void *y);
615
616	/*
617	* pooling mode
618	*/
619	typedef enum {
620	CUDNN_POOLING_MAX = `0`,
621	CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING = `1`, / count for average includes padded values /
622	CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = `2`, / count for average does not include padded values /
623	CUDNN_POOLING_MAX_DETERMINISTIC = `3`
624	} cudnnPoolingMode_t;
625
626	/ Create an instance of pooling descriptor /
627	cudnnStatus_t CUDNNWINAPI
628	cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc);
629
630	cudnnStatus_t CUDNNWINAPI
631	cudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc,
632	cudnnPoolingMode_t mode,
633	cudnnNanPropagation_t maxpoolingNanOpt,
634	int windowHeight,
635	int windowWidth,
636	int verticalPadding,
637	int horizontalPadding,
638	int verticalStride,
639	int horizontalStride);
640
641	cudnnStatus_t CUDNNWINAPI
642	cudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
643	cudnnPoolingMode_t *mode,
644	cudnnNanPropagation_t *maxpoolingNanOpt,
645	int *windowHeight,
646	int *windowWidth,
647	int *verticalPadding,
648	int *horizontalPadding,
649	int *verticalStride,
650	int *horizontalStride);
651
652	cudnnStatus_t CUDNNWINAPI
653	cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc,
654	const cudnnPoolingMode_t mode,
655	const cudnnNanPropagation_t maxpoolingNanOpt,
656	int nbDims,
657	const int windowDimA[],
658	const int paddingA[],
659	const int strideA[]);
660
661	cudnnStatus_t CUDNNWINAPI
662	cudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
663	int nbDimsRequested,
664	cudnnPoolingMode_t *mode,
665	cudnnNanPropagation_t *maxpoolingNanOpt,
666	int *nbDims,
667	int windowDimA[],
668	int paddingA[],
669	int strideA[]);
670
671	cudnnStatus_t CUDNNWINAPI
672	cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
673	const cudnnTensorDescriptor_t inputTensorDesc,
674	int nbDims,
675	int outputTensorDimA[]);
676
677	cudnnStatus_t CUDNNWINAPI
678	cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
679	const cudnnTensorDescriptor_t inputTensorDesc,
680	int *n,
681	int *c,
682	int *h,
683	int *w);
684
685	/ Destroy an instance of pooling descriptor /
686	cudnnStatus_t CUDNNWINAPI
687	cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc);
688
689	/ Pooling functions: All of the form "output = alpha * Op(inputs) + beta * output" /
690
691	/ Function to perform forward pooling /
692	cudnnStatus_t CUDNNWINAPI
693	cudnnPoolingForward(cudnnHandle_t handle,
694	const cudnnPoolingDescriptor_t poolingDesc,
695	const void *alpha,
696	const cudnnTensorDescriptor_t xDesc,
697	const void *x,
698	const void *beta,
699	const cudnnTensorDescriptor_t yDesc,
700	void *y);
701
702	/*
703	* activation mode
704	*/
705	typedef enum {
706	CUDNN_ACTIVATION_SIGMOID = `0`,
707	CUDNN_ACTIVATION_RELU = `1`,
708	CUDNN_ACTIVATION_TANH = `2`,
709	CUDNN_ACTIVATION_CLIPPED_RELU = `3`,
710	CUDNN_ACTIVATION_ELU = `4`,
711	CUDNN_ACTIVATION_IDENTITY = `5`,
712	CUDNN_ACTIVATION_SWISH = `6`
713	} cudnnActivationMode_t;
714
715	/ Activation functions: All of the form "output = alpha * Op(inputs) + beta * output" /
716	cudnnStatus_t CUDNNWINAPI
717	cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc);
718
719	cudnnStatus_t CUDNNWINAPI
720	cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc,
721	cudnnActivationMode_t mode,
722	cudnnNanPropagation_t reluNanOpt,
723	double coef); / ceiling for clipped RELU, alpha for ELU /
724
725	cudnnStatus_t CUDNNWINAPI
726	cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc,
727	cudnnActivationMode_t *mode,
728	cudnnNanPropagation_t *reluNanOpt,
729	double coef); /* ceiling for clipped RELU, alpha for ELU /
730
731	cudnnStatus_t CUDNNWINAPI
732	cudnnSetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double swish_beta);
733
734	cudnnStatus_t CUDNNWINAPI
735	cudnnGetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double *swish_beta);
736
737	cudnnStatus_t CUDNNWINAPI
738	cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc);
739
740	/ Function to perform forward activation /
741	cudnnStatus_t CUDNNWINAPI
742	cudnnActivationForward(cudnnHandle_t handle,
743	cudnnActivationDescriptor_t activationDesc,
744	const void *alpha,
745	const cudnnTensorDescriptor_t xDesc,
746	const void *x,
747	const void *beta,
748	const cudnnTensorDescriptor_t yDesc,
749	void *y);
750
751	/*
752	* Create an instance of LRN (Local Response Normalization) descriptor
753	* Uses lrnN=5, lrnAlpha=1e-4, lrnBeta=0.75, lrnK=2.0 as defaults from Krizhevsky'12 ImageNet paper
754	*/
755	cudnnStatus_t CUDNNWINAPI
756	cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc);
757
758	#define CUDNN_LRN_MIN_N 1 /* minimum allowed lrnN */
759	#define CUDNN_LRN_MAX_N 16 /* maximum allowed lrnN */
760	#define CUDNN_LRN_MIN_K 1e-5 /* minimum allowed lrnK */
761	#define CUDNN_LRN_MIN_BETA 0.01 /* minimum allowed lrnBeta */
762
763	/ LRN layer mode /
764	typedef enum {
765	CUDNN_LRN_CROSS_CHANNEL_DIM1 = `0`, / Normalize across tensor's dimA[1] dimension /
766	} cudnnLRNMode_t;
767
768	/*
769	* Uses a window [center-lookBehind, center+lookAhead], where
770	* lookBehind = floor( (lrnN-1)/2 ), lookAhead = lrnN-lookBehind-1.
771	* Values of double parameters cast to tensor data type.
772	*/
773	cudnnStatus_t CUDNNWINAPI
774	cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned lrnN, double lrnAlpha, double lrnBeta, double lrnK);
775	/*
776	* Retrieve the settings currently stored in an LRN layer descriptor
777	* Any of the provided pointers can be NULL (no corresponding value will be returned)
778	*/
779	cudnnStatus_t CUDNNWINAPI
780	cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned lrnN, double* lrnAlpha, double* lrnBeta, double* *lrnK);
781
782	/ Destroy an instance of LRN descriptor /
783	cudnnStatus_t CUDNNWINAPI
784	cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc);
785
786	/ LRN functions: output = alpha * normalize(x) + beta * old_y /
787
788	/ LRN cross-channel forward computation. Double parameters cast to tensor data type /
789	cudnnStatus_t CUDNNWINAPI
790	cudnnLRNCrossChannelForward(cudnnHandle_t handle,
791	cudnnLRNDescriptor_t normDesc,
792	cudnnLRNMode_t lrnMode,
793	const void *alpha,
794	const cudnnTensorDescriptor_t xDesc,
795	const void *x,
796	const void *beta,
797	const cudnnTensorDescriptor_t yDesc,
798	void *y);
799
800	typedef enum {
801	CUDNN_DIVNORM_PRECOMPUTED_MEANS = `0`,
802	} cudnnDivNormMode_t;
803
804	/ LCN/divisive normalization functions: y = alpha * normalize(x) + beta * y /
805	cudnnStatus_t CUDNNWINAPI
806	cudnnDivisiveNormalizationForward(cudnnHandle_t handle,
807	cudnnLRNDescriptor_t normDesc,
808	cudnnDivNormMode_t mode,
809	const void *alpha,
810	const cudnnTensorDescriptor_t xDesc, / same desc for means, temp, temp2 /
811	const void *x,
812	const void means, /* if NULL, means are assumed to be zero /
813	void *temp,
814	void *temp2,
815	const void *beta,
816	const cudnnTensorDescriptor_t yDesc,
817	void *y);
818
819	typedef enum {
820	/ bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) /
821	CUDNN_BATCHNORM_PER_ACTIVATION = `0`,
822
823	/ bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) /
824	CUDNN_BATCHNORM_SPATIAL = `1`,
825
826	/*
827	* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors).
828	* May be faster than CUDNN_BATCHNORM_SPATIAL but imposes some limits on the range of values
829	*/
830	CUDNN_BATCHNORM_SPATIAL_PERSISTENT = `2`,
831	} cudnnBatchNormMode_t;
832
833	#define CUDNN_BN_MIN_EPSILON 0.0 /* Minimum epsilon allowed to be used in the Batch Normalization formula */
834
835	/*
836	* Derives a tensor descriptor from layer data descriptor for BatchNormalization
837	* scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
838	* bnScaleBiasMeanVarDesc and bnScaleBiasDiffDesc in Batch Normalization forward and backward functions.
839	*/
840	cudnnStatus_t CUDNNWINAPI
841	cudnnDeriveBNTensorDescriptor(cudnnTensorDescriptor_t derivedBnDesc,
842	const cudnnTensorDescriptor_t xDesc,
843	cudnnBatchNormMode_t mode);
844
845	typedef enum {
846	CUDNN_BATCHNORM_OPS_BN = `0`, / do batch normalization only /
847	CUDNN_BATCHNORM_OPS_BN_ACTIVATION = `1`, / do batchNorm, then activation /
848	CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION = `2`, / do batchNorm, then elemWiseAdd, then activation /
849	} cudnnBatchNormOps_t;
850
851	/*
852	* Performs Batch Normalization during Inference:
853	* y[i] = bnScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + bnBias[k]
854	* with bnScale, bnBias, runningMean, runningInvVariance tensors indexed
855	* according to spatial or per-activation mode. Refer to cudnnBatchNormalizationForwardTraining
856	* above for notes on function arguments.
857	*/
858	cudnnStatus_t CUDNNWINAPI
859	cudnnBatchNormalizationForwardInference(cudnnHandle_t handle,
860	cudnnBatchNormMode_t mode,
861	const void alpha, /* alpha[0] = result blend factor /
862	const void beta, /* beta[0] = dest layer blend factor /
863	const cudnnTensorDescriptor_t xDesc,
864	const void x, /* NxCxHxW /
865	const cudnnTensorDescriptor_t yDesc,
866	void y, /* NxCxHxW /
867	const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
868	const void *bnScale,
869	const void *bnBias,
870	const void *estimatedMean,
871	const void *estimatedVariance,
872	double epsilon);
873
874	typedef enum {
875	/ bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) /
876	CUDNN_NORM_PER_ACTIVATION = `0`,
877
878	/ bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) /
879	CUDNN_NORM_PER_CHANNEL = `1`,
880	} cudnnNormMode_t;
881
882	typedef enum { CUDNN_NORM_ALGO_STANDARD = `0`, CUDNN_NORM_ALGO_PERSIST = `1` } cudnnNormAlgo_t;
883
884	/*
885	* Derives a tensor descriptor from layer data descriptor for Normalization
886	* scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
887	* normScaleBiasMeanVarDesc and normScaleBiasDiffDesc in Normalization forward and backward functions.
888	*/
889	cudnnStatus_t CUDNNWINAPI
890	cudnnDeriveNormTensorDescriptor(cudnnTensorDescriptor_t derivedNormScaleBiasDesc,
891	cudnnTensorDescriptor_t derivedNormMeanVarDesc,
892	const cudnnTensorDescriptor_t xDesc,
893	cudnnNormMode_t mode,
894	int groupCnt); / Place hold for future work, should be set to 1 now/
895
896	typedef enum {
897	CUDNN_NORM_OPS_NORM = `0`, / do normalization only /
898	CUDNN_NORM_OPS_NORM_ACTIVATION = `1`, / do Norm, then activation /
899	CUDNN_NORM_OPS_NORM_ADD_ACTIVATION = `2`, / do Norm, then elemWiseAdd, then activation /
900	} cudnnNormOps_t;
901
902	/*
903	* Performs Normalization during Inference:
904	* y[i] = normScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + normBias[k]
905	* with normScale, normBias, runningMean, runningInvVariance tensors indexed
906	* according to per-channel or per-activation mode. Refer to cudnnNormalizationForwardTraining
907	* above for notes on function arguments.
908	*/
909	cudnnStatus_t CUDNNWINAPI
910	cudnnNormalizationForwardInference(cudnnHandle_t handle,
911	cudnnNormMode_t mode,
912	cudnnNormOps_t normOps,
913	cudnnNormAlgo_t algo,
914	const void alpha, /* alpha[0] = result blend factor /
915	const void beta, /* beta[0] = dest layer blend factor /
916	const cudnnTensorDescriptor_t xDesc,
917	const void x, /* NxCxHxW /
918	const cudnnTensorDescriptor_t normScaleBiasDesc,
919	const void *normScale,
920	const void *normBias,
921	const cudnnTensorDescriptor_t normMeanVarDesc,
922	const void *estimatedMean,
923	const void *estimatedVariance,
924	const cudnnTensorDescriptor_t zDesc,
925	const void *z,
926	cudnnActivationDescriptor_t activationDesc,
927	const cudnnTensorDescriptor_t yDesc,
928	void y, /* NxCxHxW /
929	double epsilon,
930	int groupCnt); / Place hold for future work/
931
932	/ APIs for spatial transformer network/
933	typedef enum {
934	CUDNN_SAMPLER_BILINEAR = `0`,
935	} cudnnSamplerType_t;
936
937	cudnnStatus_t CUDNNWINAPI
938	cudnnCreateSpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t *stDesc);
939
940	cudnnStatus_t CUDNNWINAPI
941	cudnnSetSpatialTransformerNdDescriptor(cudnnSpatialTransformerDescriptor_t stDesc,
942	cudnnSamplerType_t samplerType,
943	cudnnDataType_t dataType,
944	const int nbDims,
945	const int dimA[]);
946
947	cudnnStatus_t CUDNNWINAPI
948	cudnnDestroySpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t stDesc);
949
950	cudnnStatus_t CUDNNWINAPI
951	cudnnSpatialTfGridGeneratorForward(cudnnHandle_t handle,
952	const cudnnSpatialTransformerDescriptor_t stDesc,
953	const void *theta,
954	void *grid);
955
956	cudnnStatus_t CUDNNWINAPI
957	cudnnSpatialTfSamplerForward(cudnnHandle_t handle,
958	cudnnSpatialTransformerDescriptor_t stDesc,
959	const void *alpha,
960	const cudnnTensorDescriptor_t xDesc,
961	const void *x,
962	const void *grid,
963	const void *beta,
964	cudnnTensorDescriptor_t yDesc,
965	void *y);
966
967	typedef struct cudnnDropoutStruct *cudnnDropoutDescriptor_t;
968
969	cudnnStatus_t CUDNNWINAPI
970	cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc);
971
972	cudnnStatus_t CUDNNWINAPI
973	cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc);
974
975	/helper function to determine size of the states to be passed to cudnnSetDropoutDescriptor /
976	cudnnStatus_t CUDNNWINAPI
977	cudnnDropoutGetStatesSize(cudnnHandle_t handle, size_t *sizeInBytes);
978
979	/helper function to determine size of the reserve space to be passed to dropout forward/backward calls /
980	cudnnStatus_t CUDNNWINAPI
981	cudnnDropoutGetReserveSpaceSize(cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes);
982
983	cudnnStatus_t CUDNNWINAPI
984	cudnnSetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
985	cudnnHandle_t handle,
986	float dropout,
987	void *states,
988	size_t stateSizeInBytes,
989	unsigned long long seed);
990
991	/ Restores the dropout descriptor to a previously saved-off state /
992	cudnnStatus_t CUDNNWINAPI
993	cudnnRestoreDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
994	cudnnHandle_t handle,
995	float dropout,
996	void *states,
997	size_t stateSizeInBytes,
998	unsigned long long seed);
999
1000	cudnnStatus_t CUDNNWINAPI
1001	cudnnGetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
1002	cudnnHandle_t handle,
1003	float *dropout,
1004	void **states,
1005	unsigned long long *seed);
1006
1007	cudnnStatus_t CUDNNWINAPI
1008	cudnnDropoutForward(cudnnHandle_t handle,
1009	const cudnnDropoutDescriptor_t dropoutDesc,
1010	const cudnnTensorDescriptor_t xdesc,
1011	const void *x,
1012	const cudnnTensorDescriptor_t ydesc,
1013	void *y,
1014	void *reserveSpace,
1015	size_t reserveSpaceSizeInBytes);
1016
1017	/ TODO: remove /
1018
1019	typedef struct cudnnAlgorithmStruct *cudnnAlgorithmDescriptor_t;
1020	typedef struct cudnnAlgorithmPerformanceStruct *cudnnAlgorithmPerformance_t;
1021
1022	/ TODO: move these enums out to the appropriate submodule /
1023	typedef enum {
1024	CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = `0`,
1025	CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = `1`,
1026	CUDNN_CONVOLUTION_FWD_ALGO_GEMM = `2`,
1027	CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = `3`,
1028	CUDNN_CONVOLUTION_FWD_ALGO_FFT = `4`,
1029	CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING = `5`,
1030	CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD = `6`,
1031	CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED = `7`,
1032	CUDNN_CONVOLUTION_FWD_ALGO_COUNT = `8`
1033	} cudnnConvolutionFwdAlgo_t;
1034
1035	typedef enum {
1036	CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 = `0`, / non-deterministic /
1037	CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 = `1`,
1038	CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT = `2`,
1039	CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 = `3`, / non-deterministic /
1040	CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD = `4`, / not implemented /
1041	CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED = `5`,
1042	CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING = `6`,
1043	CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT = `7`
1044	} cudnnConvolutionBwdFilterAlgo_t;
1045
1046	typedef enum {
1047	CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 = `0`, / non-deterministic /
1048	CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 = `1`,
1049	CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT = `2`,
1050	CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING = `3`,
1051	CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD = `4`,
1052	CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED = `5`,
1053	CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT = `6`
1054	} cudnnConvolutionBwdDataAlgo_t;
1055
1056	typedef enum {
1057	CUDNN_RNN_ALGO_STANDARD = `0`,
1058	CUDNN_RNN_ALGO_PERSIST_STATIC = `1`,
1059	CUDNN_RNN_ALGO_PERSIST_DYNAMIC = `2`,
1060	CUDNN_RNN_ALGO_PERSIST_STATIC_SMALL_H = `3`,
1061	CUDNN_RNN_ALGO_COUNT = `4`,
1062	} cudnnRNNAlgo_t;
1063
1064	typedef enum { CUDNN_CTC_LOSS_ALGO_DETERMINISTIC = `0`, CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC = `1` } cudnnCTCLossAlgo_t;
1065
1066	/ TODO: remove /
1067	typedef struct cudnnAlgorithmUnionStruct {
1068	union Algorithm {
1069	cudnnConvolutionFwdAlgo_t convFwdAlgo;
1070	cudnnConvolutionBwdFilterAlgo_t convBwdFilterAlgo;
1071	cudnnConvolutionBwdDataAlgo_t convBwdDataAlgo;
1072	cudnnRNNAlgo_t RNNAlgo;
1073	cudnnCTCLossAlgo_t CTCLossAlgo;
1074	} algo;
1075	} cudnnAlgorithm_t;
1076
1077	CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1078	cudnnCreateAlgorithmDescriptor(cudnnAlgorithmDescriptor_t *algoDesc);
1079
1080	CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1081	cudnnSetAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm);
1082
1083	CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1084	cudnnGetAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t *algorithm);
1085
1086	CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1087	cudnnCopyAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest);
1088
1089	CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1090	cudnnDestroyAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc);
1091
1092	CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1093	cudnnCreateAlgorithmPerformance(cudnnAlgorithmPerformance_t algoPerf, int* numberToCreate);
1094
1095	CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1096	cudnnSetAlgorithmPerformance(cudnnAlgorithmPerformance_t algoPerf,
1097	cudnnAlgorithmDescriptor_t algoDesc,
1098	cudnnStatus_t status,
1099	float time,
1100	size_t memory);
1101
1102	CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1103	cudnnGetAlgorithmPerformance(const cudnnAlgorithmPerformance_t algoPerf,
1104	cudnnAlgorithmDescriptor_t *algoDesc,
1105	cudnnStatus_t *status,
1106	float *time,
1107	size_t *memory);
1108
1109	CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1110	cudnnDestroyAlgorithmPerformance(cudnnAlgorithmPerformance_t algoPerf, int* numberToDestroy);
1111
1112	CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1113	cudnnGetAlgorithmSpaceSize(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, size_t *algoSpaceSizeInBytes);
1114
1115	CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1116	cudnnSaveAlgorithm(cudnnHandle_t handle,
1117	cudnnAlgorithmDescriptor_t algoDesc,
1118	void *algoSpace,
1119	size_t algoSpaceSizeInBytes);
1120
1121	CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1122	cudnnRestoreAlgorithm(cudnnHandle_t handle,
1123	void *algoSpace,
1124	size_t algoSpaceSizeInBytes,
1125	cudnnAlgorithmDescriptor_t algoDesc);
1126
1127	typedef enum {
1128	CUDNN_SEV_FATAL = `0`,
1129	CUDNN_SEV_ERROR = `1`,
1130	CUDNN_SEV_WARNING = `2`,
1131	CUDNN_SEV_INFO = `3`,
1132	} cudnnSeverity_t;
1133
1134	/ Message masks to be used with cudnnSetCallback() /
1135	#define CUDNN_SEV_ERROR_EN (1U << CUDNN_SEV_ERROR)
1136	#define CUDNN_SEV_WARNING_EN (1U << CUDNN_SEV_WARNING)
1137	#define CUDNN_SEV_INFO_EN (1U << CUDNN_SEV_INFO)
1138
1139	/ struct containing useful informaiton for each API call /
1140	typedef struct cudnnDebugStruct {
1141	unsigned cudnn_version;
1142	cudnnStatus_t cudnnStatus;
1143	unsigned time_sec; / epoch time in seconds /
1144	unsigned time_usec; / microseconds part of epoch time /
1145	unsigned time_delta; / time since start in seconds /
1146	cudnnHandle_t handle; / cudnn handle /
1147	cudaStream_t stream; / cuda stream ID /
1148	unsigned long long pid; / process ID /
1149	unsigned long long tid; / thread ID /
1150	int cudaDeviceId; / CUDA device ID /
1151	int reserved[`15`]; / reserved for future use /
1152	} cudnnDebug_t;
1153
1154	typedef void (cudnnCallback_t)(cudnnSeverity_t sev, void* udata, const* cudnnDebug_t dbg, const* char *msg);
1155
1156	cudnnStatus_t CUDNNWINAPI
1157	cudnnSetCallback(unsigned mask, void *udata, cudnnCallback_t fptr);
1158
1159	cudnnStatus_t CUDNNWINAPI
1160	cudnnGetCallback(unsigned mask, void* *udata, cudnnCallback_t fptr);
1161
1162	/*
1163	* \brief Cross-library version checker.
1164	* This function is implemented differently in each sub-library. Each sublib
1165	* checks whether its own version matches that of its dependencies.
1166	* \returns CUDNN_STATUS_SUCCESS if the version check passes,
1167	* CUDNN_STATUS_VERSION_MISMATCH if the versions are inconsistent.
1168	*/
1169	cudnnStatus_t CUDNNWINAPI
1170	cudnnOpsInferVersionCheck(void);
1171
1172	#if defined(__cplusplus)
1173	}
1174	#endif
1175
1176	#endif /* CUDNN_OPS_INFER_H_ */
1177

Browse the source code of include/x86_64-linux-gnu/cudnn_ops_infer_v8.h