1/*
2 * Copyright 1993-2020 NVIDIA Corporation. All rights reserved.
3 *
4 * NOTICE TO LICENSEE:
5 *
6 * This source code and/or documentation ("Licensed Deliverables") are
7 * subject to NVIDIA intellectual property rights under U.S. and
8 * international Copyright laws.
9 *
10 * These Licensed Deliverables contained herein is PROPRIETARY and
11 * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12 * conditions of a form of NVIDIA software license agreement by and
13 * between NVIDIA and Licensee ("License Agreement") or electronically
14 * accepted by Licensee. Notwithstanding any terms or conditions to
15 * the contrary in the License Agreement, reproduction or disclosure
16 * of the Licensed Deliverables to any third party without the express
17 * written consent of NVIDIA is prohibited.
18 *
19 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20 * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21 * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22 * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23 * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24 * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25 * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27 * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28 * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32 * OF THESE LICENSED DELIVERABLES.
33 *
34 * U.S. Government End Users. These Licensed Deliverables are a
35 * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36 * 1995), consisting of "commercial computer software" and "commercial
37 * computer software documentation" as such terms are used in 48
38 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39 * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40 * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41 * U.S. Government End Users acquire the Licensed Deliverables with
42 * only those rights set forth herein.
43 *
44 * Any use of the Licensed Deliverables in individual and commercial
45 * software must include, in the user documentation and internal
46 * comments to the code, the above Disclaimer and U.S. Government End
47 * Users Notice.
48 */
49
50/*
51 * cudnn_ops_infer : cuDNN's basic definitions and inference operations.
52 */
53
54#if !defined(CUDNN_OPS_INFER_H_)
55#define CUDNN_OPS_INFER_H_
56
57#include <cuda_runtime.h>
58#include <stdint.h>
59
60#include "cudnn_version.h"
61
62/* These version numbers are autogenerated, do not edit manually. */
63#define CUDNN_OPS_INFER_MAJOR 8
64#define CUDNN_OPS_INFER_MINOR 2
65#define CUDNN_OPS_INFER_PATCH 4
66
67#if (CUDNN_OPS_INFER_MAJOR != CUDNN_MAJOR) || (CUDNN_OPS_INFER_MINOR != CUDNN_MINOR) || \
68 (CUDNN_OPS_INFER_PATCH != CUDNN_PATCHLEVEL)
69#error Version mismatch in cuDNN OPS INFER!!!
70#endif
71
72#ifndef CUDNNWINAPI
73#ifdef _WIN32
74#define CUDNNWINAPI __stdcall
75#else
76#define CUDNNWINAPI
77#endif
78#endif
79
80/* Warnings for deprecated API-s are enabled using the CUDNN_WARN_DEPRECATED macro */
81#if defined(CUDNN_WARN_DEPRECATED) && (defined(__GNUC__) || defined(__clang__))
82/* GCC, Intel C/C++, Cray C/C++, CLANG, IBM XL C/C++ little endian */
83#define CUDNN_DEPRECATED __attribute__((deprecated))
84#elif defined(CUDNN_WARN_DEPRECATED) && defined(_MSC_VER)
85/* Microsoft Visual C++ */
86#define CUDNN_DEPRECATED __declspec(deprecated)
87#elif defined(CUDNN_WARN_DEPRECATED) && (__cplusplus >= 201402L)
88/* C++14 compilers */
89#define CUDNN_DEPRECATED [[deprecated]]
90#else
91/* No support for the deprecated attribute */
92#define CUDNN_DEPRECATED
93#endif
94
95#if defined(__cplusplus)
96extern "C" {
97#endif
98
99struct cudnnContext;
100typedef struct cudnnContext *cudnnHandle_t;
101
102size_t CUDNNWINAPI
103cudnnGetVersion(void);
104
105/* Returns CUDA Runtime version statically linked against cudnn */
106size_t CUDNNWINAPI
107cudnnGetCudartVersion(void);
108
109/*
110 * CUDNN return codes
111 */
112typedef enum {
113 CUDNN_STATUS_SUCCESS = 0,
114 CUDNN_STATUS_NOT_INITIALIZED = 1,
115 CUDNN_STATUS_ALLOC_FAILED = 2,
116 CUDNN_STATUS_BAD_PARAM = 3,
117 CUDNN_STATUS_INTERNAL_ERROR = 4,
118 CUDNN_STATUS_INVALID_VALUE = 5,
119 CUDNN_STATUS_ARCH_MISMATCH = 6,
120 CUDNN_STATUS_MAPPING_ERROR = 7,
121 CUDNN_STATUS_EXECUTION_FAILED = 8,
122 CUDNN_STATUS_NOT_SUPPORTED = 9,
123 CUDNN_STATUS_LICENSE_ERROR = 10,
124 CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING = 11,
125 CUDNN_STATUS_RUNTIME_IN_PROGRESS = 12,
126 CUDNN_STATUS_RUNTIME_FP_OVERFLOW = 13,
127 CUDNN_STATUS_VERSION_MISMATCH = 14,
128} cudnnStatus_t;
129
130/* human-readable error messages */
131const char *CUDNNWINAPI
132cudnnGetErrorString(cudnnStatus_t status);
133
134/* Forward definition in this version only */
135typedef struct cudnnRuntimeTag_t cudnnRuntimeTag_t;
136
137typedef enum {
138 CUDNN_ERRQUERY_RAWCODE = 0,
139 CUDNN_ERRQUERY_NONBLOCKING = 1,
140 CUDNN_ERRQUERY_BLOCKING = 2,
141} cudnnErrQueryMode_t;
142
143cudnnStatus_t CUDNNWINAPI
144cudnnQueryRuntimeError(cudnnHandle_t handle, cudnnStatus_t *rstatus, cudnnErrQueryMode_t mode, cudnnRuntimeTag_t *tag);
145
146#ifndef __LIBRARY_TYPES_H__
147
148typedef enum libraryPropertyType_t { MAJOR_VERSION, MINOR_VERSION, PATCH_LEVEL } libraryPropertyType;
149
150#endif
151
152cudnnStatus_t CUDNNWINAPI
153cudnnGetProperty(libraryPropertyType type, int *value);
154
155cudnnStatus_t CUDNNWINAPI
156cudnnCreate(cudnnHandle_t *handle);
157cudnnStatus_t CUDNNWINAPI
158cudnnDestroy(cudnnHandle_t handle);
159cudnnStatus_t CUDNNWINAPI
160cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId);
161cudnnStatus_t CUDNNWINAPI
162cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId);
163
164/* Data structures to represent Image/Filter and the Neural Network Layer */
165typedef struct cudnnTensorStruct *cudnnTensorDescriptor_t;
166typedef struct cudnnPoolingStruct *cudnnPoolingDescriptor_t;
167typedef struct cudnnFilterStruct *cudnnFilterDescriptor_t;
168typedef struct cudnnLRNStruct *cudnnLRNDescriptor_t;
169typedef struct cudnnActivationStruct *cudnnActivationDescriptor_t;
170typedef struct cudnnSpatialTransformerStruct *cudnnSpatialTransformerDescriptor_t;
171typedef struct cudnnOpTensorStruct *cudnnOpTensorDescriptor_t;
172typedef struct cudnnReduceTensorStruct *cudnnReduceTensorDescriptor_t;
173typedef struct cudnnCTCLossStruct *cudnnCTCLossDescriptor_t;
174typedef struct cudnnTensorTransformStruct *cudnnTensorTransformDescriptor_t;
175/*
176 * CUDNN data type
177 */
178typedef enum {
179 CUDNN_DATA_FLOAT = 0,
180 CUDNN_DATA_DOUBLE = 1,
181 CUDNN_DATA_HALF = 2,
182 CUDNN_DATA_INT8 = 3,
183 CUDNN_DATA_INT32 = 4,
184 CUDNN_DATA_INT8x4 = 5,
185 CUDNN_DATA_UINT8 = 6,
186 CUDNN_DATA_UINT8x4 = 7,
187 CUDNN_DATA_INT8x32 = 8,
188 CUDNN_DATA_BFLOAT16 = 9,
189 CUDNN_DATA_INT64 = 10,
190} cudnnDataType_t;
191
192/*
193 * CUDNN math type
194 */
195typedef enum {
196 CUDNN_DEFAULT_MATH = 0,
197 CUDNN_TENSOR_OP_MATH = 1,
198 CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION = 2,
199 CUDNN_FMA_MATH = 3,
200} cudnnMathType_t;
201
202/*
203 * CUDNN propagate Nan
204 */
205typedef enum {
206 CUDNN_NOT_PROPAGATE_NAN = 0,
207 CUDNN_PROPAGATE_NAN = 1,
208} cudnnNanPropagation_t;
209
210/*
211 * CUDNN Determinism
212 */
213typedef enum {
214 CUDNN_NON_DETERMINISTIC = 0,
215 CUDNN_DETERMINISTIC = 1,
216} cudnnDeterminism_t;
217
218/* Maximum supported number of tensor dimensions */
219#define CUDNN_DIM_MAX 8
220
221/* Create an instance of a generic Tensor descriptor */
222cudnnStatus_t CUDNNWINAPI
223cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc);
224
225typedef enum {
226 CUDNN_TENSOR_NCHW = 0, /* row major (wStride = 1, hStride = w) */
227 CUDNN_TENSOR_NHWC = 1, /* feature maps interleaved ( cStride = 1 )*/
228 CUDNN_TENSOR_NCHW_VECT_C = 2, /* each image point is vector of element of C, vector length in data type */
229} cudnnTensorFormat_t;
230
231cudnnStatus_t CUDNNWINAPI
232cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc,
233 cudnnTensorFormat_t format,
234 cudnnDataType_t dataType, /* image data type */
235 int n, /* number of inputs (batch size) */
236 int c, /* number of input feature maps */
237 int h, /* height of input section */
238 int w); /* width of input section */
239
240cudnnStatus_t CUDNNWINAPI
241cudnnSetTensor4dDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
242 cudnnDataType_t dataType, /* image data type */
243 int n, /* number of inputs (batch size) */
244 int c, /* number of input feature maps */
245 int h, /* height of input section */
246 int w, /* width of input section */
247 int nStride,
248 int cStride,
249 int hStride,
250 int wStride);
251
252cudnnStatus_t CUDNNWINAPI
253cudnnGetTensor4dDescriptor(const cudnnTensorDescriptor_t tensorDesc,
254 cudnnDataType_t *dataType, /* image data type */
255 int *n, /* number of inputs (batch size) */
256 int *c, /* number of input feature maps */
257 int *h, /* height of input section */
258 int *w, /* width of input section */
259 int *nStride,
260 int *cStride,
261 int *hStride,
262 int *wStride);
263
264cudnnStatus_t CUDNNWINAPI
265cudnnSetTensorNdDescriptor(cudnnTensorDescriptor_t tensorDesc,
266 cudnnDataType_t dataType,
267 int nbDims,
268 const int dimA[],
269 const int strideA[]);
270
271cudnnStatus_t CUDNNWINAPI
272cudnnSetTensorNdDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
273 cudnnTensorFormat_t format,
274 cudnnDataType_t dataType,
275 int nbDims,
276 const int dimA[]);
277
278cudnnStatus_t CUDNNWINAPI
279cudnnGetTensorNdDescriptor(const cudnnTensorDescriptor_t tensorDesc,
280 int nbDimsRequested,
281 cudnnDataType_t *dataType,
282 int *nbDims,
283 int dimA[],
284 int strideA[]);
285
286cudnnStatus_t CUDNNWINAPI
287cudnnGetTensorSizeInBytes(const cudnnTensorDescriptor_t tensorDesc, size_t *size);
288
289/* PixelOffset( n, c, h, w ) = n *input_stride + c * feature_stride + h * h_stride + w * w_stride
290
291 1)Example of all images in row major order one batch of features after the other (with an optional padding on row)
292 input_stride : c x h x h_stride
293 feature_stride : h x h_stride
294 h_stride : >= w ( h_stride = w if no padding)
295 w_stride : 1
296
297
298 2)Example of all images in row major with features maps interleaved
299 input_stride : c x h x h_stride
300 feature_stride : 1
301 h_stride : w x c
302 w_stride : c
303
304 3)Example of all images in column major order one batch of features after the other (with optional padding on column)
305 input_stride : c x w x w_stride
306 feature_stride : w x w_stride
307 h_stride : 1
308 w_stride : >= h
309
310*/
311
312/* Destroy an instance of Tensor4d descriptor */
313cudnnStatus_t CUDNNWINAPI
314cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc);
315
316/* Fold/unfold transforms */
317typedef enum {
318 CUDNN_TRANSFORM_FOLD = 0U,
319 CUDNN_TRANSFORM_UNFOLD = 1U,
320} cudnnFoldingDirection_t;
321
322/** Create a destination descriptor for cudnnTransformTensor */
323cudnnStatus_t CUDNNWINAPI
324cudnnInitTransformDest(const cudnnTensorTransformDescriptor_t transformDesc,
325 const cudnnTensorDescriptor_t srcDesc,
326 cudnnTensorDescriptor_t destDesc,
327 size_t *destSizeInBytes);
328
329/** Create an empty tensor transform descriptor */
330cudnnStatus_t CUDNNWINAPI
331cudnnCreateTensorTransformDescriptor(cudnnTensorTransformDescriptor_t *transformDesc);
332
333/** Initialize a previously created tensor transform descriptor. */
334cudnnStatus_t CUDNNWINAPI
335cudnnSetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
336 const uint32_t nbDims,
337 const cudnnTensorFormat_t destFormat,
338 const int32_t padBeforeA[],
339 const int32_t padAfterA[],
340 const uint32_t foldA[],
341 const cudnnFoldingDirection_t direction);
342
343/**
344 * Retrieves the values stored in a previously initialized tensor transform
345 * descriptor.
346 */
347cudnnStatus_t CUDNNWINAPI
348cudnnGetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
349 uint32_t nbDimsRequested,
350 cudnnTensorFormat_t *destFormat,
351 int32_t padBeforeA[],
352 int32_t padAfterA[],
353 uint32_t foldA[],
354 cudnnFoldingDirection_t *direction);
355
356/**
357 * Destroys a previously created tensor transform descriptor.
358 */
359cudnnStatus_t CUDNNWINAPI
360cudnnDestroyTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc);
361
362/* Tensor layout conversion helper (y = alpha * x + beta * y) */
363cudnnStatus_t CUDNNWINAPI
364cudnnTransformTensor(cudnnHandle_t handle,
365 const void *alpha,
366 const cudnnTensorDescriptor_t xDesc,
367 const void *x,
368 const void *beta,
369 const cudnnTensorDescriptor_t yDesc,
370 void *y);
371
372cudnnStatus_t CUDNNWINAPI
373cudnnTransformTensorEx(cudnnHandle_t handle,
374 const cudnnTensorTransformDescriptor_t transDesc,
375 const void *alpha,
376 const cudnnTensorDescriptor_t srcDesc,
377 const void *srcData,
378 const void *beta,
379 const cudnnTensorDescriptor_t destDesc,
380 void *destData);
381
382/* Tensor Bias addition : C = alpha * A + beta * C */
383cudnnStatus_t CUDNNWINAPI
384cudnnAddTensor(cudnnHandle_t handle,
385 const void *alpha,
386 const cudnnTensorDescriptor_t aDesc,
387 const void *A,
388 const void *beta,
389 const cudnnTensorDescriptor_t cDesc,
390 void *C);
391
392/*
393 * CUDNN OpTensor op type
394 */
395typedef enum {
396 CUDNN_OP_TENSOR_ADD = 0,
397 CUDNN_OP_TENSOR_MUL = 1,
398 CUDNN_OP_TENSOR_MIN = 2,
399 CUDNN_OP_TENSOR_MAX = 3,
400 CUDNN_OP_TENSOR_SQRT = 4,
401 CUDNN_OP_TENSOR_NOT = 5,
402} cudnnOpTensorOp_t;
403
404cudnnStatus_t CUDNNWINAPI
405cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc);
406
407cudnnStatus_t CUDNNWINAPI
408cudnnSetOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc,
409 cudnnOpTensorOp_t opTensorOp,
410 cudnnDataType_t opTensorCompType,
411 cudnnNanPropagation_t opTensorNanOpt);
412
413cudnnStatus_t CUDNNWINAPI
414cudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc,
415 cudnnOpTensorOp_t *opTensorOp,
416 cudnnDataType_t *opTensorCompType,
417 cudnnNanPropagation_t *opTensorNanOpt);
418
419cudnnStatus_t CUDNNWINAPI
420cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc);
421
422/* Tensor operation : C = op( alpha1 * A, alpha2 * B ) + beta * C */
423/* B tensor is ignored for CUDNN_OP_TENSOR_SQRT, CUDNN_OP_TENSOR_NOT. */
424cudnnStatus_t CUDNNWINAPI
425cudnnOpTensor(cudnnHandle_t handle,
426 const cudnnOpTensorDescriptor_t opTensorDesc,
427 const void *alpha1,
428 const cudnnTensorDescriptor_t aDesc,
429 const void *A,
430 const void *alpha2,
431 const cudnnTensorDescriptor_t bDesc,
432 const void *B,
433 const void *beta,
434 const cudnnTensorDescriptor_t cDesc,
435 void *C);
436
437/*
438 * CUDNN ReduceTensor op type
439 */
440typedef enum {
441 CUDNN_REDUCE_TENSOR_ADD = 0,
442 CUDNN_REDUCE_TENSOR_MUL = 1,
443 CUDNN_REDUCE_TENSOR_MIN = 2,
444 CUDNN_REDUCE_TENSOR_MAX = 3,
445 CUDNN_REDUCE_TENSOR_AMAX = 4,
446 CUDNN_REDUCE_TENSOR_AVG = 5,
447 CUDNN_REDUCE_TENSOR_NORM1 = 6,
448 CUDNN_REDUCE_TENSOR_NORM2 = 7,
449 CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS = 8,
450} cudnnReduceTensorOp_t;
451
452/*
453 * CUDNN ReduceTensor indices type
454 */
455typedef enum {
456 CUDNN_REDUCE_TENSOR_NO_INDICES = 0,
457 CUDNN_REDUCE_TENSOR_FLATTENED_INDICES = 1,
458} cudnnReduceTensorIndices_t;
459
460/*
461 * CUDNN tensor indices type size (all unsigned)
462 * Currently not supported, default is 32 bit unsigned.
463 */
464typedef enum {
465 CUDNN_32BIT_INDICES = 0,
466 CUDNN_64BIT_INDICES = 1,
467 CUDNN_16BIT_INDICES = 2,
468 CUDNN_8BIT_INDICES = 3,
469} cudnnIndicesType_t;
470
471cudnnStatus_t CUDNNWINAPI
472cudnnCreateReduceTensorDescriptor(cudnnReduceTensorDescriptor_t *reduceTensorDesc);
473
474cudnnStatus_t CUDNNWINAPI
475cudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc,
476 cudnnReduceTensorOp_t reduceTensorOp,
477 cudnnDataType_t reduceTensorCompType,
478 cudnnNanPropagation_t reduceTensorNanOpt,
479 cudnnReduceTensorIndices_t reduceTensorIndices,
480 cudnnIndicesType_t reduceTensorIndicesType);
481
482cudnnStatus_t CUDNNWINAPI
483cudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorDesc,
484 cudnnReduceTensorOp_t *reduceTensorOp,
485 cudnnDataType_t *reduceTensorCompType,
486 cudnnNanPropagation_t *reduceTensorNanOpt,
487 cudnnReduceTensorIndices_t *reduceTensorIndices,
488 cudnnIndicesType_t *reduceTensorIndicesType);
489
490cudnnStatus_t CUDNNWINAPI
491cudnnDestroyReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc);
492
493/* Helper function to return the minimum size of the index space to be passed to the reduction given the input and
494 * output tensors */
495cudnnStatus_t CUDNNWINAPI
496cudnnGetReductionIndicesSize(cudnnHandle_t handle,
497 const cudnnReduceTensorDescriptor_t reduceTensorDesc,
498 const cudnnTensorDescriptor_t aDesc,
499 const cudnnTensorDescriptor_t cDesc,
500 size_t *sizeInBytes);
501
502/* Helper function to return the minimum size of the workspace to be passed to the reduction given the input and output
503 * tensors */
504cudnnStatus_t CUDNNWINAPI
505cudnnGetReductionWorkspaceSize(cudnnHandle_t handle,
506 const cudnnReduceTensorDescriptor_t reduceTensorDesc,
507 const cudnnTensorDescriptor_t aDesc,
508 const cudnnTensorDescriptor_t cDesc,
509 size_t *sizeInBytes);
510
511/* Tensor operation : C = reduce op( alpha * A ) + beta * C */
512/* The NaN propagation enum applies to only the min and max reduce ops; the other reduce ops propagate NaN as usual. */
513/* The indices space is ignored for reduce ops other than min or max. */
514cudnnStatus_t CUDNNWINAPI
515cudnnReduceTensor(cudnnHandle_t handle,
516 const cudnnReduceTensorDescriptor_t reduceTensorDesc,
517 void *indices,
518 size_t indicesSizeInBytes,
519 void *workspace,
520 size_t workspaceSizeInBytes,
521 const void *alpha,
522 const cudnnTensorDescriptor_t aDesc,
523 const void *A,
524 const void *beta,
525 const cudnnTensorDescriptor_t cDesc,
526 void *C);
527
528/* Set all values of a tensor to a given value : y[i] = value[0] */
529cudnnStatus_t CUDNNWINAPI
530cudnnSetTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *valuePtr);
531
532/* Scale all values of a tensor by a given factor : y[i] = alpha * y[i] */
533cudnnStatus_t CUDNNWINAPI
534cudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *alpha);
535
536/* Create an instance of FilterStruct */
537cudnnStatus_t CUDNNWINAPI
538cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc);
539
540cudnnStatus_t CUDNNWINAPI
541cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc,
542 cudnnDataType_t dataType, /* image data type */
543 cudnnTensorFormat_t format,
544 int k, /* number of output feature maps */
545 int c, /* number of input feature maps */
546 int h, /* height of each input filter */
547 int w); /* width of each input filter */
548
549cudnnStatus_t CUDNNWINAPI
550cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc,
551 cudnnDataType_t *dataType, /* image data type */
552 cudnnTensorFormat_t *format,
553 int *k, /* number of output feature maps */
554 int *c, /* number of input feature maps */
555 int *h, /* height of each input filter */
556 int *w); /* width of each input filter */
557
558cudnnStatus_t CUDNNWINAPI
559cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc,
560 cudnnDataType_t dataType, /* image data type */
561 cudnnTensorFormat_t format,
562 int nbDims,
563 const int filterDimA[]);
564
565cudnnStatus_t CUDNNWINAPI
566cudnnGetFilterNdDescriptor(const cudnnFilterDescriptor_t filterDesc,
567 int nbDimsRequested,
568 cudnnDataType_t *dataType, /* image data type */
569 cudnnTensorFormat_t *format,
570 int *nbDims,
571 int filterDimA[]);
572cudnnStatus_t CUDNNWINAPI
573cudnnGetFilterSizeInBytes(const cudnnFilterDescriptor_t filterDesc, size_t *size);
574
575cudnnStatus_t CUDNNWINAPI
576cudnnTransformFilter(cudnnHandle_t handle,
577 const cudnnTensorTransformDescriptor_t transDesc,
578 const void *alpha,
579 const cudnnFilterDescriptor_t srcDesc,
580 const void *srcData,
581 const void *beta,
582 const cudnnFilterDescriptor_t destDesc,
583 void *destData);
584
585cudnnStatus_t CUDNNWINAPI
586cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc);
587
588/*
589 * softmax algorithm
590 */
591typedef enum {
592 CUDNN_SOFTMAX_FAST = 0, /* straightforward implementation */
593 CUDNN_SOFTMAX_ACCURATE = 1, /* subtract max from every point to avoid overflow */
594 CUDNN_SOFTMAX_LOG = 2
595} cudnnSoftmaxAlgorithm_t;
596
597typedef enum {
598 CUDNN_SOFTMAX_MODE_INSTANCE = 0, /* compute the softmax over all C, H, W for each N */
599 CUDNN_SOFTMAX_MODE_CHANNEL = 1 /* compute the softmax over all C for each H, W, N */
600} cudnnSoftmaxMode_t;
601
602/* Softmax functions: All of the form "output = alpha * Op(inputs) + beta * output" */
603
604/* Function to perform forward softmax */
605cudnnStatus_t CUDNNWINAPI
606cudnnSoftmaxForward(cudnnHandle_t handle,
607 cudnnSoftmaxAlgorithm_t algo,
608 cudnnSoftmaxMode_t mode,
609 const void *alpha,
610 const cudnnTensorDescriptor_t xDesc,
611 const void *x,
612 const void *beta,
613 const cudnnTensorDescriptor_t yDesc,
614 void *y);
615
616/*
617 * pooling mode
618 */
619typedef enum {
620 CUDNN_POOLING_MAX = 0,
621 CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING = 1, /* count for average includes padded values */
622 CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = 2, /* count for average does not include padded values */
623 CUDNN_POOLING_MAX_DETERMINISTIC = 3
624} cudnnPoolingMode_t;
625
626/* Create an instance of pooling descriptor */
627cudnnStatus_t CUDNNWINAPI
628cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc);
629
630cudnnStatus_t CUDNNWINAPI
631cudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc,
632 cudnnPoolingMode_t mode,
633 cudnnNanPropagation_t maxpoolingNanOpt,
634 int windowHeight,
635 int windowWidth,
636 int verticalPadding,
637 int horizontalPadding,
638 int verticalStride,
639 int horizontalStride);
640
641cudnnStatus_t CUDNNWINAPI
642cudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
643 cudnnPoolingMode_t *mode,
644 cudnnNanPropagation_t *maxpoolingNanOpt,
645 int *windowHeight,
646 int *windowWidth,
647 int *verticalPadding,
648 int *horizontalPadding,
649 int *verticalStride,
650 int *horizontalStride);
651
652cudnnStatus_t CUDNNWINAPI
653cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc,
654 const cudnnPoolingMode_t mode,
655 const cudnnNanPropagation_t maxpoolingNanOpt,
656 int nbDims,
657 const int windowDimA[],
658 const int paddingA[],
659 const int strideA[]);
660
661cudnnStatus_t CUDNNWINAPI
662cudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
663 int nbDimsRequested,
664 cudnnPoolingMode_t *mode,
665 cudnnNanPropagation_t *maxpoolingNanOpt,
666 int *nbDims,
667 int windowDimA[],
668 int paddingA[],
669 int strideA[]);
670
671cudnnStatus_t CUDNNWINAPI
672cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
673 const cudnnTensorDescriptor_t inputTensorDesc,
674 int nbDims,
675 int outputTensorDimA[]);
676
677cudnnStatus_t CUDNNWINAPI
678cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
679 const cudnnTensorDescriptor_t inputTensorDesc,
680 int *n,
681 int *c,
682 int *h,
683 int *w);
684
685/* Destroy an instance of pooling descriptor */
686cudnnStatus_t CUDNNWINAPI
687cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc);
688
689/* Pooling functions: All of the form "output = alpha * Op(inputs) + beta * output" */
690
691/* Function to perform forward pooling */
692cudnnStatus_t CUDNNWINAPI
693cudnnPoolingForward(cudnnHandle_t handle,
694 const cudnnPoolingDescriptor_t poolingDesc,
695 const void *alpha,
696 const cudnnTensorDescriptor_t xDesc,
697 const void *x,
698 const void *beta,
699 const cudnnTensorDescriptor_t yDesc,
700 void *y);
701
702/*
703 * activation mode
704 */
705typedef enum {
706 CUDNN_ACTIVATION_SIGMOID = 0,
707 CUDNN_ACTIVATION_RELU = 1,
708 CUDNN_ACTIVATION_TANH = 2,
709 CUDNN_ACTIVATION_CLIPPED_RELU = 3,
710 CUDNN_ACTIVATION_ELU = 4,
711 CUDNN_ACTIVATION_IDENTITY = 5,
712 CUDNN_ACTIVATION_SWISH = 6
713} cudnnActivationMode_t;
714
715/* Activation functions: All of the form "output = alpha * Op(inputs) + beta * output" */
716cudnnStatus_t CUDNNWINAPI
717cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc);
718
719cudnnStatus_t CUDNNWINAPI
720cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc,
721 cudnnActivationMode_t mode,
722 cudnnNanPropagation_t reluNanOpt,
723 double coef); /* ceiling for clipped RELU, alpha for ELU */
724
725cudnnStatus_t CUDNNWINAPI
726cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc,
727 cudnnActivationMode_t *mode,
728 cudnnNanPropagation_t *reluNanOpt,
729 double *coef); /* ceiling for clipped RELU, alpha for ELU */
730
731cudnnStatus_t CUDNNWINAPI
732cudnnSetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double swish_beta);
733
734cudnnStatus_t CUDNNWINAPI
735cudnnGetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double *swish_beta);
736
737cudnnStatus_t CUDNNWINAPI
738cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc);
739
740/* Function to perform forward activation */
741cudnnStatus_t CUDNNWINAPI
742cudnnActivationForward(cudnnHandle_t handle,
743 cudnnActivationDescriptor_t activationDesc,
744 const void *alpha,
745 const cudnnTensorDescriptor_t xDesc,
746 const void *x,
747 const void *beta,
748 const cudnnTensorDescriptor_t yDesc,
749 void *y);
750
751/*
752 * Create an instance of LRN (Local Response Normalization) descriptor
753 * Uses lrnN=5, lrnAlpha=1e-4, lrnBeta=0.75, lrnK=2.0 as defaults from Krizhevsky'12 ImageNet paper
754 */
755cudnnStatus_t CUDNNWINAPI
756cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc);
757
758#define CUDNN_LRN_MIN_N 1 /* minimum allowed lrnN */
759#define CUDNN_LRN_MAX_N 16 /* maximum allowed lrnN */
760#define CUDNN_LRN_MIN_K 1e-5 /* minimum allowed lrnK */
761#define CUDNN_LRN_MIN_BETA 0.01 /* minimum allowed lrnBeta */
762
763/* LRN layer mode */
764typedef enum {
765 CUDNN_LRN_CROSS_CHANNEL_DIM1 = 0, /* Normalize across tensor's dimA[1] dimension */
766} cudnnLRNMode_t;
767
768/*
769 * Uses a window [center-lookBehind, center+lookAhead], where
770 * lookBehind = floor( (lrnN-1)/2 ), lookAhead = lrnN-lookBehind-1.
771 * Values of double parameters cast to tensor data type.
772 */
773cudnnStatus_t CUDNNWINAPI
774cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned lrnN, double lrnAlpha, double lrnBeta, double lrnK);
775/*
776 * Retrieve the settings currently stored in an LRN layer descriptor
777 * Any of the provided pointers can be NULL (no corresponding value will be returned)
778 */
779cudnnStatus_t CUDNNWINAPI
780cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned *lrnN, double *lrnAlpha, double *lrnBeta, double *lrnK);
781
782/* Destroy an instance of LRN descriptor */
783cudnnStatus_t CUDNNWINAPI
784cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc);
785
786/* LRN functions: output = alpha * normalize(x) + beta * old_y */
787
788/* LRN cross-channel forward computation. Double parameters cast to tensor data type */
789cudnnStatus_t CUDNNWINAPI
790cudnnLRNCrossChannelForward(cudnnHandle_t handle,
791 cudnnLRNDescriptor_t normDesc,
792 cudnnLRNMode_t lrnMode,
793 const void *alpha,
794 const cudnnTensorDescriptor_t xDesc,
795 const void *x,
796 const void *beta,
797 const cudnnTensorDescriptor_t yDesc,
798 void *y);
799
800typedef enum {
801 CUDNN_DIVNORM_PRECOMPUTED_MEANS = 0,
802} cudnnDivNormMode_t;
803
804/* LCN/divisive normalization functions: y = alpha * normalize(x) + beta * y */
805cudnnStatus_t CUDNNWINAPI
806cudnnDivisiveNormalizationForward(cudnnHandle_t handle,
807 cudnnLRNDescriptor_t normDesc,
808 cudnnDivNormMode_t mode,
809 const void *alpha,
810 const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */
811 const void *x,
812 const void *means, /* if NULL, means are assumed to be zero */
813 void *temp,
814 void *temp2,
815 const void *beta,
816 const cudnnTensorDescriptor_t yDesc,
817 void *y);
818
819typedef enum {
820 /* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */
821 CUDNN_BATCHNORM_PER_ACTIVATION = 0,
822
823 /* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */
824 CUDNN_BATCHNORM_SPATIAL = 1,
825
826 /*
827 * bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors).
828 * May be faster than CUDNN_BATCHNORM_SPATIAL but imposes some limits on the range of values
829 */
830 CUDNN_BATCHNORM_SPATIAL_PERSISTENT = 2,
831} cudnnBatchNormMode_t;
832
833#define CUDNN_BN_MIN_EPSILON 0.0 /* Minimum epsilon allowed to be used in the Batch Normalization formula */
834
835/*
836 * Derives a tensor descriptor from layer data descriptor for BatchNormalization
837 * scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
838 * bnScaleBiasMeanVarDesc and bnScaleBiasDiffDesc in Batch Normalization forward and backward functions.
839 */
840cudnnStatus_t CUDNNWINAPI
841cudnnDeriveBNTensorDescriptor(cudnnTensorDescriptor_t derivedBnDesc,
842 const cudnnTensorDescriptor_t xDesc,
843 cudnnBatchNormMode_t mode);
844
845typedef enum {
846 CUDNN_BATCHNORM_OPS_BN = 0, /* do batch normalization only */
847 CUDNN_BATCHNORM_OPS_BN_ACTIVATION = 1, /* do batchNorm, then activation */
848 CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION = 2, /* do batchNorm, then elemWiseAdd, then activation */
849} cudnnBatchNormOps_t;
850
851/*
852 * Performs Batch Normalization during Inference:
853 * y[i] = bnScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + bnBias[k]
854 * with bnScale, bnBias, runningMean, runningInvVariance tensors indexed
855 * according to spatial or per-activation mode. Refer to cudnnBatchNormalizationForwardTraining
856 * above for notes on function arguments.
857 */
858cudnnStatus_t CUDNNWINAPI
859cudnnBatchNormalizationForwardInference(cudnnHandle_t handle,
860 cudnnBatchNormMode_t mode,
861 const void *alpha, /* alpha[0] = result blend factor */
862 const void *beta, /* beta[0] = dest layer blend factor */
863 const cudnnTensorDescriptor_t xDesc,
864 const void *x, /* NxCxHxW */
865 const cudnnTensorDescriptor_t yDesc,
866 void *y, /* NxCxHxW */
867 const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
868 const void *bnScale,
869 const void *bnBias,
870 const void *estimatedMean,
871 const void *estimatedVariance,
872 double epsilon);
873
874typedef enum {
875 /* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */
876 CUDNN_NORM_PER_ACTIVATION = 0,
877
878 /* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */
879 CUDNN_NORM_PER_CHANNEL = 1,
880} cudnnNormMode_t;
881
882typedef enum { CUDNN_NORM_ALGO_STANDARD = 0, CUDNN_NORM_ALGO_PERSIST = 1 } cudnnNormAlgo_t;
883
884/*
885 * Derives a tensor descriptor from layer data descriptor for Normalization
886 * scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
887 * normScaleBiasMeanVarDesc and normScaleBiasDiffDesc in Normalization forward and backward functions.
888 */
889cudnnStatus_t CUDNNWINAPI
890cudnnDeriveNormTensorDescriptor(cudnnTensorDescriptor_t derivedNormScaleBiasDesc,
891 cudnnTensorDescriptor_t derivedNormMeanVarDesc,
892 const cudnnTensorDescriptor_t xDesc,
893 cudnnNormMode_t mode,
894 int groupCnt); /* Place hold for future work, should be set to 1 now*/
895
896typedef enum {
897 CUDNN_NORM_OPS_NORM = 0, /* do normalization only */
898 CUDNN_NORM_OPS_NORM_ACTIVATION = 1, /* do Norm, then activation */
899 CUDNN_NORM_OPS_NORM_ADD_ACTIVATION = 2, /* do Norm, then elemWiseAdd, then activation */
900} cudnnNormOps_t;
901
902/*
903 * Performs Normalization during Inference:
904 * y[i] = normScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + normBias[k]
905 * with normScale, normBias, runningMean, runningInvVariance tensors indexed
906 * according to per-channel or per-activation mode. Refer to cudnnNormalizationForwardTraining
907 * above for notes on function arguments.
908 */
909cudnnStatus_t CUDNNWINAPI
910cudnnNormalizationForwardInference(cudnnHandle_t handle,
911 cudnnNormMode_t mode,
912 cudnnNormOps_t normOps,
913 cudnnNormAlgo_t algo,
914 const void *alpha, /* alpha[0] = result blend factor */
915 const void *beta, /* beta[0] = dest layer blend factor */
916 const cudnnTensorDescriptor_t xDesc,
917 const void *x, /* NxCxHxW */
918 const cudnnTensorDescriptor_t normScaleBiasDesc,
919 const void *normScale,
920 const void *normBias,
921 const cudnnTensorDescriptor_t normMeanVarDesc,
922 const void *estimatedMean,
923 const void *estimatedVariance,
924 const cudnnTensorDescriptor_t zDesc,
925 const void *z,
926 cudnnActivationDescriptor_t activationDesc,
927 const cudnnTensorDescriptor_t yDesc,
928 void *y, /* NxCxHxW */
929 double epsilon,
930 int groupCnt); /* Place hold for future work*/
931
932/* APIs for spatial transformer network*/
933typedef enum {
934 CUDNN_SAMPLER_BILINEAR = 0,
935} cudnnSamplerType_t;
936
937cudnnStatus_t CUDNNWINAPI
938cudnnCreateSpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t *stDesc);
939
940cudnnStatus_t CUDNNWINAPI
941cudnnSetSpatialTransformerNdDescriptor(cudnnSpatialTransformerDescriptor_t stDesc,
942 cudnnSamplerType_t samplerType,
943 cudnnDataType_t dataType,
944 const int nbDims,
945 const int dimA[]);
946
947cudnnStatus_t CUDNNWINAPI
948cudnnDestroySpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t stDesc);
949
950cudnnStatus_t CUDNNWINAPI
951cudnnSpatialTfGridGeneratorForward(cudnnHandle_t handle,
952 const cudnnSpatialTransformerDescriptor_t stDesc,
953 const void *theta,
954 void *grid);
955
956cudnnStatus_t CUDNNWINAPI
957cudnnSpatialTfSamplerForward(cudnnHandle_t handle,
958 cudnnSpatialTransformerDescriptor_t stDesc,
959 const void *alpha,
960 const cudnnTensorDescriptor_t xDesc,
961 const void *x,
962 const void *grid,
963 const void *beta,
964 cudnnTensorDescriptor_t yDesc,
965 void *y);
966
967typedef struct cudnnDropoutStruct *cudnnDropoutDescriptor_t;
968
969cudnnStatus_t CUDNNWINAPI
970cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc);
971
972cudnnStatus_t CUDNNWINAPI
973cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc);
974
975/*helper function to determine size of the states to be passed to cudnnSetDropoutDescriptor */
976cudnnStatus_t CUDNNWINAPI
977cudnnDropoutGetStatesSize(cudnnHandle_t handle, size_t *sizeInBytes);
978
979/*helper function to determine size of the reserve space to be passed to dropout forward/backward calls */
980cudnnStatus_t CUDNNWINAPI
981cudnnDropoutGetReserveSpaceSize(cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes);
982
983cudnnStatus_t CUDNNWINAPI
984cudnnSetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
985 cudnnHandle_t handle,
986 float dropout,
987 void *states,
988 size_t stateSizeInBytes,
989 unsigned long long seed);
990
991/* Restores the dropout descriptor to a previously saved-off state */
992cudnnStatus_t CUDNNWINAPI
993cudnnRestoreDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
994 cudnnHandle_t handle,
995 float dropout,
996 void *states,
997 size_t stateSizeInBytes,
998 unsigned long long seed);
999
1000cudnnStatus_t CUDNNWINAPI
1001cudnnGetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
1002 cudnnHandle_t handle,
1003 float *dropout,
1004 void **states,
1005 unsigned long long *seed);
1006
1007cudnnStatus_t CUDNNWINAPI
1008cudnnDropoutForward(cudnnHandle_t handle,
1009 const cudnnDropoutDescriptor_t dropoutDesc,
1010 const cudnnTensorDescriptor_t xdesc,
1011 const void *x,
1012 const cudnnTensorDescriptor_t ydesc,
1013 void *y,
1014 void *reserveSpace,
1015 size_t reserveSpaceSizeInBytes);
1016
1017/* TODO: remove */
1018
1019typedef struct cudnnAlgorithmStruct *cudnnAlgorithmDescriptor_t;
1020typedef struct cudnnAlgorithmPerformanceStruct *cudnnAlgorithmPerformance_t;
1021
1022/* TODO: move these enums out to the appropriate submodule */
1023typedef enum {
1024 CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0,
1025 CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1,
1026 CUDNN_CONVOLUTION_FWD_ALGO_GEMM = 2,
1027 CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3,
1028 CUDNN_CONVOLUTION_FWD_ALGO_FFT = 4,
1029 CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING = 5,
1030 CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD = 6,
1031 CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED = 7,
1032 CUDNN_CONVOLUTION_FWD_ALGO_COUNT = 8
1033} cudnnConvolutionFwdAlgo_t;
1034
1035typedef enum {
1036 CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 = 0, /* non-deterministic */
1037 CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 = 1,
1038 CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT = 2,
1039 CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 = 3, /* non-deterministic */
1040 CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD = 4, /* not implemented */
1041 CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED = 5,
1042 CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING = 6,
1043 CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT = 7
1044} cudnnConvolutionBwdFilterAlgo_t;
1045
1046typedef enum {
1047 CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 = 0, /* non-deterministic */
1048 CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 = 1,
1049 CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT = 2,
1050 CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING = 3,
1051 CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD = 4,
1052 CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED = 5,
1053 CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT = 6
1054} cudnnConvolutionBwdDataAlgo_t;
1055
1056typedef enum {
1057 CUDNN_RNN_ALGO_STANDARD = 0,
1058 CUDNN_RNN_ALGO_PERSIST_STATIC = 1,
1059 CUDNN_RNN_ALGO_PERSIST_DYNAMIC = 2,
1060 CUDNN_RNN_ALGO_PERSIST_STATIC_SMALL_H = 3,
1061 CUDNN_RNN_ALGO_COUNT = 4,
1062} cudnnRNNAlgo_t;
1063
1064typedef enum { CUDNN_CTC_LOSS_ALGO_DETERMINISTIC = 0, CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC = 1 } cudnnCTCLossAlgo_t;
1065
1066/* TODO: remove */
1067typedef struct cudnnAlgorithmUnionStruct {
1068 union Algorithm {
1069 cudnnConvolutionFwdAlgo_t convFwdAlgo;
1070 cudnnConvolutionBwdFilterAlgo_t convBwdFilterAlgo;
1071 cudnnConvolutionBwdDataAlgo_t convBwdDataAlgo;
1072 cudnnRNNAlgo_t RNNAlgo;
1073 cudnnCTCLossAlgo_t CTCLossAlgo;
1074 } algo;
1075} cudnnAlgorithm_t;
1076
1077CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1078cudnnCreateAlgorithmDescriptor(cudnnAlgorithmDescriptor_t *algoDesc);
1079
1080CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1081cudnnSetAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm);
1082
1083CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1084cudnnGetAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t *algorithm);
1085
1086CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1087cudnnCopyAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest);
1088
1089CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1090cudnnDestroyAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc);
1091
1092CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1093cudnnCreateAlgorithmPerformance(cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate);
1094
1095CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1096cudnnSetAlgorithmPerformance(cudnnAlgorithmPerformance_t algoPerf,
1097 cudnnAlgorithmDescriptor_t algoDesc,
1098 cudnnStatus_t status,
1099 float time,
1100 size_t memory);
1101
1102CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1103cudnnGetAlgorithmPerformance(const cudnnAlgorithmPerformance_t algoPerf,
1104 cudnnAlgorithmDescriptor_t *algoDesc,
1105 cudnnStatus_t *status,
1106 float *time,
1107 size_t *memory);
1108
1109CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1110cudnnDestroyAlgorithmPerformance(cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy);
1111
1112CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1113cudnnGetAlgorithmSpaceSize(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, size_t *algoSpaceSizeInBytes);
1114
1115CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1116cudnnSaveAlgorithm(cudnnHandle_t handle,
1117 cudnnAlgorithmDescriptor_t algoDesc,
1118 void *algoSpace,
1119 size_t algoSpaceSizeInBytes);
1120
1121CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1122cudnnRestoreAlgorithm(cudnnHandle_t handle,
1123 void *algoSpace,
1124 size_t algoSpaceSizeInBytes,
1125 cudnnAlgorithmDescriptor_t algoDesc);
1126
1127typedef enum {
1128 CUDNN_SEV_FATAL = 0,
1129 CUDNN_SEV_ERROR = 1,
1130 CUDNN_SEV_WARNING = 2,
1131 CUDNN_SEV_INFO = 3,
1132} cudnnSeverity_t;
1133
1134/* Message masks to be used with cudnnSetCallback() */
1135#define CUDNN_SEV_ERROR_EN (1U << CUDNN_SEV_ERROR)
1136#define CUDNN_SEV_WARNING_EN (1U << CUDNN_SEV_WARNING)
1137#define CUDNN_SEV_INFO_EN (1U << CUDNN_SEV_INFO)
1138
1139/* struct containing useful informaiton for each API call */
1140typedef struct cudnnDebugStruct {
1141 unsigned cudnn_version;
1142 cudnnStatus_t cudnnStatus;
1143 unsigned time_sec; /* epoch time in seconds */
1144 unsigned time_usec; /* microseconds part of epoch time */
1145 unsigned time_delta; /* time since start in seconds */
1146 cudnnHandle_t handle; /* cudnn handle */
1147 cudaStream_t stream; /* cuda stream ID */
1148 unsigned long long pid; /* process ID */
1149 unsigned long long tid; /* thread ID */
1150 int cudaDeviceId; /* CUDA device ID */
1151 int reserved[15]; /* reserved for future use */
1152} cudnnDebug_t;
1153
1154typedef void (*cudnnCallback_t)(cudnnSeverity_t sev, void *udata, const cudnnDebug_t *dbg, const char *msg);
1155
1156cudnnStatus_t CUDNNWINAPI
1157cudnnSetCallback(unsigned mask, void *udata, cudnnCallback_t fptr);
1158
1159cudnnStatus_t CUDNNWINAPI
1160cudnnGetCallback(unsigned *mask, void **udata, cudnnCallback_t *fptr);
1161
1162/*
1163 * \brief Cross-library version checker.
1164 * This function is implemented differently in each sub-library. Each sublib
1165 * checks whether its own version matches that of its dependencies.
1166 * \returns CUDNN_STATUS_SUCCESS if the version check passes,
1167 * CUDNN_STATUS_VERSION_MISMATCH if the versions are inconsistent.
1168 */
1169cudnnStatus_t CUDNNWINAPI
1170cudnnOpsInferVersionCheck(void);
1171
1172#if defined(__cplusplus)
1173}
1174#endif
1175
1176#endif /* CUDNN_OPS_INFER_H_ */
1177