1 | /* |
2 | * Copyright 1993-2020 NVIDIA Corporation. All rights reserved. |
3 | * |
4 | * NOTICE TO LICENSEE: |
5 | * |
6 | * This source code and/or documentation ("Licensed Deliverables") are |
7 | * subject to NVIDIA intellectual property rights under U.S. and |
8 | * international Copyright laws. |
9 | * |
10 | * These Licensed Deliverables contained herein is PROPRIETARY and |
11 | * CONFIDENTIAL to NVIDIA and is being provided under the terms and |
12 | * conditions of a form of NVIDIA software license agreement by and |
13 | * between NVIDIA and Licensee ("License Agreement") or electronically |
14 | * accepted by Licensee. Notwithstanding any terms or conditions to |
15 | * the contrary in the License Agreement, reproduction or disclosure |
16 | * of the Licensed Deliverables to any third party without the express |
17 | * written consent of NVIDIA is prohibited. |
18 | * |
19 | * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE |
20 | * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE |
21 | * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS |
22 | * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. |
23 | * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED |
24 | * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, |
25 | * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. |
26 | * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE |
27 | * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY |
28 | * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY |
29 | * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, |
30 | * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS |
31 | * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE |
32 | * OF THESE LICENSED DELIVERABLES. |
33 | * |
34 | * U.S. Government End Users. These Licensed Deliverables are a |
35 | * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT |
36 | * 1995), consisting of "commercial computer software" and "commercial |
37 | * computer software documentation" as such terms are used in 48 |
38 | * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government |
39 | * only as a commercial end item. Consistent with 48 C.F.R.12.212 and |
40 | * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all |
41 | * U.S. Government End Users acquire the Licensed Deliverables with |
42 | * only those rights set forth herein. |
43 | * |
44 | * Any use of the Licensed Deliverables in individual and commercial |
45 | * software must include, in the user documentation and internal |
46 | * comments to the code, the above Disclaimer and U.S. Government End |
47 | * Users Notice. |
48 | */ |
49 | |
50 | /* |
51 | * cudnn_ops_infer : cuDNN's basic definitions and inference operations. |
52 | */ |
53 | |
54 | #if !defined(CUDNN_OPS_INFER_H_) |
55 | #define CUDNN_OPS_INFER_H_ |
56 | |
57 | #include <cuda_runtime.h> |
58 | #include <stdint.h> |
59 | |
60 | #include "cudnn_version.h" |
61 | |
62 | /* These version numbers are autogenerated, do not edit manually. */ |
63 | #define CUDNN_OPS_INFER_MAJOR 8 |
64 | #define CUDNN_OPS_INFER_MINOR 2 |
65 | #define CUDNN_OPS_INFER_PATCH 4 |
66 | |
67 | #if (CUDNN_OPS_INFER_MAJOR != CUDNN_MAJOR) || (CUDNN_OPS_INFER_MINOR != CUDNN_MINOR) || \ |
68 | (CUDNN_OPS_INFER_PATCH != CUDNN_PATCHLEVEL) |
69 | #error Version mismatch in cuDNN OPS INFER!!! |
70 | #endif |
71 | |
72 | #ifndef CUDNNWINAPI |
73 | #ifdef _WIN32 |
74 | #define CUDNNWINAPI __stdcall |
75 | #else |
76 | #define CUDNNWINAPI |
77 | #endif |
78 | #endif |
79 | |
80 | /* Warnings for deprecated API-s are enabled using the CUDNN_WARN_DEPRECATED macro */ |
81 | #if defined(CUDNN_WARN_DEPRECATED) && (defined(__GNUC__) || defined(__clang__)) |
82 | /* GCC, Intel C/C++, Cray C/C++, CLANG, IBM XL C/C++ little endian */ |
83 | #define CUDNN_DEPRECATED __attribute__((deprecated)) |
84 | #elif defined(CUDNN_WARN_DEPRECATED) && defined(_MSC_VER) |
85 | /* Microsoft Visual C++ */ |
86 | #define CUDNN_DEPRECATED __declspec(deprecated) |
87 | #elif defined(CUDNN_WARN_DEPRECATED) && (__cplusplus >= 201402L) |
88 | /* C++14 compilers */ |
89 | #define CUDNN_DEPRECATED [[deprecated]] |
90 | #else |
91 | /* No support for the deprecated attribute */ |
92 | #define CUDNN_DEPRECATED |
93 | #endif |
94 | |
95 | #if defined(__cplusplus) |
96 | extern "C" { |
97 | #endif |
98 | |
99 | struct cudnnContext; |
100 | typedef struct cudnnContext *cudnnHandle_t; |
101 | |
102 | size_t CUDNNWINAPI |
103 | cudnnGetVersion(void); |
104 | |
105 | /* Returns CUDA Runtime version statically linked against cudnn */ |
106 | size_t CUDNNWINAPI |
107 | cudnnGetCudartVersion(void); |
108 | |
109 | /* |
110 | * CUDNN return codes |
111 | */ |
112 | typedef enum { |
113 | CUDNN_STATUS_SUCCESS = 0, |
114 | CUDNN_STATUS_NOT_INITIALIZED = 1, |
115 | CUDNN_STATUS_ALLOC_FAILED = 2, |
116 | CUDNN_STATUS_BAD_PARAM = 3, |
117 | CUDNN_STATUS_INTERNAL_ERROR = 4, |
118 | CUDNN_STATUS_INVALID_VALUE = 5, |
119 | CUDNN_STATUS_ARCH_MISMATCH = 6, |
120 | CUDNN_STATUS_MAPPING_ERROR = 7, |
121 | CUDNN_STATUS_EXECUTION_FAILED = 8, |
122 | CUDNN_STATUS_NOT_SUPPORTED = 9, |
123 | CUDNN_STATUS_LICENSE_ERROR = 10, |
124 | CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING = 11, |
125 | CUDNN_STATUS_RUNTIME_IN_PROGRESS = 12, |
126 | CUDNN_STATUS_RUNTIME_FP_OVERFLOW = 13, |
127 | CUDNN_STATUS_VERSION_MISMATCH = 14, |
128 | } cudnnStatus_t; |
129 | |
130 | /* human-readable error messages */ |
131 | const char *CUDNNWINAPI |
132 | cudnnGetErrorString(cudnnStatus_t status); |
133 | |
134 | /* Forward definition in this version only */ |
135 | typedef struct cudnnRuntimeTag_t cudnnRuntimeTag_t; |
136 | |
137 | typedef enum { |
138 | CUDNN_ERRQUERY_RAWCODE = 0, |
139 | CUDNN_ERRQUERY_NONBLOCKING = 1, |
140 | CUDNN_ERRQUERY_BLOCKING = 2, |
141 | } cudnnErrQueryMode_t; |
142 | |
143 | cudnnStatus_t CUDNNWINAPI |
144 | cudnnQueryRuntimeError(cudnnHandle_t handle, cudnnStatus_t *rstatus, cudnnErrQueryMode_t mode, cudnnRuntimeTag_t *tag); |
145 | |
146 | #ifndef __LIBRARY_TYPES_H__ |
147 | |
148 | typedef enum libraryPropertyType_t { MAJOR_VERSION, MINOR_VERSION, PATCH_LEVEL } libraryPropertyType; |
149 | |
150 | #endif |
151 | |
152 | cudnnStatus_t CUDNNWINAPI |
153 | cudnnGetProperty(libraryPropertyType type, int *value); |
154 | |
155 | cudnnStatus_t CUDNNWINAPI |
156 | cudnnCreate(cudnnHandle_t *handle); |
157 | cudnnStatus_t CUDNNWINAPI |
158 | cudnnDestroy(cudnnHandle_t handle); |
159 | cudnnStatus_t CUDNNWINAPI |
160 | cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId); |
161 | cudnnStatus_t CUDNNWINAPI |
162 | cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId); |
163 | |
164 | /* Data structures to represent Image/Filter and the Neural Network Layer */ |
165 | typedef struct cudnnTensorStruct *cudnnTensorDescriptor_t; |
166 | typedef struct cudnnPoolingStruct *cudnnPoolingDescriptor_t; |
167 | typedef struct cudnnFilterStruct *cudnnFilterDescriptor_t; |
168 | typedef struct cudnnLRNStruct *cudnnLRNDescriptor_t; |
169 | typedef struct cudnnActivationStruct *cudnnActivationDescriptor_t; |
170 | typedef struct cudnnSpatialTransformerStruct *cudnnSpatialTransformerDescriptor_t; |
171 | typedef struct cudnnOpTensorStruct *cudnnOpTensorDescriptor_t; |
172 | typedef struct cudnnReduceTensorStruct *cudnnReduceTensorDescriptor_t; |
173 | typedef struct cudnnCTCLossStruct *cudnnCTCLossDescriptor_t; |
174 | typedef struct cudnnTensorTransformStruct *cudnnTensorTransformDescriptor_t; |
175 | /* |
176 | * CUDNN data type |
177 | */ |
178 | typedef enum { |
179 | CUDNN_DATA_FLOAT = 0, |
180 | CUDNN_DATA_DOUBLE = 1, |
181 | CUDNN_DATA_HALF = 2, |
182 | CUDNN_DATA_INT8 = 3, |
183 | CUDNN_DATA_INT32 = 4, |
184 | CUDNN_DATA_INT8x4 = 5, |
185 | CUDNN_DATA_UINT8 = 6, |
186 | CUDNN_DATA_UINT8x4 = 7, |
187 | CUDNN_DATA_INT8x32 = 8, |
188 | CUDNN_DATA_BFLOAT16 = 9, |
189 | CUDNN_DATA_INT64 = 10, |
190 | } cudnnDataType_t; |
191 | |
192 | /* |
193 | * CUDNN math type |
194 | */ |
195 | typedef enum { |
196 | CUDNN_DEFAULT_MATH = 0, |
197 | CUDNN_TENSOR_OP_MATH = 1, |
198 | CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION = 2, |
199 | CUDNN_FMA_MATH = 3, |
200 | } cudnnMathType_t; |
201 | |
202 | /* |
203 | * CUDNN propagate Nan |
204 | */ |
205 | typedef enum { |
206 | CUDNN_NOT_PROPAGATE_NAN = 0, |
207 | CUDNN_PROPAGATE_NAN = 1, |
208 | } cudnnNanPropagation_t; |
209 | |
210 | /* |
211 | * CUDNN Determinism |
212 | */ |
213 | typedef enum { |
214 | CUDNN_NON_DETERMINISTIC = 0, |
215 | CUDNN_DETERMINISTIC = 1, |
216 | } cudnnDeterminism_t; |
217 | |
218 | /* Maximum supported number of tensor dimensions */ |
219 | #define CUDNN_DIM_MAX 8 |
220 | |
221 | /* Create an instance of a generic Tensor descriptor */ |
222 | cudnnStatus_t CUDNNWINAPI |
223 | cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc); |
224 | |
225 | typedef enum { |
226 | CUDNN_TENSOR_NCHW = 0, /* row major (wStride = 1, hStride = w) */ |
227 | CUDNN_TENSOR_NHWC = 1, /* feature maps interleaved ( cStride = 1 )*/ |
228 | CUDNN_TENSOR_NCHW_VECT_C = 2, /* each image point is vector of element of C, vector length in data type */ |
229 | } cudnnTensorFormat_t; |
230 | |
231 | cudnnStatus_t CUDNNWINAPI |
232 | cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc, |
233 | cudnnTensorFormat_t format, |
234 | cudnnDataType_t dataType, /* image data type */ |
235 | int n, /* number of inputs (batch size) */ |
236 | int c, /* number of input feature maps */ |
237 | int h, /* height of input section */ |
238 | int w); /* width of input section */ |
239 | |
240 | cudnnStatus_t CUDNNWINAPI |
241 | cudnnSetTensor4dDescriptorEx(cudnnTensorDescriptor_t tensorDesc, |
242 | cudnnDataType_t dataType, /* image data type */ |
243 | int n, /* number of inputs (batch size) */ |
244 | int c, /* number of input feature maps */ |
245 | int h, /* height of input section */ |
246 | int w, /* width of input section */ |
247 | int nStride, |
248 | int cStride, |
249 | int hStride, |
250 | int wStride); |
251 | |
252 | cudnnStatus_t CUDNNWINAPI |
253 | cudnnGetTensor4dDescriptor(const cudnnTensorDescriptor_t tensorDesc, |
254 | cudnnDataType_t *dataType, /* image data type */ |
255 | int *n, /* number of inputs (batch size) */ |
256 | int *c, /* number of input feature maps */ |
257 | int *h, /* height of input section */ |
258 | int *w, /* width of input section */ |
259 | int *nStride, |
260 | int *cStride, |
261 | int *hStride, |
262 | int *wStride); |
263 | |
264 | cudnnStatus_t CUDNNWINAPI |
265 | cudnnSetTensorNdDescriptor(cudnnTensorDescriptor_t tensorDesc, |
266 | cudnnDataType_t dataType, |
267 | int nbDims, |
268 | const int dimA[], |
269 | const int strideA[]); |
270 | |
271 | cudnnStatus_t CUDNNWINAPI |
272 | cudnnSetTensorNdDescriptorEx(cudnnTensorDescriptor_t tensorDesc, |
273 | cudnnTensorFormat_t format, |
274 | cudnnDataType_t dataType, |
275 | int nbDims, |
276 | const int dimA[]); |
277 | |
278 | cudnnStatus_t CUDNNWINAPI |
279 | cudnnGetTensorNdDescriptor(const cudnnTensorDescriptor_t tensorDesc, |
280 | int nbDimsRequested, |
281 | cudnnDataType_t *dataType, |
282 | int *nbDims, |
283 | int dimA[], |
284 | int strideA[]); |
285 | |
286 | cudnnStatus_t CUDNNWINAPI |
287 | cudnnGetTensorSizeInBytes(const cudnnTensorDescriptor_t tensorDesc, size_t *size); |
288 | |
289 | /* PixelOffset( n, c, h, w ) = n *input_stride + c * feature_stride + h * h_stride + w * w_stride |
290 | |
291 | 1)Example of all images in row major order one batch of features after the other (with an optional padding on row) |
292 | input_stride : c x h x h_stride |
293 | feature_stride : h x h_stride |
294 | h_stride : >= w ( h_stride = w if no padding) |
295 | w_stride : 1 |
296 | |
297 | |
298 | 2)Example of all images in row major with features maps interleaved |
299 | input_stride : c x h x h_stride |
300 | feature_stride : 1 |
301 | h_stride : w x c |
302 | w_stride : c |
303 | |
304 | 3)Example of all images in column major order one batch of features after the other (with optional padding on column) |
305 | input_stride : c x w x w_stride |
306 | feature_stride : w x w_stride |
307 | h_stride : 1 |
308 | w_stride : >= h |
309 | |
310 | */ |
311 | |
312 | /* Destroy an instance of Tensor4d descriptor */ |
313 | cudnnStatus_t CUDNNWINAPI |
314 | cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc); |
315 | |
316 | /* Fold/unfold transforms */ |
317 | typedef enum { |
318 | CUDNN_TRANSFORM_FOLD = 0U, |
319 | CUDNN_TRANSFORM_UNFOLD = 1U, |
320 | } cudnnFoldingDirection_t; |
321 | |
322 | /** Create a destination descriptor for cudnnTransformTensor */ |
323 | cudnnStatus_t CUDNNWINAPI |
324 | cudnnInitTransformDest(const cudnnTensorTransformDescriptor_t transformDesc, |
325 | const cudnnTensorDescriptor_t srcDesc, |
326 | cudnnTensorDescriptor_t destDesc, |
327 | size_t *destSizeInBytes); |
328 | |
329 | /** Create an empty tensor transform descriptor */ |
330 | cudnnStatus_t CUDNNWINAPI |
331 | cudnnCreateTensorTransformDescriptor(cudnnTensorTransformDescriptor_t *transformDesc); |
332 | |
333 | /** Initialize a previously created tensor transform descriptor. */ |
334 | cudnnStatus_t CUDNNWINAPI |
335 | cudnnSetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc, |
336 | const uint32_t nbDims, |
337 | const cudnnTensorFormat_t destFormat, |
338 | const int32_t padBeforeA[], |
339 | const int32_t padAfterA[], |
340 | const uint32_t foldA[], |
341 | const cudnnFoldingDirection_t direction); |
342 | |
343 | /** |
344 | * Retrieves the values stored in a previously initialized tensor transform |
345 | * descriptor. |
346 | */ |
347 | cudnnStatus_t CUDNNWINAPI |
348 | cudnnGetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc, |
349 | uint32_t nbDimsRequested, |
350 | cudnnTensorFormat_t *destFormat, |
351 | int32_t padBeforeA[], |
352 | int32_t padAfterA[], |
353 | uint32_t foldA[], |
354 | cudnnFoldingDirection_t *direction); |
355 | |
356 | /** |
357 | * Destroys a previously created tensor transform descriptor. |
358 | */ |
359 | cudnnStatus_t CUDNNWINAPI |
360 | cudnnDestroyTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc); |
361 | |
362 | /* Tensor layout conversion helper (y = alpha * x + beta * y) */ |
363 | cudnnStatus_t CUDNNWINAPI |
364 | cudnnTransformTensor(cudnnHandle_t handle, |
365 | const void *alpha, |
366 | const cudnnTensorDescriptor_t xDesc, |
367 | const void *x, |
368 | const void *beta, |
369 | const cudnnTensorDescriptor_t yDesc, |
370 | void *y); |
371 | |
372 | cudnnStatus_t CUDNNWINAPI |
373 | cudnnTransformTensorEx(cudnnHandle_t handle, |
374 | const cudnnTensorTransformDescriptor_t transDesc, |
375 | const void *alpha, |
376 | const cudnnTensorDescriptor_t srcDesc, |
377 | const void *srcData, |
378 | const void *beta, |
379 | const cudnnTensorDescriptor_t destDesc, |
380 | void *destData); |
381 | |
382 | /* Tensor Bias addition : C = alpha * A + beta * C */ |
383 | cudnnStatus_t CUDNNWINAPI |
384 | cudnnAddTensor(cudnnHandle_t handle, |
385 | const void *alpha, |
386 | const cudnnTensorDescriptor_t aDesc, |
387 | const void *A, |
388 | const void *beta, |
389 | const cudnnTensorDescriptor_t cDesc, |
390 | void *C); |
391 | |
392 | /* |
393 | * CUDNN OpTensor op type |
394 | */ |
395 | typedef enum { |
396 | CUDNN_OP_TENSOR_ADD = 0, |
397 | CUDNN_OP_TENSOR_MUL = 1, |
398 | CUDNN_OP_TENSOR_MIN = 2, |
399 | CUDNN_OP_TENSOR_MAX = 3, |
400 | CUDNN_OP_TENSOR_SQRT = 4, |
401 | CUDNN_OP_TENSOR_NOT = 5, |
402 | } cudnnOpTensorOp_t; |
403 | |
404 | cudnnStatus_t CUDNNWINAPI |
405 | cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc); |
406 | |
407 | cudnnStatus_t CUDNNWINAPI |
408 | cudnnSetOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc, |
409 | cudnnOpTensorOp_t opTensorOp, |
410 | cudnnDataType_t opTensorCompType, |
411 | cudnnNanPropagation_t opTensorNanOpt); |
412 | |
413 | cudnnStatus_t CUDNNWINAPI |
414 | cudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc, |
415 | cudnnOpTensorOp_t *opTensorOp, |
416 | cudnnDataType_t *opTensorCompType, |
417 | cudnnNanPropagation_t *opTensorNanOpt); |
418 | |
419 | cudnnStatus_t CUDNNWINAPI |
420 | cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc); |
421 | |
422 | /* Tensor operation : C = op( alpha1 * A, alpha2 * B ) + beta * C */ |
423 | /* B tensor is ignored for CUDNN_OP_TENSOR_SQRT, CUDNN_OP_TENSOR_NOT. */ |
424 | cudnnStatus_t CUDNNWINAPI |
425 | cudnnOpTensor(cudnnHandle_t handle, |
426 | const cudnnOpTensorDescriptor_t opTensorDesc, |
427 | const void *alpha1, |
428 | const cudnnTensorDescriptor_t aDesc, |
429 | const void *A, |
430 | const void *alpha2, |
431 | const cudnnTensorDescriptor_t bDesc, |
432 | const void *B, |
433 | const void *beta, |
434 | const cudnnTensorDescriptor_t cDesc, |
435 | void *C); |
436 | |
437 | /* |
438 | * CUDNN ReduceTensor op type |
439 | */ |
440 | typedef enum { |
441 | CUDNN_REDUCE_TENSOR_ADD = 0, |
442 | CUDNN_REDUCE_TENSOR_MUL = 1, |
443 | CUDNN_REDUCE_TENSOR_MIN = 2, |
444 | CUDNN_REDUCE_TENSOR_MAX = 3, |
445 | CUDNN_REDUCE_TENSOR_AMAX = 4, |
446 | CUDNN_REDUCE_TENSOR_AVG = 5, |
447 | CUDNN_REDUCE_TENSOR_NORM1 = 6, |
448 | CUDNN_REDUCE_TENSOR_NORM2 = 7, |
449 | CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS = 8, |
450 | } cudnnReduceTensorOp_t; |
451 | |
452 | /* |
453 | * CUDNN ReduceTensor indices type |
454 | */ |
455 | typedef enum { |
456 | CUDNN_REDUCE_TENSOR_NO_INDICES = 0, |
457 | CUDNN_REDUCE_TENSOR_FLATTENED_INDICES = 1, |
458 | } cudnnReduceTensorIndices_t; |
459 | |
460 | /* |
461 | * CUDNN tensor indices type size (all unsigned) |
462 | * Currently not supported, default is 32 bit unsigned. |
463 | */ |
464 | typedef enum { |
465 | CUDNN_32BIT_INDICES = 0, |
466 | CUDNN_64BIT_INDICES = 1, |
467 | CUDNN_16BIT_INDICES = 2, |
468 | CUDNN_8BIT_INDICES = 3, |
469 | } cudnnIndicesType_t; |
470 | |
471 | cudnnStatus_t CUDNNWINAPI |
472 | cudnnCreateReduceTensorDescriptor(cudnnReduceTensorDescriptor_t *reduceTensorDesc); |
473 | |
474 | cudnnStatus_t CUDNNWINAPI |
475 | cudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc, |
476 | cudnnReduceTensorOp_t reduceTensorOp, |
477 | cudnnDataType_t reduceTensorCompType, |
478 | cudnnNanPropagation_t reduceTensorNanOpt, |
479 | cudnnReduceTensorIndices_t reduceTensorIndices, |
480 | cudnnIndicesType_t reduceTensorIndicesType); |
481 | |
482 | cudnnStatus_t CUDNNWINAPI |
483 | cudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorDesc, |
484 | cudnnReduceTensorOp_t *reduceTensorOp, |
485 | cudnnDataType_t *reduceTensorCompType, |
486 | cudnnNanPropagation_t *reduceTensorNanOpt, |
487 | cudnnReduceTensorIndices_t *reduceTensorIndices, |
488 | cudnnIndicesType_t *reduceTensorIndicesType); |
489 | |
490 | cudnnStatus_t CUDNNWINAPI |
491 | cudnnDestroyReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc); |
492 | |
493 | /* Helper function to return the minimum size of the index space to be passed to the reduction given the input and |
494 | * output tensors */ |
495 | cudnnStatus_t CUDNNWINAPI |
496 | cudnnGetReductionIndicesSize(cudnnHandle_t handle, |
497 | const cudnnReduceTensorDescriptor_t reduceTensorDesc, |
498 | const cudnnTensorDescriptor_t aDesc, |
499 | const cudnnTensorDescriptor_t cDesc, |
500 | size_t *sizeInBytes); |
501 | |
502 | /* Helper function to return the minimum size of the workspace to be passed to the reduction given the input and output |
503 | * tensors */ |
504 | cudnnStatus_t CUDNNWINAPI |
505 | cudnnGetReductionWorkspaceSize(cudnnHandle_t handle, |
506 | const cudnnReduceTensorDescriptor_t reduceTensorDesc, |
507 | const cudnnTensorDescriptor_t aDesc, |
508 | const cudnnTensorDescriptor_t cDesc, |
509 | size_t *sizeInBytes); |
510 | |
511 | /* Tensor operation : C = reduce op( alpha * A ) + beta * C */ |
512 | /* The NaN propagation enum applies to only the min and max reduce ops; the other reduce ops propagate NaN as usual. */ |
513 | /* The indices space is ignored for reduce ops other than min or max. */ |
514 | cudnnStatus_t CUDNNWINAPI |
515 | cudnnReduceTensor(cudnnHandle_t handle, |
516 | const cudnnReduceTensorDescriptor_t reduceTensorDesc, |
517 | void *indices, |
518 | size_t indicesSizeInBytes, |
519 | void *workspace, |
520 | size_t workspaceSizeInBytes, |
521 | const void *alpha, |
522 | const cudnnTensorDescriptor_t aDesc, |
523 | const void *A, |
524 | const void *beta, |
525 | const cudnnTensorDescriptor_t cDesc, |
526 | void *C); |
527 | |
528 | /* Set all values of a tensor to a given value : y[i] = value[0] */ |
529 | cudnnStatus_t CUDNNWINAPI |
530 | cudnnSetTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *valuePtr); |
531 | |
532 | /* Scale all values of a tensor by a given factor : y[i] = alpha * y[i] */ |
533 | cudnnStatus_t CUDNNWINAPI |
534 | cudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *alpha); |
535 | |
536 | /* Create an instance of FilterStruct */ |
537 | cudnnStatus_t CUDNNWINAPI |
538 | cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc); |
539 | |
540 | cudnnStatus_t CUDNNWINAPI |
541 | cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc, |
542 | cudnnDataType_t dataType, /* image data type */ |
543 | cudnnTensorFormat_t format, |
544 | int k, /* number of output feature maps */ |
545 | int c, /* number of input feature maps */ |
546 | int h, /* height of each input filter */ |
547 | int w); /* width of each input filter */ |
548 | |
549 | cudnnStatus_t CUDNNWINAPI |
550 | cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc, |
551 | cudnnDataType_t *dataType, /* image data type */ |
552 | cudnnTensorFormat_t *format, |
553 | int *k, /* number of output feature maps */ |
554 | int *c, /* number of input feature maps */ |
555 | int *h, /* height of each input filter */ |
556 | int *w); /* width of each input filter */ |
557 | |
558 | cudnnStatus_t CUDNNWINAPI |
559 | cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc, |
560 | cudnnDataType_t dataType, /* image data type */ |
561 | cudnnTensorFormat_t format, |
562 | int nbDims, |
563 | const int filterDimA[]); |
564 | |
565 | cudnnStatus_t CUDNNWINAPI |
566 | cudnnGetFilterNdDescriptor(const cudnnFilterDescriptor_t filterDesc, |
567 | int nbDimsRequested, |
568 | cudnnDataType_t *dataType, /* image data type */ |
569 | cudnnTensorFormat_t *format, |
570 | int *nbDims, |
571 | int filterDimA[]); |
572 | cudnnStatus_t CUDNNWINAPI |
573 | cudnnGetFilterSizeInBytes(const cudnnFilterDescriptor_t filterDesc, size_t *size); |
574 | |
575 | cudnnStatus_t CUDNNWINAPI |
576 | cudnnTransformFilter(cudnnHandle_t handle, |
577 | const cudnnTensorTransformDescriptor_t transDesc, |
578 | const void *alpha, |
579 | const cudnnFilterDescriptor_t srcDesc, |
580 | const void *srcData, |
581 | const void *beta, |
582 | const cudnnFilterDescriptor_t destDesc, |
583 | void *destData); |
584 | |
585 | cudnnStatus_t CUDNNWINAPI |
586 | cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc); |
587 | |
588 | /* |
589 | * softmax algorithm |
590 | */ |
591 | typedef enum { |
592 | CUDNN_SOFTMAX_FAST = 0, /* straightforward implementation */ |
593 | CUDNN_SOFTMAX_ACCURATE = 1, /* subtract max from every point to avoid overflow */ |
594 | CUDNN_SOFTMAX_LOG = 2 |
595 | } cudnnSoftmaxAlgorithm_t; |
596 | |
597 | typedef enum { |
598 | CUDNN_SOFTMAX_MODE_INSTANCE = 0, /* compute the softmax over all C, H, W for each N */ |
599 | CUDNN_SOFTMAX_MODE_CHANNEL = 1 /* compute the softmax over all C for each H, W, N */ |
600 | } cudnnSoftmaxMode_t; |
601 | |
602 | /* Softmax functions: All of the form "output = alpha * Op(inputs) + beta * output" */ |
603 | |
604 | /* Function to perform forward softmax */ |
605 | cudnnStatus_t CUDNNWINAPI |
606 | cudnnSoftmaxForward(cudnnHandle_t handle, |
607 | cudnnSoftmaxAlgorithm_t algo, |
608 | cudnnSoftmaxMode_t mode, |
609 | const void *alpha, |
610 | const cudnnTensorDescriptor_t xDesc, |
611 | const void *x, |
612 | const void *beta, |
613 | const cudnnTensorDescriptor_t yDesc, |
614 | void *y); |
615 | |
616 | /* |
617 | * pooling mode |
618 | */ |
619 | typedef enum { |
620 | CUDNN_POOLING_MAX = 0, |
621 | CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING = 1, /* count for average includes padded values */ |
622 | CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = 2, /* count for average does not include padded values */ |
623 | CUDNN_POOLING_MAX_DETERMINISTIC = 3 |
624 | } cudnnPoolingMode_t; |
625 | |
626 | /* Create an instance of pooling descriptor */ |
627 | cudnnStatus_t CUDNNWINAPI |
628 | cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc); |
629 | |
630 | cudnnStatus_t CUDNNWINAPI |
631 | cudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc, |
632 | cudnnPoolingMode_t mode, |
633 | cudnnNanPropagation_t maxpoolingNanOpt, |
634 | int windowHeight, |
635 | int windowWidth, |
636 | int verticalPadding, |
637 | int horizontalPadding, |
638 | int verticalStride, |
639 | int horizontalStride); |
640 | |
641 | cudnnStatus_t CUDNNWINAPI |
642 | cudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc, |
643 | cudnnPoolingMode_t *mode, |
644 | cudnnNanPropagation_t *maxpoolingNanOpt, |
645 | int *windowHeight, |
646 | int *windowWidth, |
647 | int *verticalPadding, |
648 | int *horizontalPadding, |
649 | int *verticalStride, |
650 | int *horizontalStride); |
651 | |
652 | cudnnStatus_t CUDNNWINAPI |
653 | cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc, |
654 | const cudnnPoolingMode_t mode, |
655 | const cudnnNanPropagation_t maxpoolingNanOpt, |
656 | int nbDims, |
657 | const int windowDimA[], |
658 | const int paddingA[], |
659 | const int strideA[]); |
660 | |
661 | cudnnStatus_t CUDNNWINAPI |
662 | cudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc, |
663 | int nbDimsRequested, |
664 | cudnnPoolingMode_t *mode, |
665 | cudnnNanPropagation_t *maxpoolingNanOpt, |
666 | int *nbDims, |
667 | int windowDimA[], |
668 | int paddingA[], |
669 | int strideA[]); |
670 | |
671 | cudnnStatus_t CUDNNWINAPI |
672 | cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, |
673 | const cudnnTensorDescriptor_t inputTensorDesc, |
674 | int nbDims, |
675 | int outputTensorDimA[]); |
676 | |
677 | cudnnStatus_t CUDNNWINAPI |
678 | cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, |
679 | const cudnnTensorDescriptor_t inputTensorDesc, |
680 | int *n, |
681 | int *c, |
682 | int *h, |
683 | int *w); |
684 | |
685 | /* Destroy an instance of pooling descriptor */ |
686 | cudnnStatus_t CUDNNWINAPI |
687 | cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc); |
688 | |
689 | /* Pooling functions: All of the form "output = alpha * Op(inputs) + beta * output" */ |
690 | |
691 | /* Function to perform forward pooling */ |
692 | cudnnStatus_t CUDNNWINAPI |
693 | cudnnPoolingForward(cudnnHandle_t handle, |
694 | const cudnnPoolingDescriptor_t poolingDesc, |
695 | const void *alpha, |
696 | const cudnnTensorDescriptor_t xDesc, |
697 | const void *x, |
698 | const void *beta, |
699 | const cudnnTensorDescriptor_t yDesc, |
700 | void *y); |
701 | |
702 | /* |
703 | * activation mode |
704 | */ |
705 | typedef enum { |
706 | CUDNN_ACTIVATION_SIGMOID = 0, |
707 | CUDNN_ACTIVATION_RELU = 1, |
708 | CUDNN_ACTIVATION_TANH = 2, |
709 | CUDNN_ACTIVATION_CLIPPED_RELU = 3, |
710 | CUDNN_ACTIVATION_ELU = 4, |
711 | CUDNN_ACTIVATION_IDENTITY = 5, |
712 | CUDNN_ACTIVATION_SWISH = 6 |
713 | } cudnnActivationMode_t; |
714 | |
715 | /* Activation functions: All of the form "output = alpha * Op(inputs) + beta * output" */ |
716 | cudnnStatus_t CUDNNWINAPI |
717 | cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc); |
718 | |
719 | cudnnStatus_t CUDNNWINAPI |
720 | cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc, |
721 | cudnnActivationMode_t mode, |
722 | cudnnNanPropagation_t reluNanOpt, |
723 | double coef); /* ceiling for clipped RELU, alpha for ELU */ |
724 | |
725 | cudnnStatus_t CUDNNWINAPI |
726 | cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc, |
727 | cudnnActivationMode_t *mode, |
728 | cudnnNanPropagation_t *reluNanOpt, |
729 | double *coef); /* ceiling for clipped RELU, alpha for ELU */ |
730 | |
731 | cudnnStatus_t CUDNNWINAPI |
732 | cudnnSetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double swish_beta); |
733 | |
734 | cudnnStatus_t CUDNNWINAPI |
735 | cudnnGetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double *swish_beta); |
736 | |
737 | cudnnStatus_t CUDNNWINAPI |
738 | cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc); |
739 | |
740 | /* Function to perform forward activation */ |
741 | cudnnStatus_t CUDNNWINAPI |
742 | cudnnActivationForward(cudnnHandle_t handle, |
743 | cudnnActivationDescriptor_t activationDesc, |
744 | const void *alpha, |
745 | const cudnnTensorDescriptor_t xDesc, |
746 | const void *x, |
747 | const void *beta, |
748 | const cudnnTensorDescriptor_t yDesc, |
749 | void *y); |
750 | |
751 | /* |
752 | * Create an instance of LRN (Local Response Normalization) descriptor |
753 | * Uses lrnN=5, lrnAlpha=1e-4, lrnBeta=0.75, lrnK=2.0 as defaults from Krizhevsky'12 ImageNet paper |
754 | */ |
755 | cudnnStatus_t CUDNNWINAPI |
756 | cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc); |
757 | |
758 | #define CUDNN_LRN_MIN_N 1 /* minimum allowed lrnN */ |
759 | #define CUDNN_LRN_MAX_N 16 /* maximum allowed lrnN */ |
760 | #define CUDNN_LRN_MIN_K 1e-5 /* minimum allowed lrnK */ |
761 | #define CUDNN_LRN_MIN_BETA 0.01 /* minimum allowed lrnBeta */ |
762 | |
763 | /* LRN layer mode */ |
764 | typedef enum { |
765 | CUDNN_LRN_CROSS_CHANNEL_DIM1 = 0, /* Normalize across tensor's dimA[1] dimension */ |
766 | } cudnnLRNMode_t; |
767 | |
768 | /* |
769 | * Uses a window [center-lookBehind, center+lookAhead], where |
770 | * lookBehind = floor( (lrnN-1)/2 ), lookAhead = lrnN-lookBehind-1. |
771 | * Values of double parameters cast to tensor data type. |
772 | */ |
773 | cudnnStatus_t CUDNNWINAPI |
774 | cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned lrnN, double lrnAlpha, double lrnBeta, double lrnK); |
775 | /* |
776 | * Retrieve the settings currently stored in an LRN layer descriptor |
777 | * Any of the provided pointers can be NULL (no corresponding value will be returned) |
778 | */ |
779 | cudnnStatus_t CUDNNWINAPI |
780 | cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned *lrnN, double *lrnAlpha, double *lrnBeta, double *lrnK); |
781 | |
782 | /* Destroy an instance of LRN descriptor */ |
783 | cudnnStatus_t CUDNNWINAPI |
784 | cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc); |
785 | |
786 | /* LRN functions: output = alpha * normalize(x) + beta * old_y */ |
787 | |
788 | /* LRN cross-channel forward computation. Double parameters cast to tensor data type */ |
789 | cudnnStatus_t CUDNNWINAPI |
790 | cudnnLRNCrossChannelForward(cudnnHandle_t handle, |
791 | cudnnLRNDescriptor_t normDesc, |
792 | cudnnLRNMode_t lrnMode, |
793 | const void *alpha, |
794 | const cudnnTensorDescriptor_t xDesc, |
795 | const void *x, |
796 | const void *beta, |
797 | const cudnnTensorDescriptor_t yDesc, |
798 | void *y); |
799 | |
800 | typedef enum { |
801 | CUDNN_DIVNORM_PRECOMPUTED_MEANS = 0, |
802 | } cudnnDivNormMode_t; |
803 | |
804 | /* LCN/divisive normalization functions: y = alpha * normalize(x) + beta * y */ |
805 | cudnnStatus_t CUDNNWINAPI |
806 | cudnnDivisiveNormalizationForward(cudnnHandle_t handle, |
807 | cudnnLRNDescriptor_t normDesc, |
808 | cudnnDivNormMode_t mode, |
809 | const void *alpha, |
810 | const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */ |
811 | const void *x, |
812 | const void *means, /* if NULL, means are assumed to be zero */ |
813 | void *temp, |
814 | void *temp2, |
815 | const void *beta, |
816 | const cudnnTensorDescriptor_t yDesc, |
817 | void *y); |
818 | |
819 | typedef enum { |
820 | /* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */ |
821 | CUDNN_BATCHNORM_PER_ACTIVATION = 0, |
822 | |
823 | /* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */ |
824 | CUDNN_BATCHNORM_SPATIAL = 1, |
825 | |
826 | /* |
827 | * bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors). |
828 | * May be faster than CUDNN_BATCHNORM_SPATIAL but imposes some limits on the range of values |
829 | */ |
830 | CUDNN_BATCHNORM_SPATIAL_PERSISTENT = 2, |
831 | } cudnnBatchNormMode_t; |
832 | |
833 | #define CUDNN_BN_MIN_EPSILON 0.0 /* Minimum epsilon allowed to be used in the Batch Normalization formula */ |
834 | |
835 | /* |
836 | * Derives a tensor descriptor from layer data descriptor for BatchNormalization |
837 | * scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for |
838 | * bnScaleBiasMeanVarDesc and bnScaleBiasDiffDesc in Batch Normalization forward and backward functions. |
839 | */ |
840 | cudnnStatus_t CUDNNWINAPI |
841 | cudnnDeriveBNTensorDescriptor(cudnnTensorDescriptor_t derivedBnDesc, |
842 | const cudnnTensorDescriptor_t xDesc, |
843 | cudnnBatchNormMode_t mode); |
844 | |
845 | typedef enum { |
846 | CUDNN_BATCHNORM_OPS_BN = 0, /* do batch normalization only */ |
847 | CUDNN_BATCHNORM_OPS_BN_ACTIVATION = 1, /* do batchNorm, then activation */ |
848 | CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION = 2, /* do batchNorm, then elemWiseAdd, then activation */ |
849 | } cudnnBatchNormOps_t; |
850 | |
851 | /* |
852 | * Performs Batch Normalization during Inference: |
853 | * y[i] = bnScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + bnBias[k] |
854 | * with bnScale, bnBias, runningMean, runningInvVariance tensors indexed |
855 | * according to spatial or per-activation mode. Refer to cudnnBatchNormalizationForwardTraining |
856 | * above for notes on function arguments. |
857 | */ |
858 | cudnnStatus_t CUDNNWINAPI |
859 | cudnnBatchNormalizationForwardInference(cudnnHandle_t handle, |
860 | cudnnBatchNormMode_t mode, |
861 | const void *alpha, /* alpha[0] = result blend factor */ |
862 | const void *beta, /* beta[0] = dest layer blend factor */ |
863 | const cudnnTensorDescriptor_t xDesc, |
864 | const void *x, /* NxCxHxW */ |
865 | const cudnnTensorDescriptor_t yDesc, |
866 | void *y, /* NxCxHxW */ |
867 | const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, |
868 | const void *bnScale, |
869 | const void *bnBias, |
870 | const void *estimatedMean, |
871 | const void *estimatedVariance, |
872 | double epsilon); |
873 | |
874 | typedef enum { |
875 | /* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */ |
876 | CUDNN_NORM_PER_ACTIVATION = 0, |
877 | |
878 | /* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */ |
879 | CUDNN_NORM_PER_CHANNEL = 1, |
880 | } cudnnNormMode_t; |
881 | |
882 | typedef enum { CUDNN_NORM_ALGO_STANDARD = 0, CUDNN_NORM_ALGO_PERSIST = 1 } cudnnNormAlgo_t; |
883 | |
884 | /* |
885 | * Derives a tensor descriptor from layer data descriptor for Normalization |
886 | * scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for |
887 | * normScaleBiasMeanVarDesc and normScaleBiasDiffDesc in Normalization forward and backward functions. |
888 | */ |
889 | cudnnStatus_t CUDNNWINAPI |
890 | cudnnDeriveNormTensorDescriptor(cudnnTensorDescriptor_t derivedNormScaleBiasDesc, |
891 | cudnnTensorDescriptor_t derivedNormMeanVarDesc, |
892 | const cudnnTensorDescriptor_t xDesc, |
893 | cudnnNormMode_t mode, |
894 | int groupCnt); /* Place hold for future work, should be set to 1 now*/ |
895 | |
896 | typedef enum { |
897 | CUDNN_NORM_OPS_NORM = 0, /* do normalization only */ |
898 | CUDNN_NORM_OPS_NORM_ACTIVATION = 1, /* do Norm, then activation */ |
899 | CUDNN_NORM_OPS_NORM_ADD_ACTIVATION = 2, /* do Norm, then elemWiseAdd, then activation */ |
900 | } cudnnNormOps_t; |
901 | |
902 | /* |
903 | * Performs Normalization during Inference: |
904 | * y[i] = normScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + normBias[k] |
905 | * with normScale, normBias, runningMean, runningInvVariance tensors indexed |
906 | * according to per-channel or per-activation mode. Refer to cudnnNormalizationForwardTraining |
907 | * above for notes on function arguments. |
908 | */ |
909 | cudnnStatus_t CUDNNWINAPI |
910 | cudnnNormalizationForwardInference(cudnnHandle_t handle, |
911 | cudnnNormMode_t mode, |
912 | cudnnNormOps_t normOps, |
913 | cudnnNormAlgo_t algo, |
914 | const void *alpha, /* alpha[0] = result blend factor */ |
915 | const void *beta, /* beta[0] = dest layer blend factor */ |
916 | const cudnnTensorDescriptor_t xDesc, |
917 | const void *x, /* NxCxHxW */ |
918 | const cudnnTensorDescriptor_t normScaleBiasDesc, |
919 | const void *normScale, |
920 | const void *normBias, |
921 | const cudnnTensorDescriptor_t normMeanVarDesc, |
922 | const void *estimatedMean, |
923 | const void *estimatedVariance, |
924 | const cudnnTensorDescriptor_t zDesc, |
925 | const void *z, |
926 | cudnnActivationDescriptor_t activationDesc, |
927 | const cudnnTensorDescriptor_t yDesc, |
928 | void *y, /* NxCxHxW */ |
929 | double epsilon, |
930 | int groupCnt); /* Place hold for future work*/ |
931 | |
932 | /* APIs for spatial transformer network*/ |
933 | typedef enum { |
934 | CUDNN_SAMPLER_BILINEAR = 0, |
935 | } cudnnSamplerType_t; |
936 | |
937 | cudnnStatus_t CUDNNWINAPI |
938 | cudnnCreateSpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t *stDesc); |
939 | |
940 | cudnnStatus_t CUDNNWINAPI |
941 | cudnnSetSpatialTransformerNdDescriptor(cudnnSpatialTransformerDescriptor_t stDesc, |
942 | cudnnSamplerType_t samplerType, |
943 | cudnnDataType_t dataType, |
944 | const int nbDims, |
945 | const int dimA[]); |
946 | |
947 | cudnnStatus_t CUDNNWINAPI |
948 | cudnnDestroySpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t stDesc); |
949 | |
950 | cudnnStatus_t CUDNNWINAPI |
951 | cudnnSpatialTfGridGeneratorForward(cudnnHandle_t handle, |
952 | const cudnnSpatialTransformerDescriptor_t stDesc, |
953 | const void *theta, |
954 | void *grid); |
955 | |
956 | cudnnStatus_t CUDNNWINAPI |
957 | cudnnSpatialTfSamplerForward(cudnnHandle_t handle, |
958 | cudnnSpatialTransformerDescriptor_t stDesc, |
959 | const void *alpha, |
960 | const cudnnTensorDescriptor_t xDesc, |
961 | const void *x, |
962 | const void *grid, |
963 | const void *beta, |
964 | cudnnTensorDescriptor_t yDesc, |
965 | void *y); |
966 | |
967 | typedef struct cudnnDropoutStruct *cudnnDropoutDescriptor_t; |
968 | |
969 | cudnnStatus_t CUDNNWINAPI |
970 | cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc); |
971 | |
972 | cudnnStatus_t CUDNNWINAPI |
973 | cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc); |
974 | |
975 | /*helper function to determine size of the states to be passed to cudnnSetDropoutDescriptor */ |
976 | cudnnStatus_t CUDNNWINAPI |
977 | cudnnDropoutGetStatesSize(cudnnHandle_t handle, size_t *sizeInBytes); |
978 | |
979 | /*helper function to determine size of the reserve space to be passed to dropout forward/backward calls */ |
980 | cudnnStatus_t CUDNNWINAPI |
981 | cudnnDropoutGetReserveSpaceSize(cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes); |
982 | |
983 | cudnnStatus_t CUDNNWINAPI |
984 | cudnnSetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, |
985 | cudnnHandle_t handle, |
986 | float dropout, |
987 | void *states, |
988 | size_t stateSizeInBytes, |
989 | unsigned long long seed); |
990 | |
991 | /* Restores the dropout descriptor to a previously saved-off state */ |
992 | cudnnStatus_t CUDNNWINAPI |
993 | cudnnRestoreDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, |
994 | cudnnHandle_t handle, |
995 | float dropout, |
996 | void *states, |
997 | size_t stateSizeInBytes, |
998 | unsigned long long seed); |
999 | |
1000 | cudnnStatus_t CUDNNWINAPI |
1001 | cudnnGetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, |
1002 | cudnnHandle_t handle, |
1003 | float *dropout, |
1004 | void **states, |
1005 | unsigned long long *seed); |
1006 | |
1007 | cudnnStatus_t CUDNNWINAPI |
1008 | cudnnDropoutForward(cudnnHandle_t handle, |
1009 | const cudnnDropoutDescriptor_t dropoutDesc, |
1010 | const cudnnTensorDescriptor_t xdesc, |
1011 | const void *x, |
1012 | const cudnnTensorDescriptor_t ydesc, |
1013 | void *y, |
1014 | void *reserveSpace, |
1015 | size_t reserveSpaceSizeInBytes); |
1016 | |
1017 | /* TODO: remove */ |
1018 | |
1019 | typedef struct cudnnAlgorithmStruct *cudnnAlgorithmDescriptor_t; |
1020 | typedef struct cudnnAlgorithmPerformanceStruct *cudnnAlgorithmPerformance_t; |
1021 | |
1022 | /* TODO: move these enums out to the appropriate submodule */ |
1023 | typedef enum { |
1024 | CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0, |
1025 | CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1, |
1026 | CUDNN_CONVOLUTION_FWD_ALGO_GEMM = 2, |
1027 | CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3, |
1028 | CUDNN_CONVOLUTION_FWD_ALGO_FFT = 4, |
1029 | CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING = 5, |
1030 | CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD = 6, |
1031 | CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED = 7, |
1032 | CUDNN_CONVOLUTION_FWD_ALGO_COUNT = 8 |
1033 | } cudnnConvolutionFwdAlgo_t; |
1034 | |
1035 | typedef enum { |
1036 | CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 = 0, /* non-deterministic */ |
1037 | CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 = 1, |
1038 | CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT = 2, |
1039 | CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 = 3, /* non-deterministic */ |
1040 | CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD = 4, /* not implemented */ |
1041 | CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED = 5, |
1042 | CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING = 6, |
1043 | CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT = 7 |
1044 | } cudnnConvolutionBwdFilterAlgo_t; |
1045 | |
1046 | typedef enum { |
1047 | CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 = 0, /* non-deterministic */ |
1048 | CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 = 1, |
1049 | CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT = 2, |
1050 | CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING = 3, |
1051 | CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD = 4, |
1052 | CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED = 5, |
1053 | CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT = 6 |
1054 | } cudnnConvolutionBwdDataAlgo_t; |
1055 | |
1056 | typedef enum { |
1057 | CUDNN_RNN_ALGO_STANDARD = 0, |
1058 | CUDNN_RNN_ALGO_PERSIST_STATIC = 1, |
1059 | CUDNN_RNN_ALGO_PERSIST_DYNAMIC = 2, |
1060 | CUDNN_RNN_ALGO_PERSIST_STATIC_SMALL_H = 3, |
1061 | CUDNN_RNN_ALGO_COUNT = 4, |
1062 | } cudnnRNNAlgo_t; |
1063 | |
1064 | typedef enum { CUDNN_CTC_LOSS_ALGO_DETERMINISTIC = 0, CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC = 1 } cudnnCTCLossAlgo_t; |
1065 | |
1066 | /* TODO: remove */ |
1067 | typedef struct cudnnAlgorithmUnionStruct { |
1068 | union Algorithm { |
1069 | cudnnConvolutionFwdAlgo_t convFwdAlgo; |
1070 | cudnnConvolutionBwdFilterAlgo_t convBwdFilterAlgo; |
1071 | cudnnConvolutionBwdDataAlgo_t convBwdDataAlgo; |
1072 | cudnnRNNAlgo_t RNNAlgo; |
1073 | cudnnCTCLossAlgo_t CTCLossAlgo; |
1074 | } algo; |
1075 | } cudnnAlgorithm_t; |
1076 | |
1077 | CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI |
1078 | cudnnCreateAlgorithmDescriptor(cudnnAlgorithmDescriptor_t *algoDesc); |
1079 | |
1080 | CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI |
1081 | cudnnSetAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm); |
1082 | |
1083 | CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI |
1084 | cudnnGetAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t *algorithm); |
1085 | |
1086 | CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI |
1087 | cudnnCopyAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest); |
1088 | |
1089 | CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI |
1090 | cudnnDestroyAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc); |
1091 | |
1092 | CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI |
1093 | cudnnCreateAlgorithmPerformance(cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate); |
1094 | |
1095 | CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI |
1096 | cudnnSetAlgorithmPerformance(cudnnAlgorithmPerformance_t algoPerf, |
1097 | cudnnAlgorithmDescriptor_t algoDesc, |
1098 | cudnnStatus_t status, |
1099 | float time, |
1100 | size_t memory); |
1101 | |
1102 | CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI |
1103 | cudnnGetAlgorithmPerformance(const cudnnAlgorithmPerformance_t algoPerf, |
1104 | cudnnAlgorithmDescriptor_t *algoDesc, |
1105 | cudnnStatus_t *status, |
1106 | float *time, |
1107 | size_t *memory); |
1108 | |
1109 | CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI |
1110 | cudnnDestroyAlgorithmPerformance(cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy); |
1111 | |
1112 | CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI |
1113 | cudnnGetAlgorithmSpaceSize(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, size_t *algoSpaceSizeInBytes); |
1114 | |
1115 | CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI |
1116 | cudnnSaveAlgorithm(cudnnHandle_t handle, |
1117 | cudnnAlgorithmDescriptor_t algoDesc, |
1118 | void *algoSpace, |
1119 | size_t algoSpaceSizeInBytes); |
1120 | |
1121 | CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI |
1122 | cudnnRestoreAlgorithm(cudnnHandle_t handle, |
1123 | void *algoSpace, |
1124 | size_t algoSpaceSizeInBytes, |
1125 | cudnnAlgorithmDescriptor_t algoDesc); |
1126 | |
1127 | typedef enum { |
1128 | CUDNN_SEV_FATAL = 0, |
1129 | CUDNN_SEV_ERROR = 1, |
1130 | CUDNN_SEV_WARNING = 2, |
1131 | CUDNN_SEV_INFO = 3, |
1132 | } cudnnSeverity_t; |
1133 | |
1134 | /* Message masks to be used with cudnnSetCallback() */ |
1135 | #define CUDNN_SEV_ERROR_EN (1U << CUDNN_SEV_ERROR) |
1136 | #define CUDNN_SEV_WARNING_EN (1U << CUDNN_SEV_WARNING) |
1137 | #define CUDNN_SEV_INFO_EN (1U << CUDNN_SEV_INFO) |
1138 | |
1139 | /* struct containing useful informaiton for each API call */ |
1140 | typedef struct cudnnDebugStruct { |
1141 | unsigned cudnn_version; |
1142 | cudnnStatus_t cudnnStatus; |
1143 | unsigned time_sec; /* epoch time in seconds */ |
1144 | unsigned time_usec; /* microseconds part of epoch time */ |
1145 | unsigned time_delta; /* time since start in seconds */ |
1146 | cudnnHandle_t handle; /* cudnn handle */ |
1147 | cudaStream_t stream; /* cuda stream ID */ |
1148 | unsigned long long pid; /* process ID */ |
1149 | unsigned long long tid; /* thread ID */ |
1150 | int cudaDeviceId; /* CUDA device ID */ |
1151 | int reserved[15]; /* reserved for future use */ |
1152 | } cudnnDebug_t; |
1153 | |
1154 | typedef void (*cudnnCallback_t)(cudnnSeverity_t sev, void *udata, const cudnnDebug_t *dbg, const char *msg); |
1155 | |
1156 | cudnnStatus_t CUDNNWINAPI |
1157 | cudnnSetCallback(unsigned mask, void *udata, cudnnCallback_t fptr); |
1158 | |
1159 | cudnnStatus_t CUDNNWINAPI |
1160 | cudnnGetCallback(unsigned *mask, void **udata, cudnnCallback_t *fptr); |
1161 | |
1162 | /* |
1163 | * \brief Cross-library version checker. |
1164 | * This function is implemented differently in each sub-library. Each sublib |
1165 | * checks whether its own version matches that of its dependencies. |
1166 | * \returns CUDNN_STATUS_SUCCESS if the version check passes, |
1167 | * CUDNN_STATUS_VERSION_MISMATCH if the versions are inconsistent. |
1168 | */ |
1169 | cudnnStatus_t CUDNNWINAPI |
1170 | cudnnOpsInferVersionCheck(void); |
1171 | |
1172 | #if defined(__cplusplus) |
1173 | } |
1174 | #endif |
1175 | |
1176 | #endif /* CUDNN_OPS_INFER_H_ */ |
1177 | |