1 | #pragma once |
2 | |
3 | #include <torch/csrc/autograd/variable.h> |
4 | |
5 | namespace torch { |
6 | namespace autograd { |
7 | |
8 | /// Computes the sum of gradients of given tensors with respect to graph leaves. |
9 | /// |
10 | /// The graph is differentiated using the chain rule. If any of ``tensors`` |
11 | /// are non-scalar (i.e. their data has more than one element) and require |
12 | /// gradient, then the Jacobian-vector product would be computed, in this case |
13 | /// the function additionally requires specifying `grad_tensors`. It should be a |
14 | /// sequence of matching length, that contains the "vector" in the |
15 | /// Jacobian-vector product, usually the gradient of the differentiated function |
16 | /// w.r.t. corresponding tensors |
17 | /// (`torch::Tensor()` is an acceptable value for all tensors that don't need |
18 | /// gradient tensors). |
19 | /// |
20 | /// This function accumulates gradients in the leaves - you might need to zero |
21 | /// them before calling it. |
22 | /// |
23 | /// \param tensors Tensors of which the derivative will be computed. |
24 | /// \param grad_tensors The "vector" in the Jacobian-vector product, usually |
25 | /// gradients |
26 | /// w.r.t. each element of corresponding tensors. `torch::Tensor()` values |
27 | /// can be specified for scalar Tensors or ones that don't require grad. If |
28 | /// a `torch::Tensor()` value would be acceptable for all grad_tensors, then |
29 | /// this argument is optional. |
30 | /// \param retain_graph If `false`, the graph used to compute the grad will be |
31 | /// freed. |
32 | /// Note that in nearly all cases setting this option to `true` is not |
33 | /// needed and often can be worked around in a much more efficient way. |
34 | /// Defaults to the value of `create_graph`. |
35 | /// \param create_graph If `true`, graph of the derivative will be constructed, |
36 | /// allowing |
37 | /// to compute higher order derivative products. Defaults to `false`. |
38 | /// \param inputs Inputs w.r.t. which the gradient will be accumulated into |
39 | /// `at::Tensor::grad`. All other Tensors will be ignored. If not provided, |
40 | /// the gradient is accumulated into all the leaf Tensors that were used to |
41 | /// compute param `tensors`. |
42 | // When inputs are provided and a given input is not a leaf, |
43 | // the current implementation will call its grad_fn (even though it is not |
44 | // strictly needed to get this gradients). It is an implementation detail |
45 | // on which the user should not rely. See |
46 | // https://github.com/pytorch/pytorch/pull/60521#issuecomment-867061780 for |
47 | // more details. |
48 | TORCH_API void backward( |
49 | const variable_list& tensors, |
50 | const variable_list& grad_tensors = {}, |
51 | c10::optional<bool> retain_graph = c10::nullopt, |
52 | bool create_graph = false, |
53 | const variable_list& inputs = {}); |
54 | |
55 | /// Computes and returns the sum of gradients of outputs with respect to the |
56 | /// inputs. |
57 | /// |
58 | /// ``grad_outputs`` should be a sequence of length matching ``output`` |
59 | /// containing the "vector" in Jacobian-vector product, usually the pre-computed |
60 | /// gradients w.r.t. each of the outputs. If an output doesn't require_grad, |
61 | /// then the gradient can be ``torch::Tensor()``). |
62 | /// |
63 | /// \param outputs outputs of the differentiated function. |
64 | /// \param inputs Inputs w.r.t. which the gradient will be |
65 | /// returned (and not accumulated into ``at::Tensor::grad``). |
66 | /// \param grad_outputs The "vector" in the Jacobian-vector product. |
67 | /// Usually gradients w.r.t. each output. `torch::Tensor()` values can be |
68 | /// specified for scalar Tensors or ones that don't require grad. If a |
69 | /// `torch::Tensor()` value would be acceptable for all grad_tensors, then |
70 | /// this argument is optional. Default: `{}`. |
71 | /// \param retain_graph If ``false``, the graph used to compute the grad |
72 | /// will be freed. Note that in nearly all cases setting this option to |
73 | /// ``true`` is not needed and often can be worked around in a much more |
74 | /// efficient way. Defaults to the value of ``create_graph``. |
75 | /// \param create_graph If ``true``, graph of the derivative will |
76 | /// be constructed, allowing to compute higher order derivative products. |
77 | /// Default: ``false``. |
78 | /// \param allow_unused If ``false``, specifying inputs that were not |
79 | /// used when computing outputs (and therefore their grad is always zero) |
80 | /// is an error. Defaults to ``false``. |
81 | TORCH_API variable_list grad( |
82 | const variable_list& outputs, |
83 | const variable_list& inputs, |
84 | const variable_list& grad_outputs = {}, |
85 | c10::optional<bool> retain_graph = c10::nullopt, |
86 | bool create_graph = false, |
87 | bool allow_unused = false); |
88 | |
89 | namespace forward_ad { |
90 | |
91 | /// Creates a new dual level and returns its index. This level index should then |
92 | /// be used to call into the other functions below. This API supports entering a |
93 | /// new level before the previous one is exited. We call them nested forward AD |
94 | /// levels. These can be used to compute higher order derivatives. |
95 | TORCH_API uint64_t enter_dual_level(); |
96 | |
97 | /// Exits the given level. This will clear up all the gradients from this level |
98 | /// and all dual Tensors that had gradients for this level will become regular |
99 | /// Tensors again. This function can only be used to exit the innermost nesting |
100 | /// level and so exiting must happen in reverse order compared to the entering |
101 | /// that was done with the function above. |
102 | TORCH_API void exit_dual_level(uint64_t level); |
103 | |
104 | } // namespace forward_ad |
105 | } // namespace autograd |
106 | } // namespace torch |
107 | |