autograd.h source code [pytorch/torch/csrc/autograd/autograd.h]

1	#pragma once
2
3	#include <torch/csrc/autograd/variable.h>
4
5	namespace torch {
6	namespace autograd {
7
8	/// Computes the sum of gradients of given tensors with respect to graph leaves.
9	///
10	/// The graph is differentiated using the chain rule. If any of ``tensors``
11	/// are non-scalar (i.e. their data has more than one element) and require
12	/// gradient, then the Jacobian-vector product would be computed, in this case
13	/// the function additionally requires specifying `grad_tensors`. It should be a
14	/// sequence of matching length, that contains the "vector" in the
15	/// Jacobian-vector product, usually the gradient of the differentiated function
16	/// w.r.t. corresponding tensors
17	/// (`torch::Tensor()` is an acceptable value for all tensors that don't need
18	/// gradient tensors).
19	///
20	/// This function accumulates gradients in the leaves - you might need to zero
21	/// them before calling it.
22	///
23	/// \param tensors Tensors of which the derivative will be computed.
24	/// \param grad_tensors The "vector" in the Jacobian-vector product, usually
25	/// gradients
26	/// w.r.t. each element of corresponding tensors. `torch::Tensor()` values
27	/// can be specified for scalar Tensors or ones that don't require grad. If
28	/// a `torch::Tensor()` value would be acceptable for all grad_tensors, then
29	/// this argument is optional.
30	/// \param retain_graph If `false`, the graph used to compute the grad will be
31	/// freed.
32	/// Note that in nearly all cases setting this option to `true` is not
33	/// needed and often can be worked around in a much more efficient way.
34	/// Defaults to the value of `create_graph`.
35	/// \param create_graph If `true`, graph of the derivative will be constructed,
36	/// allowing
37	/// to compute higher order derivative products. Defaults to `false`.
38	/// \param inputs Inputs w.r.t. which the gradient will be accumulated into
39	/// `at::Tensor::grad`. All other Tensors will be ignored. If not provided,
40	/// the gradient is accumulated into all the leaf Tensors that were used to
41	/// compute param `tensors`.
42	// When inputs are provided and a given input is not a leaf,
43	// the current implementation will call its grad_fn (even though it is not
44	// strictly needed to get this gradients). It is an implementation detail
45	// on which the user should not rely. See
46	// https://github.com/pytorch/pytorch/pull/60521#issuecomment-867061780 for
47	// more details.
48	TORCH_API void backward(
49	const variable_list& tensors,
50	const variable_list& grad_tensors = {},
51	c10::optional<bool> retain_graph = c10::nullopt,
52	bool create_graph = false,
53	const variable_list& inputs = {});
54
55	/// Computes and returns the sum of gradients of outputs with respect to the
56	/// inputs.
57	///
58	/// ``grad_outputs`` should be a sequence of length matching ``output``
59	/// containing the "vector" in Jacobian-vector product, usually the pre-computed
60	/// gradients w.r.t. each of the outputs. If an output doesn't require_grad,
61	/// then the gradient can be ``torch::Tensor()``).
62	///
63	/// \param outputs outputs of the differentiated function.
64	/// \param inputs Inputs w.r.t. which the gradient will be
65	/// returned (and not accumulated into ``at::Tensor::grad``).
66	/// \param grad_outputs The "vector" in the Jacobian-vector product.
67	/// Usually gradients w.r.t. each output. `torch::Tensor()` values can be
68	/// specified for scalar Tensors or ones that don't require grad. If a
69	/// `torch::Tensor()` value would be acceptable for all grad_tensors, then
70	/// this argument is optional. Default: `{}`.
71	/// \param retain_graph If ``false``, the graph used to compute the grad
72	/// will be freed. Note that in nearly all cases setting this option to
73	/// ``true`` is not needed and often can be worked around in a much more
74	/// efficient way. Defaults to the value of ``create_graph``.
75	/// \param create_graph If ``true``, graph of the derivative will
76	/// be constructed, allowing to compute higher order derivative products.
77	/// Default: ``false``.
78	/// \param allow_unused If ``false``, specifying inputs that were not
79	/// used when computing outputs (and therefore their grad is always zero)
80	/// is an error. Defaults to ``false``.
81	TORCH_API variable_list grad(
82	const variable_list& outputs,
83	const variable_list& inputs,
84	const variable_list& grad_outputs = {},
85	c10::optional<bool> retain_graph = c10::nullopt,
86	bool create_graph = false,
87	bool allow_unused = false);
88
89	namespace forward_ad {
90
91	/// Creates a new dual level and returns its index. This level index should then
92	/// be used to call into the other functions below. This API supports entering a
93	/// new level before the previous one is exited. We call them nested forward AD
94	/// levels. These can be used to compute higher order derivatives.
95	TORCH_API uint64_t enter_dual_level();
96
97	/// Exits the given level. This will clear up all the gradients from this level
98	/// and all dual Tensors that had gradients for this level will become regular
99	/// Tensors again. This function can only be used to exit the innermost nesting
100	/// level and so exiting must happen in reverse order compared to the entering
101	/// that was done with the function above.
102	TORCH_API void exit_dual_level(uint64_t level);
103
104	} // namespace forward_ad
105	} // namespace autograd
106	} // namespace torch
107

Browse the source code of pytorch/torch/csrc/autograd/autograd.h