1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_CORE_PLATFORM_RETRYING_UTILS_H_ |
17 | #define TENSORFLOW_CORE_PLATFORM_RETRYING_UTILS_H_ |
18 | |
19 | #include <functional> |
20 | |
21 | #include "tensorflow/core/platform/status.h" |
22 | |
23 | namespace tensorflow { |
24 | |
25 | // Default time before reporting failure: ~100 seconds. |
26 | struct RetryConfig { |
27 | RetryConfig(int64_t init_delay_time_us = 100 * 1000, |
28 | int64_t max_delay_time_us = 32 * 1000 * 1000, |
29 | int max_retries = 10) { |
30 | this->init_delay_time_us = init_delay_time_us; |
31 | this->max_delay_time_us = max_delay_time_us; |
32 | this->max_retries = max_retries; |
33 | } |
34 | |
35 | // In case of failure, every call will be retried max_retries times. |
36 | int max_retries; |
37 | |
38 | // Initial backoff time |
39 | int64_t init_delay_time_us; |
40 | |
41 | // Maximum backoff time in microseconds. |
42 | int64_t max_delay_time_us; |
43 | }; |
44 | |
45 | class RetryingUtils { |
46 | public: |
47 | /// \brief Retries the function in case of failure with exponential backoff. |
48 | /// |
49 | /// The provided callback is retried with an exponential backoff until it |
50 | /// returns OK or a non-retriable error status. |
51 | /// If initial_delay_microseconds is zero, no delays will be made between |
52 | /// retries. |
53 | /// If all retries failed, returns the last error status. |
54 | static Status CallWithRetries(const std::function<Status()>& f, |
55 | const RetryConfig& config); |
56 | |
57 | /// sleep_usec is a function that sleeps for the given number of microseconds. |
58 | static Status CallWithRetries(const std::function<Status()>& f, |
59 | const std::function<void(int64_t)>& sleep_usec, |
60 | const RetryConfig& config); |
61 | /// \brief A retrying wrapper for a function that deletes a resource. |
62 | /// |
63 | /// The function takes care of the scenario when a delete operation |
64 | /// returns a failure but succeeds under the hood: if a retry returns |
65 | /// NOT_FOUND, the whole operation is considered a success. |
66 | static Status DeleteWithRetries(const std::function<Status()>& delete_func, |
67 | const RetryConfig& config); |
68 | }; |
69 | |
70 | } // namespace tensorflow |
71 | |
72 | #endif // TENSORFLOW_CORE_PLATFORM_RETRYING_UTILS_H_ |
73 | |