1 | /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #include "tensorflow/core/platform/retrying_utils.h" |
17 | |
18 | #include "tensorflow/core/lib/random/random.h" |
19 | #include "tensorflow/core/platform/env.h" |
20 | #include "tensorflow/core/platform/errors.h" |
21 | #include "tensorflow/core/platform/file_system.h" |
22 | |
23 | namespace tensorflow { |
24 | |
25 | namespace { |
26 | |
27 | bool IsRetriable(error::Code code) { |
28 | switch (code) { |
29 | case error::UNAVAILABLE: |
30 | case error::DEADLINE_EXCEEDED: |
31 | case error::UNKNOWN: |
32 | return true; |
33 | default: |
34 | // OK also falls here. |
35 | return false; |
36 | } |
37 | } |
38 | |
39 | } // namespace |
40 | |
41 | Status RetryingUtils::CallWithRetries(const std::function<Status()>& f, |
42 | const RetryConfig& config) { |
43 | return CallWithRetries( |
44 | f, |
45 | [](int64_t micros) { |
46 | return Env::Default()->SleepForMicroseconds(micros); |
47 | }, |
48 | config); |
49 | } |
50 | |
51 | Status RetryingUtils::CallWithRetries( |
52 | const std::function<Status()>& f, |
53 | const std::function<void(int64_t)>& sleep_usec, const RetryConfig& config) { |
54 | int retries = 0; |
55 | while (true) { |
56 | auto status = f(); |
57 | if (!IsRetriable(status.code())) { |
58 | return status; |
59 | } |
60 | if (retries >= config.max_retries) { |
61 | // Return AbortedError, so that it doesn't get retried again somewhere |
62 | // at a higher level. |
63 | return Status( |
64 | error::ABORTED, |
65 | strings::StrCat("All " , config.max_retries, |
66 | " retry attempts failed. The last failure: " , |
67 | status.error_message())); |
68 | } |
69 | int64_t delay_micros = 0; |
70 | if (config.init_delay_time_us > 0) { |
71 | const int64_t random_micros = random::New64() % 1000000; |
72 | delay_micros = std::min(config.init_delay_time_us << retries, |
73 | config.max_delay_time_us) + |
74 | random_micros; |
75 | } |
76 | VLOG(1) << "The operation failed and will be automatically retried in " |
77 | << (delay_micros / 1000000.0) << " seconds (attempt " |
78 | << (retries + 1) << " out of " << config.max_retries |
79 | << "), caused by: " << status.ToString(); |
80 | sleep_usec(delay_micros); |
81 | retries++; |
82 | } |
83 | } |
84 | |
85 | Status RetryingUtils::DeleteWithRetries( |
86 | const std::function<Status()>& delete_func, const RetryConfig& config) { |
87 | bool is_retried = false; |
88 | return RetryingUtils::CallWithRetries( |
89 | [delete_func, &is_retried]() { |
90 | const Status status = delete_func(); |
91 | if (is_retried && status.code() == error::NOT_FOUND) { |
92 | return OkStatus(); |
93 | } |
94 | is_retried = true; |
95 | return status; |
96 | }, |
97 | config); |
98 | } |
99 | |
100 | } // namespace tensorflow |
101 | |