1 | /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | #include "tensorflow/core/summary/summary_converter.h" |
16 | |
17 | #include "tensorflow/core/framework/register_types.h" |
18 | #include "tensorflow/core/framework/summary.pb.h" |
19 | #include "tensorflow/core/framework/types.h" |
20 | #include "tensorflow/core/framework/types.pb.h" |
21 | #include "tensorflow/core/lib/histogram/histogram.h" |
22 | #include "tensorflow/core/lib/io/path.h" |
23 | #include "tensorflow/core/lib/png/png_io.h" |
24 | #include "tensorflow/core/lib/wav/wav_io.h" |
25 | |
26 | namespace tensorflow { |
27 | namespace { |
28 | |
29 | template <typename T> |
30 | Status TensorValueAt(Tensor t, int64_t i, T* out) { |
31 | #define CASE(I) \ |
32 | case DataTypeToEnum<I>::value: \ |
33 | *out = static_cast<T>(t.flat<I>()(i)); \ |
34 | break; |
35 | #define COMPLEX_CASE(I) \ |
36 | case DataTypeToEnum<I>::value: \ |
37 | *out = static_cast<T>(t.flat<I>()(i).real()); \ |
38 | break; |
39 | // clang-format off |
40 | switch (t.dtype()) { |
41 | TF_CALL_bool(CASE) |
42 | TF_CALL_half(CASE) |
43 | TF_CALL_float(CASE) |
44 | TF_CALL_double(CASE) |
45 | TF_CALL_int8(CASE) |
46 | TF_CALL_int16(CASE) |
47 | TF_CALL_int32(CASE) |
48 | TF_CALL_int64(CASE) |
49 | TF_CALL_uint8(CASE) |
50 | TF_CALL_uint16(CASE) |
51 | TF_CALL_uint32(CASE) |
52 | TF_CALL_uint64(CASE) |
53 | TF_CALL_complex64(COMPLEX_CASE) |
54 | TF_CALL_complex128(COMPLEX_CASE) |
55 | default: |
56 | return errors::Unimplemented("SummaryFileWriter " , |
57 | DataTypeString(t.dtype()), |
58 | " not supported." ); |
59 | } |
60 | // clang-format on |
61 | return OkStatus(); |
62 | #undef CASE |
63 | #undef COMPLEX_CASE |
64 | } |
65 | |
66 | typedef Eigen::Tensor<uint8, 2, Eigen::RowMajor> Uint8Image; |
67 | |
68 | // Add the sequence of images specified by ith_image to the summary. |
69 | // |
70 | // Factoring this loop out into a helper function lets ith_image behave |
71 | // differently in the float and uint8 cases: the float case needs a temporary |
72 | // buffer which can be shared across calls to ith_image, but the uint8 case |
73 | // does not. |
74 | Status AddImages(const string& tag, int max_images, int batch_size, int w, |
75 | int h, int depth, |
76 | const std::function<Uint8Image(int)>& ith_image, Summary* s) { |
77 | const int N = std::min<int>(max_images, batch_size); |
78 | for (int i = 0; i < N; ++i) { |
79 | Summary::Value* v = s->add_value(); |
80 | // The tag depends on the number of requested images (not the number |
81 | // produced.) |
82 | // |
83 | // Note that later on avisu uses "/" to figure out a consistent naming |
84 | // convention for display, so we append "/image" to guarantee that the |
85 | // image(s) won't be displayed in the global scope with no name. |
86 | if (max_images > 1) { |
87 | v->set_tag(strings::StrCat(tag, "/image/" , i)); |
88 | } else { |
89 | v->set_tag(strings::StrCat(tag, "/image" )); |
90 | } |
91 | |
92 | const auto image = ith_image(i); |
93 | Summary::Image* si = v->mutable_image(); |
94 | si->set_height(h); |
95 | si->set_width(w); |
96 | si->set_colorspace(depth); |
97 | const int channel_bits = 8; |
98 | const int compression = -1; // Use zlib default |
99 | if (!png::WriteImageToBuffer(image.data(), w, h, w * depth, depth, |
100 | channel_bits, compression, |
101 | si->mutable_encoded_image_string(), nullptr)) { |
102 | return errors::Internal("PNG encoding failed" ); |
103 | } |
104 | } |
105 | return OkStatus(); |
106 | } |
107 | |
108 | template <class T> |
109 | void NormalizeFloatImage(int hw, int depth, |
110 | typename TTypes<T>::ConstMatrix values, |
111 | typename TTypes<uint8>::ConstVec bad_color, |
112 | Uint8Image* image) { |
113 | if (!image->size()) return; // Nothing to do for empty images |
114 | |
115 | // Rescale the image to uint8 range. |
116 | // |
117 | // We are trying to generate an RGB image from a float/half tensor. We do |
118 | // not have any info about the expected range of values in the tensor |
119 | // but the generated image needs to have all RGB values within [0, 255]. |
120 | // |
121 | // We use two different algorithms to generate these values. If the |
122 | // tensor has only positive values we scale them all by 255/max(values). |
123 | // If the tensor has both negative and positive values we scale them by |
124 | // the max of their absolute values and center them around 127. |
125 | // |
126 | // This works for most cases, but does not respect the relative dynamic |
127 | // range across different instances of the tensor. |
128 | |
129 | // Compute min and max ignoring nonfinite pixels |
130 | float image_min = std::numeric_limits<float>::infinity(); |
131 | float image_max = -image_min; |
132 | for (int i = 0; i < hw; i++) { |
133 | bool finite = true; |
134 | for (int j = 0; j < depth; j++) { |
135 | if (!Eigen::numext::isfinite(values(i, j))) { |
136 | finite = false; |
137 | break; |
138 | } |
139 | } |
140 | if (finite) { |
141 | for (int j = 0; j < depth; j++) { |
142 | float value(values(i, j)); |
143 | image_min = std::min(image_min, value); |
144 | image_max = std::max(image_max, value); |
145 | } |
146 | } |
147 | } |
148 | |
149 | // Pick an affine transform into uint8 |
150 | const float kZeroThreshold = 1e-6; |
151 | T scale, offset; |
152 | if (image_min < 0) { |
153 | const float max_val = std::max(std::abs(image_min), std::abs(image_max)); |
154 | scale = T(max_val < kZeroThreshold ? 0.0f : 127.0f / max_val); |
155 | offset = T(128.0f); |
156 | } else { |
157 | scale = T(image_max < kZeroThreshold ? 0.0f : 255.0f / image_max); |
158 | offset = T(0.0f); |
159 | } |
160 | |
161 | // Transform image, turning nonfinite values to bad_color |
162 | for (int i = 0; i < hw; i++) { |
163 | bool finite = true; |
164 | for (int j = 0; j < depth; j++) { |
165 | if (!Eigen::numext::isfinite(values(i, j))) { |
166 | finite = false; |
167 | break; |
168 | } |
169 | } |
170 | if (finite) { |
171 | image->chip<0>(i) = |
172 | (values.template chip<0>(i) * scale + offset).template cast<uint8>(); |
173 | } else { |
174 | image->chip<0>(i) = bad_color; |
175 | } |
176 | } |
177 | } |
178 | |
179 | template <class T> |
180 | Status NormalizeAndAddImages(const Tensor& tensor, int max_images, int h, int w, |
181 | int hw, int depth, int batch_size, |
182 | const string& base_tag, Tensor bad_color_tensor, |
183 | Summary* s) { |
184 | // For float and half images, nans and infs are replaced with bad_color. |
185 | if (bad_color_tensor.dim_size(0) < depth) { |
186 | return errors::InvalidArgument( |
187 | "expected depth <= bad_color.size, got depth = " , depth, |
188 | ", bad_color.size = " , bad_color_tensor.dim_size(0)); |
189 | } |
190 | auto bad_color_full = bad_color_tensor.vec<uint8>(); |
191 | typename TTypes<uint8>::ConstVec bad_color(bad_color_full.data(), depth); |
192 | |
193 | // Float images must be scaled and translated. |
194 | Uint8Image image(hw, depth); |
195 | auto ith_image = [&tensor, &image, bad_color, batch_size, hw, depth](int i) { |
196 | auto tensor_eigen = tensor.template shaped<T, 3>({batch_size, hw, depth}); |
197 | typename TTypes<T>::ConstMatrix values( |
198 | &tensor_eigen(i, 0, 0), Eigen::DSizes<Eigen::DenseIndex, 2>(hw, depth)); |
199 | NormalizeFloatImage<T>(hw, depth, values, bad_color, &image); |
200 | return image; |
201 | }; |
202 | return AddImages(base_tag, max_images, batch_size, w, h, depth, ith_image, s); |
203 | } |
204 | |
205 | } // namespace |
206 | |
207 | Status AddTensorAsScalarToSummary(const Tensor& t, const string& tag, |
208 | Summary* s) { |
209 | Summary::Value* v = s->add_value(); |
210 | v->set_tag(tag); |
211 | float value; |
212 | TF_RETURN_IF_ERROR(TensorValueAt<float>(t, 0, &value)); |
213 | v->set_simple_value(value); |
214 | return OkStatus(); |
215 | } |
216 | |
217 | Status AddTensorAsHistogramToSummary(const Tensor& t, const string& tag, |
218 | Summary* s) { |
219 | Summary::Value* v = s->add_value(); |
220 | v->set_tag(tag); |
221 | histogram::Histogram histo; |
222 | for (int64_t i = 0; i < t.NumElements(); i++) { |
223 | double double_val; |
224 | TF_RETURN_IF_ERROR(TensorValueAt<double>(t, i, &double_val)); |
225 | if (Eigen::numext::isnan(double_val)) { |
226 | return errors::InvalidArgument("Nan in summary histogram for: " , tag); |
227 | } else if (Eigen::numext::isinf(double_val)) { |
228 | return errors::InvalidArgument("Infinity in summary histogram for: " , |
229 | tag); |
230 | } |
231 | histo.Add(double_val); |
232 | } |
233 | histo.EncodeToProto(v->mutable_histo(), false /* Drop zero buckets */); |
234 | return OkStatus(); |
235 | } |
236 | |
237 | Status AddTensorAsImageToSummary(const Tensor& tensor, const string& tag, |
238 | int max_images, const Tensor& bad_color, |
239 | Summary* s) { |
240 | if (!(tensor.dims() == 4 && |
241 | (tensor.dim_size(3) == 1 || tensor.dim_size(3) == 3 || |
242 | tensor.dim_size(3) == 4))) { |
243 | return errors::InvalidArgument( |
244 | "Tensor must be 4-D with last dim 1, 3, or 4, not " , |
245 | tensor.shape().DebugString()); |
246 | } |
247 | if (!(tensor.dim_size(0) < (1LL << 31) && tensor.dim_size(1) < (1LL << 31) && |
248 | tensor.dim_size(2) < (1LL << 31) && |
249 | (tensor.dim_size(1) * tensor.dim_size(2)) < (1LL << 29))) { |
250 | return errors::InvalidArgument("Tensor too large for summary " , |
251 | tensor.shape().DebugString()); |
252 | } |
253 | // The casts and h * w cannot overflow because of the limits above. |
254 | const int batch_size = static_cast<int>(tensor.dim_size(0)); |
255 | const int h = static_cast<int>(tensor.dim_size(1)); |
256 | const int w = static_cast<int>(tensor.dim_size(2)); |
257 | const int hw = h * w; // Compact these two dims for simplicity |
258 | const int depth = static_cast<int>(tensor.dim_size(3)); |
259 | if (tensor.dtype() == DT_UINT8) { |
260 | // For uint8 input, no normalization is necessary |
261 | auto ith_image = [&tensor, batch_size, hw, depth](int i) { |
262 | auto values = tensor.shaped<uint8, 3>({batch_size, hw, depth}); |
263 | return typename TTypes<uint8>::ConstMatrix( |
264 | &values(i, 0, 0), Eigen::DSizes<Eigen::DenseIndex, 2>(hw, depth)); |
265 | }; |
266 | TF_RETURN_IF_ERROR( |
267 | AddImages(tag, max_images, batch_size, w, h, depth, ith_image, s)); |
268 | } else if (tensor.dtype() == DT_HALF) { |
269 | TF_RETURN_IF_ERROR(NormalizeAndAddImages<Eigen::half>( |
270 | tensor, max_images, h, w, hw, depth, batch_size, tag, bad_color, s)); |
271 | } else if (tensor.dtype() == DT_FLOAT) { |
272 | TF_RETURN_IF_ERROR(NormalizeAndAddImages<float>( |
273 | tensor, max_images, h, w, hw, depth, batch_size, tag, bad_color, s)); |
274 | } else if (tensor.dtype() == DT_DOUBLE) { |
275 | TF_RETURN_IF_ERROR(NormalizeAndAddImages<double>( |
276 | tensor, max_images, h, w, hw, depth, batch_size, tag, bad_color, s)); |
277 | } else { |
278 | return errors::InvalidArgument( |
279 | "Only DT_INT8, DT_HALF, DT_DOUBLE, and DT_FLOAT images are supported. " |
280 | "Got " , |
281 | DataTypeString(tensor.dtype())); |
282 | } |
283 | return OkStatus(); |
284 | } |
285 | |
286 | Status AddTensorAsAudioToSummary(const Tensor& tensor, const string& tag, |
287 | int max_outputs, float sample_rate, |
288 | Summary* s) { |
289 | if (sample_rate <= 0.0f) { |
290 | return errors::InvalidArgument("sample_rate must be > 0" ); |
291 | } |
292 | const int batch_size = tensor.dim_size(0); |
293 | const int64_t length_frames = tensor.dim_size(1); |
294 | const int64_t num_channels = |
295 | tensor.dims() == 2 ? 1 : tensor.dim_size(tensor.dims() - 1); |
296 | const int N = std::min<int>(max_outputs, batch_size); |
297 | for (int i = 0; i < N; ++i) { |
298 | Summary::Value* v = s->add_value(); |
299 | if (max_outputs > 1) { |
300 | v->set_tag(strings::StrCat(tag, "/audio/" , i)); |
301 | } else { |
302 | v->set_tag(strings::StrCat(tag, "/audio" )); |
303 | } |
304 | |
305 | Summary::Audio* sa = v->mutable_audio(); |
306 | sa->set_sample_rate(sample_rate); |
307 | sa->set_num_channels(num_channels); |
308 | sa->set_length_frames(length_frames); |
309 | sa->set_content_type("audio/wav" ); |
310 | |
311 | auto values = |
312 | tensor.shaped<float, 3>({batch_size, length_frames, num_channels}); |
313 | auto channels_by_frames = typename TTypes<float>::ConstMatrix( |
314 | &values(i, 0, 0), |
315 | Eigen::DSizes<Eigen::DenseIndex, 2>(length_frames, num_channels)); |
316 | size_t sample_rate_truncated = lrintf(sample_rate); |
317 | if (sample_rate_truncated == 0) { |
318 | sample_rate_truncated = 1; |
319 | } |
320 | TF_RETURN_IF_ERROR(wav::EncodeAudioAsS16LEWav( |
321 | channels_by_frames.data(), sample_rate_truncated, num_channels, |
322 | length_frames, sa->mutable_encoded_audio_string())); |
323 | } |
324 | return OkStatus(); |
325 | } |
326 | |
327 | } // namespace tensorflow |
328 | |