1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16#ifndef GLOW_BASE_IMAGE_H
17#define GLOW_BASE_IMAGE_H
18
19#include "glow/Base/Tensor.h"
20#include "glow/Base/Type.h"
21#include "glow/Support/Support.h"
22
23#include "llvm/ADT/ArrayRef.h"
24#include "llvm/ADT/StringRef.h"
25
26#include <tuple>
27
28namespace glow {
29
30/// Pixel value ranges.
31enum class ImgDataRange {
32 S8,
33 U8,
34 S16,
35 U16,
36};
37
38/// Pixel value ranges.
39enum class ImageNormalizationMode {
40 PassThrough, // Values are not modified.
41 kneg1to1, // Values are in the range: -1 and 1.
42 k0to1, // Values are in the range: 0 and 1.
43 k0to255, // Values are in the range: 0 and 255.
44 kneg128to127, // Values are in the range: -128 .. 127
45 U16, // Values are in the range: 0 .. 65535
46 S16, // Values are in the range: -32768 .. 32767
47};
48
49/// Layout of image dimensions (batch, channels, height, width).
50enum class ImageLayout {
51 Unspecified, // images without layout. Have a single stddev/mean
52 // value, arbitrary shape. Used with NUMPY files.
53 NCHW,
54 NHWC,
55};
56
57/// Order of color channels (red, green, blue).
58enum class ImageChannelOrder {
59 Unspecified, // used by numpy files.
60 BGR,
61 RGB,
62};
63
64/// All the image options are given as vectors, containing one element per model
65/// input. An element at position i refers to input i, and input i refers to the
66/// model input name given at the ith postion of the -model-input-name list.
67
68/// NOTE: LLVM cmd parser made subclasses final in 3.7 yet the only cmd line
69/// manual still refers to the old data and the change was not clear why it's
70/// made. Assigning callbacks is not possible, and subclassing basic_parser is
71/// open to future errors. Thus, relying in LLVM parser is minimized - we will
72/// just obtain strings and process options. With the lack of Image class/struct
73/// in Glow, we will have most of APIs to continue working with different APIs
74/// directly affecting global cmd line arguments.
75
76/// -image-mode flag.
77extern std::vector<ImageNormalizationMode> imageNormMode;
78
79/// -image-channel-order flag.
80extern std::vector<ImageChannelOrder> imageChannelOrderOpt;
81
82/// --input-values-range.
83extern std::vector<ImgDataRange> imageDataRangeOpt;
84
85/// -image-layout flag.
86extern std::vector<ImageLayout> imageLayoutOpt;
87
88/// -input-layout flag
89extern ImageLayout inputLayout;
90
91/// -input-layout flag
92extern ImageLayout inputLayout;
93
94/// -use-imagenet-normalization flag.
95extern bool useImagenetNormalization;
96
97/// -preprocessing parameters
98extern VecVec<float> meanValuesOpt;
99extern VecVec<float> stddevValuesOpt;
100
101/// These are standard normalization factors for imagenet, adjusted for
102/// normalizing values in the 0to255 range instead of 0to1, as seen at:
103/// https://github.com/pytorch/examples/blob/master/imagenet/main.py
104static const float imagenetNormMean[] = {0.485 * 255.0, 0.456 * 255.0,
105 0.406 * 255.0};
106static const float imagenetNormStd[] = {0.229, 0.224, 0.225};
107
108/// Processes special command line args for Image module.
109void processImageCmdArgVars(size_t numInputs);
110/// Clear external storage for cmd args defined in Image.
111void initImageCmdArgVars();
112
113/// Default values for mean and stddev.
114static const std::vector<float> zeroMean(max_tensor_dimensions, 0.f);
115static const std::vector<float> oneStd(max_tensor_dimensions, 1.f);
116
117/// \returns the floating-point range corresponding to enum value \p mode.
118std::pair<float, float> normModeToRange(ImageNormalizationMode mode,
119 ImgDataRange range);
120
121/// \returns floating-point range for input image based on specified options.
122std::pair<float, float> getImageRange(size_t idx = 0);
123
124/// Get min and max values in the input image data range.
125float getPixelValMin(ImgDataRange ImgDataRange);
126float getPixelValMax(ImgDataRange ImgDataRange);
127
128/// \returns mean for input image based on specified options.
129llvm::ArrayRef<float> getImageMean(size_t idx, size_t numChannels = 0);
130
131/// \returns stddev for input image based on specified options.
132llvm::ArrayRef<float> getImageStdDev(size_t idx, size_t numChannels = 0);
133
134/// Reads a png image header from png file \p filename and \returns a tuple
135/// containing height, width, and a bool if it is grayscale or not.
136std::tuple<dim_t, dim_t, bool> getPngInfo(const char *filename);
137
138/// Reads a PPM image header from PPM file descriptor \p fp and \returns a tuple
139/// containing height, width, and a bool if it is grayscale or not. \p filename
140/// is passed only as a context to provide more detailed error reporting.
141std::tuple<dim_t, dim_t, bool> getPpmInfo(FILE *fp, const char *filename);
142
143/// Reads a PPM image header from PPM file \p filename and \returns a tuple
144/// containing height, width, and a bool if it is grayscale or not.
145std::tuple<dim_t, dim_t, bool> getPpmInfo(const char *filename);
146
147/// Returns whether file \p filename is in png format.
148bool isPngFormat(const std::string &filename);
149
150/// Check if file \p filename is in PPM format.
151bool isPpmFormat(const std::string &filename);
152
153/// Reads a png image. \returns True if an error occurred. The values of the
154/// image are in the range \p range.
155bool readPngImage(Tensor *T, const char *filename,
156 std::pair<float, float> range,
157 llvm::ArrayRef<float> mean = zeroMean,
158 llvm::ArrayRef<float> stddev = oneStd);
159
160/// Writes a png image. \returns True if an error occurred. The values of the
161/// image are in the range \p range.
162bool writePngImage(Tensor *T, const char *filename,
163 std::pair<float, float> range,
164 llvm::ArrayRef<float> mean = zeroMean,
165 llvm::ArrayRef<float> stddev = oneStd);
166
167/// Reads a PPM image. \returns True if an error occurred. The values of the
168/// image are in the range \p range. Performs pre-processing using \p mean and
169/// \p stddev.
170bool readPpmImage(Tensor *T, const char *filename,
171 std::pair<float, float> range,
172 llvm::ArrayRef<float> mean = zeroMean,
173 llvm::ArrayRef<float> stddev = oneStd);
174
175/// Read a PNG/PPM image and preprocess it according to several parameters.
176/// Create a tensor and store the preprocessed image data into this tensor.
177/// \param filename the PNG/PPM file to read.
178/// \param imageNormMode normalize values to this range.
179/// \param imageChannelOrder the order of color channels.
180/// \param imageLayout the order of dimensions (channel, height, and width).
181/// \param mean use special mean to normalize.
182/// \param stdev use special stddev to normalize.
183Tensor readPngPpmImageAndPreprocess(llvm::StringRef filename,
184 ImageNormalizationMode imageNormMode,
185 ImageChannelOrder imageChannelOrder,
186 ImageLayout imageLayout,
187 llvm::ArrayRef<float> mean = zeroMean,
188 llvm::ArrayRef<float> stddev = oneStd);
189
190/// Read a PNG/PPM image and preprocess it according to several parameters. Take
191/// a tensor as a parameter and store the preprocessed image data into this
192/// tensor.
193/// \param imageData the tensor into which the preprocessed image data
194/// will be stored.
195/// \param filename the PNG/PPM file to read.
196/// \param imageNormMode normalize values to this range.
197/// \param imageChannelOrder the order of color channels.
198/// \param imageLayout the order of dimensions (channel, height, and width).
199/// \param mean use special mean to normalize.
200/// \param stdev use special stddev to normalize.
201void readPngPpmImageAndPreprocess(Tensor &imageData, llvm::StringRef filename,
202 ImageNormalizationMode imageNormMode,
203 ImageChannelOrder imageChannelOrder,
204 ImageLayout imageLayout,
205 llvm::ArrayRef<float> mean = zeroMean,
206 llvm::ArrayRef<float> stddev = oneStd);
207
208/// \param mean use special mean to normalize.
209/// \param stdev use special stddev to normalize.
210void readPngPpmImagesAndPreprocess(Tensor &inputImageData,
211 const llvm::ArrayRef<std::string> &filenames,
212 ImageNormalizationMode imageNormMode,
213 ImageChannelOrder imageChannelOrder,
214 ImageLayout imageLayout,
215 llvm::ArrayRef<float> mean,
216 llvm::ArrayRef<float> stddev);
217
218/// Returns whether file \p filename is in Numpy .npy format.
219bool isNumpyNpyFormat(const std::string &filename);
220
221/// Load & normalize tensors from multiple npy files given by \p filenames into
222/// \p inputData tensor. Npy tensors must be 4D or 3D (in this case they are
223/// expanded with the batch dimension) and are concatanted along the batch.
224/// Also, tensors are transposed from \p inputLayout to \p imageLayout. Tensor
225/// values are expected to be in 0-255 range. \param filenames list of filenames
226/// to read. \param inputData Tensor to save the resulting output. \param
227/// imageNormMode normalize values to this range. \param imageLayout the order
228/// of dimensions (channel, height, and width). \param inputLayout the order of
229/// dimensions (channel, height, and width) in the dumps. \param mean use
230/// special mean to normalize. \param stdev use special stddev to normalize.
231void loadNumpyImagesAndPreprocess(const llvm::ArrayRef<std::string> &filenames,
232 Tensor &inputData,
233 ImageNormalizationMode imageNormMode,
234 ImageLayout imageLayout,
235 ImageLayout inputLayout,
236 llvm::ArrayRef<float> mean = {},
237 llvm::ArrayRef<float> stddev = {});
238
239/// Loads either PNGs or NUMPY images/tensors into the model input tensors.
240/// \param filenamesList list of lists (for each input) of filenames to read.
241/// \param inputImageDataList list of Tensors (for each input) that will
242/// contain loaded and preprocessed images.
243/// \param normMode normalize values to this range (not applicable to
244/// NUMPY).
245/// \param channelOrder the order of color channels (not applicable
246/// to NUMPY).
247/// \param imageLayout the order of dimensions (channel, height, and
248/// width).
249/// \param inputLayout the order of dimensions (channel, height, and
250/// width) in the image file. Will be used only if the image format
251/// doesn't provide the layout (e.g. PNG uses RGB thus the option is ignored).
252/// \param mean use
253/// special mean to normalize.
254/// \param stdev use special stddev to normalize.
255/// NOTE: Last 6 arguments are setting the global options - same ones the
256/// command line arguments set. Thus, the function call alters the global state.
257void loadImagesAndPreprocess(
258 VecVecRef<std::string> filenamesList,
259 llvm::ArrayRef<Tensor *> inputImageDataList,
260 llvm::ArrayRef<ImageNormalizationMode> normMode = {},
261 llvm::ArrayRef<ImageChannelOrder> channelOrder = {},
262 llvm::ArrayRef<ImageLayout> imageLayout = {},
263 llvm::ArrayRef<ImageLayout> inputLayout = {}, VecVecRef<float> mean = {},
264 VecVecRef<float> stddev = {});
265
266/// Load & normalize tensors from multiple npy files given by \p filenames into
267/// \p inputData tensor. Npy tensors must be 4D or 3D (in this case they are
268/// expanded with the batch dimension) and are concatanted along the batch.
269/// Also, tensors are transposed from \p inputLayout to \p imageLayout.
270/// Tensor values are expected to be in 0-255 range. \param filenames list of
271/// filenames to read. \param inputData Tensor to save the resulting output.
272/// \param imageNormMode normalize values to this range. \param imageLayout
273/// the order of dimensions (channel, height, and width). \param inputLayout the
274/// order of dimensions (channel, height, and width) in the dumps. \param mean
275/// use special mean to normalize. \param stdev use special stddev to normalize.
276void loadNumpyImagesAndPreprocess(
277 const llvm::ArrayRef<std::string> &filenames, Tensor &inputData,
278 ImageNormalizationMode imageNormMode, ImageChannelOrder &imageChannelOrder,
279 ImageLayout imageLayout, ImageLayout inputLayout,
280 llvm::ArrayRef<float> mean, llvm::ArrayRef<float> stddev,
281 ImgDataRange &range);
282
283} // namespace glow
284
285#endif // GLOW_BASE_IMAGE_H
286