1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | #ifndef GLOW_BASE_IMAGE_H |
17 | #define GLOW_BASE_IMAGE_H |
18 | |
19 | #include "glow/Base/Tensor.h" |
20 | #include "glow/Base/Type.h" |
21 | #include "glow/Support/Support.h" |
22 | |
23 | #include "llvm/ADT/ArrayRef.h" |
24 | #include "llvm/ADT/StringRef.h" |
25 | |
26 | #include <tuple> |
27 | |
28 | namespace glow { |
29 | |
30 | /// Pixel value ranges. |
31 | enum class ImgDataRange { |
32 | S8, |
33 | U8, |
34 | S16, |
35 | U16, |
36 | }; |
37 | |
38 | /// Pixel value ranges. |
39 | enum class ImageNormalizationMode { |
40 | PassThrough, // Values are not modified. |
41 | kneg1to1, // Values are in the range: -1 and 1. |
42 | k0to1, // Values are in the range: 0 and 1. |
43 | k0to255, // Values are in the range: 0 and 255. |
44 | kneg128to127, // Values are in the range: -128 .. 127 |
45 | U16, // Values are in the range: 0 .. 65535 |
46 | S16, // Values are in the range: -32768 .. 32767 |
47 | }; |
48 | |
49 | /// Layout of image dimensions (batch, channels, height, width). |
50 | enum class ImageLayout { |
51 | Unspecified, // images without layout. Have a single stddev/mean |
52 | // value, arbitrary shape. Used with NUMPY files. |
53 | NCHW, |
54 | NHWC, |
55 | }; |
56 | |
57 | /// Order of color channels (red, green, blue). |
58 | enum class ImageChannelOrder { |
59 | Unspecified, // used by numpy files. |
60 | BGR, |
61 | RGB, |
62 | }; |
63 | |
64 | /// All the image options are given as vectors, containing one element per model |
65 | /// input. An element at position i refers to input i, and input i refers to the |
66 | /// model input name given at the ith postion of the -model-input-name list. |
67 | |
68 | /// NOTE: LLVM cmd parser made subclasses final in 3.7 yet the only cmd line |
69 | /// manual still refers to the old data and the change was not clear why it's |
70 | /// made. Assigning callbacks is not possible, and subclassing basic_parser is |
71 | /// open to future errors. Thus, relying in LLVM parser is minimized - we will |
72 | /// just obtain strings and process options. With the lack of Image class/struct |
73 | /// in Glow, we will have most of APIs to continue working with different APIs |
74 | /// directly affecting global cmd line arguments. |
75 | |
76 | /// -image-mode flag. |
77 | extern std::vector<ImageNormalizationMode> imageNormMode; |
78 | |
79 | /// -image-channel-order flag. |
80 | extern std::vector<ImageChannelOrder> imageChannelOrderOpt; |
81 | |
82 | /// --input-values-range. |
83 | extern std::vector<ImgDataRange> imageDataRangeOpt; |
84 | |
85 | /// -image-layout flag. |
86 | extern std::vector<ImageLayout> imageLayoutOpt; |
87 | |
88 | /// -input-layout flag |
89 | extern ImageLayout inputLayout; |
90 | |
91 | /// -input-layout flag |
92 | extern ImageLayout inputLayout; |
93 | |
94 | /// -use-imagenet-normalization flag. |
95 | extern bool useImagenetNormalization; |
96 | |
97 | /// -preprocessing parameters |
98 | extern VecVec<float> meanValuesOpt; |
99 | extern VecVec<float> stddevValuesOpt; |
100 | |
101 | /// These are standard normalization factors for imagenet, adjusted for |
102 | /// normalizing values in the 0to255 range instead of 0to1, as seen at: |
103 | /// https://github.com/pytorch/examples/blob/master/imagenet/main.py |
104 | static const float imagenetNormMean[] = {0.485 * 255.0, 0.456 * 255.0, |
105 | 0.406 * 255.0}; |
106 | static const float imagenetNormStd[] = {0.229, 0.224, 0.225}; |
107 | |
108 | /// Processes special command line args for Image module. |
109 | void processImageCmdArgVars(size_t numInputs); |
110 | /// Clear external storage for cmd args defined in Image. |
111 | void initImageCmdArgVars(); |
112 | |
113 | /// Default values for mean and stddev. |
114 | static const std::vector<float> zeroMean(max_tensor_dimensions, 0.f); |
115 | static const std::vector<float> oneStd(max_tensor_dimensions, 1.f); |
116 | |
117 | /// \returns the floating-point range corresponding to enum value \p mode. |
118 | std::pair<float, float> normModeToRange(ImageNormalizationMode mode, |
119 | ImgDataRange range); |
120 | |
121 | /// \returns floating-point range for input image based on specified options. |
122 | std::pair<float, float> getImageRange(size_t idx = 0); |
123 | |
124 | /// Get min and max values in the input image data range. |
125 | float getPixelValMin(ImgDataRange ImgDataRange); |
126 | float getPixelValMax(ImgDataRange ImgDataRange); |
127 | |
128 | /// \returns mean for input image based on specified options. |
129 | llvm::ArrayRef<float> getImageMean(size_t idx, size_t numChannels = 0); |
130 | |
131 | /// \returns stddev for input image based on specified options. |
132 | llvm::ArrayRef<float> getImageStdDev(size_t idx, size_t numChannels = 0); |
133 | |
134 | /// Reads a png image header from png file \p filename and \returns a tuple |
135 | /// containing height, width, and a bool if it is grayscale or not. |
136 | std::tuple<dim_t, dim_t, bool> getPngInfo(const char *filename); |
137 | |
138 | /// Reads a PPM image header from PPM file descriptor \p fp and \returns a tuple |
139 | /// containing height, width, and a bool if it is grayscale or not. \p filename |
140 | /// is passed only as a context to provide more detailed error reporting. |
141 | std::tuple<dim_t, dim_t, bool> getPpmInfo(FILE *fp, const char *filename); |
142 | |
143 | /// Reads a PPM image header from PPM file \p filename and \returns a tuple |
144 | /// containing height, width, and a bool if it is grayscale or not. |
145 | std::tuple<dim_t, dim_t, bool> getPpmInfo(const char *filename); |
146 | |
147 | /// Returns whether file \p filename is in png format. |
148 | bool isPngFormat(const std::string &filename); |
149 | |
150 | /// Check if file \p filename is in PPM format. |
151 | bool isPpmFormat(const std::string &filename); |
152 | |
153 | /// Reads a png image. \returns True if an error occurred. The values of the |
154 | /// image are in the range \p range. |
155 | bool readPngImage(Tensor *T, const char *filename, |
156 | std::pair<float, float> range, |
157 | llvm::ArrayRef<float> mean = zeroMean, |
158 | llvm::ArrayRef<float> stddev = oneStd); |
159 | |
160 | /// Writes a png image. \returns True if an error occurred. The values of the |
161 | /// image are in the range \p range. |
162 | bool writePngImage(Tensor *T, const char *filename, |
163 | std::pair<float, float> range, |
164 | llvm::ArrayRef<float> mean = zeroMean, |
165 | llvm::ArrayRef<float> stddev = oneStd); |
166 | |
167 | /// Reads a PPM image. \returns True if an error occurred. The values of the |
168 | /// image are in the range \p range. Performs pre-processing using \p mean and |
169 | /// \p stddev. |
170 | bool readPpmImage(Tensor *T, const char *filename, |
171 | std::pair<float, float> range, |
172 | llvm::ArrayRef<float> mean = zeroMean, |
173 | llvm::ArrayRef<float> stddev = oneStd); |
174 | |
175 | /// Read a PNG/PPM image and preprocess it according to several parameters. |
176 | /// Create a tensor and store the preprocessed image data into this tensor. |
177 | /// \param filename the PNG/PPM file to read. |
178 | /// \param imageNormMode normalize values to this range. |
179 | /// \param imageChannelOrder the order of color channels. |
180 | /// \param imageLayout the order of dimensions (channel, height, and width). |
181 | /// \param mean use special mean to normalize. |
182 | /// \param stdev use special stddev to normalize. |
183 | Tensor readPngPpmImageAndPreprocess(llvm::StringRef filename, |
184 | ImageNormalizationMode imageNormMode, |
185 | ImageChannelOrder imageChannelOrder, |
186 | ImageLayout imageLayout, |
187 | llvm::ArrayRef<float> mean = zeroMean, |
188 | llvm::ArrayRef<float> stddev = oneStd); |
189 | |
190 | /// Read a PNG/PPM image and preprocess it according to several parameters. Take |
191 | /// a tensor as a parameter and store the preprocessed image data into this |
192 | /// tensor. |
193 | /// \param imageData the tensor into which the preprocessed image data |
194 | /// will be stored. |
195 | /// \param filename the PNG/PPM file to read. |
196 | /// \param imageNormMode normalize values to this range. |
197 | /// \param imageChannelOrder the order of color channels. |
198 | /// \param imageLayout the order of dimensions (channel, height, and width). |
199 | /// \param mean use special mean to normalize. |
200 | /// \param stdev use special stddev to normalize. |
201 | void readPngPpmImageAndPreprocess(Tensor &imageData, llvm::StringRef filename, |
202 | ImageNormalizationMode imageNormMode, |
203 | ImageChannelOrder imageChannelOrder, |
204 | ImageLayout imageLayout, |
205 | llvm::ArrayRef<float> mean = zeroMean, |
206 | llvm::ArrayRef<float> stddev = oneStd); |
207 | |
208 | /// \param mean use special mean to normalize. |
209 | /// \param stdev use special stddev to normalize. |
210 | void readPngPpmImagesAndPreprocess(Tensor &inputImageData, |
211 | const llvm::ArrayRef<std::string> &filenames, |
212 | ImageNormalizationMode imageNormMode, |
213 | ImageChannelOrder imageChannelOrder, |
214 | ImageLayout imageLayout, |
215 | llvm::ArrayRef<float> mean, |
216 | llvm::ArrayRef<float> stddev); |
217 | |
218 | /// Returns whether file \p filename is in Numpy .npy format. |
219 | bool isNumpyNpyFormat(const std::string &filename); |
220 | |
221 | /// Load & normalize tensors from multiple npy files given by \p filenames into |
222 | /// \p inputData tensor. Npy tensors must be 4D or 3D (in this case they are |
223 | /// expanded with the batch dimension) and are concatanted along the batch. |
224 | /// Also, tensors are transposed from \p inputLayout to \p imageLayout. Tensor |
225 | /// values are expected to be in 0-255 range. \param filenames list of filenames |
226 | /// to read. \param inputData Tensor to save the resulting output. \param |
227 | /// imageNormMode normalize values to this range. \param imageLayout the order |
228 | /// of dimensions (channel, height, and width). \param inputLayout the order of |
229 | /// dimensions (channel, height, and width) in the dumps. \param mean use |
230 | /// special mean to normalize. \param stdev use special stddev to normalize. |
231 | void loadNumpyImagesAndPreprocess(const llvm::ArrayRef<std::string> &filenames, |
232 | Tensor &inputData, |
233 | ImageNormalizationMode imageNormMode, |
234 | ImageLayout imageLayout, |
235 | ImageLayout inputLayout, |
236 | llvm::ArrayRef<float> mean = {}, |
237 | llvm::ArrayRef<float> stddev = {}); |
238 | |
239 | /// Loads either PNGs or NUMPY images/tensors into the model input tensors. |
240 | /// \param filenamesList list of lists (for each input) of filenames to read. |
241 | /// \param inputImageDataList list of Tensors (for each input) that will |
242 | /// contain loaded and preprocessed images. |
243 | /// \param normMode normalize values to this range (not applicable to |
244 | /// NUMPY). |
245 | /// \param channelOrder the order of color channels (not applicable |
246 | /// to NUMPY). |
247 | /// \param imageLayout the order of dimensions (channel, height, and |
248 | /// width). |
249 | /// \param inputLayout the order of dimensions (channel, height, and |
250 | /// width) in the image file. Will be used only if the image format |
251 | /// doesn't provide the layout (e.g. PNG uses RGB thus the option is ignored). |
252 | /// \param mean use |
253 | /// special mean to normalize. |
254 | /// \param stdev use special stddev to normalize. |
255 | /// NOTE: Last 6 arguments are setting the global options - same ones the |
256 | /// command line arguments set. Thus, the function call alters the global state. |
257 | void loadImagesAndPreprocess( |
258 | VecVecRef<std::string> filenamesList, |
259 | llvm::ArrayRef<Tensor *> inputImageDataList, |
260 | llvm::ArrayRef<ImageNormalizationMode> normMode = {}, |
261 | llvm::ArrayRef<ImageChannelOrder> channelOrder = {}, |
262 | llvm::ArrayRef<ImageLayout> imageLayout = {}, |
263 | llvm::ArrayRef<ImageLayout> inputLayout = {}, VecVecRef<float> mean = {}, |
264 | VecVecRef<float> stddev = {}); |
265 | |
266 | /// Load & normalize tensors from multiple npy files given by \p filenames into |
267 | /// \p inputData tensor. Npy tensors must be 4D or 3D (in this case they are |
268 | /// expanded with the batch dimension) and are concatanted along the batch. |
269 | /// Also, tensors are transposed from \p inputLayout to \p imageLayout. |
270 | /// Tensor values are expected to be in 0-255 range. \param filenames list of |
271 | /// filenames to read. \param inputData Tensor to save the resulting output. |
272 | /// \param imageNormMode normalize values to this range. \param imageLayout |
273 | /// the order of dimensions (channel, height, and width). \param inputLayout the |
274 | /// order of dimensions (channel, height, and width) in the dumps. \param mean |
275 | /// use special mean to normalize. \param stdev use special stddev to normalize. |
276 | void loadNumpyImagesAndPreprocess( |
277 | const llvm::ArrayRef<std::string> &filenames, Tensor &inputData, |
278 | ImageNormalizationMode imageNormMode, ImageChannelOrder &imageChannelOrder, |
279 | ImageLayout imageLayout, ImageLayout inputLayout, |
280 | llvm::ArrayRef<float> mean, llvm::ArrayRef<float> stddev, |
281 | ImgDataRange &range); |
282 | |
283 | } // namespace glow |
284 | |
285 | #endif // GLOW_BASE_IMAGE_H |
286 | |