Image.h source code [glow/include/glow/Base/Image.h]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16	#ifndef GLOW_BASE_IMAGE_H
17	#define GLOW_BASE_IMAGE_H
18
19	#include "glow/Base/Tensor.h"
20	#include "glow/Base/Type.h"
21	#include "glow/Support/Support.h"
22
23	#include "llvm/ADT/ArrayRef.h"
24	#include "llvm/ADT/StringRef.h"
25
26	#include <tuple>
27
28	namespace glow {
29
30	/// Pixel value ranges.
31	enum class ImgDataRange {
32	S8,
33	U8,
34	S16,
35	U16,
36	};
37
38	/// Pixel value ranges.
39	enum class ImageNormalizationMode {
40	PassThrough, // Values are not modified.
41	kneg1to1, // Values are in the range: -1 and 1.
42	k0to1, // Values are in the range: 0 and 1.
43	k0to255, // Values are in the range: 0 and 255.
44	kneg128to127, // Values are in the range: -128 .. 127
45	U16, // Values are in the range: 0 .. 65535
46	S16, // Values are in the range: -32768 .. 32767
47	};
48
49	/// Layout of image dimensions (batch, channels, height, width).
50	enum class ImageLayout {
51	Unspecified, // images without layout. Have a single stddev/mean
52	// value, arbitrary shape. Used with NUMPY files.
53	NCHW,
54	NHWC,
55	};
56
57	/// Order of color channels (red, green, blue).
58	enum class ImageChannelOrder {
59	Unspecified, // used by numpy files.
60	BGR,
61	RGB,
62	};
63
64	/// All the image options are given as vectors, containing one element per model
65	/// input. An element at position i refers to input i, and input i refers to the
66	/// model input name given at the ith postion of the -model-input-name list.
67
68	/// NOTE: LLVM cmd parser made subclasses final in 3.7 yet the only cmd line
69	/// manual still refers to the old data and the change was not clear why it's
70	/// made. Assigning callbacks is not possible, and subclassing basic_parser is
71	/// open to future errors. Thus, relying in LLVM parser is minimized - we will
72	/// just obtain strings and process options. With the lack of Image class/struct
73	/// in Glow, we will have most of APIs to continue working with different APIs
74	/// directly affecting global cmd line arguments.
75
76	/// -image-mode flag.
77	extern std::vector<ImageNormalizationMode> imageNormMode;
78
79	/// -image-channel-order flag.
80	extern std::vector<ImageChannelOrder> imageChannelOrderOpt;
81
82	/// --input-values-range.
83	extern std::vector<ImgDataRange> imageDataRangeOpt;
84
85	/// -image-layout flag.
86	extern std::vector<ImageLayout> imageLayoutOpt;
87
88	/// -input-layout flag
89	extern ImageLayout inputLayout;
90
91	/// -input-layout flag
92	extern ImageLayout inputLayout;
93
94	/// -use-imagenet-normalization flag.
95	extern bool useImagenetNormalization;
96
97	/// -preprocessing parameters
98	extern VecVec<float> meanValuesOpt;
99	extern VecVec<float> stddevValuesOpt;
100
101	/// These are standard normalization factors for imagenet, adjusted for
102	/// normalizing values in the 0to255 range instead of 0to1, as seen at:
103	/// https://github.com/pytorch/examples/blob/master/imagenet/main.py
104	static const float imagenetNormMean[] = {`0.485` * `255.0`, `0.456` * `255.0`,
105	`0.406` * `255.0`};
106	static const float imagenetNormStd[] = {`0.229`, `0.224`, `0.225`};
107
108	/// Processes special command line args for Image module.
109	void processImageCmdArgVars(size_t numInputs);
110	/// Clear external storage for cmd args defined in Image.
111	void initImageCmdArgVars();
112
113	/// Default values for mean and stddev.
114	static const std::vector<float> zeroMean(max_tensor_dimensions, `0.f`);
115	static const std::vector<float> oneStd(max_tensor_dimensions, `1.f`);
116
117	/// \returns the floating-point range corresponding to enum value \p mode.
118	std::pair<float, float> normModeToRange(ImageNormalizationMode mode,
119	ImgDataRange range);
120
121	/// \returns floating-point range for input image based on specified options.
122	std::pair<float, float> getImageRange(size_t idx = `0`);
123
124	/// Get min and max values in the input image data range.
125	float getPixelValMin(ImgDataRange ImgDataRange);
126	float getPixelValMax(ImgDataRange ImgDataRange);
127
128	/// \returns mean for input image based on specified options.
129	llvm::ArrayRef<float> getImageMean(size_t idx, size_t numChannels = `0`);
130
131	/// \returns stddev for input image based on specified options.
132	llvm::ArrayRef<float> getImageStdDev(size_t idx, size_t numChannels = `0`);
133
134	/// Reads a png image header from png file \p filename and \returns a tuple
135	/// containing height, width, and a bool if it is grayscale or not.
136	std::tuple<dim_t, dim_t, bool> getPngInfo(const char *filename);
137
138	/// Reads a PPM image header from PPM file descriptor \p fp and \returns a tuple
139	/// containing height, width, and a bool if it is grayscale or not. \p filename
140	/// is passed only as a context to provide more detailed error reporting.
141	std::tuple<dim_t, dim_t, bool> getPpmInfo(FILE fp, const* char *filename);
142
143	/// Reads a PPM image header from PPM file \p filename and \returns a tuple
144	/// containing height, width, and a bool if it is grayscale or not.
145	std::tuple<dim_t, dim_t, bool> getPpmInfo(const char *filename);
146
147	/// Returns whether file \p filename is in png format.
148	bool isPngFormat(const std::string &filename);
149
150	/// Check if file \p filename is in PPM format.
151	bool isPpmFormat(const std::string &filename);
152
153	/// Reads a png image. \returns True if an error occurred. The values of the
154	/// image are in the range \p range.
155	bool readPngImage(Tensor T, const* char *filename,
156	std::pair<float, float> range,
157	llvm::ArrayRef<float> mean = zeroMean,
158	llvm::ArrayRef<float> stddev = oneStd);
159
160	/// Writes a png image. \returns True if an error occurred. The values of the
161	/// image are in the range \p range.
162	bool writePngImage(Tensor T, const* char *filename,
163	std::pair<float, float> range,
164	llvm::ArrayRef<float> mean = zeroMean,
165	llvm::ArrayRef<float> stddev = oneStd);
166
167	/// Reads a PPM image. \returns True if an error occurred. The values of the
168	/// image are in the range \p range. Performs pre-processing using \p mean and
169	/// \p stddev.
170	bool readPpmImage(Tensor T, const* char *filename,
171	std::pair<float, float> range,
172	llvm::ArrayRef<float> mean = zeroMean,
173	llvm::ArrayRef<float> stddev = oneStd);
174
175	/// Read a PNG/PPM image and preprocess it according to several parameters.
176	/// Create a tensor and store the preprocessed image data into this tensor.
177	/// \param filename the PNG/PPM file to read.
178	/// \param imageNormMode normalize values to this range.
179	/// \param imageChannelOrder the order of color channels.
180	/// \param imageLayout the order of dimensions (channel, height, and width).
181	/// \param mean use special mean to normalize.
182	/// \param stdev use special stddev to normalize.
183	Tensor readPngPpmImageAndPreprocess(llvm::StringRef filename,
184	ImageNormalizationMode imageNormMode,
185	ImageChannelOrder imageChannelOrder,
186	ImageLayout imageLayout,
187	llvm::ArrayRef<float> mean = zeroMean,
188	llvm::ArrayRef<float> stddev = oneStd);
189
190	/// Read a PNG/PPM image and preprocess it according to several parameters. Take
191	/// a tensor as a parameter and store the preprocessed image data into this
192	/// tensor.
193	/// \param imageData the tensor into which the preprocessed image data
194	/// will be stored.
195	/// \param filename the PNG/PPM file to read.
196	/// \param imageNormMode normalize values to this range.
197	/// \param imageChannelOrder the order of color channels.
198	/// \param imageLayout the order of dimensions (channel, height, and width).
199	/// \param mean use special mean to normalize.
200	/// \param stdev use special stddev to normalize.
201	void readPngPpmImageAndPreprocess(Tensor &imageData, llvm::StringRef filename,
202	ImageNormalizationMode imageNormMode,
203	ImageChannelOrder imageChannelOrder,
204	ImageLayout imageLayout,
205	llvm::ArrayRef<float> mean = zeroMean,
206	llvm::ArrayRef<float> stddev = oneStd);
207
208	/// \param mean use special mean to normalize.
209	/// \param stdev use special stddev to normalize.
210	void readPngPpmImagesAndPreprocess(Tensor &inputImageData,
211	const llvm::ArrayRef<std::string> &filenames,
212	ImageNormalizationMode imageNormMode,
213	ImageChannelOrder imageChannelOrder,
214	ImageLayout imageLayout,
215	llvm::ArrayRef<float> mean,
216	llvm::ArrayRef<float> stddev);
217
218	/// Returns whether file \p filename is in Numpy .npy format.
219	bool isNumpyNpyFormat(const std::string &filename);
220
221	/// Load & normalize tensors from multiple npy files given by \p filenames into
222	/// \p inputData tensor. Npy tensors must be 4D or 3D (in this case they are
223	/// expanded with the batch dimension) and are concatanted along the batch.
224	/// Also, tensors are transposed from \p inputLayout to \p imageLayout. Tensor
225	/// values are expected to be in 0-255 range. \param filenames list of filenames
226	/// to read. \param inputData Tensor to save the resulting output. \param
227	/// imageNormMode normalize values to this range. \param imageLayout the order
228	/// of dimensions (channel, height, and width). \param inputLayout the order of
229	/// dimensions (channel, height, and width) in the dumps. \param mean use
230	/// special mean to normalize. \param stdev use special stddev to normalize.
231	void loadNumpyImagesAndPreprocess(const llvm::ArrayRef<std::string> &filenames,
232	Tensor &inputData,
233	ImageNormalizationMode imageNormMode,
234	ImageLayout imageLayout,
235	ImageLayout inputLayout,
236	llvm::ArrayRef<float> mean = {},
237	llvm::ArrayRef<float> stddev = {});
238
239	/// Loads either PNGs or NUMPY images/tensors into the model input tensors.
240	/// \param filenamesList list of lists (for each input) of filenames to read.
241	/// \param inputImageDataList list of Tensors (for each input) that will
242	/// contain loaded and preprocessed images.
243	/// \param normMode normalize values to this range (not applicable to
244	/// NUMPY).
245	/// \param channelOrder the order of color channels (not applicable
246	/// to NUMPY).
247	/// \param imageLayout the order of dimensions (channel, height, and
248	/// width).
249	/// \param inputLayout the order of dimensions (channel, height, and
250	/// width) in the image file. Will be used only if the image format
251	/// doesn't provide the layout (e.g. PNG uses RGB thus the option is ignored).
252	/// \param mean use
253	/// special mean to normalize.
254	/// \param stdev use special stddev to normalize.
255	/// NOTE: Last 6 arguments are setting the global options - same ones the
256	/// command line arguments set. Thus, the function call alters the global state.
257	void loadImagesAndPreprocess(
258	VecVecRef<std::string> filenamesList,
259	llvm::ArrayRef<Tensor *> inputImageDataList,
260	llvm::ArrayRef<ImageNormalizationMode> normMode = {},
261	llvm::ArrayRef<ImageChannelOrder> channelOrder = {},
262	llvm::ArrayRef<ImageLayout> imageLayout = {},
263	llvm::ArrayRef<ImageLayout> inputLayout = {}, VecVecRef<float> mean = {},
264	VecVecRef<float> stddev = {});
265
266	/// Load & normalize tensors from multiple npy files given by \p filenames into
267	/// \p inputData tensor. Npy tensors must be 4D or 3D (in this case they are
268	/// expanded with the batch dimension) and are concatanted along the batch.
269	/// Also, tensors are transposed from \p inputLayout to \p imageLayout.
270	/// Tensor values are expected to be in 0-255 range. \param filenames list of
271	/// filenames to read. \param inputData Tensor to save the resulting output.
272	/// \param imageNormMode normalize values to this range. \param imageLayout
273	/// the order of dimensions (channel, height, and width). \param inputLayout the
274	/// order of dimensions (channel, height, and width) in the dumps. \param mean
275	/// use special mean to normalize. \param stdev use special stddev to normalize.
276	void loadNumpyImagesAndPreprocess(
277	const llvm::ArrayRef<std::string> &filenames, Tensor &inputData,
278	ImageNormalizationMode imageNormMode, ImageChannelOrder &imageChannelOrder,
279	ImageLayout imageLayout, ImageLayout inputLayout,
280	llvm::ArrayRef<float> mean, llvm::ArrayRef<float> stddev,
281	ImgDataRange &range);
282
283	} // namespace glow
284
285	#endif // GLOW_BASE_IMAGE_H
286

Browse the source code of glow/include/glow/Base/Image.h