TextTranslator.cpp source code [glow/tools/loader/TextTranslator.cpp]

1	/**
2	* Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	#include "Loader.h"
18
19	#include "glow/Base/TensorSerialization.h"
20	#include "glow/Importer/Caffe2ModelLoader.h"
21
22	#include "llvm/Support/CommandLine.h"
23	#include "llvm/Support/FileSystem.h"
24	#include "llvm/Support/raw_ostream.h"
25
26	#include <fstream>
27	#include <iostream>
28	#include <sstream>
29
30	using namespace glow;
31
32	namespace {
33	llvm::cl::OptionCategory textTranslatorCat("Text Translator Options");
34
35	llvm::cl::opt<unsigned>
36	maxInputLenOpt("max-input-len",
37	llvm::cl::desc ("Maximum allowed length of the input "
38	"sentence. Specified by the input model."),
39	llvm::cl::Optional, llvm::cl::init(`10`),
40	llvm::cl::cat (textTranslatorCat));
41
42	llvm::cl::opt<unsigned>
43	maxOutputLenOpt("max-output-len",
44	llvm::cl::desc ("Maximum allowed length of the output "
45	"sentence. Specified by the input model."),
46	llvm::cl::Optional, llvm::cl::init(`14`),
47	llvm::cl::cat (textTranslatorCat));
48
49	llvm::cl::opt<unsigned> beamSizeOpt(
50	"beam-size", llvm::cl::desc ("Beam size used by the input model."),
51	llvm::cl::Optional, llvm::cl::init(`6`), llvm::cl::cat (textTranslatorCat));
52
53	llvm::cl::opt<double>
54	lengthPenaltyOpt("length-penalty",
55	llvm::cl::desc ("Length penalty to use when determining "
56	"highest likelihood output sentence."),
57	llvm::cl::Optional, llvm::cl::init(`0.0f`),
58	llvm::cl::cat (textTranslatorCat));
59
60	llvm::cl::opt<std::string> inputSentencesFile(
61	"input-text-file",
62	llvm::cl::desc (
63	"Name of the file containing list of sentences (one per line)"),
64	llvm::cl::value_desc ("string_name"), llvm::cl::Optional,
65	llvm::cl::cat (textTranslatorCat));
66
67	llvm::cl::opt<std::string> expectedResultSentencesFile(
68	"expected-output-text-file",
69	llvm::cl::desc ("Name of the file containing list of sentences (one per "
70	"line) corresponding to expected translations provided via "
71	"-input-text-file."),
72	llvm::cl::value_desc ("string_name"), llvm::cl::Optional,
73	llvm::cl::cat (textTranslatorCat));
74
75	llvm::cl::opt<bool> dumpBinaryResults(
76	"dump-binary-results",
77	llvm::cl::desc ("Dump raw binary Tensor results after execution."),
78	llvm::cl::init(false), llvm::cl::cat (textTranslatorCat));
79	} // namespace
80
81	/// These should be kept in sync with pytorch_translate/vocab_constants.py
82	constexpr size_t reservedOffset = `100`;
83	constexpr size_t padIdx = `0`;
84	constexpr size_t eosIdx = `2`;
85	constexpr size_t unkIdx = `3`;
86
87	/// Stores dictionary of a language. Contains mapping from word to index and
88	/// vice versa.
89	struct Dictionary {
90	private:
91	std::vector<std::string> index2word_;
92	std::unordered_map<std::string, size_t> word2index_;
93
94	public:
95	/// Add a word from the \p line to the dictionary.
96	void addWord(llvm::StringRef line) {
97	// Lines generally should be formatted like "the 9876543", where the
98	// trailing number is not relevant for inference.
99	auto spaceIdx = line.find(" ");
100	DCHECK(spaceIdx != llvm::StringRef::npos)
101	<< "Unexpected format for dict file.";
102
103	auto word = std::string (line.take_front(spaceIdx));
104	DCHECK_GT(word.size(), `0`) << "Did not find word correctly.";
105
106	word2index_[word] = index2word_.size();
107	index2word_.push_back(word);
108	}
109
110	Dictionary() = default;
111
112	/// Load a dictionary from text file \p filename, adding each word from each
113	/// line of the file.
114	void loadDictionaryFromFile(llvm::StringRef filename) {
115	std::ifstream file(filename.str());
116	std::string word;
117	while (getline(file, word)) {
118	addWord(word);
119	}
120	}
121
122	/// Get the index for the input \p word from the dictionary.
123	size_t getIdxFromWord(llvm::StringRef word) {
124	auto iter = word2index_.find(word.str());
125	// If unknown word, return the index for unknown.
126	if (iter == word2index_.end()) {
127	return unkIdx;
128	}
129	return iter ->second + reservedOffset;
130	}
131
132	/// Get the word for the input \p index from the dictionary.
133	std::string getWordFromIdx(size_t idx) {
134	if (idx < reservedOffset) {
135	if (idx == eosIdx) {
136	return "<EOS>";
137	}
138	if (idx == padIdx) {
139	return "<PAD>";
140	}
141	return "<UNK>";
142	}
143
144	return index2word_[idx - reservedOffset];
145	}
146	};
147
148	Dictionary srcVocab, dstVocab;
149
150	/// Break the input \p sentence up by spaces, and then encode the words as
151	/// indices from the input dictionary, placing them in \p encoderInputs. Note
152	/// that the model expects sentences to be in reverse order.
153	static void encodeString(const llvm::StringRef sentence,
154	Tensor *encoderInputs) {
155	auto IH = encoderInputs->getHandle<int32_t>();
156
157	std::vector<int32_t> encodedWords;
158	encodedWords.reserve(maxInputLenOpt);
159
160	// Get each word from the sentence and encode it.
161	std::istringstream iss(sentence.str());
162	std::string word;
163	while (iss >> word) {
164	auto idx = srcVocab.getIdxFromWord(word);
165	encodedWords.push_back(idx);
166	}
167	encodedWords.push_back(eosIdx);
168
169	CHECK_LE(encodedWords.size(), maxInputLenOpt)
170	<< "Sentence length exceeds maxInputLen.";
171
172	// Pad the rest of the input.
173	while (encodedWords.size() != maxInputLenOpt) {
174	encodedWords.push_back(padIdx);
175	}
176
177	// Note: the model expects the input sentence to be in reverse order.
178	dim_t i = `0`;
179	for (auto it = encodedWords.rbegin(); it != encodedWords.rend(); it ++, i++) {
180	// The batch size is 1 for inference models.
181	IH.at({i, / batchSize / `0`}) = *it;
182	}
183	}
184
185	/// Load a sentence from std::cin for processing, or from \p file if it is open,
186	/// and place the encoded inputs in \p encoderInputs. \returns false if the
187	/// passed in line was empty.
188	static bool loadNextInputTranslationText(Tensor *encoderInputs,
189	std::ifstream &file) {
190	std::string sentence;
191	if (file.is_open()) {
192	getline(file, sentence);
193	} else {
194	llvm::outs() << "Enter a sentence in English to translate to German: ";
195	getline(std::cin, sentence);
196	}
197
198	if (sentence.empty()) {
199	return false;
200	}
201
202	encodeString(sentence, encoderInputs);
203
204	return true;
205	}
206
207	/// Find and return a vector of the best translation given the outputs from the
208	/// model \p outputTokenBeamList, \p outputScoreBeamList, and \p
209	/// outputPrevIndexBeamList. A translation is made up of a vector of tokens
210	/// which must be converted back to words from via the destination dictionary.
211	static std::vector<dim_t> getBestTranslation(Tensor *outputTokenBeamList,
212	Tensor *outputScoreBeamList,
213	Tensor *outputPrevIndexBeamList) {
214	// Get handles to all the outputs from the model run.
215	auto tokenBeamListH = outputTokenBeamList->getHandle<int32_t>();
216	auto scoreBeamListH = outputScoreBeamList->getHandle<float>();
217	auto prevIndexBeamListH = outputPrevIndexBeamList->getHandle<int32_t>();
218
219	// This pair represents the ending position of a translation in the beam
220	// search grid. The first index corresponds to the length (column index), the
221	// second index corresponds to the position in the beam (row index).
222	std::pair<dim_t, dim_t> bestPosition = std::make_pair(`0`, `0`);
223	float bestScore = std::numeric_limits<float>::max();
224
225	// Keep track of whether the current hypothesis of best translation has
226	// already ended.
227	std::vector<bool> prevHypoIsFinished(beamSizeOpt, false);
228	std::vector<bool> currentHypoIsFinished(beamSizeOpt, false);
229	for (dim_t lengthIndex = `0`; lengthIndex < maxOutputLenOpt; ++lengthIndex) {
230	for (dim_t hypoIndex = `0`; hypoIndex < beamSizeOpt; ++hypoIndex) {
231	// If the current hypothesis was already scored and compared to the best,
232	// we can skip it and move onto the next one.
233	dim_t prevIndex = prevIndexBeamListH.at({lengthIndex, hypoIndex});
234	currentHypoIsFinished [hypoIndex] = prevHypoIsFinished [prevIndex];
235	if (currentHypoIsFinished [hypoIndex]) {
236	continue;
237	}
238
239	// If the current token is not the end of sentence, and we haven't reached
240	// the max output length, then we cannot yet score/compare it, so keep
241	// going until we reach the end.
242	if (tokenBeamListH.at({lengthIndex, hypoIndex}) != eosIdx &&
243	lengthIndex + `1` != maxOutputLenOpt) {
244	continue;
245	}
246
247	// At this point we must have reached the end of a hypothesis sentence
248	// which has not yet been scored and compared. Set this as finished as we
249	// will now score and compare it against the current best score.
250	currentHypoIsFinished [hypoIndex] = true;
251
252	// Calculate the current score with length penalty.
253	float currScore = scoreBeamListH.at({lengthIndex, hypoIndex}) /
254	pow(lengthIndex + `1`, lengthPenaltyOpt);
255
256	// If this translation has a better score, replace the current one.
257	if (currScore > -bestScore) {
258	bestPosition = std::make_pair(lengthIndex, hypoIndex);
259	bestScore = -currScore;
260	}
261	}
262
263	// Moving onto the next hypothesis, so swap current finished bools into
264	// previous, and reset current to all false.
265	prevHypoIsFinished.swap(currentHypoIsFinished);
266	currentHypoIsFinished.assign(beamSizeOpt, false);
267	}
268
269	// Generate the best translation given the end state. Use the previous index
270	// beam list to find the next word to add to the translation.
271	std::vector<dim_t> output;
272	dim_t lengthIndex = bestPosition.first;
273	dim_t hypoIndex = bestPosition.second;
274	while (lengthIndex > `0`) {
275	output.emplace_back(tokenBeamListH.at({lengthIndex, hypoIndex}));
276	hypoIndex = prevIndexBeamListH.at({lengthIndex, hypoIndex});
277	lengthIndex--;
278	}
279
280	// Reverse the output order of the translated sentence.
281	std::reverse(output.begin(), output.end());
282
283	// Find the EOS token and cut off the rest of the output.
284	auto findEos = std::find(output.begin(), output.end(), eosIdx);
285	auto findEosIndex = findEos - output.begin();
286	output.resize(findEosIndex);
287
288	return output;
289	}
290
291	/// Queries getBestTranslation() for the best translation via the outputs from
292	/// the model, \p outputTokenBeamList, \p outputScoreBeamList, and
293	/// \p outputPrevIndexBeamList. Then converts each of the tokens from the
294	/// returned best translation into words from the dest dictionary, and prints
295	/// it. \p expectedResultFile is a stream of expected results; if open the
296	/// translation is checked against the expected result, and will \returns
297	/// whether it does. Otherwise returns true.
298	static bool processAndPrintDecodedTranslation(
299	Tensor outputTokenBeamList, Tensor outputScoreBeamList,
300	Tensor *outputPrevIndexBeamList, std::ifstream &expectedResultFile) {
301	std::vector<dim_t> translationTokens = getBestTranslation(
302	outputTokenBeamList, outputScoreBeamList, outputPrevIndexBeamList);
303
304	// Use the dest dictionary to convert tokens to words, and print it.
305	std::string result;
306	for (size_t i = `0`; i < translationTokens.size(); i++) {
307	auto wordIdx = translationTokens [i];
308	auto word = dstVocab.getWordFromIdx(wordIdx);
309
310	// Check if the word has suffix "@@". This means the current word should be
311	// appended to the next word, so remove the "@@" and do not output a space.
312	auto wordLength = word.length();
313	if (wordLength > `1` && word.substr(wordLength - `2`) == "@@") {
314	word = word.substr(`0`, wordLength - `2`);
315	} else if (i != translationTokens.size() - `1`) {
316	word = word + " ";
317	}
318	result += word;
319	}
320	llvm::outs() << result;
321	bool correctTranslation = true;
322	if (expectedResultFile.is_open()) {
323	std::string expectedResult;
324	CHECK(getline(expectedResultFile, expectedResult))
325	<< "Did not find expected translation.";
326	correctTranslation = expectedResult == result;
327	if (!correctTranslation) {
328	llvm::outs() << "\n PREVIOUS TRANSLATION INCORRECT; EXPECTED: "
329	<< expectedResult;
330	}
331	}
332	llvm::outs() << "\n\n";
333	return correctTranslation;
334	}
335
336	int main(int argc, char **argv) {
337	PlaceholderBindings bindings;
338
339	// Verify/initialize command line parameters, and then loader initializes
340	// the ExecutionEngine and Function.
341	parseCommandLine(argc, argv);
342	Loader loader;
343
344	// Load the source and dest dictionaries.
345	auto modelDir = loader.getModelOptDir();
346	srcVocab.loadDictionaryFromFile(modelDir.str() + "/src_dictionary.txt");
347	dstVocab.loadDictionaryFromFile(modelDir.str() + "/dst_dictionary.txt");
348
349	// Encoded input sentence. Note that the batch size is 1 for inference models.
350	Tensor encoderInputs(ElemKind::Int32ITy, {maxInputLenOpt, / batchSize / `1`});
351
352	// Inputs other than tokenized input. These should all be initialized to zero.
353	// Note, the init_net already defines these tensors solely as placeholders
354	// (with incorrect shapes/elementtypes/data). Glow uses these tensors in their
355	// place.
356	Tensor attnWeights(ElemKind::FloatTy, {maxInputLenOpt});
357	Tensor prevHyposIndices(ElemKind::Int32ITy, {beamSizeOpt});
358	Tensor prevScores(ElemKind::FloatTy, {`1`});
359	Tensor prevToken(ElemKind::Int32ITy, {`1`});
360
361	DCHECK(!loader.getCaffe2NetDescFilename().empty())
362	<< "Only supporting Caffe2 currently.";
363
364	constexpr char const *inputNames[`5`] = {"encoder_inputs", "attn_weights",
365	"prev_hypos_indices", "prev_scores",
366	"prev_token"};
367	std::vector<TypeRef> inputTensors = {
368	&encoderInputs.getType(), &attnWeights.getType(),
369	&prevHyposIndices.getType(), &prevScores.getType(), &prevToken.getType()};
370
371	Caffe2ModelLoader LD(loader.getCaffe2NetDescFilename().str(),
372	loader.getCaffe2NetWeightFilename().str(), inputNames,
373	inputTensors, *loader.getFunction());
374
375	// Allocate tensors to back all inputs and outputs.
376	bindings.allocate(loader.getModule()->getPlaceholders());
377
378	// Get all bound tensors and zero them.
379	for (auto &pair : bindings.pairs()) {
380	pair.second.zero();
381	}
382
383	Placeholder *encoderInputsVar = llvm::cast<Placeholder>(
384	EXIT_ON_ERR(LD.getNodeValueByName("encoder_inputs")));
385
386	// Compile the model, and perform quantization/emit a bundle/dump debug info
387	// if requested from command line.
388	loader.compile(bindings);
389
390	DCHECK(!emittingBundle()) << "Bundle mode has not been tested.";
391
392	Placeholder *outputTokenBeamList =
393	EXIT_ON_ERR(LD.getOutputByName("output_token_beam_list"));
394	Placeholder *outputScoreBeamList =
395	EXIT_ON_ERR(LD.getOutputByName("output_score_beam_list"));
396	Placeholder *outputPrevIndexBeamList =
397	EXIT_ON_ERR(LD.getOutputByName("output_prev_index_beam_list"));
398
399	std::ifstream inFile, expectedResultFile;
400	if (!inputSentencesFile.empty()) {
401	inFile.open(inputSentencesFile, std::ifstream::in);
402	}
403	if (!expectedResultSentencesFile.empty()) {
404	expectedResultFile.open(expectedResultSentencesFile, std::ifstream::in);
405	}
406
407	// We reuse the same output Tensors for every inference, so it's safe to get
408	// them ahead of the inference loop.
409	Tensor *outputTokenBeamListT = bindings.get(outputTokenBeamList);
410	Tensor *outputScoreBeamListT = bindings.get(outputScoreBeamList);
411	Tensor *outputPrevIndexBeamListT = bindings.get(outputPrevIndexBeamList);
412
413	// Store copies of results of each inference for dumping after the inference
414	// loop if requested.
415	std::vector<Tensor> outputTokenBeamListResults;
416	std::vector<Tensor> outputScoreBeamListResults;
417	std::vector<Tensor> outputPrevIndexBeamListResults;
418
419	int incorrectTranslationCount = `0`;
420	while (loadNextInputTranslationText(&encoderInputs, inFile)) {
421	// Update the inputs.
422	updateInputPlaceholders(bindings, {encoderInputsVar}, {&encoderInputs});
423
424	// Run actual translation.
425	loader.runInference(bindings);
426
427	// Keep around copies of all results to dump after inference loop.
428	if (dumpBinaryResults) {
429	outputTokenBeamListResults.emplace_back(outputTokenBeamListT->clone());
430	outputScoreBeamListResults.emplace_back(outputScoreBeamListT->clone());
431	outputPrevIndexBeamListResults.emplace_back(
432	outputPrevIndexBeamListT->clone());
433	}
434
435	// Process the outputs to determine the highest likelihood sentence, and
436	// print out the decoded translation using the dest dictionary.
437	if (!processAndPrintDecodedTranslation(
438	outputTokenBeamListT, outputScoreBeamListT,
439	outputPrevIndexBeamListT, expectedResultFile)) {
440	incorrectTranslationCount += `1`;
441	}
442	}
443
444	if (dumpBinaryResults) {
445	TensorSerializationOptions opts;
446	opts.withType = false;
447	auto dumpRes = [&](const std::string &name,
448	const std::vector<Tensor> &vecT) {
449	std::ofstream fs;
450	llvm::SmallString<`64`> path;
451	auto tempFileRes = llvm::sys::fs::createTemporaryFile(name, "bin", path);
452	CHECK_EQ(tempFileRes.value(), `0`)
453	<< "Failed to create temp file to write into.";
454	fs.open(path.data(), std::ios::binary);
455	CHECK(fs.is_open()) << "Error opening file '" << path.data() << "'!";
456	std::cout << "Dumping binary results of " << name << " to " << path.data()
457	<< std::endl;
458	for (const Tensor &T : vecT) {
459	dumpTensorToBinaryFile(T, fs, opts);
460	}
461	fs.close();
462	};
463	dumpRes ("outputTokenBeamList", outputTokenBeamListResults);
464	dumpRes ("outputScoreBeamList", outputScoreBeamListResults);
465	dumpRes ("outputPrevIndexBeamList", outputPrevIndexBeamListResults);
466	}
467
468	// If profiling, generate and serialize the profiling infos now that we
469	// have run inference to gather the profile.
470	if (profilingGraph()) {
471	loader.generateAndSerializeProfilingInfos(bindings);
472	}
473
474	return incorrectTranslationCount;
475	}
476

Browse the source code of glow/tools/loader/TextTranslator.cpp