1/**
2 * Copyright (c) Glow Contributors. See CONTRIBUTORS file.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "Loader.h"
18
19#include "glow/Base/TensorSerialization.h"
20#include "glow/Importer/Caffe2ModelLoader.h"
21
22#include "llvm/Support/CommandLine.h"
23#include "llvm/Support/FileSystem.h"
24#include "llvm/Support/raw_ostream.h"
25
26#include <fstream>
27#include <iostream>
28#include <sstream>
29
30using namespace glow;
31
32namespace {
33llvm::cl::OptionCategory textTranslatorCat("Text Translator Options");
34
35llvm::cl::opt<unsigned>
36 maxInputLenOpt("max-input-len",
37 llvm::cl::desc("Maximum allowed length of the input "
38 "sentence. Specified by the input model."),
39 llvm::cl::Optional, llvm::cl::init(10),
40 llvm::cl::cat(textTranslatorCat));
41
42llvm::cl::opt<unsigned>
43 maxOutputLenOpt("max-output-len",
44 llvm::cl::desc("Maximum allowed length of the output "
45 "sentence. Specified by the input model."),
46 llvm::cl::Optional, llvm::cl::init(14),
47 llvm::cl::cat(textTranslatorCat));
48
49llvm::cl::opt<unsigned> beamSizeOpt(
50 "beam-size", llvm::cl::desc("Beam size used by the input model."),
51 llvm::cl::Optional, llvm::cl::init(6), llvm::cl::cat(textTranslatorCat));
52
53llvm::cl::opt<double>
54 lengthPenaltyOpt("length-penalty",
55 llvm::cl::desc("Length penalty to use when determining "
56 "highest likelihood output sentence."),
57 llvm::cl::Optional, llvm::cl::init(0.0f),
58 llvm::cl::cat(textTranslatorCat));
59
60llvm::cl::opt<std::string> inputSentencesFile(
61 "input-text-file",
62 llvm::cl::desc(
63 "Name of the file containing list of sentences (one per line)"),
64 llvm::cl::value_desc("string_name"), llvm::cl::Optional,
65 llvm::cl::cat(textTranslatorCat));
66
67llvm::cl::opt<std::string> expectedResultSentencesFile(
68 "expected-output-text-file",
69 llvm::cl::desc("Name of the file containing list of sentences (one per "
70 "line) corresponding to expected translations provided via "
71 "-input-text-file."),
72 llvm::cl::value_desc("string_name"), llvm::cl::Optional,
73 llvm::cl::cat(textTranslatorCat));
74
75llvm::cl::opt<bool> dumpBinaryResults(
76 "dump-binary-results",
77 llvm::cl::desc("Dump raw binary Tensor results after execution."),
78 llvm::cl::init(false), llvm::cl::cat(textTranslatorCat));
79} // namespace
80
81/// These should be kept in sync with pytorch_translate/vocab_constants.py
82constexpr size_t reservedOffset = 100;
83constexpr size_t padIdx = 0;
84constexpr size_t eosIdx = 2;
85constexpr size_t unkIdx = 3;
86
87/// Stores dictionary of a language. Contains mapping from word to index and
88/// vice versa.
89struct Dictionary {
90private:
91 std::vector<std::string> index2word_;
92 std::unordered_map<std::string, size_t> word2index_;
93
94public:
95 /// Add a word from the \p line to the dictionary.
96 void addWord(llvm::StringRef line) {
97 // Lines generally should be formatted like "the 9876543", where the
98 // trailing number is not relevant for inference.
99 auto spaceIdx = line.find(" ");
100 DCHECK(spaceIdx != llvm::StringRef::npos)
101 << "Unexpected format for dict file.";
102
103 auto word = std::string(line.take_front(spaceIdx));
104 DCHECK_GT(word.size(), 0) << "Did not find word correctly.";
105
106 word2index_[word] = index2word_.size();
107 index2word_.push_back(word);
108 }
109
110 Dictionary() = default;
111
112 /// Load a dictionary from text file \p filename, adding each word from each
113 /// line of the file.
114 void loadDictionaryFromFile(llvm::StringRef filename) {
115 std::ifstream file(filename.str());
116 std::string word;
117 while (getline(file, word)) {
118 addWord(word);
119 }
120 }
121
122 /// Get the index for the input \p word from the dictionary.
123 size_t getIdxFromWord(llvm::StringRef word) {
124 auto iter = word2index_.find(word.str());
125 // If unknown word, return the index for unknown.
126 if (iter == word2index_.end()) {
127 return unkIdx;
128 }
129 return iter->second + reservedOffset;
130 }
131
132 /// Get the word for the input \p index from the dictionary.
133 std::string getWordFromIdx(size_t idx) {
134 if (idx < reservedOffset) {
135 if (idx == eosIdx) {
136 return "<EOS>";
137 }
138 if (idx == padIdx) {
139 return "<PAD>";
140 }
141 return "<UNK>";
142 }
143
144 return index2word_[idx - reservedOffset];
145 }
146};
147
148Dictionary srcVocab, dstVocab;
149
150/// Break the input \p sentence up by spaces, and then encode the words as
151/// indices from the input dictionary, placing them in \p encoderInputs. Note
152/// that the model expects sentences to be in reverse order.
153static void encodeString(const llvm::StringRef sentence,
154 Tensor *encoderInputs) {
155 auto IH = encoderInputs->getHandle<int32_t>();
156
157 std::vector<int32_t> encodedWords;
158 encodedWords.reserve(maxInputLenOpt);
159
160 // Get each word from the sentence and encode it.
161 std::istringstream iss(sentence.str());
162 std::string word;
163 while (iss >> word) {
164 auto idx = srcVocab.getIdxFromWord(word);
165 encodedWords.push_back(idx);
166 }
167 encodedWords.push_back(eosIdx);
168
169 CHECK_LE(encodedWords.size(), maxInputLenOpt)
170 << "Sentence length exceeds maxInputLen.";
171
172 // Pad the rest of the input.
173 while (encodedWords.size() != maxInputLenOpt) {
174 encodedWords.push_back(padIdx);
175 }
176
177 // Note: the model expects the input sentence to be in reverse order.
178 dim_t i = 0;
179 for (auto it = encodedWords.rbegin(); it != encodedWords.rend(); it++, i++) {
180 // The batch size is 1 for inference models.
181 IH.at({i, /* batchSize */ 0}) = *it;
182 }
183}
184
185/// Load a sentence from std::cin for processing, or from \p file if it is open,
186/// and place the encoded inputs in \p encoderInputs. \returns false if the
187/// passed in line was empty.
188static bool loadNextInputTranslationText(Tensor *encoderInputs,
189 std::ifstream &file) {
190 std::string sentence;
191 if (file.is_open()) {
192 getline(file, sentence);
193 } else {
194 llvm::outs() << "Enter a sentence in English to translate to German: ";
195 getline(std::cin, sentence);
196 }
197
198 if (sentence.empty()) {
199 return false;
200 }
201
202 encodeString(sentence, encoderInputs);
203
204 return true;
205}
206
207/// Find and return a vector of the best translation given the outputs from the
208/// model \p outputTokenBeamList, \p outputScoreBeamList, and \p
209/// outputPrevIndexBeamList. A translation is made up of a vector of tokens
210/// which must be converted back to words from via the destination dictionary.
211static std::vector<dim_t> getBestTranslation(Tensor *outputTokenBeamList,
212 Tensor *outputScoreBeamList,
213 Tensor *outputPrevIndexBeamList) {
214 // Get handles to all the outputs from the model run.
215 auto tokenBeamListH = outputTokenBeamList->getHandle<int32_t>();
216 auto scoreBeamListH = outputScoreBeamList->getHandle<float>();
217 auto prevIndexBeamListH = outputPrevIndexBeamList->getHandle<int32_t>();
218
219 // This pair represents the ending position of a translation in the beam
220 // search grid. The first index corresponds to the length (column index), the
221 // second index corresponds to the position in the beam (row index).
222 std::pair<dim_t, dim_t> bestPosition = std::make_pair(0, 0);
223 float bestScore = std::numeric_limits<float>::max();
224
225 // Keep track of whether the current hypothesis of best translation has
226 // already ended.
227 std::vector<bool> prevHypoIsFinished(beamSizeOpt, false);
228 std::vector<bool> currentHypoIsFinished(beamSizeOpt, false);
229 for (dim_t lengthIndex = 0; lengthIndex < maxOutputLenOpt; ++lengthIndex) {
230 for (dim_t hypoIndex = 0; hypoIndex < beamSizeOpt; ++hypoIndex) {
231 // If the current hypothesis was already scored and compared to the best,
232 // we can skip it and move onto the next one.
233 dim_t prevIndex = prevIndexBeamListH.at({lengthIndex, hypoIndex});
234 currentHypoIsFinished[hypoIndex] = prevHypoIsFinished[prevIndex];
235 if (currentHypoIsFinished[hypoIndex]) {
236 continue;
237 }
238
239 // If the current token is not the end of sentence, and we haven't reached
240 // the max output length, then we cannot yet score/compare it, so keep
241 // going until we reach the end.
242 if (tokenBeamListH.at({lengthIndex, hypoIndex}) != eosIdx &&
243 lengthIndex + 1 != maxOutputLenOpt) {
244 continue;
245 }
246
247 // At this point we must have reached the end of a hypothesis sentence
248 // which has not yet been scored and compared. Set this as finished as we
249 // will now score and compare it against the current best score.
250 currentHypoIsFinished[hypoIndex] = true;
251
252 // Calculate the current score with length penalty.
253 float currScore = scoreBeamListH.at({lengthIndex, hypoIndex}) /
254 pow(lengthIndex + 1, lengthPenaltyOpt);
255
256 // If this translation has a better score, replace the current one.
257 if (currScore > -bestScore) {
258 bestPosition = std::make_pair(lengthIndex, hypoIndex);
259 bestScore = -currScore;
260 }
261 }
262
263 // Moving onto the next hypothesis, so swap current finished bools into
264 // previous, and reset current to all false.
265 prevHypoIsFinished.swap(currentHypoIsFinished);
266 currentHypoIsFinished.assign(beamSizeOpt, false);
267 }
268
269 // Generate the best translation given the end state. Use the previous index
270 // beam list to find the next word to add to the translation.
271 std::vector<dim_t> output;
272 dim_t lengthIndex = bestPosition.first;
273 dim_t hypoIndex = bestPosition.second;
274 while (lengthIndex > 0) {
275 output.emplace_back(tokenBeamListH.at({lengthIndex, hypoIndex}));
276 hypoIndex = prevIndexBeamListH.at({lengthIndex, hypoIndex});
277 lengthIndex--;
278 }
279
280 // Reverse the output order of the translated sentence.
281 std::reverse(output.begin(), output.end());
282
283 // Find the EOS token and cut off the rest of the output.
284 auto findEos = std::find(output.begin(), output.end(), eosIdx);
285 auto findEosIndex = findEos - output.begin();
286 output.resize(findEosIndex);
287
288 return output;
289}
290
291/// Queries getBestTranslation() for the best translation via the outputs from
292/// the model, \p outputTokenBeamList, \p outputScoreBeamList, and
293/// \p outputPrevIndexBeamList. Then converts each of the tokens from the
294/// returned best translation into words from the dest dictionary, and prints
295/// it. \p expectedResultFile is a stream of expected results; if open the
296/// translation is checked against the expected result, and will \returns
297/// whether it does. Otherwise returns true.
298static bool processAndPrintDecodedTranslation(
299 Tensor *outputTokenBeamList, Tensor *outputScoreBeamList,
300 Tensor *outputPrevIndexBeamList, std::ifstream &expectedResultFile) {
301 std::vector<dim_t> translationTokens = getBestTranslation(
302 outputTokenBeamList, outputScoreBeamList, outputPrevIndexBeamList);
303
304 // Use the dest dictionary to convert tokens to words, and print it.
305 std::string result;
306 for (size_t i = 0; i < translationTokens.size(); i++) {
307 auto wordIdx = translationTokens[i];
308 auto word = dstVocab.getWordFromIdx(wordIdx);
309
310 // Check if the word has suffix "@@". This means the current word should be
311 // appended to the next word, so remove the "@@" and do not output a space.
312 auto wordLength = word.length();
313 if (wordLength > 1 && word.substr(wordLength - 2) == "@@") {
314 word = word.substr(0, wordLength - 2);
315 } else if (i != translationTokens.size() - 1) {
316 word = word + " ";
317 }
318 result += word;
319 }
320 llvm::outs() << result;
321 bool correctTranslation = true;
322 if (expectedResultFile.is_open()) {
323 std::string expectedResult;
324 CHECK(getline(expectedResultFile, expectedResult))
325 << "Did not find expected translation.";
326 correctTranslation = expectedResult == result;
327 if (!correctTranslation) {
328 llvm::outs() << "\n PREVIOUS TRANSLATION INCORRECT; EXPECTED: "
329 << expectedResult;
330 }
331 }
332 llvm::outs() << "\n\n";
333 return correctTranslation;
334}
335
336int main(int argc, char **argv) {
337 PlaceholderBindings bindings;
338
339 // Verify/initialize command line parameters, and then loader initializes
340 // the ExecutionEngine and Function.
341 parseCommandLine(argc, argv);
342 Loader loader;
343
344 // Load the source and dest dictionaries.
345 auto modelDir = loader.getModelOptDir();
346 srcVocab.loadDictionaryFromFile(modelDir.str() + "/src_dictionary.txt");
347 dstVocab.loadDictionaryFromFile(modelDir.str() + "/dst_dictionary.txt");
348
349 // Encoded input sentence. Note that the batch size is 1 for inference models.
350 Tensor encoderInputs(ElemKind::Int32ITy, {maxInputLenOpt, /* batchSize */ 1});
351
352 // Inputs other than tokenized input. These should all be initialized to zero.
353 // Note, the init_net already defines these tensors solely as placeholders
354 // (with incorrect shapes/elementtypes/data). Glow uses these tensors in their
355 // place.
356 Tensor attnWeights(ElemKind::FloatTy, {maxInputLenOpt});
357 Tensor prevHyposIndices(ElemKind::Int32ITy, {beamSizeOpt});
358 Tensor prevScores(ElemKind::FloatTy, {1});
359 Tensor prevToken(ElemKind::Int32ITy, {1});
360
361 DCHECK(!loader.getCaffe2NetDescFilename().empty())
362 << "Only supporting Caffe2 currently.";
363
364 constexpr char const *inputNames[5] = {"encoder_inputs", "attn_weights",
365 "prev_hypos_indices", "prev_scores",
366 "prev_token"};
367 std::vector<TypeRef> inputTensors = {
368 &encoderInputs.getType(), &attnWeights.getType(),
369 &prevHyposIndices.getType(), &prevScores.getType(), &prevToken.getType()};
370
371 Caffe2ModelLoader LD(loader.getCaffe2NetDescFilename().str(),
372 loader.getCaffe2NetWeightFilename().str(), inputNames,
373 inputTensors, *loader.getFunction());
374
375 // Allocate tensors to back all inputs and outputs.
376 bindings.allocate(loader.getModule()->getPlaceholders());
377
378 // Get all bound tensors and zero them.
379 for (auto &pair : bindings.pairs()) {
380 pair.second.zero();
381 }
382
383 Placeholder *encoderInputsVar = llvm::cast<Placeholder>(
384 EXIT_ON_ERR(LD.getNodeValueByName("encoder_inputs")));
385
386 // Compile the model, and perform quantization/emit a bundle/dump debug info
387 // if requested from command line.
388 loader.compile(bindings);
389
390 DCHECK(!emittingBundle()) << "Bundle mode has not been tested.";
391
392 Placeholder *outputTokenBeamList =
393 EXIT_ON_ERR(LD.getOutputByName("output_token_beam_list"));
394 Placeholder *outputScoreBeamList =
395 EXIT_ON_ERR(LD.getOutputByName("output_score_beam_list"));
396 Placeholder *outputPrevIndexBeamList =
397 EXIT_ON_ERR(LD.getOutputByName("output_prev_index_beam_list"));
398
399 std::ifstream inFile, expectedResultFile;
400 if (!inputSentencesFile.empty()) {
401 inFile.open(inputSentencesFile, std::ifstream::in);
402 }
403 if (!expectedResultSentencesFile.empty()) {
404 expectedResultFile.open(expectedResultSentencesFile, std::ifstream::in);
405 }
406
407 // We reuse the same output Tensors for every inference, so it's safe to get
408 // them ahead of the inference loop.
409 Tensor *outputTokenBeamListT = bindings.get(outputTokenBeamList);
410 Tensor *outputScoreBeamListT = bindings.get(outputScoreBeamList);
411 Tensor *outputPrevIndexBeamListT = bindings.get(outputPrevIndexBeamList);
412
413 // Store copies of results of each inference for dumping after the inference
414 // loop if requested.
415 std::vector<Tensor> outputTokenBeamListResults;
416 std::vector<Tensor> outputScoreBeamListResults;
417 std::vector<Tensor> outputPrevIndexBeamListResults;
418
419 int incorrectTranslationCount = 0;
420 while (loadNextInputTranslationText(&encoderInputs, inFile)) {
421 // Update the inputs.
422 updateInputPlaceholders(bindings, {encoderInputsVar}, {&encoderInputs});
423
424 // Run actual translation.
425 loader.runInference(bindings);
426
427 // Keep around copies of all results to dump after inference loop.
428 if (dumpBinaryResults) {
429 outputTokenBeamListResults.emplace_back(outputTokenBeamListT->clone());
430 outputScoreBeamListResults.emplace_back(outputScoreBeamListT->clone());
431 outputPrevIndexBeamListResults.emplace_back(
432 outputPrevIndexBeamListT->clone());
433 }
434
435 // Process the outputs to determine the highest likelihood sentence, and
436 // print out the decoded translation using the dest dictionary.
437 if (!processAndPrintDecodedTranslation(
438 outputTokenBeamListT, outputScoreBeamListT,
439 outputPrevIndexBeamListT, expectedResultFile)) {
440 incorrectTranslationCount += 1;
441 }
442 }
443
444 if (dumpBinaryResults) {
445 TensorSerializationOptions opts;
446 opts.withType = false;
447 auto dumpRes = [&](const std::string &name,
448 const std::vector<Tensor> &vecT) {
449 std::ofstream fs;
450 llvm::SmallString<64> path;
451 auto tempFileRes = llvm::sys::fs::createTemporaryFile(name, "bin", path);
452 CHECK_EQ(tempFileRes.value(), 0)
453 << "Failed to create temp file to write into.";
454 fs.open(path.data(), std::ios::binary);
455 CHECK(fs.is_open()) << "Error opening file '" << path.data() << "'!";
456 std::cout << "Dumping binary results of " << name << " to " << path.data()
457 << std::endl;
458 for (const Tensor &T : vecT) {
459 dumpTensorToBinaryFile(T, fs, opts);
460 }
461 fs.close();
462 };
463 dumpRes("outputTokenBeamList", outputTokenBeamListResults);
464 dumpRes("outputScoreBeamList", outputScoreBeamListResults);
465 dumpRes("outputPrevIndexBeamList", outputPrevIndexBeamListResults);
466 }
467
468 // If profiling, generate and serialize the profiling infos now that we
469 // have run inference to gather the profile.
470 if (profilingGraph()) {
471 loader.generateAndSerializeProfilingInfos(bindings);
472 }
473
474 return incorrectTranslationCount;
475}
476