1 | /** |
2 | * Copyright (c) Glow Contributors. See CONTRIBUTORS file. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include "Loader.h" |
18 | |
19 | #include "glow/Base/TensorSerialization.h" |
20 | #include "glow/Importer/Caffe2ModelLoader.h" |
21 | |
22 | #include "llvm/Support/CommandLine.h" |
23 | #include "llvm/Support/FileSystem.h" |
24 | #include "llvm/Support/raw_ostream.h" |
25 | |
26 | #include <fstream> |
27 | #include <iostream> |
28 | #include <sstream> |
29 | |
30 | using namespace glow; |
31 | |
32 | namespace { |
33 | llvm::cl::OptionCategory textTranslatorCat("Text Translator Options" ); |
34 | |
35 | llvm::cl::opt<unsigned> |
36 | maxInputLenOpt("max-input-len" , |
37 | llvm::cl::desc("Maximum allowed length of the input " |
38 | "sentence. Specified by the input model." ), |
39 | llvm::cl::Optional, llvm::cl::init(10), |
40 | llvm::cl::cat(textTranslatorCat)); |
41 | |
42 | llvm::cl::opt<unsigned> |
43 | maxOutputLenOpt("max-output-len" , |
44 | llvm::cl::desc("Maximum allowed length of the output " |
45 | "sentence. Specified by the input model." ), |
46 | llvm::cl::Optional, llvm::cl::init(14), |
47 | llvm::cl::cat(textTranslatorCat)); |
48 | |
49 | llvm::cl::opt<unsigned> beamSizeOpt( |
50 | "beam-size" , llvm::cl::desc("Beam size used by the input model." ), |
51 | llvm::cl::Optional, llvm::cl::init(6), llvm::cl::cat(textTranslatorCat)); |
52 | |
53 | llvm::cl::opt<double> |
54 | lengthPenaltyOpt("length-penalty" , |
55 | llvm::cl::desc("Length penalty to use when determining " |
56 | "highest likelihood output sentence." ), |
57 | llvm::cl::Optional, llvm::cl::init(0.0f), |
58 | llvm::cl::cat(textTranslatorCat)); |
59 | |
60 | llvm::cl::opt<std::string> inputSentencesFile( |
61 | "input-text-file" , |
62 | llvm::cl::desc( |
63 | "Name of the file containing list of sentences (one per line)" ), |
64 | llvm::cl::value_desc("string_name" ), llvm::cl::Optional, |
65 | llvm::cl::cat(textTranslatorCat)); |
66 | |
67 | llvm::cl::opt<std::string> expectedResultSentencesFile( |
68 | "expected-output-text-file" , |
69 | llvm::cl::desc("Name of the file containing list of sentences (one per " |
70 | "line) corresponding to expected translations provided via " |
71 | "-input-text-file." ), |
72 | llvm::cl::value_desc("string_name" ), llvm::cl::Optional, |
73 | llvm::cl::cat(textTranslatorCat)); |
74 | |
75 | llvm::cl::opt<bool> dumpBinaryResults( |
76 | "dump-binary-results" , |
77 | llvm::cl::desc("Dump raw binary Tensor results after execution." ), |
78 | llvm::cl::init(false), llvm::cl::cat(textTranslatorCat)); |
79 | } // namespace |
80 | |
81 | /// These should be kept in sync with pytorch_translate/vocab_constants.py |
82 | constexpr size_t reservedOffset = 100; |
83 | constexpr size_t padIdx = 0; |
84 | constexpr size_t eosIdx = 2; |
85 | constexpr size_t unkIdx = 3; |
86 | |
87 | /// Stores dictionary of a language. Contains mapping from word to index and |
88 | /// vice versa. |
89 | struct Dictionary { |
90 | private: |
91 | std::vector<std::string> index2word_; |
92 | std::unordered_map<std::string, size_t> word2index_; |
93 | |
94 | public: |
95 | /// Add a word from the \p line to the dictionary. |
96 | void addWord(llvm::StringRef line) { |
97 | // Lines generally should be formatted like "the 9876543", where the |
98 | // trailing number is not relevant for inference. |
99 | auto spaceIdx = line.find(" " ); |
100 | DCHECK(spaceIdx != llvm::StringRef::npos) |
101 | << "Unexpected format for dict file." ; |
102 | |
103 | auto word = std::string(line.take_front(spaceIdx)); |
104 | DCHECK_GT(word.size(), 0) << "Did not find word correctly." ; |
105 | |
106 | word2index_[word] = index2word_.size(); |
107 | index2word_.push_back(word); |
108 | } |
109 | |
110 | Dictionary() = default; |
111 | |
112 | /// Load a dictionary from text file \p filename, adding each word from each |
113 | /// line of the file. |
114 | void loadDictionaryFromFile(llvm::StringRef filename) { |
115 | std::ifstream file(filename.str()); |
116 | std::string word; |
117 | while (getline(file, word)) { |
118 | addWord(word); |
119 | } |
120 | } |
121 | |
122 | /// Get the index for the input \p word from the dictionary. |
123 | size_t getIdxFromWord(llvm::StringRef word) { |
124 | auto iter = word2index_.find(word.str()); |
125 | // If unknown word, return the index for unknown. |
126 | if (iter == word2index_.end()) { |
127 | return unkIdx; |
128 | } |
129 | return iter->second + reservedOffset; |
130 | } |
131 | |
132 | /// Get the word for the input \p index from the dictionary. |
133 | std::string getWordFromIdx(size_t idx) { |
134 | if (idx < reservedOffset) { |
135 | if (idx == eosIdx) { |
136 | return "<EOS>" ; |
137 | } |
138 | if (idx == padIdx) { |
139 | return "<PAD>" ; |
140 | } |
141 | return "<UNK>" ; |
142 | } |
143 | |
144 | return index2word_[idx - reservedOffset]; |
145 | } |
146 | }; |
147 | |
148 | Dictionary srcVocab, dstVocab; |
149 | |
150 | /// Break the input \p sentence up by spaces, and then encode the words as |
151 | /// indices from the input dictionary, placing them in \p encoderInputs. Note |
152 | /// that the model expects sentences to be in reverse order. |
153 | static void encodeString(const llvm::StringRef sentence, |
154 | Tensor *encoderInputs) { |
155 | auto IH = encoderInputs->getHandle<int32_t>(); |
156 | |
157 | std::vector<int32_t> encodedWords; |
158 | encodedWords.reserve(maxInputLenOpt); |
159 | |
160 | // Get each word from the sentence and encode it. |
161 | std::istringstream iss(sentence.str()); |
162 | std::string word; |
163 | while (iss >> word) { |
164 | auto idx = srcVocab.getIdxFromWord(word); |
165 | encodedWords.push_back(idx); |
166 | } |
167 | encodedWords.push_back(eosIdx); |
168 | |
169 | CHECK_LE(encodedWords.size(), maxInputLenOpt) |
170 | << "Sentence length exceeds maxInputLen." ; |
171 | |
172 | // Pad the rest of the input. |
173 | while (encodedWords.size() != maxInputLenOpt) { |
174 | encodedWords.push_back(padIdx); |
175 | } |
176 | |
177 | // Note: the model expects the input sentence to be in reverse order. |
178 | dim_t i = 0; |
179 | for (auto it = encodedWords.rbegin(); it != encodedWords.rend(); it++, i++) { |
180 | // The batch size is 1 for inference models. |
181 | IH.at({i, /* batchSize */ 0}) = *it; |
182 | } |
183 | } |
184 | |
185 | /// Load a sentence from std::cin for processing, or from \p file if it is open, |
186 | /// and place the encoded inputs in \p encoderInputs. \returns false if the |
187 | /// passed in line was empty. |
188 | static bool loadNextInputTranslationText(Tensor *encoderInputs, |
189 | std::ifstream &file) { |
190 | std::string sentence; |
191 | if (file.is_open()) { |
192 | getline(file, sentence); |
193 | } else { |
194 | llvm::outs() << "Enter a sentence in English to translate to German: " ; |
195 | getline(std::cin, sentence); |
196 | } |
197 | |
198 | if (sentence.empty()) { |
199 | return false; |
200 | } |
201 | |
202 | encodeString(sentence, encoderInputs); |
203 | |
204 | return true; |
205 | } |
206 | |
207 | /// Find and return a vector of the best translation given the outputs from the |
208 | /// model \p outputTokenBeamList, \p outputScoreBeamList, and \p |
209 | /// outputPrevIndexBeamList. A translation is made up of a vector of tokens |
210 | /// which must be converted back to words from via the destination dictionary. |
211 | static std::vector<dim_t> getBestTranslation(Tensor *outputTokenBeamList, |
212 | Tensor *outputScoreBeamList, |
213 | Tensor *outputPrevIndexBeamList) { |
214 | // Get handles to all the outputs from the model run. |
215 | auto tokenBeamListH = outputTokenBeamList->getHandle<int32_t>(); |
216 | auto scoreBeamListH = outputScoreBeamList->getHandle<float>(); |
217 | auto prevIndexBeamListH = outputPrevIndexBeamList->getHandle<int32_t>(); |
218 | |
219 | // This pair represents the ending position of a translation in the beam |
220 | // search grid. The first index corresponds to the length (column index), the |
221 | // second index corresponds to the position in the beam (row index). |
222 | std::pair<dim_t, dim_t> bestPosition = std::make_pair(0, 0); |
223 | float bestScore = std::numeric_limits<float>::max(); |
224 | |
225 | // Keep track of whether the current hypothesis of best translation has |
226 | // already ended. |
227 | std::vector<bool> prevHypoIsFinished(beamSizeOpt, false); |
228 | std::vector<bool> currentHypoIsFinished(beamSizeOpt, false); |
229 | for (dim_t lengthIndex = 0; lengthIndex < maxOutputLenOpt; ++lengthIndex) { |
230 | for (dim_t hypoIndex = 0; hypoIndex < beamSizeOpt; ++hypoIndex) { |
231 | // If the current hypothesis was already scored and compared to the best, |
232 | // we can skip it and move onto the next one. |
233 | dim_t prevIndex = prevIndexBeamListH.at({lengthIndex, hypoIndex}); |
234 | currentHypoIsFinished[hypoIndex] = prevHypoIsFinished[prevIndex]; |
235 | if (currentHypoIsFinished[hypoIndex]) { |
236 | continue; |
237 | } |
238 | |
239 | // If the current token is not the end of sentence, and we haven't reached |
240 | // the max output length, then we cannot yet score/compare it, so keep |
241 | // going until we reach the end. |
242 | if (tokenBeamListH.at({lengthIndex, hypoIndex}) != eosIdx && |
243 | lengthIndex + 1 != maxOutputLenOpt) { |
244 | continue; |
245 | } |
246 | |
247 | // At this point we must have reached the end of a hypothesis sentence |
248 | // which has not yet been scored and compared. Set this as finished as we |
249 | // will now score and compare it against the current best score. |
250 | currentHypoIsFinished[hypoIndex] = true; |
251 | |
252 | // Calculate the current score with length penalty. |
253 | float currScore = scoreBeamListH.at({lengthIndex, hypoIndex}) / |
254 | pow(lengthIndex + 1, lengthPenaltyOpt); |
255 | |
256 | // If this translation has a better score, replace the current one. |
257 | if (currScore > -bestScore) { |
258 | bestPosition = std::make_pair(lengthIndex, hypoIndex); |
259 | bestScore = -currScore; |
260 | } |
261 | } |
262 | |
263 | // Moving onto the next hypothesis, so swap current finished bools into |
264 | // previous, and reset current to all false. |
265 | prevHypoIsFinished.swap(currentHypoIsFinished); |
266 | currentHypoIsFinished.assign(beamSizeOpt, false); |
267 | } |
268 | |
269 | // Generate the best translation given the end state. Use the previous index |
270 | // beam list to find the next word to add to the translation. |
271 | std::vector<dim_t> output; |
272 | dim_t lengthIndex = bestPosition.first; |
273 | dim_t hypoIndex = bestPosition.second; |
274 | while (lengthIndex > 0) { |
275 | output.emplace_back(tokenBeamListH.at({lengthIndex, hypoIndex})); |
276 | hypoIndex = prevIndexBeamListH.at({lengthIndex, hypoIndex}); |
277 | lengthIndex--; |
278 | } |
279 | |
280 | // Reverse the output order of the translated sentence. |
281 | std::reverse(output.begin(), output.end()); |
282 | |
283 | // Find the EOS token and cut off the rest of the output. |
284 | auto findEos = std::find(output.begin(), output.end(), eosIdx); |
285 | auto findEosIndex = findEos - output.begin(); |
286 | output.resize(findEosIndex); |
287 | |
288 | return output; |
289 | } |
290 | |
291 | /// Queries getBestTranslation() for the best translation via the outputs from |
292 | /// the model, \p outputTokenBeamList, \p outputScoreBeamList, and |
293 | /// \p outputPrevIndexBeamList. Then converts each of the tokens from the |
294 | /// returned best translation into words from the dest dictionary, and prints |
295 | /// it. \p expectedResultFile is a stream of expected results; if open the |
296 | /// translation is checked against the expected result, and will \returns |
297 | /// whether it does. Otherwise returns true. |
298 | static bool processAndPrintDecodedTranslation( |
299 | Tensor *outputTokenBeamList, Tensor *outputScoreBeamList, |
300 | Tensor *outputPrevIndexBeamList, std::ifstream &expectedResultFile) { |
301 | std::vector<dim_t> translationTokens = getBestTranslation( |
302 | outputTokenBeamList, outputScoreBeamList, outputPrevIndexBeamList); |
303 | |
304 | // Use the dest dictionary to convert tokens to words, and print it. |
305 | std::string result; |
306 | for (size_t i = 0; i < translationTokens.size(); i++) { |
307 | auto wordIdx = translationTokens[i]; |
308 | auto word = dstVocab.getWordFromIdx(wordIdx); |
309 | |
310 | // Check if the word has suffix "@@". This means the current word should be |
311 | // appended to the next word, so remove the "@@" and do not output a space. |
312 | auto wordLength = word.length(); |
313 | if (wordLength > 1 && word.substr(wordLength - 2) == "@@" ) { |
314 | word = word.substr(0, wordLength - 2); |
315 | } else if (i != translationTokens.size() - 1) { |
316 | word = word + " " ; |
317 | } |
318 | result += word; |
319 | } |
320 | llvm::outs() << result; |
321 | bool correctTranslation = true; |
322 | if (expectedResultFile.is_open()) { |
323 | std::string expectedResult; |
324 | CHECK(getline(expectedResultFile, expectedResult)) |
325 | << "Did not find expected translation." ; |
326 | correctTranslation = expectedResult == result; |
327 | if (!correctTranslation) { |
328 | llvm::outs() << "\n PREVIOUS TRANSLATION INCORRECT; EXPECTED: " |
329 | << expectedResult; |
330 | } |
331 | } |
332 | llvm::outs() << "\n\n" ; |
333 | return correctTranslation; |
334 | } |
335 | |
336 | int main(int argc, char **argv) { |
337 | PlaceholderBindings bindings; |
338 | |
339 | // Verify/initialize command line parameters, and then loader initializes |
340 | // the ExecutionEngine and Function. |
341 | parseCommandLine(argc, argv); |
342 | Loader loader; |
343 | |
344 | // Load the source and dest dictionaries. |
345 | auto modelDir = loader.getModelOptDir(); |
346 | srcVocab.loadDictionaryFromFile(modelDir.str() + "/src_dictionary.txt" ); |
347 | dstVocab.loadDictionaryFromFile(modelDir.str() + "/dst_dictionary.txt" ); |
348 | |
349 | // Encoded input sentence. Note that the batch size is 1 for inference models. |
350 | Tensor encoderInputs(ElemKind::Int32ITy, {maxInputLenOpt, /* batchSize */ 1}); |
351 | |
352 | // Inputs other than tokenized input. These should all be initialized to zero. |
353 | // Note, the init_net already defines these tensors solely as placeholders |
354 | // (with incorrect shapes/elementtypes/data). Glow uses these tensors in their |
355 | // place. |
356 | Tensor attnWeights(ElemKind::FloatTy, {maxInputLenOpt}); |
357 | Tensor prevHyposIndices(ElemKind::Int32ITy, {beamSizeOpt}); |
358 | Tensor prevScores(ElemKind::FloatTy, {1}); |
359 | Tensor prevToken(ElemKind::Int32ITy, {1}); |
360 | |
361 | DCHECK(!loader.getCaffe2NetDescFilename().empty()) |
362 | << "Only supporting Caffe2 currently." ; |
363 | |
364 | constexpr char const *inputNames[5] = {"encoder_inputs" , "attn_weights" , |
365 | "prev_hypos_indices" , "prev_scores" , |
366 | "prev_token" }; |
367 | std::vector<TypeRef> inputTensors = { |
368 | &encoderInputs.getType(), &attnWeights.getType(), |
369 | &prevHyposIndices.getType(), &prevScores.getType(), &prevToken.getType()}; |
370 | |
371 | Caffe2ModelLoader LD(loader.getCaffe2NetDescFilename().str(), |
372 | loader.getCaffe2NetWeightFilename().str(), inputNames, |
373 | inputTensors, *loader.getFunction()); |
374 | |
375 | // Allocate tensors to back all inputs and outputs. |
376 | bindings.allocate(loader.getModule()->getPlaceholders()); |
377 | |
378 | // Get all bound tensors and zero them. |
379 | for (auto &pair : bindings.pairs()) { |
380 | pair.second.zero(); |
381 | } |
382 | |
383 | Placeholder *encoderInputsVar = llvm::cast<Placeholder>( |
384 | EXIT_ON_ERR(LD.getNodeValueByName("encoder_inputs" ))); |
385 | |
386 | // Compile the model, and perform quantization/emit a bundle/dump debug info |
387 | // if requested from command line. |
388 | loader.compile(bindings); |
389 | |
390 | DCHECK(!emittingBundle()) << "Bundle mode has not been tested." ; |
391 | |
392 | Placeholder *outputTokenBeamList = |
393 | EXIT_ON_ERR(LD.getOutputByName("output_token_beam_list" )); |
394 | Placeholder *outputScoreBeamList = |
395 | EXIT_ON_ERR(LD.getOutputByName("output_score_beam_list" )); |
396 | Placeholder *outputPrevIndexBeamList = |
397 | EXIT_ON_ERR(LD.getOutputByName("output_prev_index_beam_list" )); |
398 | |
399 | std::ifstream inFile, expectedResultFile; |
400 | if (!inputSentencesFile.empty()) { |
401 | inFile.open(inputSentencesFile, std::ifstream::in); |
402 | } |
403 | if (!expectedResultSentencesFile.empty()) { |
404 | expectedResultFile.open(expectedResultSentencesFile, std::ifstream::in); |
405 | } |
406 | |
407 | // We reuse the same output Tensors for every inference, so it's safe to get |
408 | // them ahead of the inference loop. |
409 | Tensor *outputTokenBeamListT = bindings.get(outputTokenBeamList); |
410 | Tensor *outputScoreBeamListT = bindings.get(outputScoreBeamList); |
411 | Tensor *outputPrevIndexBeamListT = bindings.get(outputPrevIndexBeamList); |
412 | |
413 | // Store copies of results of each inference for dumping after the inference |
414 | // loop if requested. |
415 | std::vector<Tensor> outputTokenBeamListResults; |
416 | std::vector<Tensor> outputScoreBeamListResults; |
417 | std::vector<Tensor> outputPrevIndexBeamListResults; |
418 | |
419 | int incorrectTranslationCount = 0; |
420 | while (loadNextInputTranslationText(&encoderInputs, inFile)) { |
421 | // Update the inputs. |
422 | updateInputPlaceholders(bindings, {encoderInputsVar}, {&encoderInputs}); |
423 | |
424 | // Run actual translation. |
425 | loader.runInference(bindings); |
426 | |
427 | // Keep around copies of all results to dump after inference loop. |
428 | if (dumpBinaryResults) { |
429 | outputTokenBeamListResults.emplace_back(outputTokenBeamListT->clone()); |
430 | outputScoreBeamListResults.emplace_back(outputScoreBeamListT->clone()); |
431 | outputPrevIndexBeamListResults.emplace_back( |
432 | outputPrevIndexBeamListT->clone()); |
433 | } |
434 | |
435 | // Process the outputs to determine the highest likelihood sentence, and |
436 | // print out the decoded translation using the dest dictionary. |
437 | if (!processAndPrintDecodedTranslation( |
438 | outputTokenBeamListT, outputScoreBeamListT, |
439 | outputPrevIndexBeamListT, expectedResultFile)) { |
440 | incorrectTranslationCount += 1; |
441 | } |
442 | } |
443 | |
444 | if (dumpBinaryResults) { |
445 | TensorSerializationOptions opts; |
446 | opts.withType = false; |
447 | auto dumpRes = [&](const std::string &name, |
448 | const std::vector<Tensor> &vecT) { |
449 | std::ofstream fs; |
450 | llvm::SmallString<64> path; |
451 | auto tempFileRes = llvm::sys::fs::createTemporaryFile(name, "bin" , path); |
452 | CHECK_EQ(tempFileRes.value(), 0) |
453 | << "Failed to create temp file to write into." ; |
454 | fs.open(path.data(), std::ios::binary); |
455 | CHECK(fs.is_open()) << "Error opening file '" << path.data() << "'!" ; |
456 | std::cout << "Dumping binary results of " << name << " to " << path.data() |
457 | << std::endl; |
458 | for (const Tensor &T : vecT) { |
459 | dumpTensorToBinaryFile(T, fs, opts); |
460 | } |
461 | fs.close(); |
462 | }; |
463 | dumpRes("outputTokenBeamList" , outputTokenBeamListResults); |
464 | dumpRes("outputScoreBeamList" , outputScoreBeamListResults); |
465 | dumpRes("outputPrevIndexBeamList" , outputPrevIndexBeamListResults); |
466 | } |
467 | |
468 | // If profiling, generate and serialize the profiling infos now that we |
469 | // have run inference to gather the profile. |
470 | if (profilingGraph()) { |
471 | loader.generateAndSerializeProfilingInfos(bindings); |
472 | } |
473 | |
474 | return incorrectTranslationCount; |
475 | } |
476 | |