1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one |
3 | * or more contributor license agreements. See the NOTICE file |
4 | * distributed with this work for additional information |
5 | * regarding copyright ownership. The ASF licenses this file |
6 | * to you under the Apache License, Version 2.0 (the |
7 | * "License"); you may not use this file except in compliance |
8 | * with the License. You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, |
13 | * software distributed under the License is distributed on an |
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
15 | * KIND, either express or implied. See the License for the |
16 | * specific language governing permissions and limitations |
17 | * under the License. |
18 | */ |
19 | |
20 | /*! |
21 | * \file token.h |
22 | * \brief The definition of tokens for the TVM parser. |
23 | */ |
24 | |
25 | #ifndef TVM_RELAY_PARSER_TOKEN_H_ |
26 | #define TVM_RELAY_PARSER_TOKEN_H_ |
27 | |
28 | #include <tvm/ir/expr.h> |
29 | #include <tvm/ir/source_map.h> |
30 | #include <tvm/runtime/object.h> |
31 | |
32 | #include <fstream> |
33 | #include <string> |
34 | #include <utility> |
35 | |
36 | namespace tvm { |
37 | namespace relay { |
38 | |
39 | using namespace runtime; |
40 | |
41 | enum class TokenType { |
42 | , |
43 | , |
44 | , |
45 | , |
46 | kWhitespace, |
47 | kNewline, |
48 | kStringLiteral, |
49 | kIdentifier, |
50 | kLocal, |
51 | kGlobal, |
52 | kOp, |
53 | kGraph, |
54 | kOpenParen, |
55 | kCloseParen, |
56 | kAtSymbol, |
57 | kPercent, |
58 | kComma, |
59 | kPeriod, |
60 | kEqual, |
61 | kSemicolon, |
62 | kColon, |
63 | kInteger, |
64 | kFloat, |
65 | kDivision, |
66 | kBoolean, |
67 | kPlus, |
68 | kStar, |
69 | kMinus, |
70 | kRAngle, |
71 | kLAngle, |
72 | kRCurly, |
73 | kLCurly, |
74 | kRSquare, |
75 | kLSquare, |
76 | kBang, |
77 | kAt, |
78 | kQuestion, |
79 | kIf, |
80 | kElse, |
81 | kUnderscore, |
82 | kLet, |
83 | kFn, |
84 | kDefn, |
85 | kTypeDef, |
86 | kExtern, |
87 | kMatch, |
88 | kPartialMatch, |
89 | kMetadata, |
90 | kMetaReference, |
91 | kFreeVar, |
92 | kRef, |
93 | kRefRead, |
94 | kRefWrite, |
95 | kVersion, |
96 | kUnknown, |
97 | kEndOfFile, |
98 | kNull, |
99 | }; |
100 | |
101 | inline std::string ToString(const TokenType& token_type) { |
102 | switch (token_type) { |
103 | case TokenType::kCommentStart: |
104 | return "CommentStart" ; |
105 | case TokenType::kCommentEnd: |
106 | return "CommentEnd" ; |
107 | case TokenType::kLineComment: |
108 | return "LineComment" ; |
109 | case TokenType::kComment: |
110 | return "Comment" ; |
111 | case TokenType::kWhitespace: |
112 | return "WhiteSpace" ; |
113 | case TokenType::kNewline: |
114 | return "Newline" ; |
115 | case TokenType::kStringLiteral: |
116 | return "StringLiteral" ; |
117 | case TokenType::kIdentifier: |
118 | return "Identifier" ; |
119 | case TokenType::kLocal: |
120 | return "Local" ; |
121 | case TokenType::kGlobal: |
122 | return "Global" ; |
123 | case TokenType::kGraph: |
124 | return "Graph" ; |
125 | case TokenType::kOp: |
126 | return "Op" ; |
127 | case TokenType::kOpenParen: |
128 | return "OpenParen" ; |
129 | case TokenType::kCloseParen: |
130 | return "CloseParen" ; |
131 | case TokenType::kAtSymbol: |
132 | return "AtSymbol" ; |
133 | case TokenType::kPercent: |
134 | return "Percent" ; |
135 | case TokenType::kComma: |
136 | return "Comma" ; |
137 | case TokenType::kColon: |
138 | return "Colon" ; |
139 | case TokenType::kSemicolon: |
140 | return "Semicolon" ; |
141 | case TokenType::kPeriod: |
142 | return "Period" ; |
143 | case TokenType::kEqual: |
144 | return "Equal" ; |
145 | case TokenType::kInteger: |
146 | return "Integer" ; |
147 | case TokenType::kFloat: |
148 | return "Float" ; |
149 | case TokenType::kPlus: |
150 | return "Plus" ; |
151 | case TokenType::kStar: |
152 | return "Star" ; |
153 | case TokenType::kMinus: |
154 | return "Minus" ; |
155 | case TokenType::kDivision: |
156 | return "Division" ; |
157 | case TokenType::kRAngle: |
158 | return "RAngle" ; |
159 | case TokenType::kLAngle: |
160 | return "LAngle" ; |
161 | case TokenType::kRCurly: |
162 | return "RCurly" ; |
163 | case TokenType::kLCurly: |
164 | return "LCurly" ; |
165 | case TokenType::kRSquare: |
166 | return "RSquare" ; |
167 | case TokenType::kLSquare: |
168 | return "LSquare" ; |
169 | case TokenType::kBang: |
170 | return "Bang" ; |
171 | case TokenType::kUnderscore: |
172 | return "Underscore" ; |
173 | case TokenType::kAt: |
174 | return "At" ; |
175 | case TokenType::kLet: |
176 | return "Let" ; |
177 | case TokenType::kIf: |
178 | return "If" ; |
179 | case TokenType::kElse: |
180 | return "Else" ; |
181 | case TokenType::kFn: |
182 | return "Fn" ; |
183 | case TokenType::kDefn: |
184 | return "Defn" ; |
185 | case TokenType::kTypeDef: |
186 | return "TypeDef" ; |
187 | case TokenType::kExtern: |
188 | return "Extern" ; |
189 | case TokenType::kMatch: |
190 | return "Match" ; |
191 | case TokenType::kPartialMatch: |
192 | return "PartialMatch" ; |
193 | case TokenType::kQuestion: |
194 | return "Question" ; |
195 | case TokenType::kBoolean: |
196 | return "Boolean" ; |
197 | case TokenType::kMetadata: |
198 | return "Metadata" ; |
199 | case TokenType::kMetaReference: |
200 | return "MetaReference" ; |
201 | case TokenType::kFreeVar: |
202 | return "FreeVar" ; |
203 | case TokenType::kVersion: |
204 | return "Version" ; |
205 | case TokenType::kRef: |
206 | return "Ref" ; |
207 | case TokenType::kRefRead: |
208 | return "RefRead" ; |
209 | case TokenType::kRefWrite: |
210 | return "RefWrite" ; |
211 | case TokenType::kUnknown: |
212 | return "Unknown" ; |
213 | case TokenType::kEndOfFile: |
214 | return "EndOfFile" ; |
215 | case TokenType::kNull: |
216 | return "Null" ; |
217 | // Older compilers warn even though the above code is exhaustive. |
218 | default: |
219 | LOG(FATAL) << "unreachable code" ; |
220 | } |
221 | } |
222 | |
223 | inline std::string Pretty(const TokenType& token_type) { |
224 | switch (token_type) { |
225 | case TokenType::kCommentStart: |
226 | return "`/*`" ; |
227 | case TokenType::kCommentEnd: |
228 | return "`*/`" ; |
229 | case TokenType::kLineComment: |
230 | return "`//`" ; |
231 | case TokenType::kComment: |
232 | return "comment" ; |
233 | case TokenType::kWhitespace: |
234 | return "whitespace" ; |
235 | case TokenType::kNewline: |
236 | return "newline" ; |
237 | case TokenType::kStringLiteral: |
238 | return "string literal" ; |
239 | case TokenType::kIdentifier: |
240 | return "identifier" ; |
241 | case TokenType::kLocal: |
242 | return "local variable" ; |
243 | case TokenType::kGlobal: |
244 | return "global variable" ; |
245 | case TokenType::kGraph: |
246 | return "graph variable" ; |
247 | case TokenType::kOp: |
248 | return "operator" ; |
249 | case TokenType::kOpenParen: |
250 | return "`(`" ; |
251 | case TokenType::kCloseParen: |
252 | return "`)`" ; |
253 | case TokenType::kAtSymbol: |
254 | return "`@`" ; |
255 | case TokenType::kPercent: |
256 | return "`%`" ; |
257 | case TokenType::kComma: |
258 | return "`,`" ; |
259 | case TokenType::kColon: |
260 | return "`:`" ; |
261 | case TokenType::kSemicolon: |
262 | return "`;`" ; |
263 | case TokenType::kPeriod: |
264 | return "`.`" ; |
265 | case TokenType::kEqual: |
266 | return "`=`" ; |
267 | case TokenType::kInteger: |
268 | return "integer" ; |
269 | case TokenType::kFloat: |
270 | return "float" ; |
271 | case TokenType::kPlus: |
272 | return "`+`" ; |
273 | case TokenType::kStar: |
274 | return "`*`" ; |
275 | case TokenType::kMinus: |
276 | return "`-`" ; |
277 | case TokenType::kDivision: |
278 | return "`/`" ; |
279 | case TokenType::kRAngle: |
280 | return "`<`" ; |
281 | case TokenType::kLAngle: |
282 | return "`>`" ; |
283 | case TokenType::kRCurly: |
284 | return "`}`" ; |
285 | case TokenType::kLCurly: |
286 | return "`{`" ; |
287 | case TokenType::kRSquare: |
288 | return "`]`" ; |
289 | case TokenType::kLSquare: |
290 | return "`[`" ; |
291 | case TokenType::kBang: |
292 | return "`!`" ; |
293 | case TokenType::kUnderscore: |
294 | return "`_`" ; |
295 | case TokenType::kAt: |
296 | return "`@`" ; |
297 | case TokenType::kLet: |
298 | return "`let`" ; |
299 | case TokenType::kIf: |
300 | return "`if`" ; |
301 | case TokenType::kElse: |
302 | return "`else`" ; |
303 | case TokenType::kFn: |
304 | return "`fn`" ; |
305 | case TokenType::kDefn: |
306 | return "`def`" ; |
307 | case TokenType::kTypeDef: |
308 | return "`type`" ; |
309 | case TokenType::kExtern: |
310 | return "`extern`" ; |
311 | case TokenType::kBoolean: |
312 | return "boolean" ; |
313 | case TokenType::kMetadata: |
314 | return "metadata section" ; |
315 | case TokenType::kMetaReference: |
316 | return "`meta`" ; |
317 | case TokenType::kFreeVar: |
318 | return "`free_var`" ; |
319 | case TokenType::kMatch: |
320 | return "`match`" ; |
321 | case TokenType::kPartialMatch: |
322 | return "`match?`" ; |
323 | case TokenType::kQuestion: |
324 | return "`?`" ; |
325 | case TokenType::kRef: |
326 | return "`ref`" ; |
327 | case TokenType::kRefRead: |
328 | return "`ref_read`" ; |
329 | case TokenType::kRefWrite: |
330 | return "`ref_write`" ; |
331 | case TokenType::kUnknown: |
332 | return "unknown" ; |
333 | case TokenType::kEndOfFile: |
334 | return "end of file" ; |
335 | case TokenType::kNull: |
336 | return "null" ; |
337 | case TokenType::kVersion: |
338 | return "version attribute" ; |
339 | // Older compilers warn even though the above code is exhaustive. |
340 | default: |
341 | LOG(FATAL) << "unreachable code" ; |
342 | } |
343 | } |
344 | |
345 | class Token; |
346 | |
347 | class TokenNode : public Object { |
348 | public: |
349 | Span span; |
350 | TokenType token_type; |
351 | mutable runtime::ObjectRef data; |
352 | |
353 | void VisitAttrs(AttrVisitor* v) {} |
354 | |
355 | static constexpr const char* _type_key = "parser.Token" ; |
356 | TVM_DECLARE_FINAL_OBJECT_INFO(TokenNode, Object); |
357 | }; |
358 | |
359 | TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable) |
360 | .set_dispatch<TokenNode>([](const ObjectRef& ref, ReprPrinter* p) { |
361 | auto* node = static_cast<const TokenNode*>(ref.get()); |
362 | p->stream << "Token(span=" << node->span << ", token_type=" << ToString(node->token_type) |
363 | << ", data=" << node->data << ")" ; |
364 | }); |
365 | |
366 | TVM_REGISTER_NODE_TYPE(TokenNode); |
367 | |
368 | class Token : public ObjectRef { |
369 | public: |
370 | TVM_DLL explicit Token(Span span, TokenType token_type, ObjectRef data = ObjectRef()); |
371 | |
372 | static Token Null(); |
373 | int64_t ToNumber() const; |
374 | std::string ToString() const; |
375 | Map<String, Array<ObjectRef>> ToMetadata() const; |
376 | TVM_DEFINE_OBJECT_REF_METHODS(Token, ObjectRef, TokenNode); |
377 | }; |
378 | |
379 | inline Token::Token(Span span, TokenType token_type, ObjectRef data) { |
380 | ObjectPtr<TokenNode> n = make_object<TokenNode>(); |
381 | n->span = span; |
382 | n->token_type = token_type; |
383 | n->data = data; |
384 | data_ = std::move(n); |
385 | } |
386 | |
387 | inline Token Token::Null() { return Token(Span(SourceName(), 0, 0, 0, 0), TokenType::kNull); } |
388 | |
389 | inline int64_t Token::ToNumber() const { |
390 | return Downcast<tvm::Integer>(this->operator->()->data).IntValue(); |
391 | } |
392 | |
393 | inline std::string Token::ToString() const { |
394 | return Downcast<tvm::String>(this->operator->()->data); |
395 | } |
396 | |
397 | inline Map<String, Array<ObjectRef>> Token::ToMetadata() const { |
398 | ObjectRef data = this->operator->()->data; |
399 | if (data.defined()) { |
400 | return Downcast<Map<String, Array<ObjectRef>>>(data); |
401 | } else { |
402 | return Map<String, Array<ObjectRef>>({}); |
403 | } |
404 | } |
405 | |
406 | } // namespace relay |
407 | } // namespace tvm |
408 | #endif // TVM_RELAY_PARSER_TOKEN_H_ |
409 | |