1 | /* |
2 | * Licensed to the Apache Software Foundation (ASF) under one |
3 | * or more contributor license agreements. See the NOTICE file |
4 | * distributed with this work for additional information |
5 | * regarding copyright ownership. The ASF licenses this file |
6 | * to you under the Apache License, Version 2.0 (the |
7 | * "License"); you may not use this file except in compliance |
8 | * with the License. You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, |
13 | * software distributed under the License is distributed on an |
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
15 | * KIND, either express or implied. See the License for the |
16 | * specific language governing permissions and limitations |
17 | * under the License. |
18 | */ |
19 | |
20 | /*! |
21 | * \file token.h |
22 | * \brief The definition of tokens for the TVM parser. |
23 | */ |
24 | |
25 | #ifndef TVM_PARSER_TOKEN_H_ |
26 | #define TVM_PARSER_TOKEN_H_ |
27 | |
28 | #include <tvm/ir/span.h> |
29 | #include <tvm/runtime/object.h> |
30 | |
31 | #include <fstream> |
32 | #include <string> |
33 | #include <utility> |
34 | |
35 | namespace tvm { |
36 | namespace parser { |
37 | |
38 | using namespace runtime; |
39 | |
40 | enum class TokenType { |
41 | , |
42 | , |
43 | , |
44 | , |
45 | kWhitespace, |
46 | kNewline, |
47 | kStringLiteral, |
48 | kIdentifier, |
49 | kLocal, |
50 | kGlobal, |
51 | kOp, |
52 | kGraph, |
53 | kOpenParen, |
54 | kCloseParen, |
55 | kAtSymbol, |
56 | kPercent, |
57 | kComma, |
58 | kPeriod, |
59 | kEqual, |
60 | kSemicolon, |
61 | kColon, |
62 | kInteger, |
63 | kFloat, |
64 | kDivision, |
65 | kBoolean, |
66 | kPlus, |
67 | kStar, |
68 | kMinus, |
69 | kRAngle, |
70 | kLAngle, |
71 | kRCurly, |
72 | kLCurly, |
73 | kRSquare, |
74 | kLSquare, |
75 | kBang, |
76 | kAt, |
77 | kQuestion, |
78 | kIf, |
79 | kElse, |
80 | kUnderscore, |
81 | kLet, |
82 | kFn, |
83 | kDefn, |
84 | kTypeDef, |
85 | kExtern, |
86 | kMatch, |
87 | kPartialMatch, |
88 | kMetadata, |
89 | kMetaReference, |
90 | kFreeVar, |
91 | kRef, |
92 | kRefRead, |
93 | kRefWrite, |
94 | kVersion, |
95 | kUnknown, |
96 | kEndOfFile, |
97 | kNull, |
98 | }; |
99 | |
100 | std::string ToString(const TokenType& token_type) { |
101 | switch (token_type) { |
102 | case TokenType::kCommentStart: |
103 | return "CommentStart" ; |
104 | case TokenType::kCommentEnd: |
105 | return "CommentEnd" ; |
106 | case TokenType::kLineComment: |
107 | return "LineComment" ; |
108 | case TokenType::kComment: |
109 | return "Comment" ; |
110 | case TokenType::kWhitespace: |
111 | return "WhiteSpace" ; |
112 | case TokenType::kNewline: |
113 | return "Newline" ; |
114 | case TokenType::kStringLiteral: |
115 | return "StringLiteral" ; |
116 | case TokenType::kIdentifier: |
117 | return "Identifier" ; |
118 | case TokenType::kLocal: |
119 | return "Local" ; |
120 | case TokenType::kGlobal: |
121 | return "Global" ; |
122 | case TokenType::kGraph: |
123 | return "Graph" ; |
124 | case TokenType::kOp: |
125 | return "Op" ; |
126 | case TokenType::kOpenParen: |
127 | return "OpenParen" ; |
128 | case TokenType::kCloseParen: |
129 | return "CloseParen" ; |
130 | case TokenType::kAtSymbol: |
131 | return "AtSymbol" ; |
132 | case TokenType::kPercent: |
133 | return "Percent" ; |
134 | case TokenType::kComma: |
135 | return "Comma" ; |
136 | case TokenType::kColon: |
137 | return "Colon" ; |
138 | case TokenType::kSemicolon: |
139 | return "Semicolon" ; |
140 | case TokenType::kPeriod: |
141 | return "Period" ; |
142 | case TokenType::kEqual: |
143 | return "Equal" ; |
144 | case TokenType::kInteger: |
145 | return "Integer" ; |
146 | case TokenType::kFloat: |
147 | return "Float" ; |
148 | case TokenType::kPlus: |
149 | return "Plus" ; |
150 | case TokenType::kStar: |
151 | return "Star" ; |
152 | case TokenType::kMinus: |
153 | return "Minus" ; |
154 | case TokenType::kDivision: |
155 | return "Division" ; |
156 | case TokenType::kRAngle: |
157 | return "RAngle" ; |
158 | case TokenType::kLAngle: |
159 | return "LAngle" ; |
160 | case TokenType::kRCurly: |
161 | return "RCurly" ; |
162 | case TokenType::kLCurly: |
163 | return "LCurly" ; |
164 | case TokenType::kRSquare: |
165 | return "RSquare" ; |
166 | case TokenType::kLSquare: |
167 | return "LSquare" ; |
168 | case TokenType::kBang: |
169 | return "Bang" ; |
170 | case TokenType::kUnderscore: |
171 | return "Underscore" ; |
172 | case TokenType::kAt: |
173 | return "At" ; |
174 | case TokenType::kLet: |
175 | return "Let" ; |
176 | case TokenType::kIf: |
177 | return "If" ; |
178 | case TokenType::kElse: |
179 | return "Else" ; |
180 | case TokenType::kFn: |
181 | return "Fn" ; |
182 | case TokenType::kDefn: |
183 | return "Defn" ; |
184 | case TokenType::kTypeDef: |
185 | return "TypeDef" ; |
186 | case TokenType::kExtern: |
187 | return "Extern" ; |
188 | case TokenType::kMatch: |
189 | return "Match" ; |
190 | case TokenType::kPartialMatch: |
191 | return "PartialMatch" ; |
192 | case TokenType::kQuestion: |
193 | return "Question" ; |
194 | case TokenType::kBoolean: |
195 | return "Boolean" ; |
196 | case TokenType::kMetadata: |
197 | return "Metadata" ; |
198 | case TokenType::kMetaReference: |
199 | return "MetaReference" ; |
200 | case TokenType::kFreeVar: |
201 | return "FreeVar" ; |
202 | case TokenType::kVersion: |
203 | return "Version" ; |
204 | case TokenType::kRef: |
205 | return "Ref" ; |
206 | case TokenType::kRefRead: |
207 | return "RefRead" ; |
208 | case TokenType::kRefWrite: |
209 | return "RefWrite" ; |
210 | case TokenType::kUnknown: |
211 | return "Unknown" ; |
212 | case TokenType::kEndOfFile: |
213 | return "EndOfFile" ; |
214 | case TokenType::kNull: |
215 | return "Null" ; |
216 | // Older compilers warn even though the above code is exhaustive. |
217 | default: |
218 | LOG(FATAL) << "unreachable code" ; |
219 | } |
220 | } |
221 | |
222 | std::string Pretty(const TokenType& token_type) { |
223 | switch (token_type) { |
224 | case TokenType::kCommentStart: |
225 | return "`/*`" ; |
226 | case TokenType::kCommentEnd: |
227 | return "`*/`" ; |
228 | case TokenType::kLineComment: |
229 | return "`//`" ; |
230 | case TokenType::kComment: |
231 | return "comment" ; |
232 | case TokenType::kWhitespace: |
233 | return "whitespace" ; |
234 | case TokenType::kNewline: |
235 | return "newline" ; |
236 | case TokenType::kStringLiteral: |
237 | return "string literal" ; |
238 | case TokenType::kIdentifier: |
239 | return "identifier" ; |
240 | case TokenType::kLocal: |
241 | return "local variable" ; |
242 | case TokenType::kGlobal: |
243 | return "global variable" ; |
244 | case TokenType::kGraph: |
245 | return "graph variable" ; |
246 | case TokenType::kOp: |
247 | return "operator" ; |
248 | case TokenType::kOpenParen: |
249 | return "`(`" ; |
250 | case TokenType::kCloseParen: |
251 | return "`)`" ; |
252 | case TokenType::kAtSymbol: |
253 | return "`@`" ; |
254 | case TokenType::kPercent: |
255 | return "`%`" ; |
256 | case TokenType::kComma: |
257 | return "`,`" ; |
258 | case TokenType::kColon: |
259 | return "`:`" ; |
260 | case TokenType::kSemicolon: |
261 | return "`;`" ; |
262 | case TokenType::kPeriod: |
263 | return "`.`" ; |
264 | case TokenType::kEqual: |
265 | return "`=`" ; |
266 | case TokenType::kInteger: |
267 | return "integer" ; |
268 | case TokenType::kFloat: |
269 | return "float" ; |
270 | case TokenType::kPlus: |
271 | return "`+`" ; |
272 | case TokenType::kStar: |
273 | return "`*`" ; |
274 | case TokenType::kMinus: |
275 | return "`-`" ; |
276 | case TokenType::kDivision: |
277 | return "`/`" ; |
278 | case TokenType::kRAngle: |
279 | return "`<`" ; |
280 | case TokenType::kLAngle: |
281 | return "`>`" ; |
282 | case TokenType::kRCurly: |
283 | return "`}`" ; |
284 | case TokenType::kLCurly: |
285 | return "`{`" ; |
286 | case TokenType::kRSquare: |
287 | return "`]`" ; |
288 | case TokenType::kLSquare: |
289 | return "`[`" ; |
290 | case TokenType::kBang: |
291 | return "`!`" ; |
292 | case TokenType::kUnderscore: |
293 | return "`_`" ; |
294 | case TokenType::kAt: |
295 | return "`@`" ; |
296 | case TokenType::kLet: |
297 | return "`let`" ; |
298 | case TokenType::kIf: |
299 | return "`if`" ; |
300 | case TokenType::kElse: |
301 | return "`else`" ; |
302 | case TokenType::kFn: |
303 | return "`fn`" ; |
304 | case TokenType::kDefn: |
305 | return "`def`" ; |
306 | case TokenType::kTypeDef: |
307 | return "`type`" ; |
308 | case TokenType::kExtern: |
309 | return "`extern`" ; |
310 | case TokenType::kBoolean: |
311 | return "boolean" ; |
312 | case TokenType::kMetadata: |
313 | return "metadata section" ; |
314 | case TokenType::kMetaReference: |
315 | return "`meta`" ; |
316 | case TokenType::kFreeVar: |
317 | return "`free_var`" ; |
318 | case TokenType::kMatch: |
319 | return "`match`" ; |
320 | case TokenType::kPartialMatch: |
321 | return "`match?`" ; |
322 | case TokenType::kQuestion: |
323 | return "`?`" ; |
324 | case TokenType::kRef: |
325 | return "`ref`" ; |
326 | case TokenType::kRefRead: |
327 | return "`ref_read`" ; |
328 | case TokenType::kRefWrite: |
329 | return "`ref_write`" ; |
330 | case TokenType::kUnknown: |
331 | return "unknown" ; |
332 | case TokenType::kEndOfFile: |
333 | return "end of file" ; |
334 | case TokenType::kNull: |
335 | return "null" ; |
336 | case TokenType::kVersion: |
337 | return "version attribute" ; |
338 | // Older compilers warn even though the above code is exhaustive. |
339 | default: |
340 | LOG(FATAL) << "unreachable code" ; |
341 | } |
342 | } |
343 | |
344 | class Token; |
345 | |
346 | class TokenNode : public Object { |
347 | public: |
348 | Span span; |
349 | TokenType token_type; |
350 | mutable runtime::ObjectRef data; |
351 | |
352 | void VisitAttrs(AttrVisitor* v) {} |
353 | |
354 | static constexpr const char* _type_key = "parser.Token" ; |
355 | TVM_DECLARE_FINAL_OBJECT_INFO(TokenNode, Object); |
356 | }; |
357 | |
358 | TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable) |
359 | .set_dispatch<TokenNode>([](const ObjectRef& ref, ReprPrinter* p) { |
360 | auto* node = static_cast<const TokenNode*>(ref.get()); |
361 | p->stream << "Token(span=" << node->span << ", token_type=" << ToString(node->token_type) |
362 | << ", data=" << node->data << ")" ; |
363 | }); |
364 | |
365 | TVM_REGISTER_NODE_TYPE(TokenNode); |
366 | |
367 | class Token : public ObjectRef { |
368 | public: |
369 | TVM_DLL explicit Token(Span span, TokenType token_type, ObjectRef data = ObjectRef()); |
370 | |
371 | static Token Null(); |
372 | int64_t ToNumber() const; |
373 | std::string ToString() const; |
374 | Map<String, Array<ObjectRef>> ToMetadata() const; |
375 | TVM_DEFINE_OBJECT_REF_METHODS(Token, ObjectRef, TokenNode); |
376 | }; |
377 | |
378 | Token::Token(Span span, TokenType token_type, ObjectRef data) { |
379 | ObjectPtr<TokenNode> n = make_object<TokenNode>(); |
380 | n->span = span; |
381 | n->token_type = token_type; |
382 | n->data = data; |
383 | data_ = std::move(n); |
384 | } |
385 | |
386 | Token Token::Null() { return Token(Span(SourceName(), 0, 0, 0, 0), TokenType::kNull); } |
387 | |
388 | int64_t Token::ToNumber() const { |
389 | return Downcast<tvm::Integer>(this->operator->()->data).IntValue(); |
390 | } |
391 | |
392 | std::string Token::ToString() const { return Downcast<tvm::String>(this->operator->()->data); } |
393 | |
394 | Map<String, Array<ObjectRef>> Token::ToMetadata() const { |
395 | ObjectRef data = this->operator->()->data; |
396 | if (data.defined()) { |
397 | return Downcast<Map<String, Array<ObjectRef>>>(data); |
398 | } else { |
399 | return Map<String, Array<ObjectRef>>({}); |
400 | } |
401 | } |
402 | |
403 | } // namespace parser |
404 | } // namespace tvm |
405 | #endif // TVM_PARSER_TOKEN_H_ |
406 | |