1/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19
20/*!
21 * \file token.h
22 * \brief The definition of tokens for the TVM parser.
23 */
24
25#ifndef TVM_PARSER_TOKEN_H_
26#define TVM_PARSER_TOKEN_H_
27
28#include <tvm/ir/span.h>
29#include <tvm/runtime/object.h>
30
31#include <fstream>
32#include <string>
33#include <utility>
34
35namespace tvm {
36namespace parser {
37
38using namespace runtime;
39
40enum class TokenType {
41 kCommentStart,
42 kCommentEnd,
43 kLineComment,
44 kComment,
45 kWhitespace,
46 kNewline,
47 kStringLiteral,
48 kIdentifier,
49 kLocal,
50 kGlobal,
51 kOp,
52 kGraph,
53 kOpenParen,
54 kCloseParen,
55 kAtSymbol,
56 kPercent,
57 kComma,
58 kPeriod,
59 kEqual,
60 kSemicolon,
61 kColon,
62 kInteger,
63 kFloat,
64 kDivision,
65 kBoolean,
66 kPlus,
67 kStar,
68 kMinus,
69 kRAngle,
70 kLAngle,
71 kRCurly,
72 kLCurly,
73 kRSquare,
74 kLSquare,
75 kBang,
76 kAt,
77 kQuestion,
78 kIf,
79 kElse,
80 kUnderscore,
81 kLet,
82 kFn,
83 kDefn,
84 kTypeDef,
85 kExtern,
86 kMatch,
87 kPartialMatch,
88 kMetadata,
89 kMetaReference,
90 kFreeVar,
91 kRef,
92 kRefRead,
93 kRefWrite,
94 kVersion,
95 kUnknown,
96 kEndOfFile,
97 kNull,
98};
99
100std::string ToString(const TokenType& token_type) {
101 switch (token_type) {
102 case TokenType::kCommentStart:
103 return "CommentStart";
104 case TokenType::kCommentEnd:
105 return "CommentEnd";
106 case TokenType::kLineComment:
107 return "LineComment";
108 case TokenType::kComment:
109 return "Comment";
110 case TokenType::kWhitespace:
111 return "WhiteSpace";
112 case TokenType::kNewline:
113 return "Newline";
114 case TokenType::kStringLiteral:
115 return "StringLiteral";
116 case TokenType::kIdentifier:
117 return "Identifier";
118 case TokenType::kLocal:
119 return "Local";
120 case TokenType::kGlobal:
121 return "Global";
122 case TokenType::kGraph:
123 return "Graph";
124 case TokenType::kOp:
125 return "Op";
126 case TokenType::kOpenParen:
127 return "OpenParen";
128 case TokenType::kCloseParen:
129 return "CloseParen";
130 case TokenType::kAtSymbol:
131 return "AtSymbol";
132 case TokenType::kPercent:
133 return "Percent";
134 case TokenType::kComma:
135 return "Comma";
136 case TokenType::kColon:
137 return "Colon";
138 case TokenType::kSemicolon:
139 return "Semicolon";
140 case TokenType::kPeriod:
141 return "Period";
142 case TokenType::kEqual:
143 return "Equal";
144 case TokenType::kInteger:
145 return "Integer";
146 case TokenType::kFloat:
147 return "Float";
148 case TokenType::kPlus:
149 return "Plus";
150 case TokenType::kStar:
151 return "Star";
152 case TokenType::kMinus:
153 return "Minus";
154 case TokenType::kDivision:
155 return "Division";
156 case TokenType::kRAngle:
157 return "RAngle";
158 case TokenType::kLAngle:
159 return "LAngle";
160 case TokenType::kRCurly:
161 return "RCurly";
162 case TokenType::kLCurly:
163 return "LCurly";
164 case TokenType::kRSquare:
165 return "RSquare";
166 case TokenType::kLSquare:
167 return "LSquare";
168 case TokenType::kBang:
169 return "Bang";
170 case TokenType::kUnderscore:
171 return "Underscore";
172 case TokenType::kAt:
173 return "At";
174 case TokenType::kLet:
175 return "Let";
176 case TokenType::kIf:
177 return "If";
178 case TokenType::kElse:
179 return "Else";
180 case TokenType::kFn:
181 return "Fn";
182 case TokenType::kDefn:
183 return "Defn";
184 case TokenType::kTypeDef:
185 return "TypeDef";
186 case TokenType::kExtern:
187 return "Extern";
188 case TokenType::kMatch:
189 return "Match";
190 case TokenType::kPartialMatch:
191 return "PartialMatch";
192 case TokenType::kQuestion:
193 return "Question";
194 case TokenType::kBoolean:
195 return "Boolean";
196 case TokenType::kMetadata:
197 return "Metadata";
198 case TokenType::kMetaReference:
199 return "MetaReference";
200 case TokenType::kFreeVar:
201 return "FreeVar";
202 case TokenType::kVersion:
203 return "Version";
204 case TokenType::kRef:
205 return "Ref";
206 case TokenType::kRefRead:
207 return "RefRead";
208 case TokenType::kRefWrite:
209 return "RefWrite";
210 case TokenType::kUnknown:
211 return "Unknown";
212 case TokenType::kEndOfFile:
213 return "EndOfFile";
214 case TokenType::kNull:
215 return "Null";
216 // Older compilers warn even though the above code is exhaustive.
217 default:
218 LOG(FATAL) << "unreachable code";
219 }
220}
221
222std::string Pretty(const TokenType& token_type) {
223 switch (token_type) {
224 case TokenType::kCommentStart:
225 return "`/*`";
226 case TokenType::kCommentEnd:
227 return "`*/`";
228 case TokenType::kLineComment:
229 return "`//`";
230 case TokenType::kComment:
231 return "comment";
232 case TokenType::kWhitespace:
233 return "whitespace";
234 case TokenType::kNewline:
235 return "newline";
236 case TokenType::kStringLiteral:
237 return "string literal";
238 case TokenType::kIdentifier:
239 return "identifier";
240 case TokenType::kLocal:
241 return "local variable";
242 case TokenType::kGlobal:
243 return "global variable";
244 case TokenType::kGraph:
245 return "graph variable";
246 case TokenType::kOp:
247 return "operator";
248 case TokenType::kOpenParen:
249 return "`(`";
250 case TokenType::kCloseParen:
251 return "`)`";
252 case TokenType::kAtSymbol:
253 return "`@`";
254 case TokenType::kPercent:
255 return "`%`";
256 case TokenType::kComma:
257 return "`,`";
258 case TokenType::kColon:
259 return "`:`";
260 case TokenType::kSemicolon:
261 return "`;`";
262 case TokenType::kPeriod:
263 return "`.`";
264 case TokenType::kEqual:
265 return "`=`";
266 case TokenType::kInteger:
267 return "integer";
268 case TokenType::kFloat:
269 return "float";
270 case TokenType::kPlus:
271 return "`+`";
272 case TokenType::kStar:
273 return "`*`";
274 case TokenType::kMinus:
275 return "`-`";
276 case TokenType::kDivision:
277 return "`/`";
278 case TokenType::kRAngle:
279 return "`<`";
280 case TokenType::kLAngle:
281 return "`>`";
282 case TokenType::kRCurly:
283 return "`}`";
284 case TokenType::kLCurly:
285 return "`{`";
286 case TokenType::kRSquare:
287 return "`]`";
288 case TokenType::kLSquare:
289 return "`[`";
290 case TokenType::kBang:
291 return "`!`";
292 case TokenType::kUnderscore:
293 return "`_`";
294 case TokenType::kAt:
295 return "`@`";
296 case TokenType::kLet:
297 return "`let`";
298 case TokenType::kIf:
299 return "`if`";
300 case TokenType::kElse:
301 return "`else`";
302 case TokenType::kFn:
303 return "`fn`";
304 case TokenType::kDefn:
305 return "`def`";
306 case TokenType::kTypeDef:
307 return "`type`";
308 case TokenType::kExtern:
309 return "`extern`";
310 case TokenType::kBoolean:
311 return "boolean";
312 case TokenType::kMetadata:
313 return "metadata section";
314 case TokenType::kMetaReference:
315 return "`meta`";
316 case TokenType::kFreeVar:
317 return "`free_var`";
318 case TokenType::kMatch:
319 return "`match`";
320 case TokenType::kPartialMatch:
321 return "`match?`";
322 case TokenType::kQuestion:
323 return "`?`";
324 case TokenType::kRef:
325 return "`ref`";
326 case TokenType::kRefRead:
327 return "`ref_read`";
328 case TokenType::kRefWrite:
329 return "`ref_write`";
330 case TokenType::kUnknown:
331 return "unknown";
332 case TokenType::kEndOfFile:
333 return "end of file";
334 case TokenType::kNull:
335 return "null";
336 case TokenType::kVersion:
337 return "version attribute";
338 // Older compilers warn even though the above code is exhaustive.
339 default:
340 LOG(FATAL) << "unreachable code";
341 }
342}
343
344class Token;
345
346class TokenNode : public Object {
347 public:
348 Span span;
349 TokenType token_type;
350 mutable runtime::ObjectRef data;
351
352 void VisitAttrs(AttrVisitor* v) {}
353
354 static constexpr const char* _type_key = "parser.Token";
355 TVM_DECLARE_FINAL_OBJECT_INFO(TokenNode, Object);
356};
357
358TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable)
359 .set_dispatch<TokenNode>([](const ObjectRef& ref, ReprPrinter* p) {
360 auto* node = static_cast<const TokenNode*>(ref.get());
361 p->stream << "Token(span=" << node->span << ", token_type=" << ToString(node->token_type)
362 << ", data=" << node->data << ")";
363 });
364
365TVM_REGISTER_NODE_TYPE(TokenNode);
366
367class Token : public ObjectRef {
368 public:
369 TVM_DLL explicit Token(Span span, TokenType token_type, ObjectRef data = ObjectRef());
370
371 static Token Null();
372 int64_t ToNumber() const;
373 std::string ToString() const;
374 Map<String, Array<ObjectRef>> ToMetadata() const;
375 TVM_DEFINE_OBJECT_REF_METHODS(Token, ObjectRef, TokenNode);
376};
377
378Token::Token(Span span, TokenType token_type, ObjectRef data) {
379 ObjectPtr<TokenNode> n = make_object<TokenNode>();
380 n->span = span;
381 n->token_type = token_type;
382 n->data = data;
383 data_ = std::move(n);
384}
385
386Token Token::Null() { return Token(Span(SourceName(), 0, 0, 0, 0), TokenType::kNull); }
387
388int64_t Token::ToNumber() const {
389 return Downcast<tvm::Integer>(this->operator->()->data).IntValue();
390}
391
392std::string Token::ToString() const { return Downcast<tvm::String>(this->operator->()->data); }
393
394Map<String, Array<ObjectRef>> Token::ToMetadata() const {
395 ObjectRef data = this->operator->()->data;
396 if (data.defined()) {
397 return Downcast<Map<String, Array<ObjectRef>>>(data);
398 } else {
399 return Map<String, Array<ObjectRef>>({});
400 }
401}
402
403} // namespace parser
404} // namespace tvm
405#endif // TVM_PARSER_TOKEN_H_
406