1 | // Protocol Buffers - Google's data interchange format |
2 | // Copyright 2008 Google Inc. All rights reserved. |
3 | // https://developers.google.com/protocol-buffers/ |
4 | // |
5 | // Redistribution and use in source and binary forms, with or without |
6 | // modification, are permitted provided that the following conditions are |
7 | // met: |
8 | // |
9 | // * Redistributions of source code must retain the above copyright |
10 | // notice, this list of conditions and the following disclaimer. |
11 | // * Redistributions in binary form must reproduce the above |
12 | // copyright notice, this list of conditions and the following disclaimer |
13 | // in the documentation and/or other materials provided with the |
14 | // distribution. |
15 | // * Neither the name of Google Inc. nor the names of its |
16 | // contributors may be used to endorse or promote products derived from |
17 | // this software without specific prior written permission. |
18 | // |
19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
30 | |
31 | // Author: [email protected] (Joseph Schorr) |
32 | // Based on original Protocol Buffers design by |
33 | // Sanjay Ghemawat, Jeff Dean, and others. |
34 | // |
35 | // Utilities for printing and parsing protocol messages in a human-readable, |
36 | // text-based format. |
37 | |
38 | #ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__ |
39 | #define GOOGLE_PROTOBUF_TEXT_FORMAT_H__ |
40 | |
41 | #include <map> |
42 | #include <memory> |
43 | #include <string> |
44 | #include <vector> |
45 | |
46 | #include <google/protobuf/stubs/common.h> |
47 | #include <google/protobuf/descriptor.h> |
48 | #include <google/protobuf/message.h> |
49 | |
50 | namespace google { |
51 | namespace protobuf { |
52 | |
53 | namespace io { |
54 | class ErrorCollector; // tokenizer.h |
55 | } |
56 | |
57 | // This class implements protocol buffer text format. Printing and parsing |
58 | // protocol messages in text format is useful for debugging and human editing |
59 | // of messages. |
60 | // |
61 | // This class is really a namespace that contains only static methods. |
62 | class LIBPROTOBUF_EXPORT TextFormat { |
63 | public: |
64 | // Outputs a textual representation of the given message to the given |
65 | // output stream. |
66 | static bool Print(const Message& message, io::ZeroCopyOutputStream* output); |
67 | |
68 | // Print the fields in an UnknownFieldSet. They are printed by tag number |
69 | // only. Embedded messages are heuristically identified by attempting to |
70 | // parse them. |
71 | static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields, |
72 | io::ZeroCopyOutputStream* output); |
73 | |
74 | // Like Print(), but outputs directly to a string. |
75 | static bool PrintToString(const Message& message, string* output); |
76 | |
77 | // Like PrintUnknownFields(), but outputs directly to a string. |
78 | static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields, |
79 | string* output); |
80 | |
81 | // Outputs a textual representation of the value of the field supplied on |
82 | // the message supplied. For non-repeated fields, an index of -1 must |
83 | // be supplied. Note that this method will print the default value for a |
84 | // field if it is not set. |
85 | static void PrintFieldValueToString(const Message& message, |
86 | const FieldDescriptor* field, |
87 | int index, |
88 | string* output); |
89 | |
90 | // The default printer that converts scalar values from fields into |
91 | // their string representation. |
92 | // You can derive from this FieldValuePrinter if you want to have |
93 | // fields to be printed in a different way and register it at the |
94 | // Printer. |
95 | class LIBPROTOBUF_EXPORT FieldValuePrinter { |
96 | public: |
97 | FieldValuePrinter(); |
98 | virtual ~FieldValuePrinter(); |
99 | virtual string PrintBool(bool val) const; |
100 | virtual string PrintInt32(int32 val) const; |
101 | virtual string PrintUInt32(uint32 val) const; |
102 | virtual string PrintInt64(int64 val) const; |
103 | virtual string PrintUInt64(uint64 val) const; |
104 | virtual string PrintFloat(float val) const; |
105 | virtual string PrintDouble(double val) const; |
106 | virtual string PrintString(const string& val) const; |
107 | virtual string PrintBytes(const string& val) const; |
108 | virtual string PrintEnum(int32 val, const string& name) const; |
109 | virtual string PrintFieldName(const Message& message, |
110 | const Reflection* reflection, |
111 | const FieldDescriptor* field) const; |
112 | virtual string PrintMessageStart(const Message& message, |
113 | int field_index, |
114 | int field_count, |
115 | bool single_line_mode) const; |
116 | virtual string PrintMessageEnd(const Message& message, |
117 | int field_index, |
118 | int field_count, |
119 | bool single_line_mode) const; |
120 | |
121 | private: |
122 | GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FieldValuePrinter); |
123 | }; |
124 | |
125 | // Class for those users which require more fine-grained control over how |
126 | // a protobuffer message is printed out. |
127 | class LIBPROTOBUF_EXPORT Printer { |
128 | public: |
129 | Printer(); |
130 | ~Printer(); |
131 | |
132 | // Like TextFormat::Print |
133 | bool Print(const Message& message, io::ZeroCopyOutputStream* output) const; |
134 | // Like TextFormat::PrintUnknownFields |
135 | bool PrintUnknownFields(const UnknownFieldSet& unknown_fields, |
136 | io::ZeroCopyOutputStream* output) const; |
137 | // Like TextFormat::PrintToString |
138 | bool PrintToString(const Message& message, string* output) const; |
139 | // Like TextFormat::PrintUnknownFieldsToString |
140 | bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields, |
141 | string* output) const; |
142 | // Like TextFormat::PrintFieldValueToString |
143 | void PrintFieldValueToString(const Message& message, |
144 | const FieldDescriptor* field, |
145 | int index, |
146 | string* output) const; |
147 | |
148 | // Adjust the initial indent level of all output. Each indent level is |
149 | // equal to two spaces. |
150 | void SetInitialIndentLevel(int indent_level) { |
151 | initial_indent_level_ = indent_level; |
152 | } |
153 | |
154 | // If printing in single line mode, then the entire message will be output |
155 | // on a single line with no line breaks. |
156 | void SetSingleLineMode(bool single_line_mode) { |
157 | single_line_mode_ = single_line_mode; |
158 | } |
159 | |
160 | bool IsInSingleLineMode() { |
161 | return single_line_mode_; |
162 | } |
163 | |
164 | // If use_field_number is true, uses field number instead of field name. |
165 | void SetUseFieldNumber(bool use_field_number) { |
166 | use_field_number_ = use_field_number; |
167 | } |
168 | |
169 | // Set true to print repeated primitives in a format like: |
170 | // field_name: [1, 2, 3, 4] |
171 | // instead of printing each value on its own line. Short format applies |
172 | // only to primitive values -- i.e. everything except strings and |
173 | // sub-messages/groups. |
174 | void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) { |
175 | use_short_repeated_primitives_ = use_short_repeated_primitives; |
176 | } |
177 | |
178 | // Set true to output UTF-8 instead of ASCII. The only difference |
179 | // is that bytes >= 0x80 in string fields will not be escaped, |
180 | // because they are assumed to be part of UTF-8 multi-byte |
181 | // sequences. This will change the default FieldValuePrinter. |
182 | void SetUseUtf8StringEscaping(bool as_utf8); |
183 | |
184 | // Set the default FieldValuePrinter that is used for all fields that |
185 | // don't have a field-specific printer registered. |
186 | // Takes ownership of the printer. |
187 | void SetDefaultFieldValuePrinter(const FieldValuePrinter* printer); |
188 | |
189 | // Sets whether we want to hide unknown fields or not. |
190 | // Usually unknown fields are printed in a generic way that includes the |
191 | // tag number of the field instead of field name. However, sometimes it |
192 | // is useful to be able to print the message without unknown fields (e.g. |
193 | // for the python protobuf version to maintain consistency between its pure |
194 | // python and c++ implementations). |
195 | void SetHideUnknownFields(bool hide) { |
196 | hide_unknown_fields_ = hide; |
197 | } |
198 | |
199 | // If print_message_fields_in_index_order is true, print fields of a proto |
200 | // message using the order defined in source code instead of the field |
201 | // number. By default, use the field number order. |
202 | void SetPrintMessageFieldsInIndexOrder( |
203 | bool print_message_fields_in_index_order) { |
204 | print_message_fields_in_index_order_ = |
205 | print_message_fields_in_index_order; |
206 | } |
207 | |
208 | // Register a custom field-specific FieldValuePrinter for fields |
209 | // with a particular FieldDescriptor. |
210 | // Returns "true" if the registration succeeded, or "false", if there is |
211 | // already a printer for that FieldDescriptor. |
212 | // Takes ownership of the printer on successful registration. |
213 | bool RegisterFieldValuePrinter(const FieldDescriptor* field, |
214 | const FieldValuePrinter* printer); |
215 | |
216 | private: |
217 | // Forward declaration of an internal class used to print the text |
218 | // output to the OutputStream (see text_format.cc for implementation). |
219 | class TextGenerator; |
220 | |
221 | // Internal Print method, used for writing to the OutputStream via |
222 | // the TextGenerator class. |
223 | void Print(const Message& message, |
224 | TextGenerator& generator) const; |
225 | |
226 | // Print a single field. |
227 | void PrintField(const Message& message, |
228 | const Reflection* reflection, |
229 | const FieldDescriptor* field, |
230 | TextGenerator& generator) const; |
231 | |
232 | // Print a repeated primitive field in short form. |
233 | void PrintShortRepeatedField(const Message& message, |
234 | const Reflection* reflection, |
235 | const FieldDescriptor* field, |
236 | TextGenerator& generator) const; |
237 | |
238 | // Print the name of a field -- i.e. everything that comes before the |
239 | // ':' for a single name/value pair. |
240 | void PrintFieldName(const Message& message, |
241 | const Reflection* reflection, |
242 | const FieldDescriptor* field, |
243 | TextGenerator& generator) const; |
244 | |
245 | // Outputs a textual representation of the value of the field supplied on |
246 | // the message supplied or the default value if not set. |
247 | void PrintFieldValue(const Message& message, |
248 | const Reflection* reflection, |
249 | const FieldDescriptor* field, |
250 | int index, |
251 | TextGenerator& generator) const; |
252 | |
253 | // Print the fields in an UnknownFieldSet. They are printed by tag number |
254 | // only. Embedded messages are heuristically identified by attempting to |
255 | // parse them. |
256 | void PrintUnknownFields(const UnknownFieldSet& unknown_fields, |
257 | TextGenerator& generator) const; |
258 | |
259 | int initial_indent_level_; |
260 | |
261 | bool single_line_mode_; |
262 | |
263 | bool use_field_number_; |
264 | |
265 | bool use_short_repeated_primitives_; |
266 | |
267 | bool hide_unknown_fields_; |
268 | |
269 | bool print_message_fields_in_index_order_; |
270 | |
271 | scoped_ptr<const FieldValuePrinter> default_field_value_printer_; |
272 | typedef map<const FieldDescriptor*, |
273 | const FieldValuePrinter*> CustomPrinterMap; |
274 | CustomPrinterMap custom_printers_; |
275 | }; |
276 | |
277 | // Parses a text-format protocol message from the given input stream to |
278 | // the given message object. This function parses the format written |
279 | // by Print(). |
280 | static bool Parse(io::ZeroCopyInputStream* input, Message* output); |
281 | // Like Parse(), but reads directly from a string. |
282 | static bool ParseFromString(const string& input, Message* output); |
283 | |
284 | // Like Parse(), but the data is merged into the given message, as if |
285 | // using Message::MergeFrom(). |
286 | static bool Merge(io::ZeroCopyInputStream* input, Message* output); |
287 | // Like Merge(), but reads directly from a string. |
288 | static bool MergeFromString(const string& input, Message* output); |
289 | |
290 | // Parse the given text as a single field value and store it into the |
291 | // given field of the given message. If the field is a repeated field, |
292 | // the new value will be added to the end |
293 | static bool ParseFieldValueFromString(const string& input, |
294 | const FieldDescriptor* field, |
295 | Message* message); |
296 | |
297 | // Interface that TextFormat::Parser can use to find extensions. |
298 | // This class may be extended in the future to find more information |
299 | // like fields, etc. |
300 | class LIBPROTOBUF_EXPORT Finder { |
301 | public: |
302 | virtual ~Finder(); |
303 | |
304 | // Try to find an extension of *message by fully-qualified field |
305 | // name. Returns NULL if no extension is known for this name or number. |
306 | virtual const FieldDescriptor* FindExtension( |
307 | Message* message, |
308 | const string& name) const = 0; |
309 | }; |
310 | |
311 | // A location in the parsed text. |
312 | struct ParseLocation { |
313 | int line; |
314 | int column; |
315 | |
316 | ParseLocation() : line(-1), column(-1) {} |
317 | ParseLocation(int line_param, int column_param) |
318 | : line(line_param), column(column_param) {} |
319 | }; |
320 | |
321 | // Data structure which is populated with the locations of each field |
322 | // value parsed from the text. |
323 | class LIBPROTOBUF_EXPORT ParseInfoTree { |
324 | public: |
325 | ParseInfoTree(); |
326 | ~ParseInfoTree(); |
327 | |
328 | // Returns the parse location for index-th value of the field in the parsed |
329 | // text. If none exists, returns a location with line = -1. Index should be |
330 | // -1 for not-repeated fields. |
331 | ParseLocation GetLocation(const FieldDescriptor* field, int index) const; |
332 | |
333 | // Returns the parse info tree for the given field, which must be a message |
334 | // type. The nested information tree is owned by the root tree and will be |
335 | // deleted when it is deleted. |
336 | ParseInfoTree* GetTreeForNested(const FieldDescriptor* field, |
337 | int index) const; |
338 | |
339 | private: |
340 | // Allow the text format parser to record information into the tree. |
341 | friend class TextFormat; |
342 | |
343 | // Records the starting location of a single value for a field. |
344 | void RecordLocation(const FieldDescriptor* field, ParseLocation location); |
345 | |
346 | // Create and records a nested tree for a nested message field. |
347 | ParseInfoTree* CreateNested(const FieldDescriptor* field); |
348 | |
349 | // Defines the map from the index-th field descriptor to its parse location. |
350 | typedef map<const FieldDescriptor*, vector<ParseLocation> > LocationMap; |
351 | |
352 | // Defines the map from the index-th field descriptor to the nested parse |
353 | // info tree. |
354 | typedef map<const FieldDescriptor*, vector<ParseInfoTree*> > NestedMap; |
355 | |
356 | LocationMap locations_; |
357 | NestedMap nested_; |
358 | |
359 | GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParseInfoTree); |
360 | }; |
361 | |
362 | // For more control over parsing, use this class. |
363 | class LIBPROTOBUF_EXPORT Parser { |
364 | public: |
365 | Parser(); |
366 | ~Parser(); |
367 | |
368 | // Like TextFormat::Parse(). |
369 | bool Parse(io::ZeroCopyInputStream* input, Message* output); |
370 | // Like TextFormat::ParseFromString(). |
371 | bool ParseFromString(const string& input, Message* output); |
372 | // Like TextFormat::Merge(). |
373 | bool Merge(io::ZeroCopyInputStream* input, Message* output); |
374 | // Like TextFormat::MergeFromString(). |
375 | bool MergeFromString(const string& input, Message* output); |
376 | |
377 | // Set where to report parse errors. If NULL (the default), errors will |
378 | // be printed to stderr. |
379 | void RecordErrorsTo(io::ErrorCollector* error_collector) { |
380 | error_collector_ = error_collector; |
381 | } |
382 | |
383 | // Set how parser finds extensions. If NULL (the default), the |
384 | // parser will use the standard Reflection object associated with |
385 | // the message being parsed. |
386 | void SetFinder(Finder* finder) { |
387 | finder_ = finder; |
388 | } |
389 | |
390 | // Sets where location information about the parse will be written. If NULL |
391 | // (the default), then no location will be written. |
392 | void WriteLocationsTo(ParseInfoTree* tree) { |
393 | parse_info_tree_ = tree; |
394 | } |
395 | |
396 | // Normally parsing fails if, after parsing, output->IsInitialized() |
397 | // returns false. Call AllowPartialMessage(true) to skip this check. |
398 | void AllowPartialMessage(bool allow) { |
399 | allow_partial_ = allow; |
400 | } |
401 | |
402 | // Allow field names to be matched case-insensitively. |
403 | // This is not advisable if there are fields that only differ in case, or |
404 | // if you want to enforce writing in the canonical form. |
405 | // This is 'false' by default. |
406 | void AllowCaseInsensitiveField(bool allow) { |
407 | allow_case_insensitive_field_ = allow; |
408 | } |
409 | |
410 | // Like TextFormat::ParseFieldValueFromString |
411 | bool ParseFieldValueFromString(const string& input, |
412 | const FieldDescriptor* field, |
413 | Message* output); |
414 | |
415 | |
416 | void AllowFieldNumber(bool allow) { |
417 | allow_field_number_ = allow; |
418 | } |
419 | |
420 | private: |
421 | // Forward declaration of an internal class used to parse text |
422 | // representations (see text_format.cc for implementation). |
423 | class ParserImpl; |
424 | |
425 | // Like TextFormat::Merge(). The provided implementation is used |
426 | // to do the parsing. |
427 | bool MergeUsingImpl(io::ZeroCopyInputStream* input, |
428 | Message* output, |
429 | ParserImpl* parser_impl); |
430 | |
431 | io::ErrorCollector* error_collector_; |
432 | Finder* finder_; |
433 | ParseInfoTree* parse_info_tree_; |
434 | bool allow_partial_; |
435 | bool allow_case_insensitive_field_; |
436 | bool allow_unknown_field_; |
437 | bool allow_unknown_enum_; |
438 | bool allow_field_number_; |
439 | bool allow_relaxed_whitespace_; |
440 | bool allow_singular_overwrites_; |
441 | }; |
442 | |
443 | |
444 | private: |
445 | // Hack: ParseInfoTree declares TextFormat as a friend which should extend |
446 | // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some |
447 | // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide |
448 | // helpers for ParserImpl to call methods of ParseInfoTree. |
449 | static inline void RecordLocation(ParseInfoTree* info_tree, |
450 | const FieldDescriptor* field, |
451 | ParseLocation location); |
452 | static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree, |
453 | const FieldDescriptor* field); |
454 | |
455 | GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextFormat); |
456 | }; |
457 | |
458 | inline void TextFormat::RecordLocation(ParseInfoTree* info_tree, |
459 | const FieldDescriptor* field, |
460 | ParseLocation location) { |
461 | info_tree->RecordLocation(field, location); |
462 | } |
463 | |
464 | |
465 | inline TextFormat::ParseInfoTree* TextFormat::CreateNested( |
466 | ParseInfoTree* info_tree, const FieldDescriptor* field) { |
467 | return info_tree->CreateNested(field); |
468 | } |
469 | |
470 | } // namespace protobuf |
471 | |
472 | } // namespace google |
473 | #endif // GOOGLE_PROTOBUF_TEXT_FORMAT_H__ |
474 | |