1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: [email protected] (Joseph Schorr)
32// Based on original Protocol Buffers design by
33// Sanjay Ghemawat, Jeff Dean, and others.
34//
35// Utilities for printing and parsing protocol messages in a human-readable,
36// text-based format.
37
38#ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__
39#define GOOGLE_PROTOBUF_TEXT_FORMAT_H__
40
41#include <map>
42#include <memory>
43#include <string>
44#include <vector>
45
46#include <google/protobuf/stubs/common.h>
47#include <google/protobuf/descriptor.h>
48#include <google/protobuf/message.h>
49
50namespace google {
51namespace protobuf {
52
53namespace io {
54 class ErrorCollector; // tokenizer.h
55}
56
57// This class implements protocol buffer text format. Printing and parsing
58// protocol messages in text format is useful for debugging and human editing
59// of messages.
60//
61// This class is really a namespace that contains only static methods.
62class LIBPROTOBUF_EXPORT TextFormat {
63 public:
64 // Outputs a textual representation of the given message to the given
65 // output stream.
66 static bool Print(const Message& message, io::ZeroCopyOutputStream* output);
67
68 // Print the fields in an UnknownFieldSet. They are printed by tag number
69 // only. Embedded messages are heuristically identified by attempting to
70 // parse them.
71 static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
72 io::ZeroCopyOutputStream* output);
73
74 // Like Print(), but outputs directly to a string.
75 static bool PrintToString(const Message& message, string* output);
76
77 // Like PrintUnknownFields(), but outputs directly to a string.
78 static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
79 string* output);
80
81 // Outputs a textual representation of the value of the field supplied on
82 // the message supplied. For non-repeated fields, an index of -1 must
83 // be supplied. Note that this method will print the default value for a
84 // field if it is not set.
85 static void PrintFieldValueToString(const Message& message,
86 const FieldDescriptor* field,
87 int index,
88 string* output);
89
90 // The default printer that converts scalar values from fields into
91 // their string representation.
92 // You can derive from this FieldValuePrinter if you want to have
93 // fields to be printed in a different way and register it at the
94 // Printer.
95 class LIBPROTOBUF_EXPORT FieldValuePrinter {
96 public:
97 FieldValuePrinter();
98 virtual ~FieldValuePrinter();
99 virtual string PrintBool(bool val) const;
100 virtual string PrintInt32(int32 val) const;
101 virtual string PrintUInt32(uint32 val) const;
102 virtual string PrintInt64(int64 val) const;
103 virtual string PrintUInt64(uint64 val) const;
104 virtual string PrintFloat(float val) const;
105 virtual string PrintDouble(double val) const;
106 virtual string PrintString(const string& val) const;
107 virtual string PrintBytes(const string& val) const;
108 virtual string PrintEnum(int32 val, const string& name) const;
109 virtual string PrintFieldName(const Message& message,
110 const Reflection* reflection,
111 const FieldDescriptor* field) const;
112 virtual string PrintMessageStart(const Message& message,
113 int field_index,
114 int field_count,
115 bool single_line_mode) const;
116 virtual string PrintMessageEnd(const Message& message,
117 int field_index,
118 int field_count,
119 bool single_line_mode) const;
120
121 private:
122 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FieldValuePrinter);
123 };
124
125 // Class for those users which require more fine-grained control over how
126 // a protobuffer message is printed out.
127 class LIBPROTOBUF_EXPORT Printer {
128 public:
129 Printer();
130 ~Printer();
131
132 // Like TextFormat::Print
133 bool Print(const Message& message, io::ZeroCopyOutputStream* output) const;
134 // Like TextFormat::PrintUnknownFields
135 bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
136 io::ZeroCopyOutputStream* output) const;
137 // Like TextFormat::PrintToString
138 bool PrintToString(const Message& message, string* output) const;
139 // Like TextFormat::PrintUnknownFieldsToString
140 bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
141 string* output) const;
142 // Like TextFormat::PrintFieldValueToString
143 void PrintFieldValueToString(const Message& message,
144 const FieldDescriptor* field,
145 int index,
146 string* output) const;
147
148 // Adjust the initial indent level of all output. Each indent level is
149 // equal to two spaces.
150 void SetInitialIndentLevel(int indent_level) {
151 initial_indent_level_ = indent_level;
152 }
153
154 // If printing in single line mode, then the entire message will be output
155 // on a single line with no line breaks.
156 void SetSingleLineMode(bool single_line_mode) {
157 single_line_mode_ = single_line_mode;
158 }
159
160 bool IsInSingleLineMode() {
161 return single_line_mode_;
162 }
163
164 // If use_field_number is true, uses field number instead of field name.
165 void SetUseFieldNumber(bool use_field_number) {
166 use_field_number_ = use_field_number;
167 }
168
169 // Set true to print repeated primitives in a format like:
170 // field_name: [1, 2, 3, 4]
171 // instead of printing each value on its own line. Short format applies
172 // only to primitive values -- i.e. everything except strings and
173 // sub-messages/groups.
174 void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) {
175 use_short_repeated_primitives_ = use_short_repeated_primitives;
176 }
177
178 // Set true to output UTF-8 instead of ASCII. The only difference
179 // is that bytes >= 0x80 in string fields will not be escaped,
180 // because they are assumed to be part of UTF-8 multi-byte
181 // sequences. This will change the default FieldValuePrinter.
182 void SetUseUtf8StringEscaping(bool as_utf8);
183
184 // Set the default FieldValuePrinter that is used for all fields that
185 // don't have a field-specific printer registered.
186 // Takes ownership of the printer.
187 void SetDefaultFieldValuePrinter(const FieldValuePrinter* printer);
188
189 // Sets whether we want to hide unknown fields or not.
190 // Usually unknown fields are printed in a generic way that includes the
191 // tag number of the field instead of field name. However, sometimes it
192 // is useful to be able to print the message without unknown fields (e.g.
193 // for the python protobuf version to maintain consistency between its pure
194 // python and c++ implementations).
195 void SetHideUnknownFields(bool hide) {
196 hide_unknown_fields_ = hide;
197 }
198
199 // If print_message_fields_in_index_order is true, print fields of a proto
200 // message using the order defined in source code instead of the field
201 // number. By default, use the field number order.
202 void SetPrintMessageFieldsInIndexOrder(
203 bool print_message_fields_in_index_order) {
204 print_message_fields_in_index_order_ =
205 print_message_fields_in_index_order;
206 }
207
208 // Register a custom field-specific FieldValuePrinter for fields
209 // with a particular FieldDescriptor.
210 // Returns "true" if the registration succeeded, or "false", if there is
211 // already a printer for that FieldDescriptor.
212 // Takes ownership of the printer on successful registration.
213 bool RegisterFieldValuePrinter(const FieldDescriptor* field,
214 const FieldValuePrinter* printer);
215
216 private:
217 // Forward declaration of an internal class used to print the text
218 // output to the OutputStream (see text_format.cc for implementation).
219 class TextGenerator;
220
221 // Internal Print method, used for writing to the OutputStream via
222 // the TextGenerator class.
223 void Print(const Message& message,
224 TextGenerator& generator) const;
225
226 // Print a single field.
227 void PrintField(const Message& message,
228 const Reflection* reflection,
229 const FieldDescriptor* field,
230 TextGenerator& generator) const;
231
232 // Print a repeated primitive field in short form.
233 void PrintShortRepeatedField(const Message& message,
234 const Reflection* reflection,
235 const FieldDescriptor* field,
236 TextGenerator& generator) const;
237
238 // Print the name of a field -- i.e. everything that comes before the
239 // ':' for a single name/value pair.
240 void PrintFieldName(const Message& message,
241 const Reflection* reflection,
242 const FieldDescriptor* field,
243 TextGenerator& generator) const;
244
245 // Outputs a textual representation of the value of the field supplied on
246 // the message supplied or the default value if not set.
247 void PrintFieldValue(const Message& message,
248 const Reflection* reflection,
249 const FieldDescriptor* field,
250 int index,
251 TextGenerator& generator) const;
252
253 // Print the fields in an UnknownFieldSet. They are printed by tag number
254 // only. Embedded messages are heuristically identified by attempting to
255 // parse them.
256 void PrintUnknownFields(const UnknownFieldSet& unknown_fields,
257 TextGenerator& generator) const;
258
259 int initial_indent_level_;
260
261 bool single_line_mode_;
262
263 bool use_field_number_;
264
265 bool use_short_repeated_primitives_;
266
267 bool hide_unknown_fields_;
268
269 bool print_message_fields_in_index_order_;
270
271 scoped_ptr<const FieldValuePrinter> default_field_value_printer_;
272 typedef map<const FieldDescriptor*,
273 const FieldValuePrinter*> CustomPrinterMap;
274 CustomPrinterMap custom_printers_;
275 };
276
277 // Parses a text-format protocol message from the given input stream to
278 // the given message object. This function parses the format written
279 // by Print().
280 static bool Parse(io::ZeroCopyInputStream* input, Message* output);
281 // Like Parse(), but reads directly from a string.
282 static bool ParseFromString(const string& input, Message* output);
283
284 // Like Parse(), but the data is merged into the given message, as if
285 // using Message::MergeFrom().
286 static bool Merge(io::ZeroCopyInputStream* input, Message* output);
287 // Like Merge(), but reads directly from a string.
288 static bool MergeFromString(const string& input, Message* output);
289
290 // Parse the given text as a single field value and store it into the
291 // given field of the given message. If the field is a repeated field,
292 // the new value will be added to the end
293 static bool ParseFieldValueFromString(const string& input,
294 const FieldDescriptor* field,
295 Message* message);
296
297 // Interface that TextFormat::Parser can use to find extensions.
298 // This class may be extended in the future to find more information
299 // like fields, etc.
300 class LIBPROTOBUF_EXPORT Finder {
301 public:
302 virtual ~Finder();
303
304 // Try to find an extension of *message by fully-qualified field
305 // name. Returns NULL if no extension is known for this name or number.
306 virtual const FieldDescriptor* FindExtension(
307 Message* message,
308 const string& name) const = 0;
309 };
310
311 // A location in the parsed text.
312 struct ParseLocation {
313 int line;
314 int column;
315
316 ParseLocation() : line(-1), column(-1) {}
317 ParseLocation(int line_param, int column_param)
318 : line(line_param), column(column_param) {}
319 };
320
321 // Data structure which is populated with the locations of each field
322 // value parsed from the text.
323 class LIBPROTOBUF_EXPORT ParseInfoTree {
324 public:
325 ParseInfoTree();
326 ~ParseInfoTree();
327
328 // Returns the parse location for index-th value of the field in the parsed
329 // text. If none exists, returns a location with line = -1. Index should be
330 // -1 for not-repeated fields.
331 ParseLocation GetLocation(const FieldDescriptor* field, int index) const;
332
333 // Returns the parse info tree for the given field, which must be a message
334 // type. The nested information tree is owned by the root tree and will be
335 // deleted when it is deleted.
336 ParseInfoTree* GetTreeForNested(const FieldDescriptor* field,
337 int index) const;
338
339 private:
340 // Allow the text format parser to record information into the tree.
341 friend class TextFormat;
342
343 // Records the starting location of a single value for a field.
344 void RecordLocation(const FieldDescriptor* field, ParseLocation location);
345
346 // Create and records a nested tree for a nested message field.
347 ParseInfoTree* CreateNested(const FieldDescriptor* field);
348
349 // Defines the map from the index-th field descriptor to its parse location.
350 typedef map<const FieldDescriptor*, vector<ParseLocation> > LocationMap;
351
352 // Defines the map from the index-th field descriptor to the nested parse
353 // info tree.
354 typedef map<const FieldDescriptor*, vector<ParseInfoTree*> > NestedMap;
355
356 LocationMap locations_;
357 NestedMap nested_;
358
359 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParseInfoTree);
360 };
361
362 // For more control over parsing, use this class.
363 class LIBPROTOBUF_EXPORT Parser {
364 public:
365 Parser();
366 ~Parser();
367
368 // Like TextFormat::Parse().
369 bool Parse(io::ZeroCopyInputStream* input, Message* output);
370 // Like TextFormat::ParseFromString().
371 bool ParseFromString(const string& input, Message* output);
372 // Like TextFormat::Merge().
373 bool Merge(io::ZeroCopyInputStream* input, Message* output);
374 // Like TextFormat::MergeFromString().
375 bool MergeFromString(const string& input, Message* output);
376
377 // Set where to report parse errors. If NULL (the default), errors will
378 // be printed to stderr.
379 void RecordErrorsTo(io::ErrorCollector* error_collector) {
380 error_collector_ = error_collector;
381 }
382
383 // Set how parser finds extensions. If NULL (the default), the
384 // parser will use the standard Reflection object associated with
385 // the message being parsed.
386 void SetFinder(Finder* finder) {
387 finder_ = finder;
388 }
389
390 // Sets where location information about the parse will be written. If NULL
391 // (the default), then no location will be written.
392 void WriteLocationsTo(ParseInfoTree* tree) {
393 parse_info_tree_ = tree;
394 }
395
396 // Normally parsing fails if, after parsing, output->IsInitialized()
397 // returns false. Call AllowPartialMessage(true) to skip this check.
398 void AllowPartialMessage(bool allow) {
399 allow_partial_ = allow;
400 }
401
402 // Allow field names to be matched case-insensitively.
403 // This is not advisable if there are fields that only differ in case, or
404 // if you want to enforce writing in the canonical form.
405 // This is 'false' by default.
406 void AllowCaseInsensitiveField(bool allow) {
407 allow_case_insensitive_field_ = allow;
408 }
409
410 // Like TextFormat::ParseFieldValueFromString
411 bool ParseFieldValueFromString(const string& input,
412 const FieldDescriptor* field,
413 Message* output);
414
415
416 void AllowFieldNumber(bool allow) {
417 allow_field_number_ = allow;
418 }
419
420 private:
421 // Forward declaration of an internal class used to parse text
422 // representations (see text_format.cc for implementation).
423 class ParserImpl;
424
425 // Like TextFormat::Merge(). The provided implementation is used
426 // to do the parsing.
427 bool MergeUsingImpl(io::ZeroCopyInputStream* input,
428 Message* output,
429 ParserImpl* parser_impl);
430
431 io::ErrorCollector* error_collector_;
432 Finder* finder_;
433 ParseInfoTree* parse_info_tree_;
434 bool allow_partial_;
435 bool allow_case_insensitive_field_;
436 bool allow_unknown_field_;
437 bool allow_unknown_enum_;
438 bool allow_field_number_;
439 bool allow_relaxed_whitespace_;
440 bool allow_singular_overwrites_;
441 };
442
443
444 private:
445 // Hack: ParseInfoTree declares TextFormat as a friend which should extend
446 // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some
447 // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide
448 // helpers for ParserImpl to call methods of ParseInfoTree.
449 static inline void RecordLocation(ParseInfoTree* info_tree,
450 const FieldDescriptor* field,
451 ParseLocation location);
452 static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree,
453 const FieldDescriptor* field);
454
455 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextFormat);
456};
457
458inline void TextFormat::RecordLocation(ParseInfoTree* info_tree,
459 const FieldDescriptor* field,
460 ParseLocation location) {
461 info_tree->RecordLocation(field, location);
462}
463
464
465inline TextFormat::ParseInfoTree* TextFormat::CreateNested(
466 ParseInfoTree* info_tree, const FieldDescriptor* field) {
467 return info_tree->CreateNested(field);
468}
469
470} // namespace protobuf
471
472} // namespace google
473#endif // GOOGLE_PROTOBUF_TEXT_FORMAT_H__
474