1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// Date: Mon. Apr. 18 19:52:34 CST 2011
19
20// Iteratively split a string by one or multiple separators.
21
22#ifndef BUTIL_STRING_SPLITTER_H
23#define BUTIL_STRING_SPLITTER_H
24
25#include <stdlib.h>
26#include <stdint.h>
27#include "butil/strings/string_piece.h"
28
29// It's common to encode data into strings separated by special characters
30// and decode them back, but functions such as `split_string' has to modify
31// the input string, which is bad. If we parse the string from scratch, the
32// code will be filled with pointer operations and obscure to understand.
33//
34// What we want is:
35// - Scan the string once: just do simple things efficiently.
36// - Do not modify input string: Changing input is bad, it may bring hidden
37// bugs, concurrency issues and non-const propagations.
38// - Split the string in-place without additional buffer/array.
39//
40// StringSplitter does meet these requirements.
41// Usage:
42// const char* the_string_to_split = ...;
43// for (StringSplitter s(the_string_to_split, '\t'); s; ++s) {
44// printf("%*s\n", s.length(), s.field());
45// }
46//
47// "s" behaves as an iterator and evaluates to true before ending.
48// "s.field()" and "s.length()" are address and length of current field
49// respectively. Notice that "s.field()" may not end with '\0' because
50// we don't modify input. You can copy the field to a dedicated buffer
51// or apply a function supporting length.
52
53namespace butil {
54
55enum EmptyFieldAction {
56 SKIP_EMPTY_FIELD,
57 ALLOW_EMPTY_FIELD
58};
59
60// Split a string with one character
61class StringSplitter {
62public:
63 // Split `input' with `separator'. If `action' is SKIP_EMPTY_FIELD, zero-
64 // length() field() will be skipped.
65 inline StringSplitter(const char* input, char separator,
66 EmptyFieldAction action = SKIP_EMPTY_FIELD);
67 // Allows containing embedded '\0' characters and separator can be '\0',
68 // if str_end is not NULL.
69 inline StringSplitter(const char* str_begin, const char* str_end,
70 char separator,
71 EmptyFieldAction action = SKIP_EMPTY_FIELD);
72 // Allows containing embedded '\0' characters and separator can be '\0',
73 inline StringSplitter(const StringPiece& input, char separator,
74 EmptyFieldAction action = SKIP_EMPTY_FIELD);
75
76 // Move splitter forward.
77 inline StringSplitter& operator++();
78 inline StringSplitter operator++(int);
79
80 // True iff field() is valid.
81 inline operator const void*() const;
82
83 // Beginning address and length of the field. *(field() + length()) may
84 // not be '\0' because we don't modify `input'.
85 inline const char* field() const;
86 inline size_t length() const;
87 inline StringPiece field_sp() const;
88
89 // Cast field to specific type, and write the value into `pv'.
90 // Returns 0 on success, -1 otherwise.
91 // NOTE: If separator is a digit, casting functions always return -1.
92 inline int to_int8(int8_t *pv) const;
93 inline int to_uint8(uint8_t *pv) const;
94 inline int to_int(int *pv) const;
95 inline int to_uint(unsigned int *pv) const;
96 inline int to_long(long *pv) const;
97 inline int to_ulong(unsigned long *pv) const;
98 inline int to_longlong(long long *pv) const;
99 inline int to_ulonglong(unsigned long long *pv) const;
100 inline int to_float(float *pv) const;
101 inline int to_double(double *pv) const;
102
103private:
104 inline bool not_end(const char* p) const;
105 inline void init();
106
107 const char* _head;
108 const char* _tail;
109 const char* _str_tail;
110 const char _sep;
111 const EmptyFieldAction _empty_field_action;
112};
113
114// Split a string with one of the separators
115class StringMultiSplitter {
116public:
117 // Split `input' with one character of `separators'. If `action' is
118 // SKIP_EMPTY_FIELD, zero-length() field() will be skipped.
119 // NOTE: This utility stores pointer of `separators' directly rather than
120 // copying the content because this utility is intended to be used
121 // in ad-hoc manner where lifetime of `separators' is generally
122 // longer than this utility.
123 inline StringMultiSplitter(const char* input, const char* separators,
124 EmptyFieldAction action = SKIP_EMPTY_FIELD);
125 // Allows containing embedded '\0' characters if str_end is not NULL.
126 // NOTE: `separators` cannot contain embedded '\0' character.
127 inline StringMultiSplitter(const char* str_begin, const char* str_end,
128 const char* separators,
129 EmptyFieldAction action = SKIP_EMPTY_FIELD);
130
131 // Move splitter forward.
132 inline StringMultiSplitter& operator++();
133 inline StringMultiSplitter operator++(int);
134
135 // True iff field() is valid.
136 inline operator const void*() const;
137
138 // Beginning address and length of the field. *(field() + length()) may
139 // not be '\0' because we don't modify `input'.
140 inline const char* field() const;
141 inline size_t length() const;
142 inline StringPiece field_sp() const;
143
144 // Cast field to specific type, and write the value into `pv'.
145 // Returns 0 on success, -1 otherwise.
146 // NOTE: If separators contains digit, casting functions always return -1.
147 inline int to_int8(int8_t *pv) const;
148 inline int to_uint8(uint8_t *pv) const;
149 inline int to_int(int *pv) const;
150 inline int to_uint(unsigned int *pv) const;
151 inline int to_long(long *pv) const;
152 inline int to_ulong(unsigned long *pv) const;
153 inline int to_longlong(long long *pv) const;
154 inline int to_ulonglong(unsigned long long *pv) const;
155 inline int to_float(float *pv) const;
156 inline int to_double(double *pv) const;
157
158private:
159 inline bool is_sep(char c) const;
160 inline bool not_end(const char* p) const;
161 inline void init();
162
163 const char* _head;
164 const char* _tail;
165 const char* _str_tail;
166 const char* const _seps;
167 const EmptyFieldAction _empty_field_action;
168};
169
170// Split query in the format according to the given delimiters.
171// This class can also handle some exceptional cases.
172// 1. consecutive pair_delimiter are omitted, for example,
173// suppose key_value_delimiter is '=' and pair_delimiter
174// is '&', then 'k1=v1&&&k2=v2' is normalized to 'k1=k2&k2=v2'.
175// 2. key or value can be empty or both can be empty.
176// 3. consecutive key_value_delimiter are not omitted, for example,
177// suppose input is 'k1===v2' and key_value_delimiter is '=', then
178// key() returns 'k1', value() returns '==v2'.
179class KeyValuePairsSplitter {
180public:
181 inline KeyValuePairsSplitter(const char* str_begin,
182 const char* str_end,
183 char pair_delimiter,
184 char key_value_delimiter)
185 : _sp(str_begin, str_end, pair_delimiter)
186 , _delim_pos(StringPiece::npos)
187 , _key_value_delim(key_value_delimiter) {
188 UpdateDelimiterPosition();
189 }
190
191 inline KeyValuePairsSplitter(const char* str_begin,
192 char pair_delimiter,
193 char key_value_delimiter)
194 : KeyValuePairsSplitter(str_begin, NULL,
195 pair_delimiter, key_value_delimiter) {}
196
197 inline KeyValuePairsSplitter(const StringPiece &sp,
198 char pair_delimiter,
199 char key_value_delimiter)
200 : KeyValuePairsSplitter(sp.begin(), sp.end(),
201 pair_delimiter, key_value_delimiter) {}
202
203 inline StringPiece key() {
204 return key_and_value().substr(0, _delim_pos);
205 }
206
207 inline StringPiece value() {
208 return key_and_value().substr(_delim_pos + 1);
209 }
210
211 // Get the current value of key and value
212 // in the format of "key=value"
213 inline StringPiece key_and_value() {
214 return StringPiece(_sp.field(), _sp.length());
215 }
216
217 // Move splitter forward.
218 inline KeyValuePairsSplitter& operator++() {
219 ++_sp;
220 UpdateDelimiterPosition();
221 return *this;
222 }
223
224 inline KeyValuePairsSplitter operator++(int) {
225 KeyValuePairsSplitter tmp = *this;
226 operator++();
227 return tmp;
228 }
229
230 inline operator const void*() const { return _sp; }
231
232private:
233 inline void UpdateDelimiterPosition();
234
235private:
236 StringSplitter _sp;
237 StringPiece::size_type _delim_pos;
238 const char _key_value_delim;
239};
240
241} // namespace butil
242
243#include "butil/string_splitter_inl.h"
244
245#endif // BUTIL_STRING_SPLITTER_H
246