1 | /* Generated by re2c */ |
2 | // Copyright 2011 Google Inc. All Rights Reserved. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
5 | // you may not use this file except in compliance with the License. |
6 | // You may obtain a copy of the License at |
7 | // |
8 | // http://www.apache.org/licenses/LICENSE-2.0 |
9 | // |
10 | // Unless required by applicable law or agreed to in writing, software |
11 | // distributed under the License is distributed on an "AS IS" BASIS, |
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | // See the License for the specific language governing permissions and |
14 | // limitations under the License. |
15 | |
16 | #include "depfile_parser.h" |
17 | #include "util.h" |
18 | |
19 | #include <algorithm> |
20 | |
21 | using namespace std; |
22 | |
23 | DepfileParser::DepfileParser(DepfileParserOptions options) |
24 | : options_(options) |
25 | { |
26 | } |
27 | |
28 | // A note on backslashes in Makefiles, from reading the docs: |
29 | // Backslash-newline is the line continuation character. |
30 | // Backslash-# escapes a # (otherwise meaningful as a comment start). |
31 | // Backslash-% escapes a % (otherwise meaningful as a special). |
32 | // Finally, quoting the GNU manual, "Backslashes that are not in danger |
33 | // of quoting ‘%’ characters go unmolested." |
34 | // How do you end a line with a backslash? The netbsd Make docs suggest |
35 | // reading the result of a shell command echoing a backslash! |
36 | // |
37 | // Rather than implement all of above, we follow what GCC/Clang produces: |
38 | // Backslashes escape a space or hash sign. |
39 | // When a space is preceded by 2N+1 backslashes, it is represents N backslashes |
40 | // followed by space. |
41 | // When a space is preceded by 2N backslashes, it represents 2N backslashes at |
42 | // the end of a filename. |
43 | // A hash sign is escaped by a single backslash. All other backslashes remain |
44 | // unchanged. |
45 | // |
46 | // If anyone actually has depfiles that rely on the more complicated |
47 | // behavior we can adjust this. |
48 | bool DepfileParser::Parse(string* content, string* err) { |
49 | // in: current parser input point. |
50 | // end: end of input. |
51 | // parsing_targets: whether we are parsing targets or dependencies. |
52 | char* in = &(*content)[0]; |
53 | char* end = in + content->size(); |
54 | bool have_target = false; |
55 | bool parsing_targets = true; |
56 | bool poisoned_input = false; |
57 | while (in < end) { |
58 | bool have_newline = false; |
59 | // out: current output point (typically same as in, but can fall behind |
60 | // as we de-escape backslashes). |
61 | char* out = in; |
62 | // filename: start of the current parsed filename. |
63 | char* filename = out; |
64 | for (;;) { |
65 | // start: beginning of the current parsed span. |
66 | const char* start = in; |
67 | char* yymarker = NULL; |
68 | |
69 | { |
70 | unsigned char yych; |
71 | static const unsigned char yybm[] = { |
72 | 0, 0, 0, 0, 0, 0, 0, 0, |
73 | 0, 0, 0, 0, 0, 0, 0, 0, |
74 | 0, 0, 0, 0, 0, 0, 0, 0, |
75 | 0, 0, 0, 0, 0, 0, 0, 0, |
76 | 0, 128, 0, 0, 0, 128, 0, 0, |
77 | 128, 128, 0, 128, 128, 128, 128, 128, |
78 | 128, 128, 128, 128, 128, 128, 128, 128, |
79 | 128, 128, 128, 0, 0, 128, 0, 0, |
80 | 128, 128, 128, 128, 128, 128, 128, 128, |
81 | 128, 128, 128, 128, 128, 128, 128, 128, |
82 | 128, 128, 128, 128, 128, 128, 128, 128, |
83 | 128, 128, 128, 128, 0, 128, 0, 128, |
84 | 0, 128, 128, 128, 128, 128, 128, 128, |
85 | 128, 128, 128, 128, 128, 128, 128, 128, |
86 | 128, 128, 128, 128, 128, 128, 128, 128, |
87 | 128, 128, 128, 128, 0, 128, 128, 0, |
88 | 128, 128, 128, 128, 128, 128, 128, 128, |
89 | 128, 128, 128, 128, 128, 128, 128, 128, |
90 | 128, 128, 128, 128, 128, 128, 128, 128, |
91 | 128, 128, 128, 128, 128, 128, 128, 128, |
92 | 128, 128, 128, 128, 128, 128, 128, 128, |
93 | 128, 128, 128, 128, 128, 128, 128, 128, |
94 | 128, 128, 128, 128, 128, 128, 128, 128, |
95 | 128, 128, 128, 128, 128, 128, 128, 128, |
96 | 128, 128, 128, 128, 128, 128, 128, 128, |
97 | 128, 128, 128, 128, 128, 128, 128, 128, |
98 | 128, 128, 128, 128, 128, 128, 128, 128, |
99 | 128, 128, 128, 128, 128, 128, 128, 128, |
100 | 128, 128, 128, 128, 128, 128, 128, 128, |
101 | 128, 128, 128, 128, 128, 128, 128, 128, |
102 | 128, 128, 128, 128, 128, 128, 128, 128, |
103 | 128, 128, 128, 128, 128, 128, 128, 128, |
104 | }; |
105 | yych = *in; |
106 | if (yybm[0+yych] & 128) { |
107 | goto yy9; |
108 | } |
109 | if (yych <= '\r') { |
110 | if (yych <= '\t') { |
111 | if (yych >= 0x01) goto yy4; |
112 | } else { |
113 | if (yych <= '\n') goto yy6; |
114 | if (yych <= '\f') goto yy4; |
115 | goto yy8; |
116 | } |
117 | } else { |
118 | if (yych <= '$') { |
119 | if (yych <= '#') goto yy4; |
120 | goto yy12; |
121 | } else { |
122 | if (yych <= '?') goto yy4; |
123 | if (yych <= '\\') goto yy13; |
124 | goto yy4; |
125 | } |
126 | } |
127 | ++in; |
128 | { |
129 | break; |
130 | } |
131 | yy4: |
132 | ++in; |
133 | yy5: |
134 | { |
135 | // For any other character (e.g. whitespace), swallow it here, |
136 | // allowing the outer logic to loop around again. |
137 | break; |
138 | } |
139 | yy6: |
140 | ++in; |
141 | { |
142 | // A newline ends the current file name and the current rule. |
143 | have_newline = true; |
144 | break; |
145 | } |
146 | yy8: |
147 | yych = *++in; |
148 | if (yych == '\n') goto yy6; |
149 | goto yy5; |
150 | yy9: |
151 | yych = *++in; |
152 | if (yybm[0+yych] & 128) { |
153 | goto yy9; |
154 | } |
155 | yy11: |
156 | { |
157 | // Got a span of plain text. |
158 | int len = (int)(in - start); |
159 | // Need to shift it over if we're overwriting backslashes. |
160 | if (out < start) |
161 | memmove(out, start, len); |
162 | out += len; |
163 | continue; |
164 | } |
165 | yy12: |
166 | yych = *++in; |
167 | if (yych == '$') goto yy14; |
168 | goto yy5; |
169 | yy13: |
170 | yych = *(yymarker = ++in); |
171 | if (yych <= ' ') { |
172 | if (yych <= '\n') { |
173 | if (yych <= 0x00) goto yy5; |
174 | if (yych <= '\t') goto yy16; |
175 | goto yy17; |
176 | } else { |
177 | if (yych == '\r') goto yy19; |
178 | if (yych <= 0x1F) goto yy16; |
179 | goto yy21; |
180 | } |
181 | } else { |
182 | if (yych <= '9') { |
183 | if (yych == '#') goto yy23; |
184 | goto yy16; |
185 | } else { |
186 | if (yych <= ':') goto yy25; |
187 | if (yych == '\\') goto yy27; |
188 | goto yy16; |
189 | } |
190 | } |
191 | yy14: |
192 | ++in; |
193 | { |
194 | // De-escape dollar character. |
195 | *out++ = '$'; |
196 | continue; |
197 | } |
198 | yy16: |
199 | ++in; |
200 | goto yy11; |
201 | yy17: |
202 | ++in; |
203 | { |
204 | // A line continuation ends the current file name. |
205 | break; |
206 | } |
207 | yy19: |
208 | yych = *++in; |
209 | if (yych == '\n') goto yy17; |
210 | in = yymarker; |
211 | goto yy5; |
212 | yy21: |
213 | ++in; |
214 | { |
215 | // 2N+1 backslashes plus space -> N backslashes plus space. |
216 | int len = (int)(in - start); |
217 | int n = len / 2 - 1; |
218 | if (out < start) |
219 | memset(out, '\\', n); |
220 | out += n; |
221 | *out++ = ' '; |
222 | continue; |
223 | } |
224 | yy23: |
225 | ++in; |
226 | { |
227 | // De-escape hash sign, but preserve other leading backslashes. |
228 | int len = (int)(in - start); |
229 | if (len > 2 && out < start) |
230 | memset(out, '\\', len - 2); |
231 | out += len - 2; |
232 | *out++ = '#'; |
233 | continue; |
234 | } |
235 | yy25: |
236 | yych = *++in; |
237 | if (yych <= '\f') { |
238 | if (yych <= 0x00) goto yy28; |
239 | if (yych <= 0x08) goto yy26; |
240 | if (yych <= '\n') goto yy28; |
241 | } else { |
242 | if (yych <= '\r') goto yy28; |
243 | if (yych == ' ') goto yy28; |
244 | } |
245 | yy26: |
246 | { |
247 | // De-escape colon sign, but preserve other leading backslashes. |
248 | // Regular expression uses lookahead to make sure that no whitespace |
249 | // nor EOF follows. In that case it'd be the : at the end of a target |
250 | int len = (int)(in - start); |
251 | if (len > 2 && out < start) |
252 | memset(out, '\\', len - 2); |
253 | out += len - 2; |
254 | *out++ = ':'; |
255 | continue; |
256 | } |
257 | yy27: |
258 | yych = *++in; |
259 | if (yych <= ' ') { |
260 | if (yych <= '\n') { |
261 | if (yych <= 0x00) goto yy11; |
262 | if (yych <= '\t') goto yy16; |
263 | goto yy11; |
264 | } else { |
265 | if (yych == '\r') goto yy11; |
266 | if (yych <= 0x1F) goto yy16; |
267 | goto yy30; |
268 | } |
269 | } else { |
270 | if (yych <= '9') { |
271 | if (yych == '#') goto yy23; |
272 | goto yy16; |
273 | } else { |
274 | if (yych <= ':') goto yy25; |
275 | if (yych == '\\') goto yy32; |
276 | goto yy16; |
277 | } |
278 | } |
279 | yy28: |
280 | ++in; |
281 | { |
282 | // Backslash followed by : and whitespace. |
283 | // It is therefore normal text and not an escaped colon |
284 | int len = (int)(in - start - 1); |
285 | // Need to shift it over if we're overwriting backslashes. |
286 | if (out < start) |
287 | memmove(out, start, len); |
288 | out += len; |
289 | if (*(in - 1) == '\n') |
290 | have_newline = true; |
291 | break; |
292 | } |
293 | yy30: |
294 | ++in; |
295 | { |
296 | // 2N backslashes plus space -> 2N backslashes, end of filename. |
297 | int len = (int)(in - start); |
298 | if (out < start) |
299 | memset(out, '\\', len - 1); |
300 | out += len - 1; |
301 | break; |
302 | } |
303 | yy32: |
304 | yych = *++in; |
305 | if (yych <= ' ') { |
306 | if (yych <= '\n') { |
307 | if (yych <= 0x00) goto yy11; |
308 | if (yych <= '\t') goto yy16; |
309 | goto yy11; |
310 | } else { |
311 | if (yych == '\r') goto yy11; |
312 | if (yych <= 0x1F) goto yy16; |
313 | goto yy21; |
314 | } |
315 | } else { |
316 | if (yych <= '9') { |
317 | if (yych == '#') goto yy23; |
318 | goto yy16; |
319 | } else { |
320 | if (yych <= ':') goto yy25; |
321 | if (yych == '\\') goto yy27; |
322 | goto yy16; |
323 | } |
324 | } |
325 | } |
326 | |
327 | } |
328 | |
329 | int len = (int)(out - filename); |
330 | const bool is_dependency = !parsing_targets; |
331 | if (len > 0 && filename[len - 1] == ':') { |
332 | len--; // Strip off trailing colon, if any. |
333 | parsing_targets = false; |
334 | have_target = true; |
335 | } |
336 | |
337 | if (len > 0) { |
338 | StringPiece piece = StringPiece(filename, len); |
339 | // If we've seen this as an input before, skip it. |
340 | std::vector<StringPiece>::iterator pos = std::find(ins_.begin(), ins_.end(), piece); |
341 | if (pos == ins_.end()) { |
342 | if (is_dependency) { |
343 | if (poisoned_input) { |
344 | *err = "inputs may not also have inputs" ; |
345 | return false; |
346 | } |
347 | // New input. |
348 | ins_.push_back(piece); |
349 | } else { |
350 | // Check for a new output. |
351 | if (std::find(outs_.begin(), outs_.end(), piece) == outs_.end()) |
352 | outs_.push_back(piece); |
353 | } |
354 | } else if (!is_dependency) { |
355 | // We've passed an input on the left side; reject new inputs. |
356 | poisoned_input = true; |
357 | } |
358 | } |
359 | |
360 | if (have_newline) { |
361 | // A newline ends a rule so the next filename will be a new target. |
362 | parsing_targets = true; |
363 | poisoned_input = false; |
364 | } |
365 | } |
366 | if (!have_target) { |
367 | *err = "expected ':' in depfile" ; |
368 | return false; |
369 | } |
370 | return true; |
371 | } |
372 | |