1/* Generated by re2c */
2// Copyright 2011 Google Inc. All Rights Reserved.
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#include "depfile_parser.h"
17#include "util.h"
18
19#include <algorithm>
20
21using namespace std;
22
23DepfileParser::DepfileParser(DepfileParserOptions options)
24 : options_(options)
25{
26}
27
28// A note on backslashes in Makefiles, from reading the docs:
29// Backslash-newline is the line continuation character.
30// Backslash-# escapes a # (otherwise meaningful as a comment start).
31// Backslash-% escapes a % (otherwise meaningful as a special).
32// Finally, quoting the GNU manual, "Backslashes that are not in danger
33// of quoting ‘%’ characters go unmolested."
34// How do you end a line with a backslash? The netbsd Make docs suggest
35// reading the result of a shell command echoing a backslash!
36//
37// Rather than implement all of above, we follow what GCC/Clang produces:
38// Backslashes escape a space or hash sign.
39// When a space is preceded by 2N+1 backslashes, it is represents N backslashes
40// followed by space.
41// When a space is preceded by 2N backslashes, it represents 2N backslashes at
42// the end of a filename.
43// A hash sign is escaped by a single backslash. All other backslashes remain
44// unchanged.
45//
46// If anyone actually has depfiles that rely on the more complicated
47// behavior we can adjust this.
48bool DepfileParser::Parse(string* content, string* err) {
49 // in: current parser input point.
50 // end: end of input.
51 // parsing_targets: whether we are parsing targets or dependencies.
52 char* in = &(*content)[0];
53 char* end = in + content->size();
54 bool have_target = false;
55 bool parsing_targets = true;
56 bool poisoned_input = false;
57 while (in < end) {
58 bool have_newline = false;
59 // out: current output point (typically same as in, but can fall behind
60 // as we de-escape backslashes).
61 char* out = in;
62 // filename: start of the current parsed filename.
63 char* filename = out;
64 for (;;) {
65 // start: beginning of the current parsed span.
66 const char* start = in;
67 char* yymarker = NULL;
68
69 {
70 unsigned char yych;
71 static const unsigned char yybm[] = {
72 0, 0, 0, 0, 0, 0, 0, 0,
73 0, 0, 0, 0, 0, 0, 0, 0,
74 0, 0, 0, 0, 0, 0, 0, 0,
75 0, 0, 0, 0, 0, 0, 0, 0,
76 0, 128, 0, 0, 0, 128, 0, 0,
77 128, 128, 0, 128, 128, 128, 128, 128,
78 128, 128, 128, 128, 128, 128, 128, 128,
79 128, 128, 128, 0, 0, 128, 0, 0,
80 128, 128, 128, 128, 128, 128, 128, 128,
81 128, 128, 128, 128, 128, 128, 128, 128,
82 128, 128, 128, 128, 128, 128, 128, 128,
83 128, 128, 128, 128, 0, 128, 0, 128,
84 0, 128, 128, 128, 128, 128, 128, 128,
85 128, 128, 128, 128, 128, 128, 128, 128,
86 128, 128, 128, 128, 128, 128, 128, 128,
87 128, 128, 128, 128, 0, 128, 128, 0,
88 128, 128, 128, 128, 128, 128, 128, 128,
89 128, 128, 128, 128, 128, 128, 128, 128,
90 128, 128, 128, 128, 128, 128, 128, 128,
91 128, 128, 128, 128, 128, 128, 128, 128,
92 128, 128, 128, 128, 128, 128, 128, 128,
93 128, 128, 128, 128, 128, 128, 128, 128,
94 128, 128, 128, 128, 128, 128, 128, 128,
95 128, 128, 128, 128, 128, 128, 128, 128,
96 128, 128, 128, 128, 128, 128, 128, 128,
97 128, 128, 128, 128, 128, 128, 128, 128,
98 128, 128, 128, 128, 128, 128, 128, 128,
99 128, 128, 128, 128, 128, 128, 128, 128,
100 128, 128, 128, 128, 128, 128, 128, 128,
101 128, 128, 128, 128, 128, 128, 128, 128,
102 128, 128, 128, 128, 128, 128, 128, 128,
103 128, 128, 128, 128, 128, 128, 128, 128,
104 };
105 yych = *in;
106 if (yybm[0+yych] & 128) {
107 goto yy9;
108 }
109 if (yych <= '\r') {
110 if (yych <= '\t') {
111 if (yych >= 0x01) goto yy4;
112 } else {
113 if (yych <= '\n') goto yy6;
114 if (yych <= '\f') goto yy4;
115 goto yy8;
116 }
117 } else {
118 if (yych <= '$') {
119 if (yych <= '#') goto yy4;
120 goto yy12;
121 } else {
122 if (yych <= '?') goto yy4;
123 if (yych <= '\\') goto yy13;
124 goto yy4;
125 }
126 }
127 ++in;
128 {
129 break;
130 }
131yy4:
132 ++in;
133yy5:
134 {
135 // For any other character (e.g. whitespace), swallow it here,
136 // allowing the outer logic to loop around again.
137 break;
138 }
139yy6:
140 ++in;
141 {
142 // A newline ends the current file name and the current rule.
143 have_newline = true;
144 break;
145 }
146yy8:
147 yych = *++in;
148 if (yych == '\n') goto yy6;
149 goto yy5;
150yy9:
151 yych = *++in;
152 if (yybm[0+yych] & 128) {
153 goto yy9;
154 }
155yy11:
156 {
157 // Got a span of plain text.
158 int len = (int)(in - start);
159 // Need to shift it over if we're overwriting backslashes.
160 if (out < start)
161 memmove(out, start, len);
162 out += len;
163 continue;
164 }
165yy12:
166 yych = *++in;
167 if (yych == '$') goto yy14;
168 goto yy5;
169yy13:
170 yych = *(yymarker = ++in);
171 if (yych <= ' ') {
172 if (yych <= '\n') {
173 if (yych <= 0x00) goto yy5;
174 if (yych <= '\t') goto yy16;
175 goto yy17;
176 } else {
177 if (yych == '\r') goto yy19;
178 if (yych <= 0x1F) goto yy16;
179 goto yy21;
180 }
181 } else {
182 if (yych <= '9') {
183 if (yych == '#') goto yy23;
184 goto yy16;
185 } else {
186 if (yych <= ':') goto yy25;
187 if (yych == '\\') goto yy27;
188 goto yy16;
189 }
190 }
191yy14:
192 ++in;
193 {
194 // De-escape dollar character.
195 *out++ = '$';
196 continue;
197 }
198yy16:
199 ++in;
200 goto yy11;
201yy17:
202 ++in;
203 {
204 // A line continuation ends the current file name.
205 break;
206 }
207yy19:
208 yych = *++in;
209 if (yych == '\n') goto yy17;
210 in = yymarker;
211 goto yy5;
212yy21:
213 ++in;
214 {
215 // 2N+1 backslashes plus space -> N backslashes plus space.
216 int len = (int)(in - start);
217 int n = len / 2 - 1;
218 if (out < start)
219 memset(out, '\\', n);
220 out += n;
221 *out++ = ' ';
222 continue;
223 }
224yy23:
225 ++in;
226 {
227 // De-escape hash sign, but preserve other leading backslashes.
228 int len = (int)(in - start);
229 if (len > 2 && out < start)
230 memset(out, '\\', len - 2);
231 out += len - 2;
232 *out++ = '#';
233 continue;
234 }
235yy25:
236 yych = *++in;
237 if (yych <= '\f') {
238 if (yych <= 0x00) goto yy28;
239 if (yych <= 0x08) goto yy26;
240 if (yych <= '\n') goto yy28;
241 } else {
242 if (yych <= '\r') goto yy28;
243 if (yych == ' ') goto yy28;
244 }
245yy26:
246 {
247 // De-escape colon sign, but preserve other leading backslashes.
248 // Regular expression uses lookahead to make sure that no whitespace
249 // nor EOF follows. In that case it'd be the : at the end of a target
250 int len = (int)(in - start);
251 if (len > 2 && out < start)
252 memset(out, '\\', len - 2);
253 out += len - 2;
254 *out++ = ':';
255 continue;
256 }
257yy27:
258 yych = *++in;
259 if (yych <= ' ') {
260 if (yych <= '\n') {
261 if (yych <= 0x00) goto yy11;
262 if (yych <= '\t') goto yy16;
263 goto yy11;
264 } else {
265 if (yych == '\r') goto yy11;
266 if (yych <= 0x1F) goto yy16;
267 goto yy30;
268 }
269 } else {
270 if (yych <= '9') {
271 if (yych == '#') goto yy23;
272 goto yy16;
273 } else {
274 if (yych <= ':') goto yy25;
275 if (yych == '\\') goto yy32;
276 goto yy16;
277 }
278 }
279yy28:
280 ++in;
281 {
282 // Backslash followed by : and whitespace.
283 // It is therefore normal text and not an escaped colon
284 int len = (int)(in - start - 1);
285 // Need to shift it over if we're overwriting backslashes.
286 if (out < start)
287 memmove(out, start, len);
288 out += len;
289 if (*(in - 1) == '\n')
290 have_newline = true;
291 break;
292 }
293yy30:
294 ++in;
295 {
296 // 2N backslashes plus space -> 2N backslashes, end of filename.
297 int len = (int)(in - start);
298 if (out < start)
299 memset(out, '\\', len - 1);
300 out += len - 1;
301 break;
302 }
303yy32:
304 yych = *++in;
305 if (yych <= ' ') {
306 if (yych <= '\n') {
307 if (yych <= 0x00) goto yy11;
308 if (yych <= '\t') goto yy16;
309 goto yy11;
310 } else {
311 if (yych == '\r') goto yy11;
312 if (yych <= 0x1F) goto yy16;
313 goto yy21;
314 }
315 } else {
316 if (yych <= '9') {
317 if (yych == '#') goto yy23;
318 goto yy16;
319 } else {
320 if (yych <= ':') goto yy25;
321 if (yych == '\\') goto yy27;
322 goto yy16;
323 }
324 }
325 }
326
327 }
328
329 int len = (int)(out - filename);
330 const bool is_dependency = !parsing_targets;
331 if (len > 0 && filename[len - 1] == ':') {
332 len--; // Strip off trailing colon, if any.
333 parsing_targets = false;
334 have_target = true;
335 }
336
337 if (len > 0) {
338 StringPiece piece = StringPiece(filename, len);
339 // If we've seen this as an input before, skip it.
340 std::vector<StringPiece>::iterator pos = std::find(ins_.begin(), ins_.end(), piece);
341 if (pos == ins_.end()) {
342 if (is_dependency) {
343 if (poisoned_input) {
344 *err = "inputs may not also have inputs";
345 return false;
346 }
347 // New input.
348 ins_.push_back(piece);
349 } else {
350 // Check for a new output.
351 if (std::find(outs_.begin(), outs_.end(), piece) == outs_.end())
352 outs_.push_back(piece);
353 }
354 } else if (!is_dependency) {
355 // We've passed an input on the left side; reject new inputs.
356 poisoned_input = true;
357 }
358 }
359
360 if (have_newline) {
361 // A newline ends a rule so the next filename will be a new target.
362 parsing_targets = true;
363 poisoned_input = false;
364 }
365 }
366 if (!have_target) {
367 *err = "expected ':' in depfile";
368 return false;
369 }
370 return true;
371}
372