1 | // Copyright 2012 Google Inc. All Rights Reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "deps_log.h" |
16 | |
17 | #include <assert.h> |
18 | #include <stdio.h> |
19 | #include <errno.h> |
20 | #include <string.h> |
21 | #ifndef _WIN32 |
22 | #include <unistd.h> |
23 | #elif defined(_MSC_VER) && (_MSC_VER < 1900) |
24 | typedef __int32 int32_t; |
25 | typedef unsigned __int32 uint32_t; |
26 | #endif |
27 | |
28 | #include "graph.h" |
29 | #include "metrics.h" |
30 | #include "state.h" |
31 | #include "util.h" |
32 | |
33 | using namespace std; |
34 | |
35 | // The version is stored as 4 bytes after the signature and also serves as a |
36 | // byte order mark. Signature and version combined are 16 bytes long. |
37 | const char kFileSignature[] = "# ninjadeps\n" ; |
38 | const int kCurrentVersion = 4; |
39 | |
40 | // Record size is currently limited to less than the full 32 bit, due to |
41 | // internal buffers having to have this size. |
42 | const unsigned kMaxRecordSize = (1 << 19) - 1; |
43 | |
44 | DepsLog::~DepsLog() { |
45 | Close(); |
46 | } |
47 | |
48 | bool DepsLog::OpenForWrite(const string& path, string* err) { |
49 | if (needs_recompaction_) { |
50 | if (!Recompact(path, err)) |
51 | return false; |
52 | } |
53 | |
54 | assert(!file_); |
55 | file_path_ = path; // we don't actually open the file right now, but will do |
56 | // so on the first write attempt |
57 | return true; |
58 | } |
59 | |
60 | bool DepsLog::RecordDeps(Node* node, TimeStamp mtime, |
61 | const vector<Node*>& nodes) { |
62 | return RecordDeps(node, mtime, nodes.size(), |
63 | nodes.empty() ? NULL : (Node**)&nodes.front()); |
64 | } |
65 | |
66 | bool DepsLog::RecordDeps(Node* node, TimeStamp mtime, |
67 | int node_count, Node** nodes) { |
68 | // Track whether there's any new data to be recorded. |
69 | bool made_change = false; |
70 | |
71 | // Assign ids to all nodes that are missing one. |
72 | if (node->id() < 0) { |
73 | if (!RecordId(node)) |
74 | return false; |
75 | made_change = true; |
76 | } |
77 | for (int i = 0; i < node_count; ++i) { |
78 | if (nodes[i]->id() < 0) { |
79 | if (!RecordId(nodes[i])) |
80 | return false; |
81 | made_change = true; |
82 | } |
83 | } |
84 | |
85 | // See if the new data is different than the existing data, if any. |
86 | if (!made_change) { |
87 | Deps* deps = GetDeps(node); |
88 | if (!deps || |
89 | deps->mtime != mtime || |
90 | deps->node_count != node_count) { |
91 | made_change = true; |
92 | } else { |
93 | for (int i = 0; i < node_count; ++i) { |
94 | if (deps->nodes[i] != nodes[i]) { |
95 | made_change = true; |
96 | break; |
97 | } |
98 | } |
99 | } |
100 | } |
101 | |
102 | // Don't write anything if there's no new info. |
103 | if (!made_change) |
104 | return true; |
105 | |
106 | // Update on-disk representation. |
107 | unsigned size = 4 * (1 + 2 + node_count); |
108 | if (size > kMaxRecordSize) { |
109 | errno = ERANGE; |
110 | return false; |
111 | } |
112 | |
113 | if (!OpenForWriteIfNeeded()) { |
114 | return false; |
115 | } |
116 | size |= 0x80000000; // Deps record: set high bit. |
117 | if (fwrite(&size, 4, 1, file_) < 1) |
118 | return false; |
119 | int id = node->id(); |
120 | if (fwrite(&id, 4, 1, file_) < 1) |
121 | return false; |
122 | uint32_t mtime_part = static_cast<uint32_t>(mtime & 0xffffffff); |
123 | if (fwrite(&mtime_part, 4, 1, file_) < 1) |
124 | return false; |
125 | mtime_part = static_cast<uint32_t>((mtime >> 32) & 0xffffffff); |
126 | if (fwrite(&mtime_part, 4, 1, file_) < 1) |
127 | return false; |
128 | for (int i = 0; i < node_count; ++i) { |
129 | id = nodes[i]->id(); |
130 | if (fwrite(&id, 4, 1, file_) < 1) |
131 | return false; |
132 | } |
133 | if (fflush(file_) != 0) |
134 | return false; |
135 | |
136 | // Update in-memory representation. |
137 | Deps* deps = new Deps(mtime, node_count); |
138 | for (int i = 0; i < node_count; ++i) |
139 | deps->nodes[i] = nodes[i]; |
140 | UpdateDeps(node->id(), deps); |
141 | |
142 | return true; |
143 | } |
144 | |
145 | void DepsLog::Close() { |
146 | OpenForWriteIfNeeded(); // create the file even if nothing has been recorded |
147 | if (file_) |
148 | fclose(file_); |
149 | file_ = NULL; |
150 | } |
151 | |
152 | LoadStatus DepsLog::Load(const string& path, State* state, string* err) { |
153 | METRIC_RECORD(".ninja_deps load" ); |
154 | char buf[kMaxRecordSize + 1]; |
155 | FILE* f = fopen(path.c_str(), "rb" ); |
156 | if (!f) { |
157 | if (errno == ENOENT) |
158 | return LOAD_NOT_FOUND; |
159 | *err = strerror(errno); |
160 | return LOAD_ERROR; |
161 | } |
162 | |
163 | bool = true; |
164 | int version = 0; |
165 | if (!fgets(buf, sizeof(buf), f) || fread(&version, 4, 1, f) < 1) |
166 | valid_header = false; |
167 | // Note: For version differences, this should migrate to the new format. |
168 | // But the v1 format could sometimes (rarely) end up with invalid data, so |
169 | // don't migrate v1 to v3 to force a rebuild. (v2 only existed for a few days, |
170 | // and there was no release with it, so pretend that it never happened.) |
171 | if (!valid_header || strcmp(buf, kFileSignature) != 0 || |
172 | version != kCurrentVersion) { |
173 | if (version == 1) |
174 | *err = "deps log version change; rebuilding" ; |
175 | else |
176 | *err = "bad deps log signature or version; starting over" ; |
177 | fclose(f); |
178 | unlink(path.c_str()); |
179 | // Don't report this as a failure. An empty deps log will cause |
180 | // us to rebuild the outputs anyway. |
181 | return LOAD_SUCCESS; |
182 | } |
183 | |
184 | long offset; |
185 | bool read_failed = false; |
186 | int unique_dep_record_count = 0; |
187 | int total_dep_record_count = 0; |
188 | for (;;) { |
189 | offset = ftell(f); |
190 | |
191 | unsigned size; |
192 | if (fread(&size, 4, 1, f) < 1) { |
193 | if (!feof(f)) |
194 | read_failed = true; |
195 | break; |
196 | } |
197 | bool is_deps = (size >> 31) != 0; |
198 | size = size & 0x7FFFFFFF; |
199 | |
200 | if (size > kMaxRecordSize || fread(buf, size, 1, f) < 1) { |
201 | read_failed = true; |
202 | break; |
203 | } |
204 | |
205 | if (is_deps) { |
206 | assert(size % 4 == 0); |
207 | int* deps_data = reinterpret_cast<int*>(buf); |
208 | int out_id = deps_data[0]; |
209 | TimeStamp mtime; |
210 | mtime = (TimeStamp)(((uint64_t)(unsigned int)deps_data[2] << 32) | |
211 | (uint64_t)(unsigned int)deps_data[1]); |
212 | deps_data += 3; |
213 | int deps_count = (size / 4) - 3; |
214 | |
215 | Deps* deps = new Deps(mtime, deps_count); |
216 | for (int i = 0; i < deps_count; ++i) { |
217 | assert(deps_data[i] < (int)nodes_.size()); |
218 | assert(nodes_[deps_data[i]]); |
219 | deps->nodes[i] = nodes_[deps_data[i]]; |
220 | } |
221 | |
222 | total_dep_record_count++; |
223 | if (!UpdateDeps(out_id, deps)) |
224 | ++unique_dep_record_count; |
225 | } else { |
226 | int path_size = size - 4; |
227 | assert(path_size > 0); // CanonicalizePath() rejects empty paths. |
228 | // There can be up to 3 bytes of padding. |
229 | if (buf[path_size - 1] == '\0') --path_size; |
230 | if (buf[path_size - 1] == '\0') --path_size; |
231 | if (buf[path_size - 1] == '\0') --path_size; |
232 | StringPiece subpath(buf, path_size); |
233 | // It is not necessary to pass in a correct slash_bits here. It will |
234 | // either be a Node that's in the manifest (in which case it will already |
235 | // have a correct slash_bits that GetNode will look up), or it is an |
236 | // implicit dependency from a .d which does not affect the build command |
237 | // (and so need not have its slashes maintained). |
238 | Node* node = state->GetNode(subpath, 0); |
239 | |
240 | // Check that the expected index matches the actual index. This can only |
241 | // happen if two ninja processes write to the same deps log concurrently. |
242 | // (This uses unary complement to make the checksum look less like a |
243 | // dependency record entry.) |
244 | unsigned checksum = *reinterpret_cast<unsigned*>(buf + size - 4); |
245 | int expected_id = ~checksum; |
246 | int id = nodes_.size(); |
247 | if (id != expected_id) { |
248 | read_failed = true; |
249 | break; |
250 | } |
251 | |
252 | assert(node->id() < 0); |
253 | node->set_id(id); |
254 | nodes_.push_back(node); |
255 | } |
256 | } |
257 | |
258 | if (read_failed) { |
259 | // An error occurred while loading; try to recover by truncating the |
260 | // file to the last fully-read record. |
261 | if (ferror(f)) { |
262 | *err = strerror(ferror(f)); |
263 | } else { |
264 | *err = "premature end of file" ; |
265 | } |
266 | fclose(f); |
267 | |
268 | if (!Truncate(path, offset, err)) |
269 | return LOAD_ERROR; |
270 | |
271 | // The truncate succeeded; we'll just report the load error as a |
272 | // warning because the build can proceed. |
273 | *err += "; recovering" ; |
274 | return LOAD_SUCCESS; |
275 | } |
276 | |
277 | fclose(f); |
278 | |
279 | // Rebuild the log if there are too many dead records. |
280 | int kMinCompactionEntryCount = 1000; |
281 | int kCompactionRatio = 3; |
282 | if (total_dep_record_count > kMinCompactionEntryCount && |
283 | total_dep_record_count > unique_dep_record_count * kCompactionRatio) { |
284 | needs_recompaction_ = true; |
285 | } |
286 | |
287 | return LOAD_SUCCESS; |
288 | } |
289 | |
290 | DepsLog::Deps* DepsLog::GetDeps(Node* node) { |
291 | // Abort if the node has no id (never referenced in the deps) or if |
292 | // there's no deps recorded for the node. |
293 | if (node->id() < 0 || node->id() >= (int)deps_.size()) |
294 | return NULL; |
295 | return deps_[node->id()]; |
296 | } |
297 | |
298 | Node* DepsLog::GetFirstReverseDepsNode(Node* node) { |
299 | for (size_t id = 0; id < deps_.size(); ++id) { |
300 | Deps* deps = deps_[id]; |
301 | if (!deps) |
302 | continue; |
303 | for (int i = 0; i < deps->node_count; ++i) { |
304 | if (deps->nodes[i] == node) |
305 | return nodes_[id]; |
306 | } |
307 | } |
308 | return NULL; |
309 | } |
310 | |
311 | bool DepsLog::Recompact(const string& path, string* err) { |
312 | METRIC_RECORD(".ninja_deps recompact" ); |
313 | |
314 | Close(); |
315 | string temp_path = path + ".recompact" ; |
316 | |
317 | // OpenForWrite() opens for append. Make sure it's not appending to a |
318 | // left-over file from a previous recompaction attempt that crashed somehow. |
319 | unlink(temp_path.c_str()); |
320 | |
321 | DepsLog new_log; |
322 | if (!new_log.OpenForWrite(temp_path, err)) |
323 | return false; |
324 | |
325 | // Clear all known ids so that new ones can be reassigned. The new indices |
326 | // will refer to the ordering in new_log, not in the current log. |
327 | for (vector<Node*>::iterator i = nodes_.begin(); i != nodes_.end(); ++i) |
328 | (*i)->set_id(-1); |
329 | |
330 | // Write out all deps again. |
331 | for (int old_id = 0; old_id < (int)deps_.size(); ++old_id) { |
332 | Deps* deps = deps_[old_id]; |
333 | if (!deps) continue; // If nodes_[old_id] is a leaf, it has no deps. |
334 | |
335 | if (!IsDepsEntryLiveFor(nodes_[old_id])) |
336 | continue; |
337 | |
338 | if (!new_log.RecordDeps(nodes_[old_id], deps->mtime, |
339 | deps->node_count, deps->nodes)) { |
340 | new_log.Close(); |
341 | return false; |
342 | } |
343 | } |
344 | |
345 | new_log.Close(); |
346 | |
347 | // All nodes now have ids that refer to new_log, so steal its data. |
348 | deps_.swap(new_log.deps_); |
349 | nodes_.swap(new_log.nodes_); |
350 | |
351 | if (unlink(path.c_str()) < 0) { |
352 | *err = strerror(errno); |
353 | return false; |
354 | } |
355 | |
356 | if (rename(temp_path.c_str(), path.c_str()) < 0) { |
357 | *err = strerror(errno); |
358 | return false; |
359 | } |
360 | |
361 | return true; |
362 | } |
363 | |
364 | bool DepsLog::IsDepsEntryLiveFor(const Node* node) { |
365 | // Skip entries that don't have in-edges or whose edges don't have a |
366 | // "deps" attribute. They were in the deps log from previous builds, but |
367 | // the the files they were for were removed from the build and their deps |
368 | // entries are no longer needed. |
369 | // (Without the check for "deps", a chain of two or more nodes that each |
370 | // had deps wouldn't be collected in a single recompaction.) |
371 | return node->in_edge() && !node->in_edge()->GetBinding("deps" ).empty(); |
372 | } |
373 | |
374 | bool DepsLog::UpdateDeps(int out_id, Deps* deps) { |
375 | if (out_id >= (int)deps_.size()) |
376 | deps_.resize(out_id + 1); |
377 | |
378 | bool delete_old = deps_[out_id] != NULL; |
379 | if (delete_old) |
380 | delete deps_[out_id]; |
381 | deps_[out_id] = deps; |
382 | return delete_old; |
383 | } |
384 | |
385 | bool DepsLog::RecordId(Node* node) { |
386 | int path_size = node->path().size(); |
387 | int padding = (4 - path_size % 4) % 4; // Pad path to 4 byte boundary. |
388 | |
389 | unsigned size = path_size + padding + 4; |
390 | if (size > kMaxRecordSize) { |
391 | errno = ERANGE; |
392 | return false; |
393 | } |
394 | |
395 | if (!OpenForWriteIfNeeded()) { |
396 | return false; |
397 | } |
398 | if (fwrite(&size, 4, 1, file_) < 1) |
399 | return false; |
400 | if (fwrite(node->path().data(), path_size, 1, file_) < 1) { |
401 | assert(!node->path().empty()); |
402 | return false; |
403 | } |
404 | if (padding && fwrite("\0\0" , padding, 1, file_) < 1) |
405 | return false; |
406 | int id = nodes_.size(); |
407 | unsigned checksum = ~(unsigned)id; |
408 | if (fwrite(&checksum, 4, 1, file_) < 1) |
409 | return false; |
410 | if (fflush(file_) != 0) |
411 | return false; |
412 | |
413 | node->set_id(id); |
414 | nodes_.push_back(node); |
415 | |
416 | return true; |
417 | } |
418 | |
419 | bool DepsLog::OpenForWriteIfNeeded() { |
420 | if (file_path_.empty()) { |
421 | return true; |
422 | } |
423 | file_ = fopen(file_path_.c_str(), "ab" ); |
424 | if (!file_) { |
425 | return false; |
426 | } |
427 | // Set the buffer size to this and flush the file buffer after every record |
428 | // to make sure records aren't written partially. |
429 | if (setvbuf(file_, NULL, _IOFBF, kMaxRecordSize + 1) != 0) { |
430 | return false; |
431 | } |
432 | SetCloseOnExec(fileno(file_)); |
433 | |
434 | // Opening a file in append mode doesn't set the file pointer to the file's |
435 | // end on Windows. Do that explicitly. |
436 | fseek(file_, 0, SEEK_END); |
437 | |
438 | if (ftell(file_) == 0) { |
439 | if (fwrite(kFileSignature, sizeof(kFileSignature) - 1, 1, file_) < 1) { |
440 | return false; |
441 | } |
442 | if (fwrite(&kCurrentVersion, 4, 1, file_) < 1) { |
443 | return false; |
444 | } |
445 | } |
446 | if (fflush(file_) != 0) { |
447 | return false; |
448 | } |
449 | file_path_.clear(); |
450 | return true; |
451 | } |
452 | |