1/*
2 * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
3 * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * * Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * * Neither the name of Redis nor the names of its contributors may be used
15 * to endorse or promote products derived from this software without
16 * specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "server.h"
32#include <sys/stat.h>
33#include <sys/types.h>
34#include <regex.h>
35#include <libgen.h>
36
37#define AOF_CHECK_OK 0
38#define AOF_CHECK_EMPTY 1
39#define AOF_CHECK_TRUNCATED 2
40#define AOF_CHECK_TIMESTAMP_TRUNCATED 3
41
42typedef enum {
43 AOF_RESP,
44 AOF_RDB_PREAMBLE,
45 AOF_MULTI_PART,
46} input_file_type;
47
48aofManifest *aofManifestCreate(void);
49void aofManifestFree(aofManifest *am);
50aofManifest *aofLoadManifestFromFile(sds am_filepath);
51
52#define ERROR(...) { \
53 char __buf[1024]; \
54 snprintf(__buf, sizeof(__buf), __VA_ARGS__); \
55 snprintf(error, sizeof(error), "0x%16llx: %s", (long long)epos, __buf); \
56}
57
58static char error[1044];
59static off_t epos;
60static long long line = 1;
61static time_t to_timestamp = 0;
62
63int consumeNewline(char *buf) {
64 if (strncmp(buf,"\r\n",2) != 0) {
65 ERROR("Expected \\r\\n, got: %02x%02x",buf[0],buf[1]);
66 return 0;
67 }
68 line += 1;
69 return 1;
70}
71
72int readLong(FILE *fp, char prefix, long *target) {
73 char buf[128], *eptr;
74 epos = ftello(fp);
75 if (fgets(buf,sizeof(buf),fp) == NULL) {
76 return 0;
77 }
78 if (buf[0] != prefix) {
79 ERROR("Expected prefix '%c', got: '%c'",prefix,buf[0]);
80 return 0;
81 }
82 *target = strtol(buf+1,&eptr,10);
83 return consumeNewline(eptr);
84}
85
86int readBytes(FILE *fp, char *target, long length) {
87 long real;
88 epos = ftello(fp);
89 real = fread(target,1,length,fp);
90 if (real != length) {
91 ERROR("Expected to read %ld bytes, got %ld bytes",length,real);
92 return 0;
93 }
94 return 1;
95}
96
97int readString(FILE *fp, char** target) {
98 long len;
99 *target = NULL;
100 if (!readLong(fp,'$',&len)) {
101 return 0;
102 }
103
104 if (len < 0 || len > LONG_MAX - 2) {
105 ERROR("Expected to read string of %ld bytes, which is not in the suitable range",len);
106 return 0;
107 }
108
109 /* Increase length to also consume \r\n */
110 len += 2;
111 *target = (char*)zmalloc(len);
112 if (!readBytes(fp,*target,len)) {
113 zfree(*target);
114 *target = NULL;
115 return 0;
116 }
117 if (!consumeNewline(*target+len-2)) {
118 zfree(*target);
119 *target = NULL;
120 return 0;
121 }
122 (*target)[len-2] = '\0';
123 return 1;
124}
125
126int readArgc(FILE *fp, long *target) {
127 return readLong(fp,'*',target);
128}
129
130/* Used to decode a RESP record in the AOF file to obtain the original
131 * redis command, and also check whether the command is MULTI/EXEC. If the
132 * command is MULTI, the parameter out_multi will be incremented by one, and
133 * if the command is EXEC, the parameter out_multi will be decremented
134 * by one. The parameter out_multi will be used by the upper caller to determine
135 * whether the AOF file contains unclosed transactions.
136 **/
137int processRESP(FILE *fp, char *filename, int *out_multi) {
138 long argc;
139 char *str;
140
141 if (!readArgc(fp, &argc)) return 0;
142
143 for (int i = 0; i < argc; i++) {
144 if (!readString(fp, &str)) return 0;
145 if (i == 0) {
146 if (strcasecmp(str, "multi") == 0) {
147 if ((*out_multi)++) {
148 ERROR("Unexpected MULTI in AOF %s", filename);
149 zfree(str);
150 return 0;
151 }
152 } else if (strcasecmp(str, "exec") == 0) {
153 if (--(*out_multi)) {
154 ERROR("Unexpected EXEC in AOF %s", filename);
155 zfree(str);
156 return 0;
157 }
158 }
159 }
160 zfree(str);
161 }
162
163 return 1;
164}
165
166/* Used to parse an annotation in the AOF file, the annotation starts with '#'
167 * in AOF. Currently AOF only contains timestamp annotations, but this function
168 * can easily be extended to handle other annotations.
169 *
170 * The processing rule of time annotation is that once the timestamp is found to
171 * be greater than 'to_timestamp', the AOF after the annotation is truncated.
172 * Note that in Multi Part AOF, this truncation is only allowed when the last_file
173 * parameter is 1.
174 **/
175int processAnnotations(FILE *fp, char *filename, int last_file) {
176 char buf[AOF_ANNOTATION_LINE_MAX_LEN];
177
178 epos = ftello(fp);
179 if (fgets(buf, sizeof(buf), fp) == NULL) {
180 printf("Failed to read annotations from AOF %s, aborting...\n", filename);
181 exit(1);
182 }
183
184 if (to_timestamp && strncmp(buf, "#TS:", 4) == 0) {
185 char *endptr;
186 errno = 0;
187 time_t ts = strtol(buf+4, &endptr, 10);
188 if (errno != 0 || *endptr != '\r') {
189 printf("Invalid timestamp annotation\n");
190 exit(1);
191 }
192 if (ts <= to_timestamp) return 1;
193 if (epos == 0) {
194 printf("AOF %s has nothing before timestamp %ld, "
195 "aborting...\n", filename, to_timestamp);
196 exit(1);
197 }
198 if (!last_file) {
199 printf("Failed to truncate AOF %s to timestamp %ld to offset %ld because it is not the last file.\n",
200 filename, to_timestamp, (long int)epos);
201 printf("If you insist, please delete all files after this file according to the manifest "
202 "file and delete the corresponding records in manifest file manually. Then re-run redis-check-aof.\n");
203 exit(1);
204 }
205 /* Truncate remaining AOF if exceeding 'to_timestamp' */
206 if (ftruncate(fileno(fp), epos) == -1) {
207 printf("Failed to truncate AOF %s to timestamp %ld\n",
208 filename, to_timestamp);
209 exit(1);
210 } else {
211 return 0;
212 }
213 }
214 return 1;
215}
216
217/* Used to check the validity of a single AOF file. The AOF file can be:
218 * 1. Old-style AOF
219 * 2. Old-style RDB-preamble AOF
220 * 3. BASE or INCR in Multi Part AOF
221 * */
222int checkSingleAof(char *aof_filename, char *aof_filepath, int last_file, int fix, int preamble) {
223 off_t pos = 0, diff;
224 int multi = 0;
225 char buf[2];
226
227 FILE *fp = fopen(aof_filepath, "r+");
228 if (fp == NULL) {
229 printf("Cannot open file %s: %s, aborting...\n", aof_filepath, strerror(errno));
230 exit(1);
231 }
232
233 struct redis_stat sb;
234 if (redis_fstat(fileno(fp),&sb) == -1) {
235 printf("Cannot stat file: %s, aborting...\n", aof_filename);
236 exit(1);
237 }
238
239 off_t size = sb.st_size;
240 if (size == 0) {
241 return AOF_CHECK_EMPTY;
242 }
243
244 if (preamble) {
245 char *argv[2] = {NULL, aof_filename};
246 if (redis_check_rdb_main(2, argv, fp) == C_ERR) {
247 printf("RDB preamble of AOF file is not sane, aborting.\n");
248 exit(1);
249 } else {
250 printf("RDB preamble is OK, proceeding with AOF tail...\n");
251 }
252 }
253
254 while(1) {
255 if (!multi) pos = ftello(fp);
256 if (fgets(buf, sizeof(buf), fp) == NULL) {
257 if (feof(fp)) {
258 break;
259 }
260 printf("Failed to read from AOF %s, aborting...\n", aof_filename);
261 exit(1);
262 }
263
264 if (fseek(fp, -1, SEEK_CUR) == -1) {
265 printf("Failed to fseek in AOF %s: %s", aof_filename, strerror(errno));
266 exit(1);
267 }
268
269 if (buf[0] == '#') {
270 if (!processAnnotations(fp, aof_filepath, last_file)) {
271 fclose(fp);
272 return AOF_CHECK_TIMESTAMP_TRUNCATED;
273 }
274 } else if (buf[0] == '*'){
275 if (!processRESP(fp, aof_filepath, &multi)) break;
276 } else {
277 printf("AOF %s format error\n", aof_filename);
278 break;
279 }
280 }
281
282 if (feof(fp) && multi && strlen(error) == 0) {
283 ERROR("Reached EOF before reading EXEC for MULTI");
284 }
285
286 if (strlen(error) > 0) {
287 printf("%s\n", error);
288 }
289
290 diff = size-pos;
291
292 /* In truncate-to-timestamp mode, just exit if there is nothing to truncate. */
293 if (diff == 0 && to_timestamp) {
294 printf("Truncate nothing in AOF %s to timestamp %ld\n", aof_filename, to_timestamp);
295 fclose(fp);
296 return AOF_CHECK_OK;
297 }
298
299 printf("AOF analyzed: filename=%s, size=%lld, ok_up_to=%lld, ok_up_to_line=%lld, diff=%lld\n",
300 aof_filename, (long long) size, (long long) pos, line, (long long) diff);
301 if (diff > 0) {
302 if (fix) {
303 if (!last_file) {
304 printf("Failed to truncate AOF %s because it is not the last file\n", aof_filename);
305 exit(1);
306 }
307
308 char buf[2];
309 printf("This will shrink the AOF %s from %lld bytes, with %lld bytes, to %lld bytes\n",
310 aof_filename, (long long)size, (long long)diff, (long long)pos);
311 printf("Continue? [y/N]: ");
312 if (fgets(buf, sizeof(buf), stdin) == NULL || strncasecmp(buf, "y", 1) != 0) {
313 printf("Aborting...\n");
314 exit(1);
315 }
316 if (ftruncate(fileno(fp), pos) == -1) {
317 printf("Failed to truncate AOF %s\n", aof_filename);
318 exit(1);
319 } else {
320 fclose(fp);
321 return AOF_CHECK_TRUNCATED;
322 }
323 } else {
324 printf("AOF %s is not valid. Use the --fix option to try fixing it.\n", aof_filename);
325 exit(1);
326 }
327 }
328 fclose(fp);
329 return AOF_CHECK_OK;
330}
331
332/* Used to determine whether the file is a RDB file. These two possibilities:
333 * 1. The file is an old style RDB-preamble AOF
334 * 2. The file is a BASE AOF in Multi Part AOF
335 * */
336int fileIsRDB(char *filepath) {
337 FILE *fp = fopen(filepath, "r");
338 if (fp == NULL) {
339 printf("Cannot open file %s: %s\n", filepath, strerror(errno));
340 exit(1);
341 }
342
343 struct redis_stat sb;
344 if (redis_fstat(fileno(fp), &sb) == -1) {
345 printf("Cannot stat file: %s\n", filepath);
346 exit(1);
347 }
348
349 off_t size = sb.st_size;
350 if (size == 0) {
351 fclose(fp);
352 return 0;
353 }
354
355 if (size >= 8) { /* There must be at least room for the RDB header. */
356 char sig[5];
357 int rdb_file = fread(sig, sizeof(sig), 1, fp) == 1 &&
358 memcmp(sig, "REDIS", sizeof(sig)) == 0;
359 if (rdb_file) {
360 fclose(fp);
361 return 1;
362 }
363 }
364
365 fclose(fp);
366 return 0;
367}
368
369/* Used to determine whether the file is a manifest file. */
370#define MANIFEST_MAX_LINE 1024
371int fileIsManifest(char *filepath) {
372 int is_manifest = 0;
373 FILE *fp = fopen(filepath, "r");
374 if (fp == NULL) {
375 printf("Cannot open file %s: %s\n", filepath, strerror(errno));
376 exit(1);
377 }
378
379 struct redis_stat sb;
380 if (redis_fstat(fileno(fp), &sb) == -1) {
381 printf("Cannot stat file: %s\n", filepath);
382 exit(1);
383 }
384
385 off_t size = sb.st_size;
386 if (size == 0) {
387 fclose(fp);
388 return 0;
389 }
390
391 char buf[MANIFEST_MAX_LINE+1];
392 while (1) {
393 if (fgets(buf, MANIFEST_MAX_LINE+1, fp) == NULL) {
394 if (feof(fp)) {
395 break;
396 } else {
397 printf("Cannot read file: %s\n", filepath);
398 exit(1);
399 }
400 }
401
402 /* Skip comments lines */
403 if (buf[0] == '#') {
404 continue;
405 } else if (!memcmp(buf, "file", strlen("file"))) {
406 is_manifest = 1;
407 }
408 }
409
410 fclose(fp);
411 return is_manifest;
412}
413
414/* Get the format of the file to be checked. It can be:
415 * AOF_RESP: Old-style AOF
416 * AOF_RDB_PREAMBLE: Old-style RDB-preamble AOF
417 * AOF_MULTI_PART: manifest in Multi Part AOF
418 *
419 * redis-check-aof tool will automatically perform different
420 * verification logic according to different file formats.
421 * */
422input_file_type getInputFileType(char *filepath) {
423 if (fileIsManifest(filepath)) {
424 return AOF_MULTI_PART;
425 } else if (fileIsRDB(filepath)) {
426 return AOF_RDB_PREAMBLE;
427 } else {
428 return AOF_RESP;
429 }
430}
431
432/* Check if Multi Part AOF is valid. It will check the BASE file and INCR files
433 * at once according to the manifest instructions (this is somewhat similar to
434 * redis' AOF loading).
435 *
436 * When the verification is successful, we can guarantee:
437 * 1. The manifest file format is valid
438 * 2. Both BASE AOF and INCR AOFs format are valid
439 * 3. No BASE or INCR AOFs files are missing
440 *
441 * Note that in Multi Part AOF, we only allow truncation for the last AOF file.
442 * */
443void checkMultiPartAof(char *dirpath, char *manifest_filepath, int fix) {
444 int total_num = 0, aof_num = 0, last_file;
445 int ret;
446
447 printf("Start checking Multi Part AOF\n");
448 aofManifest *am = aofLoadManifestFromFile(manifest_filepath);
449
450 if (am->base_aof_info) total_num++;
451 if (am->incr_aof_list) total_num += listLength(am->incr_aof_list);
452
453 if (am->base_aof_info) {
454 sds aof_filename = am->base_aof_info->file_name;
455 sds aof_filepath = makePath(dirpath, aof_filename);
456 last_file = ++aof_num == total_num;
457 int aof_preable = fileIsRDB(aof_filepath);
458
459 printf("Start to check BASE AOF (%s format).\n", aof_preable ? "RDB":"RESP");
460 ret = checkSingleAof(aof_filename, aof_filepath, last_file, fix, aof_preable);
461 if (ret == AOF_CHECK_OK) {
462 printf("BASE AOF %s is valid\n", aof_filename);
463 } else if (ret == AOF_CHECK_EMPTY) {
464 printf("BASE AOF %s is empty\n", aof_filename);
465 } else if (ret == AOF_CHECK_TIMESTAMP_TRUNCATED) {
466 printf("Successfully truncated AOF %s to timestamp %ld\n",
467 aof_filename, to_timestamp);
468 } else if (ret == AOF_CHECK_TRUNCATED) {
469 printf("Successfully truncated AOF %s\n", aof_filename);
470 }
471 sdsfree(aof_filepath);
472 }
473
474 if (listLength(am->incr_aof_list)) {
475 listNode *ln;
476 listIter li;
477
478 printf("Start to check INCR files.\n");
479 listRewind(am->incr_aof_list, &li);
480 while ((ln = listNext(&li)) != NULL) {
481 aofInfo *ai = (aofInfo*)ln->value;
482 sds aof_filename = (char*)ai->file_name;
483 sds aof_filepath = makePath(dirpath, aof_filename);
484 last_file = ++aof_num == total_num;
485 ret = checkSingleAof(aof_filename, aof_filepath, last_file, fix, 0);
486 if (ret == AOF_CHECK_OK) {
487 printf("INCR AOF %s is valid\n", aof_filename);
488 } else if (ret == AOF_CHECK_EMPTY) {
489 printf("INCR AOF %s is empty\n", aof_filename);
490 } else if (ret == AOF_CHECK_TIMESTAMP_TRUNCATED) {
491 printf("Successfully truncated AOF %s to timestamp %ld\n",
492 aof_filename, to_timestamp);
493 } else if (ret == AOF_CHECK_TRUNCATED) {
494 printf("Successfully truncated AOF %s\n", aof_filename);
495 }
496 sdsfree(aof_filepath);
497 }
498 }
499
500 aofManifestFree(am);
501 printf("All AOF files and manifest are valid\n");
502}
503
504/* Check if old style AOF is valid. Internally, it will identify whether
505 * the AOF is in RDB-preamble format, and will eventually call `checkSingleAof`
506 * to do the check. */
507void checkOldStyleAof(char *filepath, int fix, int preamble) {
508 printf("Start checking Old-Style AOF\n");
509 int ret = checkSingleAof(filepath, filepath, 1, fix, preamble);
510 if (ret == AOF_CHECK_OK) {
511 printf("AOF %s is valid\n", filepath);
512 } else if (ret == AOF_CHECK_EMPTY) {
513 printf("AOF %s is empty\n", filepath);
514 } else if (ret == AOF_CHECK_TIMESTAMP_TRUNCATED) {
515 printf("Successfully truncated AOF %s to timestamp %ld\n",
516 filepath, to_timestamp);
517 } else if (ret == AOF_CHECK_TRUNCATED) {
518 printf("Successfully truncated AOF %s\n", filepath);
519 }
520}
521
522int redis_check_aof_main(int argc, char **argv) {
523 char *filepath;
524 char temp_filepath[PATH_MAX + 1];
525 char *dirpath;
526 int fix = 0;
527
528 if (argc < 2) {
529 goto invalid_args;
530 } else if (argc == 2) {
531 filepath = argv[1];
532 } else if (argc == 3) {
533 if (!strcmp(argv[1], "--fix")) {
534 filepath = argv[2];
535 fix = 1;
536 } else {
537 goto invalid_args;
538 }
539 } else if (argc == 4) {
540 if (!strcmp(argv[1], "--truncate-to-timestamp")) {
541 char *endptr;
542 errno = 0;
543 to_timestamp = strtol(argv[2], &endptr, 10);
544 if (errno != 0 || *endptr != '\0') {
545 printf("Invalid timestamp, aborting...\n");
546 exit(1);
547 }
548 filepath = argv[3];
549 } else {
550 goto invalid_args;
551 }
552 } else {
553 goto invalid_args;
554 }
555
556 /* In the glibc implementation dirname may modify their argument. */
557 memcpy(temp_filepath, filepath, strlen(filepath) + 1);
558 dirpath = dirname(temp_filepath);
559
560 /* Select the corresponding verification method according to the input file type. */
561 input_file_type type = getInputFileType(filepath);
562 switch (type) {
563 case AOF_MULTI_PART:
564 checkMultiPartAof(dirpath, filepath, fix);
565 break;
566 case AOF_RESP:
567 checkOldStyleAof(filepath, fix, 0);
568 break;
569 case AOF_RDB_PREAMBLE:
570 checkOldStyleAof(filepath, fix, 1);
571 break;
572 }
573
574 exit(0);
575
576invalid_args:
577 printf("Usage: %s [--fix|--truncate-to-timestamp $timestamp] <file.manifest|file.aof>\n",
578 argv[0]);
579 exit(1);
580}
581