1 | /*************************************************************************** |
2 | * _ _ ____ _ |
3 | * Project ___| | | | _ \| | |
4 | * / __| | | | |_) | | |
5 | * | (__| |_| | _ <| |___ |
6 | * \___|\___/|_| \_\_____| |
7 | * |
8 | * Copyright (C) 1998 - 2022, Daniel Stenberg, <[email protected]>, et al. |
9 | * |
10 | * This software is licensed as described in the file COPYING, which |
11 | * you should have received as part of this distribution. The terms |
12 | * are also available at https://curl.se/docs/copyright.html. |
13 | * |
14 | * You may opt to use, copy, modify, merge, publish, distribute and/or sell |
15 | * copies of the Software, and permit persons to whom the Software is |
16 | * furnished to do so, under the terms of the COPYING file. |
17 | * |
18 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY |
19 | * KIND, either express or implied. |
20 | * |
21 | * SPDX-License-Identifier: curl |
22 | * |
23 | ***************************************************************************/ |
24 | /* |
25 | A brief summary of the date string formats this parser groks: |
26 | |
27 | RFC 2616 3.3.1 |
28 | |
29 | Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 |
30 | Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036 |
31 | Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format |
32 | |
33 | we support dates without week day name: |
34 | |
35 | 06 Nov 1994 08:49:37 GMT |
36 | 06-Nov-94 08:49:37 GMT |
37 | Nov 6 08:49:37 1994 |
38 | |
39 | without the time zone: |
40 | |
41 | 06 Nov 1994 08:49:37 |
42 | 06-Nov-94 08:49:37 |
43 | |
44 | weird order: |
45 | |
46 | 1994 Nov 6 08:49:37 (GNU date fails) |
47 | GMT 08:49:37 06-Nov-94 Sunday |
48 | 94 6 Nov 08:49:37 (GNU date fails) |
49 | |
50 | time left out: |
51 | |
52 | 1994 Nov 6 |
53 | 06-Nov-94 |
54 | Sun Nov 6 94 |
55 | |
56 | unusual separators: |
57 | |
58 | 1994.Nov.6 |
59 | Sun/Nov/6/94/GMT |
60 | |
61 | commonly used time zone names: |
62 | |
63 | Sun, 06 Nov 1994 08:49:37 CET |
64 | 06 Nov 1994 08:49:37 EST |
65 | |
66 | time zones specified using RFC822 style: |
67 | |
68 | Sun, 12 Sep 2004 15:05:58 -0700 |
69 | Sat, 11 Sep 2004 21:32:11 +0200 |
70 | |
71 | compact numerical date strings: |
72 | |
73 | 20040912 15:05:58 -0700 |
74 | 20040911 +0200 |
75 | |
76 | */ |
77 | |
78 | #include "curl_setup.h" |
79 | |
80 | #include <limits.h> |
81 | |
82 | #include <curl/curl.h> |
83 | #include "strcase.h" |
84 | #include "warnless.h" |
85 | #include "parsedate.h" |
86 | |
87 | /* |
88 | * parsedate() |
89 | * |
90 | * Returns: |
91 | * |
92 | * PARSEDATE_OK - a fine conversion |
93 | * PARSEDATE_FAIL - failed to convert |
94 | * PARSEDATE_LATER - time overflow at the far end of time_t |
95 | * PARSEDATE_SOONER - time underflow at the low end of time_t |
96 | */ |
97 | |
98 | static int parsedate(const char *date, time_t *output); |
99 | |
100 | #define PARSEDATE_OK 0 |
101 | #define PARSEDATE_FAIL -1 |
102 | #define PARSEDATE_LATER 1 |
103 | #define PARSEDATE_SOONER 2 |
104 | |
105 | #if !defined(CURL_DISABLE_PARSEDATE) || !defined(CURL_DISABLE_FTP) || \ |
106 | !defined(CURL_DISABLE_FILE) |
107 | /* These names are also used by FTP and FILE code */ |
108 | const char * const Curl_wkday[] = |
109 | {"Mon" , "Tue" , "Wed" , "Thu" , "Fri" , "Sat" , "Sun" }; |
110 | const char * const Curl_month[]= |
111 | { "Jan" , "Feb" , "Mar" , "Apr" , "May" , "Jun" , |
112 | "Jul" , "Aug" , "Sep" , "Oct" , "Nov" , "Dec" }; |
113 | #endif |
114 | |
115 | #ifndef CURL_DISABLE_PARSEDATE |
116 | static const char * const weekday[] = |
117 | { "Monday" , "Tuesday" , "Wednesday" , "Thursday" , |
118 | "Friday" , "Saturday" , "Sunday" }; |
119 | |
120 | struct tzinfo { |
121 | char name[5]; |
122 | int offset; /* +/- in minutes */ |
123 | }; |
124 | |
125 | /* Here's a bunch of frequently used time zone names. These were supported |
126 | by the old getdate parser. */ |
127 | #define tDAYZONE -60 /* offset for daylight savings time */ |
128 | static const struct tzinfo tz[]= { |
129 | {"GMT" , 0}, /* Greenwich Mean */ |
130 | {"UT" , 0}, /* Universal Time */ |
131 | {"UTC" , 0}, /* Universal (Coordinated) */ |
132 | {"WET" , 0}, /* Western European */ |
133 | {"BST" , 0 tDAYZONE}, /* British Summer */ |
134 | {"WAT" , 60}, /* West Africa */ |
135 | {"AST" , 240}, /* Atlantic Standard */ |
136 | {"ADT" , 240 tDAYZONE}, /* Atlantic Daylight */ |
137 | {"EST" , 300}, /* Eastern Standard */ |
138 | {"EDT" , 300 tDAYZONE}, /* Eastern Daylight */ |
139 | {"CST" , 360}, /* Central Standard */ |
140 | {"CDT" , 360 tDAYZONE}, /* Central Daylight */ |
141 | {"MST" , 420}, /* Mountain Standard */ |
142 | {"MDT" , 420 tDAYZONE}, /* Mountain Daylight */ |
143 | {"PST" , 480}, /* Pacific Standard */ |
144 | {"PDT" , 480 tDAYZONE}, /* Pacific Daylight */ |
145 | {"YST" , 540}, /* Yukon Standard */ |
146 | {"YDT" , 540 tDAYZONE}, /* Yukon Daylight */ |
147 | {"HST" , 600}, /* Hawaii Standard */ |
148 | {"HDT" , 600 tDAYZONE}, /* Hawaii Daylight */ |
149 | {"CAT" , 600}, /* Central Alaska */ |
150 | {"AHST" , 600}, /* Alaska-Hawaii Standard */ |
151 | {"NT" , 660}, /* Nome */ |
152 | {"IDLW" , 720}, /* International Date Line West */ |
153 | {"CET" , -60}, /* Central European */ |
154 | {"MET" , -60}, /* Middle European */ |
155 | {"MEWT" , -60}, /* Middle European Winter */ |
156 | {"MEST" , -60 tDAYZONE}, /* Middle European Summer */ |
157 | {"CEST" , -60 tDAYZONE}, /* Central European Summer */ |
158 | {"MESZ" , -60 tDAYZONE}, /* Middle European Summer */ |
159 | {"FWT" , -60}, /* French Winter */ |
160 | {"FST" , -60 tDAYZONE}, /* French Summer */ |
161 | {"EET" , -120}, /* Eastern Europe, USSR Zone 1 */ |
162 | {"WAST" , -420}, /* West Australian Standard */ |
163 | {"WADT" , -420 tDAYZONE}, /* West Australian Daylight */ |
164 | {"CCT" , -480}, /* China Coast, USSR Zone 7 */ |
165 | {"JST" , -540}, /* Japan Standard, USSR Zone 8 */ |
166 | {"EAST" , -600}, /* Eastern Australian Standard */ |
167 | {"EADT" , -600 tDAYZONE}, /* Eastern Australian Daylight */ |
168 | {"GST" , -600}, /* Guam Standard, USSR Zone 9 */ |
169 | {"NZT" , -720}, /* New Zealand */ |
170 | {"NZST" , -720}, /* New Zealand Standard */ |
171 | {"NZDT" , -720 tDAYZONE}, /* New Zealand Daylight */ |
172 | {"IDLE" , -720}, /* International Date Line East */ |
173 | /* Next up: Military timezone names. RFC822 allowed these, but (as noted in |
174 | RFC 1123) had their signs wrong. Here we use the correct signs to match |
175 | actual military usage. |
176 | */ |
177 | {"A" , 1 * 60}, /* Alpha */ |
178 | {"B" , 2 * 60}, /* Bravo */ |
179 | {"C" , 3 * 60}, /* Charlie */ |
180 | {"D" , 4 * 60}, /* Delta */ |
181 | {"E" , 5 * 60}, /* Echo */ |
182 | {"F" , 6 * 60}, /* Foxtrot */ |
183 | {"G" , 7 * 60}, /* Golf */ |
184 | {"H" , 8 * 60}, /* Hotel */ |
185 | {"I" , 9 * 60}, /* India */ |
186 | /* "J", Juliet is not used as a timezone, to indicate the observer's local |
187 | time */ |
188 | {"K" , 10 * 60}, /* Kilo */ |
189 | {"L" , 11 * 60}, /* Lima */ |
190 | {"M" , 12 * 60}, /* Mike */ |
191 | {"N" , -1 * 60}, /* November */ |
192 | {"O" , -2 * 60}, /* Oscar */ |
193 | {"P" , -3 * 60}, /* Papa */ |
194 | {"Q" , -4 * 60}, /* Quebec */ |
195 | {"R" , -5 * 60}, /* Romeo */ |
196 | {"S" , -6 * 60}, /* Sierra */ |
197 | {"T" , -7 * 60}, /* Tango */ |
198 | {"U" , -8 * 60}, /* Uniform */ |
199 | {"V" , -9 * 60}, /* Victor */ |
200 | {"W" , -10 * 60}, /* Whiskey */ |
201 | {"X" , -11 * 60}, /* X-ray */ |
202 | {"Y" , -12 * 60}, /* Yankee */ |
203 | {"Z" , 0}, /* Zulu, zero meridian, a.k.a. UTC */ |
204 | }; |
205 | |
206 | /* returns: |
207 | -1 no day |
208 | 0 monday - 6 sunday |
209 | */ |
210 | |
211 | static int checkday(const char *check, size_t len) |
212 | { |
213 | int i; |
214 | const char * const *what; |
215 | bool found = FALSE; |
216 | if(len > 3) |
217 | what = &weekday[0]; |
218 | else |
219 | what = &Curl_wkday[0]; |
220 | for(i = 0; i<7; i++) { |
221 | if(strcasecompare(check, what[0])) { |
222 | found = TRUE; |
223 | break; |
224 | } |
225 | what++; |
226 | } |
227 | return found?i:-1; |
228 | } |
229 | |
230 | static int checkmonth(const char *check) |
231 | { |
232 | int i; |
233 | const char * const *what; |
234 | bool found = FALSE; |
235 | |
236 | what = &Curl_month[0]; |
237 | for(i = 0; i<12; i++) { |
238 | if(strcasecompare(check, what[0])) { |
239 | found = TRUE; |
240 | break; |
241 | } |
242 | what++; |
243 | } |
244 | return found?i:-1; /* return the offset or -1, no real offset is -1 */ |
245 | } |
246 | |
247 | /* return the time zone offset between GMT and the input one, in number |
248 | of seconds or -1 if the timezone wasn't found/legal */ |
249 | |
250 | static int checktz(const char *check) |
251 | { |
252 | unsigned int i; |
253 | const struct tzinfo *what; |
254 | bool found = FALSE; |
255 | |
256 | what = tz; |
257 | for(i = 0; i< sizeof(tz)/sizeof(tz[0]); i++) { |
258 | if(strcasecompare(check, what->name)) { |
259 | found = TRUE; |
260 | break; |
261 | } |
262 | what++; |
263 | } |
264 | return found?what->offset*60:-1; |
265 | } |
266 | |
267 | static void skip(const char **date) |
268 | { |
269 | /* skip everything that aren't letters or digits */ |
270 | while(**date && !ISALNUM(**date)) |
271 | (*date)++; |
272 | } |
273 | |
274 | enum assume { |
275 | DATE_MDAY, |
276 | DATE_YEAR, |
277 | DATE_TIME |
278 | }; |
279 | |
280 | /* |
281 | * time2epoch: time stamp to seconds since epoch in GMT time zone. Similar to |
282 | * mktime but for GMT only. |
283 | */ |
284 | static time_t time2epoch(int sec, int min, int hour, |
285 | int mday, int mon, int year) |
286 | { |
287 | static const int month_days_cumulative [12] = |
288 | { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 }; |
289 | int leap_days = year - (mon <= 1); |
290 | leap_days = ((leap_days / 4) - (leap_days / 100) + (leap_days / 400) |
291 | - (1969 / 4) + (1969 / 100) - (1969 / 400)); |
292 | return ((((time_t) (year - 1970) * 365 |
293 | + leap_days + month_days_cumulative[mon] + mday - 1) * 24 |
294 | + hour) * 60 + min) * 60 + sec; |
295 | } |
296 | |
297 | /* |
298 | * parsedate() |
299 | * |
300 | * Returns: |
301 | * |
302 | * PARSEDATE_OK - a fine conversion |
303 | * PARSEDATE_FAIL - failed to convert |
304 | * PARSEDATE_LATER - time overflow at the far end of time_t |
305 | * PARSEDATE_SOONER - time underflow at the low end of time_t |
306 | */ |
307 | |
308 | static int parsedate(const char *date, time_t *output) |
309 | { |
310 | time_t t = 0; |
311 | int wdaynum = -1; /* day of the week number, 0-6 (mon-sun) */ |
312 | int monnum = -1; /* month of the year number, 0-11 */ |
313 | int mdaynum = -1; /* day of month, 1 - 31 */ |
314 | int hournum = -1; |
315 | int minnum = -1; |
316 | int secnum = -1; |
317 | int yearnum = -1; |
318 | int tzoff = -1; |
319 | enum assume dignext = DATE_MDAY; |
320 | const char *indate = date; /* save the original pointer */ |
321 | int part = 0; /* max 6 parts */ |
322 | |
323 | while(*date && (part < 6)) { |
324 | bool found = FALSE; |
325 | |
326 | skip(&date); |
327 | |
328 | if(ISALPHA(*date)) { |
329 | /* a name coming up */ |
330 | char buf[32]="" ; |
331 | size_t len; |
332 | if(sscanf(date, "%31[ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
333 | "abcdefghijklmnopqrstuvwxyz]" , buf)) |
334 | len = strlen(buf); |
335 | else |
336 | len = 0; |
337 | |
338 | if(wdaynum == -1) { |
339 | wdaynum = checkday(buf, len); |
340 | if(wdaynum != -1) |
341 | found = TRUE; |
342 | } |
343 | if(!found && (monnum == -1)) { |
344 | monnum = checkmonth(buf); |
345 | if(monnum != -1) |
346 | found = TRUE; |
347 | } |
348 | |
349 | if(!found && (tzoff == -1)) { |
350 | /* this just must be a time zone string */ |
351 | tzoff = checktz(buf); |
352 | if(tzoff != -1) |
353 | found = TRUE; |
354 | } |
355 | |
356 | if(!found) |
357 | return PARSEDATE_FAIL; /* bad string */ |
358 | |
359 | date += len; |
360 | } |
361 | else if(ISDIGIT(*date)) { |
362 | /* a digit */ |
363 | int val; |
364 | char *end; |
365 | int len = 0; |
366 | if((secnum == -1) && |
367 | (3 == sscanf(date, "%02d:%02d:%02d%n" , |
368 | &hournum, &minnum, &secnum, &len))) { |
369 | /* time stamp! */ |
370 | date += len; |
371 | } |
372 | else if((secnum == -1) && |
373 | (2 == sscanf(date, "%02d:%02d%n" , &hournum, &minnum, &len))) { |
374 | /* time stamp without seconds */ |
375 | date += len; |
376 | secnum = 0; |
377 | } |
378 | else { |
379 | long lval; |
380 | int error; |
381 | int old_errno; |
382 | |
383 | old_errno = errno; |
384 | errno = 0; |
385 | lval = strtol(date, &end, 10); |
386 | error = errno; |
387 | if(errno != old_errno) |
388 | errno = old_errno; |
389 | |
390 | if(error) |
391 | return PARSEDATE_FAIL; |
392 | |
393 | #if LONG_MAX != INT_MAX |
394 | if((lval > (long)INT_MAX) || (lval < (long)INT_MIN)) |
395 | return PARSEDATE_FAIL; |
396 | #endif |
397 | |
398 | val = curlx_sltosi(lval); |
399 | |
400 | if((tzoff == -1) && |
401 | ((end - date) == 4) && |
402 | (val <= 1400) && |
403 | (indate< date) && |
404 | ((date[-1] == '+' || date[-1] == '-'))) { |
405 | /* four digits and a value less than or equal to 1400 (to take into |
406 | account all sorts of funny time zone diffs) and it is preceded |
407 | with a plus or minus. This is a time zone indication. 1400 is |
408 | picked since +1300 is frequently used and +1400 is mentioned as |
409 | an edge number in the document "ISO C 200X Proposal: Timezone |
410 | Functions" at http://david.tribble.com/text/c0xtimezone.html If |
411 | anyone has a more authoritative source for the exact maximum time |
412 | zone offsets, please speak up! */ |
413 | found = TRUE; |
414 | tzoff = (val/100 * 60 + val%100)*60; |
415 | |
416 | /* the + and - prefix indicates the local time compared to GMT, |
417 | this we need their reversed math to get what we want */ |
418 | tzoff = date[-1]=='+'?-tzoff:tzoff; |
419 | } |
420 | |
421 | if(((end - date) == 8) && |
422 | (yearnum == -1) && |
423 | (monnum == -1) && |
424 | (mdaynum == -1)) { |
425 | /* 8 digits, no year, month or day yet. This is YYYYMMDD */ |
426 | found = TRUE; |
427 | yearnum = val/10000; |
428 | monnum = (val%10000)/100-1; /* month is 0 - 11 */ |
429 | mdaynum = val%100; |
430 | } |
431 | |
432 | if(!found && (dignext == DATE_MDAY) && (mdaynum == -1)) { |
433 | if((val > 0) && (val<32)) { |
434 | mdaynum = val; |
435 | found = TRUE; |
436 | } |
437 | dignext = DATE_YEAR; |
438 | } |
439 | |
440 | if(!found && (dignext == DATE_YEAR) && (yearnum == -1)) { |
441 | yearnum = val; |
442 | found = TRUE; |
443 | if(yearnum < 100) { |
444 | if(yearnum > 70) |
445 | yearnum += 1900; |
446 | else |
447 | yearnum += 2000; |
448 | } |
449 | if(mdaynum == -1) |
450 | dignext = DATE_MDAY; |
451 | } |
452 | |
453 | if(!found) |
454 | return PARSEDATE_FAIL; |
455 | |
456 | date = end; |
457 | } |
458 | } |
459 | |
460 | part++; |
461 | } |
462 | |
463 | if(-1 == secnum) |
464 | secnum = minnum = hournum = 0; /* no time, make it zero */ |
465 | |
466 | if((-1 == mdaynum) || |
467 | (-1 == monnum) || |
468 | (-1 == yearnum)) |
469 | /* lacks vital info, fail */ |
470 | return PARSEDATE_FAIL; |
471 | |
472 | #ifdef HAVE_TIME_T_UNSIGNED |
473 | if(yearnum < 1970) { |
474 | /* only positive numbers cannot return earlier */ |
475 | *output = TIME_T_MIN; |
476 | return PARSEDATE_SOONER; |
477 | } |
478 | #endif |
479 | |
480 | #if (SIZEOF_TIME_T < 5) |
481 | |
482 | #ifdef HAVE_TIME_T_UNSIGNED |
483 | /* an unsigned 32 bit time_t can only hold dates to 2106 */ |
484 | if(yearnum > 2105) { |
485 | *output = TIME_T_MAX; |
486 | return PARSEDATE_LATER; |
487 | } |
488 | #else |
489 | /* a signed 32 bit time_t can only hold dates to the beginning of 2038 */ |
490 | if(yearnum > 2037) { |
491 | *output = TIME_T_MAX; |
492 | return PARSEDATE_LATER; |
493 | } |
494 | if(yearnum < 1903) { |
495 | *output = TIME_T_MIN; |
496 | return PARSEDATE_SOONER; |
497 | } |
498 | #endif |
499 | |
500 | #else |
501 | /* The Gregorian calendar was introduced 1582 */ |
502 | if(yearnum < 1583) |
503 | return PARSEDATE_FAIL; |
504 | #endif |
505 | |
506 | if((mdaynum > 31) || (monnum > 11) || |
507 | (hournum > 23) || (minnum > 59) || (secnum > 60)) |
508 | return PARSEDATE_FAIL; /* clearly an illegal date */ |
509 | |
510 | /* time2epoch() returns a time_t. time_t is often 32 bits, sometimes even on |
511 | architectures that feature 64 bit 'long' but ultimately time_t is the |
512 | correct data type to use. |
513 | */ |
514 | t = time2epoch(secnum, minnum, hournum, mdaynum, monnum, yearnum); |
515 | |
516 | /* Add the time zone diff between local time zone and GMT. */ |
517 | if(tzoff == -1) |
518 | tzoff = 0; |
519 | |
520 | if((tzoff > 0) && (t > TIME_T_MAX - tzoff)) { |
521 | *output = TIME_T_MAX; |
522 | return PARSEDATE_LATER; /* time_t overflow */ |
523 | } |
524 | |
525 | t += tzoff; |
526 | |
527 | *output = t; |
528 | |
529 | return PARSEDATE_OK; |
530 | } |
531 | #else |
532 | /* disabled */ |
533 | static int parsedate(const char *date, time_t *output) |
534 | { |
535 | (void)date; |
536 | *output = 0; |
537 | return PARSEDATE_OK; /* a lie */ |
538 | } |
539 | #endif |
540 | |
541 | time_t curl_getdate(const char *p, const time_t *now) |
542 | { |
543 | time_t parsed = -1; |
544 | int rc = parsedate(p, &parsed); |
545 | (void)now; /* legacy argument from the past that we ignore */ |
546 | |
547 | if(rc == PARSEDATE_OK) { |
548 | if(parsed == -1) |
549 | /* avoid returning -1 for a working scenario */ |
550 | parsed++; |
551 | return parsed; |
552 | } |
553 | /* everything else is fail */ |
554 | return -1; |
555 | } |
556 | |
557 | /* Curl_getdate_capped() differs from curl_getdate() in that this will return |
558 | TIME_T_MAX in case the parsed time value was too big, instead of an |
559 | error. */ |
560 | |
561 | time_t Curl_getdate_capped(const char *p) |
562 | { |
563 | time_t parsed = -1; |
564 | int rc = parsedate(p, &parsed); |
565 | |
566 | switch(rc) { |
567 | case PARSEDATE_OK: |
568 | if(parsed == -1) |
569 | /* avoid returning -1 for a working scenario */ |
570 | parsed++; |
571 | return parsed; |
572 | case PARSEDATE_LATER: |
573 | /* this returns the maximum time value */ |
574 | return parsed; |
575 | default: |
576 | return -1; /* everything else is fail */ |
577 | } |
578 | /* UNREACHABLE */ |
579 | } |
580 | |
581 | /* |
582 | * Curl_gmtime() is a gmtime() replacement for portability. Do not use the |
583 | * gmtime_r() or gmtime() functions anywhere else but here. |
584 | * |
585 | */ |
586 | |
587 | CURLcode Curl_gmtime(time_t intime, struct tm *store) |
588 | { |
589 | const struct tm *tm; |
590 | #ifdef HAVE_GMTIME_R |
591 | /* thread-safe version */ |
592 | tm = (struct tm *)gmtime_r(&intime, store); |
593 | #else |
594 | /* !checksrc! disable BANNEDFUNC 1 */ |
595 | tm = gmtime(&intime); |
596 | if(tm) |
597 | *store = *tm; /* copy the pointed struct to the local copy */ |
598 | #endif |
599 | |
600 | if(!tm) |
601 | return CURLE_BAD_FUNCTION_ARGUMENT; |
602 | return CURLE_OK; |
603 | } |
604 | |