1/***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) 1998 - 2022, Daniel Stenberg, <[email protected]>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24
25#include "curl_setup.h"
26
27#include "urldata.h"
28#include "urlapi-int.h"
29#include "strcase.h"
30#include "dotdot.h"
31#include "url.h"
32#include "escape.h"
33#include "curl_ctype.h"
34#include "inet_pton.h"
35#include "inet_ntop.h"
36
37/* The last 3 #include files should be in this order */
38#include "curl_printf.h"
39#include "curl_memory.h"
40#include "memdebug.h"
41
42 /* MSDOS/Windows style drive prefix, eg c: in c:foo */
43#define STARTS_WITH_DRIVE_PREFIX(str) \
44 ((('a' <= str[0] && str[0] <= 'z') || \
45 ('A' <= str[0] && str[0] <= 'Z')) && \
46 (str[1] == ':'))
47
48 /* MSDOS/Windows style drive prefix, optionally with
49 * a '|' instead of ':', followed by a slash or NUL */
50#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
51 ((('a' <= (str)[0] && (str)[0] <= 'z') || \
52 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
53 ((str)[1] == ':' || (str)[1] == '|') && \
54 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
55
56/* scheme is not URL encoded, the longest libcurl supported ones are... */
57#define MAX_SCHEME_LEN 40
58
59/* Internal representation of CURLU. Point to URL-encoded strings. */
60struct Curl_URL {
61 char *scheme;
62 char *user;
63 char *password;
64 char *options; /* IMAP only? */
65 char *host;
66 char *zoneid; /* for numerical IPv6 addresses */
67 char *port;
68 char *path;
69 char *query;
70 char *fragment;
71
72 char *scratch; /* temporary scratch area */
73 char *temppath; /* temporary path pointer */
74 long portnum; /* the numerical version */
75};
76
77#define DEFAULT_SCHEME "https"
78
79static void free_urlhandle(struct Curl_URL *u)
80{
81 free(u->scheme);
82 free(u->user);
83 free(u->password);
84 free(u->options);
85 free(u->host);
86 free(u->zoneid);
87 free(u->port);
88 free(u->path);
89 free(u->query);
90 free(u->fragment);
91 free(u->scratch);
92 free(u->temppath);
93}
94
95/*
96 * Find the separator at the end of the host name, or the '?' in cases like
97 * http://www.url.com?id=2380
98 */
99static const char *find_host_sep(const char *url)
100{
101 const char *sep;
102 const char *query;
103
104 /* Find the start of the hostname */
105 sep = strstr(url, "//");
106 if(!sep)
107 sep = url;
108 else
109 sep += 2;
110
111 query = strchr(sep, '?');
112 sep = strchr(sep, '/');
113
114 if(!sep)
115 sep = url + strlen(url);
116
117 if(!query)
118 query = url + strlen(url);
119
120 return sep < query ? sep : query;
121}
122
123/*
124 * Decide in an encoding-independent manner whether a character in an
125 * URL must be escaped. The same criterion must be used in strlen_url()
126 * and strcpy_url().
127 */
128static bool urlchar_needs_escaping(int c)
129{
130 return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
131}
132
133/*
134 * strlen_url() returns the length of the given URL if the spaces within the
135 * URL were properly URL encoded.
136 * URL encoding should be skipped for host names, otherwise IDN resolution
137 * will fail.
138 */
139static size_t strlen_url(const char *url, bool relative)
140{
141 const unsigned char *ptr;
142 size_t newlen = 0;
143 bool left = TRUE; /* left side of the ? */
144 const unsigned char *host_sep = (const unsigned char *) url;
145
146 if(!relative)
147 host_sep = (const unsigned char *) find_host_sep(url);
148
149 for(ptr = (unsigned char *)url; *ptr; ptr++) {
150
151 if(ptr < host_sep) {
152 ++newlen;
153 continue;
154 }
155
156 if(*ptr == ' ') {
157 if(left)
158 newlen += 3;
159 else
160 newlen++;
161 continue;
162 }
163
164 if (*ptr == '?')
165 left = FALSE;
166
167 if(urlchar_needs_escaping(*ptr))
168 newlen += 2;
169
170 newlen++;
171 }
172
173 return newlen;
174}
175
176/* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in
177 * the source URL accordingly.
178 * URL encoding should be skipped for host names, otherwise IDN resolution
179 * will fail.
180 *
181 * Returns TRUE if something was updated.
182 */
183static bool strcpy_url(char *output, const char *url, bool relative)
184{
185 /* we must add this with whitespace-replacing */
186 bool left = TRUE;
187 const unsigned char *iptr;
188 char *optr = output;
189 const unsigned char *host_sep = (const unsigned char *) url;
190 bool changed = FALSE;
191
192 if(!relative)
193 host_sep = (const unsigned char *) find_host_sep(url);
194
195 for(iptr = (unsigned char *)url; /* read from here */
196 *iptr; /* until zero byte */
197 iptr++) {
198
199 if(iptr < host_sep) {
200 *optr++ = *iptr;
201 continue;
202 }
203
204 if(*iptr == ' ') {
205 if(left) {
206 *optr++='%'; /* add a '%' */
207 *optr++='2'; /* add a '2' */
208 *optr++='0'; /* add a '0' */
209 }
210 else
211 *optr++='+'; /* add a '+' here */
212 changed = TRUE;
213 continue;
214 }
215
216 if(*iptr == '?')
217 left = FALSE;
218
219 if(urlchar_needs_escaping(*iptr)) {
220 msnprintf(optr, 4, "%%%02x", *iptr);
221 changed = TRUE;
222 optr += 3;
223 }
224 else
225 *optr++ = *iptr;
226 }
227 *optr = 0; /* null-terminate output buffer */
228
229 return changed;
230}
231
232/*
233 * Returns true if the given URL is absolute (as opposed to relative). Returns
234 * the scheme in the buffer if TRUE and 'buf' is non-NULL. The buflen must
235 * be larger than MAX_SCHEME_LEN if buf is set.
236 */
237bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
238{
239 int i;
240 DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
241 (void)buflen; /* only used in debug-builds */
242 if(buf)
243 buf[0] = 0; /* always leave a defined value in buf */
244#ifdef WIN32
245 if(STARTS_WITH_DRIVE_PREFIX(url))
246 return FALSE;
247#endif
248 for(i = 0; i < MAX_SCHEME_LEN; ++i) {
249 char s = url[i];
250 if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
251 /* RFC 3986 3.1 explains:
252 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
253 */
254 }
255 else {
256 break;
257 }
258 }
259 if(i && (url[i] == ':') && (url[i + 1] == '/')) {
260 if(buf) {
261 buf[i] = 0;
262 while(i--) {
263 buf[i] = (char)TOLOWER(url[i]);
264 }
265 }
266 return TRUE;
267 }
268 return FALSE;
269}
270
271/*
272 * Concatenate a relative URL to a base URL making it absolute.
273 * URL-encodes any spaces.
274 * The returned pointer must be freed by the caller unless NULL
275 * (returns NULL on out of memory).
276 */
277static char *concat_url(const char *base, const char *relurl)
278{
279 /***
280 TRY to append this new path to the old URL
281 to the right of the host part. Oh crap, this is doomed to cause
282 problems in the future...
283 */
284 char *newest;
285 char *protsep;
286 char *pathsep;
287 size_t newlen;
288 bool host_changed = FALSE;
289
290 const char *useurl = relurl;
291 size_t urllen;
292
293 /* we must make our own copy of the URL to play with, as it may
294 point to read-only data */
295 char *url_clone = strdup(base);
296
297 if(!url_clone)
298 return NULL; /* skip out of this NOW */
299
300 /* protsep points to the start of the host name */
301 protsep = strstr(url_clone, "//");
302 if(!protsep)
303 protsep = url_clone;
304 else
305 protsep += 2; /* pass the slashes */
306
307 if('/' != relurl[0]) {
308 int level = 0;
309
310 /* First we need to find out if there's a ?-letter in the URL,
311 and cut it and the right-side of that off */
312 pathsep = strchr(protsep, '?');
313 if(pathsep)
314 *pathsep = 0;
315
316 /* we have a relative path to append to the last slash if there's one
317 available, or if the new URL is just a query string (starts with a
318 '?') we append the new one at the end of the entire currently worked
319 out URL */
320 if(useurl[0] != '?') {
321 pathsep = strrchr(protsep, '/');
322 if(pathsep)
323 *pathsep = 0;
324 }
325
326 /* Check if there's any slash after the host name, and if so, remember
327 that position instead */
328 pathsep = strchr(protsep, '/');
329 if(pathsep)
330 protsep = pathsep + 1;
331 else
332 protsep = NULL;
333
334 /* now deal with one "./" or any amount of "../" in the newurl
335 and act accordingly */
336
337 if((useurl[0] == '.') && (useurl[1] == '/'))
338 useurl += 2; /* just skip the "./" */
339
340 while((useurl[0] == '.') &&
341 (useurl[1] == '.') &&
342 (useurl[2] == '/')) {
343 level++;
344 useurl += 3; /* pass the "../" */
345 }
346
347 if(protsep) {
348 while(level--) {
349 /* cut off one more level from the right of the original URL */
350 pathsep = strrchr(protsep, '/');
351 if(pathsep)
352 *pathsep = 0;
353 else {
354 *protsep = 0;
355 break;
356 }
357 }
358 }
359 }
360 else {
361 /* We got a new absolute path for this server */
362
363 if(relurl[1] == '/') {
364 /* the new URL starts with //, just keep the protocol part from the
365 original one */
366 *protsep = 0;
367 useurl = &relurl[2]; /* we keep the slashes from the original, so we
368 skip the new ones */
369 host_changed = TRUE;
370 }
371 else {
372 /* cut off the original URL from the first slash, or deal with URLs
373 without slash */
374 pathsep = strchr(protsep, '/');
375 if(pathsep) {
376 /* When people use badly formatted URLs, such as
377 "http://www.url.com?dir=/home/daniel" we must not use the first
378 slash, if there's a ?-letter before it! */
379 char *sep = strchr(protsep, '?');
380 if(sep && (sep < pathsep))
381 pathsep = sep;
382 *pathsep = 0;
383 }
384 else {
385 /* There was no slash. Now, since we might be operating on a badly
386 formatted URL, such as "http://www.url.com?id=2380" which doesn't
387 use a slash separator as it is supposed to, we need to check for a
388 ?-letter as well! */
389 pathsep = strchr(protsep, '?');
390 if(pathsep)
391 *pathsep = 0;
392 }
393 }
394 }
395
396 /* If the new part contains a space, this is a mighty stupid redirect
397 but we still make an effort to do "right". To the left of a '?'
398 letter we replace each space with %20 while it is replaced with '+'
399 on the right side of the '?' letter.
400 */
401 newlen = strlen_url(useurl, !host_changed);
402
403 urllen = strlen(url_clone);
404
405 newest = malloc(urllen + 1 + /* possible slash */
406 newlen + 1 /* zero byte */);
407
408 if(!newest) {
409 free(url_clone); /* don't leak this */
410 return NULL;
411 }
412
413 /* copy over the root url part */
414 memcpy(newest, url_clone, urllen);
415
416 /* check if we need to append a slash */
417 if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
418 ;
419 else
420 newest[urllen++]='/';
421
422 /* then append the new piece on the right side */
423 strcpy_url(&newest[urllen], useurl, !host_changed);
424
425 free(url_clone);
426
427 return newest;
428}
429
430/* scan for byte values < 31 or 127 */
431static bool junkscan(const char *part, unsigned int flags)
432{
433 if(part) {
434 static const char badbytes[]={
435 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
436 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
437 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
438 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
439 0x7f, 0x00 /* null-terminate */
440 };
441 size_t n = strlen(part);
442 size_t nfine = strcspn(part, badbytes);
443 if(nfine != n)
444 /* since we don't know which part is scanned, return a generic error
445 code */
446 return TRUE;
447 if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
448 return TRUE;
449 }
450 return FALSE;
451}
452
453/*
454 * parse_hostname_login()
455 *
456 * Parse the login details (user name, password and options) from the URL and
457 * strip them out of the host name
458 *
459 */
460static CURLUcode parse_hostname_login(struct Curl_URL *u,
461 char **hostname,
462 unsigned int flags)
463{
464 CURLUcode result = CURLUE_OK;
465 CURLcode ccode;
466 char *userp = NULL;
467 char *passwdp = NULL;
468 char *optionsp = NULL;
469 const struct Curl_handler *h = NULL;
470
471 /* At this point, we're hoping all the other special cases have
472 * been taken care of, so conn->host.name is at most
473 * [user[:password][;options]]@]hostname
474 *
475 * We need somewhere to put the embedded details, so do that first.
476 */
477
478 char *ptr = strchr(*hostname, '@');
479 char *login = *hostname;
480
481 if(!ptr)
482 goto out;
483
484 /* We will now try to extract the
485 * possible login information in a string like:
486 * ftp://user:[email protected]:8021/README */
487 *hostname = ++ptr;
488
489 /* if this is a known scheme, get some details */
490 if(u->scheme)
491 h = Curl_builtin_scheme(u->scheme);
492
493 /* We could use the login information in the URL so extract it. Only parse
494 options if the handler says we should. Note that 'h' might be NULL! */
495 ccode = Curl_parse_login_details(login, ptr - login - 1,
496 &userp, &passwdp,
497 (h && (h->flags & PROTOPT_URLOPTIONS)) ?
498 &optionsp:NULL);
499 if(ccode) {
500 result = CURLUE_BAD_LOGIN;
501 goto out;
502 }
503
504 if(userp) {
505 if(flags & CURLU_DISALLOW_USER) {
506 /* Option DISALLOW_USER is set and url contains username. */
507 result = CURLUE_USER_NOT_ALLOWED;
508 goto out;
509 }
510 if(junkscan(userp, flags)) {
511 result = CURLUE_BAD_USER;
512 goto out;
513 }
514 u->user = userp;
515 }
516
517 if(passwdp) {
518 if(junkscan(passwdp, flags)) {
519 result = CURLUE_BAD_PASSWORD;
520 goto out;
521 }
522 u->password = passwdp;
523 }
524
525 if(optionsp) {
526 if(junkscan(optionsp, flags)) {
527 result = CURLUE_BAD_LOGIN;
528 goto out;
529 }
530 u->options = optionsp;
531 }
532
533 return CURLUE_OK;
534 out:
535
536 free(userp);
537 free(passwdp);
538 free(optionsp);
539 u->user = NULL;
540 u->password = NULL;
541 u->options = NULL;
542
543 return result;
544}
545
546UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
547 bool has_scheme)
548{
549 char *portptr = NULL;
550 char endbracket;
551 int len;
552
553 /*
554 * Find the end of an IPv6 address, either on the ']' ending bracket or
555 * a percent-encoded zone index.
556 */
557 if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
558 &endbracket, &len)) {
559 if(']' == endbracket)
560 portptr = &hostname[len];
561 else if('%' == endbracket) {
562 int zonelen = len;
563 if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
564 if(']' != endbracket)
565 return CURLUE_BAD_IPV6;
566 portptr = &hostname[--zonelen + len + 1];
567 }
568 else
569 return CURLUE_BAD_IPV6;
570 }
571 else
572 return CURLUE_BAD_IPV6;
573
574 /* this is a RFC2732-style specified IP-address */
575 if(portptr && *portptr) {
576 if(*portptr != ':')
577 return CURLUE_BAD_IPV6;
578 }
579 else
580 portptr = NULL;
581 }
582 else
583 portptr = strchr(hostname, ':');
584
585 if(portptr) {
586 char *rest;
587 long port;
588 char portbuf[7];
589
590 /* Browser behavior adaptation. If there's a colon with no digits after,
591 just cut off the name there which makes us ignore the colon and just
592 use the default port. Firefox, Chrome and Safari all do that.
593
594 Don't do it if the URL has no scheme, to make something that looks like
595 a scheme not work!
596 */
597 if(!portptr[1]) {
598 *portptr = '\0';
599 return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
600 }
601
602 if(!ISDIGIT(portptr[1]))
603 return CURLUE_BAD_PORT_NUMBER;
604
605 port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */
606
607 if(port > 0xffff)
608 return CURLUE_BAD_PORT_NUMBER;
609
610 if(rest[0])
611 return CURLUE_BAD_PORT_NUMBER;
612
613 *portptr++ = '\0'; /* cut off the name there */
614 *rest = 0;
615 /* generate a new port number string to get rid of leading zeroes etc */
616 msnprintf(portbuf, sizeof(portbuf), "%ld", port);
617 u->portnum = port;
618 u->port = strdup(portbuf);
619 if(!u->port)
620 return CURLUE_OUT_OF_MEMORY;
621 }
622
623 return CURLUE_OK;
624}
625
626static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
627{
628 size_t len;
629 size_t hlen = strlen(hostname);
630
631 if(hostname[0] == '[') {
632 const char *l = "0123456789abcdefABCDEF:.";
633 if(hlen < 4) /* '[::]' is the shortest possible valid string */
634 return CURLUE_BAD_IPV6;
635 hostname++;
636 hlen -= 2;
637
638 if(hostname[hlen] != ']')
639 return CURLUE_BAD_IPV6;
640
641 /* only valid letters are ok */
642 len = strspn(hostname, l);
643 if(hlen != len) {
644 hlen = len;
645 if(hostname[len] == '%') {
646 /* this could now be '%[zone id]' */
647 char zoneid[16];
648 int i = 0;
649 char *h = &hostname[len + 1];
650 /* pass '25' if present and is a url encoded percent sign */
651 if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
652 h += 2;
653 while(*h && (*h != ']') && (i < 15))
654 zoneid[i++] = *h++;
655 if(!i || (']' != *h))
656 /* impossible to reach? */
657 return CURLUE_MALFORMED_INPUT;
658 zoneid[i] = 0;
659 u->zoneid = strdup(zoneid);
660 if(!u->zoneid)
661 return CURLUE_OUT_OF_MEMORY;
662 hostname[len] = ']'; /* insert end bracket */
663 hostname[len + 1] = 0; /* terminate the hostname */
664 }
665 else
666 return CURLUE_BAD_IPV6;
667 /* hostname is fine */
668 }
669#ifdef ENABLE_IPV6
670 {
671 char dest[16]; /* fits a binary IPv6 address */
672 char norm[MAX_IPADR_LEN];
673 hostname[hlen] = 0; /* end the address there */
674 if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
675 return CURLUE_BAD_IPV6;
676
677 /* check if it can be done shorter */
678 if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
679 (strlen(norm) < hlen)) {
680 strcpy(hostname, norm);
681 hlen = strlen(norm);
682 hostname[hlen + 1] = 0;
683 }
684 hostname[hlen] = ']'; /* restore ending bracket */
685 }
686#endif
687 }
688 else {
689 /* letters from the second string are not ok */
690 len = strcspn(hostname, " \r\n\t/:#?!@");
691 if(hlen != len)
692 /* hostname with bad content */
693 return CURLUE_BAD_HOSTNAME;
694 }
695 if(!hostname[0])
696 return CURLUE_NO_HOST;
697 return CURLUE_OK;
698}
699
700#define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
701
702/*
703 * Handle partial IPv4 numerical addresses and different bases, like
704 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
705 *
706 * If the given input string is syntactically wrong or any part for example is
707 * too big, this function returns FALSE and doesn't create any output.
708 *
709 * Output the "normalized" version of that input string in plain quad decimal
710 * integers and return TRUE.
711 */
712static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
713{
714 bool done = FALSE;
715 int n = 0;
716 const char *c = hostname;
717 unsigned long parts[4] = {0, 0, 0, 0};
718
719 while(!done) {
720 char *endp;
721 unsigned long l;
722 if((*c < '0') || (*c > '9'))
723 /* most importantly this doesn't allow a leading plus or minus */
724 return FALSE;
725 l = strtoul(c, &endp, 0);
726
727 /* overflow or nothing parsed at all */
728 if(((l == ULONG_MAX) && (errno == ERANGE)) || (endp == c))
729 return FALSE;
730
731#if SIZEOF_LONG > 4
732 /* a value larger than 32 bits */
733 if(l > UINT_MAX)
734 return FALSE;
735#endif
736
737 parts[n] = l;
738 c = endp;
739
740 switch (*c) {
741 case '.' :
742 if(n == 3)
743 return FALSE;
744 n++;
745 c++;
746 break;
747
748 case '\0':
749 done = TRUE;
750 break;
751
752 default:
753 return FALSE;
754 }
755 }
756
757 /* this is deemed a valid IPv4 numerical address */
758
759 switch(n) {
760 case 0: /* a -- 32 bits */
761 msnprintf(outp, olen, "%u.%u.%u.%u",
762 parts[0] >> 24, (parts[0] >> 16) & 0xff,
763 (parts[0] >> 8) & 0xff, parts[0] & 0xff);
764 break;
765 case 1: /* a.b -- 8.24 bits */
766 if((parts[0] > 0xff) || (parts[1] > 0xffffff))
767 return FALSE;
768 msnprintf(outp, olen, "%u.%u.%u.%u",
769 parts[0], (parts[1] >> 16) & 0xff,
770 (parts[1] >> 8) & 0xff, parts[1] & 0xff);
771 break;
772 case 2: /* a.b.c -- 8.8.16 bits */
773 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
774 return FALSE;
775 msnprintf(outp, olen, "%u.%u.%u.%u",
776 parts[0], parts[1], (parts[2] >> 8) & 0xff,
777 parts[2] & 0xff);
778 break;
779 case 3: /* a.b.c.d -- 8.8.8.8 bits */
780 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
781 (parts[3] > 0xff))
782 return FALSE;
783 msnprintf(outp, olen, "%u.%u.%u.%u",
784 parts[0], parts[1], parts[2], parts[3]);
785 break;
786 }
787 return TRUE;
788}
789
790/* return strdup'ed version in 'outp', possibly percent decoded */
791static CURLUcode decode_host(char *hostname, char **outp)
792{
793 char *per = NULL;
794 if(hostname[0] != '[')
795 /* only decode if not an ipv6 numerical */
796 per = strchr(hostname, '%');
797 if(!per) {
798 *outp = strdup(hostname);
799 if(!*outp)
800 return CURLUE_OUT_OF_MEMORY;
801 }
802 else {
803 /* might be encoded */
804 size_t dlen;
805 CURLcode result = Curl_urldecode(hostname, 0, outp, &dlen, REJECT_CTRL);
806 if(result)
807 return CURLUE_BAD_HOSTNAME;
808 }
809
810 return CURLUE_OK;
811}
812
813static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
814{
815 char *path;
816 bool path_alloced = FALSE;
817 bool uncpath = FALSE;
818 char *hostname;
819 char *query = NULL;
820 char *fragment = NULL;
821 CURLUcode result;
822 bool url_has_scheme = FALSE;
823 char schemebuf[MAX_SCHEME_LEN + 1];
824 const char *schemep = NULL;
825 size_t schemelen = 0;
826 size_t urllen;
827
828 DEBUGASSERT(url);
829
830 /*************************************************************
831 * Parse the URL.
832 ************************************************************/
833 /* allocate scratch area */
834 urllen = strlen(url);
835 if(urllen > CURL_MAX_INPUT_LENGTH)
836 /* excessive input length */
837 return CURLUE_MALFORMED_INPUT;
838
839 path = u->scratch = malloc(urllen * 2 + 2);
840 if(!path)
841 return CURLUE_OUT_OF_MEMORY;
842
843 hostname = &path[urllen + 1];
844 hostname[0] = 0;
845
846 if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) {
847 url_has_scheme = TRUE;
848 schemelen = strlen(schemebuf);
849 }
850
851 /* handle the file: scheme */
852 if(url_has_scheme && !strcmp(schemebuf, "file")) {
853 if(urllen <= 6)
854 /* file:/ is not enough to actually be a complete file: URL */
855 return CURLUE_BAD_FILE_URL;
856
857 /* path has been allocated large enough to hold this */
858 strcpy(path, &url[5]);
859
860 u->scheme = strdup("file");
861 if(!u->scheme)
862 return CURLUE_OUT_OF_MEMORY;
863
864 /* Extra handling URLs with an authority component (i.e. that start with
865 * "file://")
866 *
867 * We allow omitted hostname (e.g. file:/<path>) -- valid according to
868 * RFC 8089, but not the (current) WHAT-WG URL spec.
869 */
870 if(path[0] == '/' && path[1] == '/') {
871 /* swallow the two slashes */
872 char *ptr = &path[2];
873
874 /*
875 * According to RFC 8089, a file: URL can be reliably dereferenced if:
876 *
877 * o it has no/blank hostname, or
878 *
879 * o the hostname matches "localhost" (case-insensitively), or
880 *
881 * o the hostname is a FQDN that resolves to this machine, or
882 *
883 * o it is an UNC String transformed to an URI (Windows only, RFC 8089
884 * Appendix E.3).
885 *
886 * For brevity, we only consider URLs with empty, "localhost", or
887 * "127.0.0.1" hostnames as local, otherwise as an UNC String.
888 *
889 * Additionally, there is an exception for URLs with a Windows drive
890 * letter in the authority (which was accidentally omitted from RFC 8089
891 * Appendix E, but believe me, it was meant to be there. --MK)
892 */
893 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
894 /* the URL includes a host name, it must match "localhost" or
895 "127.0.0.1" to be valid */
896 if(checkprefix("localhost/", ptr) ||
897 checkprefix("127.0.0.1/", ptr)) {
898 ptr += 9; /* now points to the slash after the host */
899 }
900 else {
901#if defined(WIN32)
902 size_t len;
903
904 /* the host name, NetBIOS computer name, can not contain disallowed
905 chars, and the delimiting slash character must be appended to the
906 host name */
907 path = strpbrk(ptr, "/\\:*?\"<>|");
908 if(!path || *path != '/')
909 return CURLUE_BAD_FILE_URL;
910
911 len = path - ptr;
912 if(len) {
913 memcpy(hostname, ptr, len);
914 hostname[len] = 0;
915 uncpath = TRUE;
916 }
917
918 ptr -= 2; /* now points to the // before the host in UNC */
919#else
920 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
921 none */
922 return CURLUE_BAD_FILE_URL;
923#endif
924 }
925 }
926
927 path = ptr;
928 }
929
930 if(!uncpath)
931 hostname = NULL; /* no host for file: URLs by default */
932
933#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
934 /* Don't allow Windows drive letters when not in Windows.
935 * This catches both "file:/c:" and "file:c:" */
936 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
937 STARTS_WITH_URL_DRIVE_PREFIX(path)) {
938 /* File drive letters are only accepted in MSDOS/Windows */
939 return CURLUE_BAD_FILE_URL;
940 }
941#else
942 /* If the path starts with a slash and a drive letter, ditch the slash */
943 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
944 /* This cannot be done with strcpy, as the memory chunks overlap! */
945 memmove(path, &path[1], strlen(&path[1]) + 1);
946 }
947#endif
948
949 }
950 else {
951 /* clear path */
952 const char *p;
953 const char *hostp;
954 size_t len;
955 path[0] = 0;
956
957 if(url_has_scheme) {
958 int i = 0;
959 p = &url[schemelen + 1];
960 while(p && (*p == '/') && (i < 4)) {
961 p++;
962 i++;
963 }
964 if((i < 1) || (i>3))
965 /* less than one or more than three slashes */
966 return CURLUE_BAD_SLASHES;
967
968 schemep = schemebuf;
969 if(!Curl_builtin_scheme(schemep) &&
970 !(flags & CURLU_NON_SUPPORT_SCHEME))
971 return CURLUE_UNSUPPORTED_SCHEME;
972
973 if(junkscan(schemep, flags))
974 return CURLUE_BAD_SCHEME;
975 }
976 else {
977 /* no scheme! */
978
979 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME)))
980 return CURLUE_BAD_SCHEME;
981 if(flags & CURLU_DEFAULT_SCHEME)
982 schemep = DEFAULT_SCHEME;
983
984 /*
985 * The URL was badly formatted, let's try without scheme specified.
986 */
987 p = url;
988 }
989 hostp = p; /* host name starts here */
990
991 /* find the end of the host name + port number */
992 while(*p && !HOSTNAME_END(*p))
993 p++;
994
995 len = p - hostp;
996 if(len) {
997 memcpy(hostname, hostp, len);
998 hostname[len] = 0;
999 }
1000 else {
1001 if(!(flags & CURLU_NO_AUTHORITY))
1002 return CURLUE_NO_HOST;
1003 }
1004
1005 strcpy(path, p);
1006
1007 if(schemep) {
1008 u->scheme = strdup(schemep);
1009 if(!u->scheme)
1010 return CURLUE_OUT_OF_MEMORY;
1011 }
1012 }
1013
1014 if((flags & CURLU_URLENCODE) && path[0]) {
1015 /* worst case output length is 3x the original! */
1016 char *newp = malloc(strlen(path) * 3);
1017 if(!newp)
1018 return CURLUE_OUT_OF_MEMORY;
1019 path_alloced = TRUE;
1020 strcpy_url(newp, path, TRUE); /* consider it relative */
1021 u->temppath = path = newp;
1022 }
1023
1024 fragment = strchr(path, '#');
1025 if(fragment) {
1026 *fragment++ = 0;
1027 if(junkscan(fragment, flags))
1028 return CURLUE_BAD_FRAGMENT;
1029 if(fragment[0]) {
1030 u->fragment = strdup(fragment);
1031 if(!u->fragment)
1032 return CURLUE_OUT_OF_MEMORY;
1033 }
1034 }
1035
1036 query = strchr(path, '?');
1037 if(query) {
1038 *query++ = 0;
1039 if(junkscan(query, flags))
1040 return CURLUE_BAD_QUERY;
1041 /* done even if the query part is a blank string */
1042 u->query = strdup(query);
1043 if(!u->query)
1044 return CURLUE_OUT_OF_MEMORY;
1045 }
1046
1047 if(junkscan(path, flags))
1048 return CURLUE_BAD_PATH;
1049
1050 if(!path[0])
1051 /* if there's no path left set, unset */
1052 path = NULL;
1053 else {
1054 if(!(flags & CURLU_PATH_AS_IS)) {
1055 /* remove ../ and ./ sequences according to RFC3986 */
1056 char *newp = Curl_dedotdotify(path);
1057 if(!newp)
1058 return CURLUE_OUT_OF_MEMORY;
1059
1060 if(strcmp(newp, path)) {
1061 /* if we got a new version */
1062 if(path_alloced)
1063 Curl_safefree(u->temppath);
1064 u->temppath = path = newp;
1065 path_alloced = TRUE;
1066 }
1067 else
1068 free(newp);
1069 }
1070
1071 u->path = path_alloced?path:strdup(path);
1072 if(!u->path)
1073 return CURLUE_OUT_OF_MEMORY;
1074 u->temppath = NULL; /* used now */
1075 }
1076
1077 if(hostname) {
1078 char normalized_ipv4[sizeof("255.255.255.255") + 1];
1079
1080 /*
1081 * Parse the login details and strip them out of the host name.
1082 */
1083 result = parse_hostname_login(u, &hostname, flags);
1084 if(result)
1085 return result;
1086
1087 result = Curl_parse_port(u, hostname, url_has_scheme);
1088 if(result)
1089 return result;
1090
1091 if(junkscan(hostname, flags))
1092 return CURLUE_BAD_HOSTNAME;
1093
1094 if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) {
1095 /* Skip hostname check, it's allowed to be empty. */
1096 u->host = strdup("");
1097 }
1098 else {
1099 if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4)))
1100 u->host = strdup(normalized_ipv4);
1101 else {
1102 result = decode_host(hostname, &u->host);
1103 if(result)
1104 return result;
1105 result = hostname_check(u, u->host);
1106 if(result)
1107 return result;
1108 }
1109 }
1110 if(!u->host)
1111 return CURLUE_OUT_OF_MEMORY;
1112 if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1113 /* legacy curl-style guess based on host name */
1114 if(checkprefix("ftp.", hostname))
1115 schemep = "ftp";
1116 else if(checkprefix("dict.", hostname))
1117 schemep = "dict";
1118 else if(checkprefix("ldap.", hostname))
1119 schemep = "ldap";
1120 else if(checkprefix("imap.", hostname))
1121 schemep = "imap";
1122 else if(checkprefix("smtp.", hostname))
1123 schemep = "smtp";
1124 else if(checkprefix("pop3.", hostname))
1125 schemep = "pop3";
1126 else
1127 schemep = "http";
1128
1129 u->scheme = strdup(schemep);
1130 if(!u->scheme)
1131 return CURLUE_OUT_OF_MEMORY;
1132 }
1133 }
1134
1135 Curl_safefree(u->scratch);
1136 Curl_safefree(u->temppath);
1137
1138 return CURLUE_OK;
1139}
1140
1141/*
1142 * Parse the URL and set the relevant members of the Curl_URL struct.
1143 */
1144static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
1145{
1146 CURLUcode result = seturl(url, u, flags);
1147 if(result) {
1148 free_urlhandle(u);
1149 memset(u, 0, sizeof(struct Curl_URL));
1150 }
1151 return result;
1152}
1153
1154/*
1155 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1156 */
1157static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1158 unsigned int flags)
1159{
1160 CURLUcode result;
1161 CURLU tmpurl;
1162 memset(&tmpurl, 0, sizeof(tmpurl));
1163 result = parseurl(url, &tmpurl, flags);
1164 if(!result) {
1165 free_urlhandle(u);
1166 *u = tmpurl;
1167 }
1168 else
1169 free_urlhandle(&tmpurl);
1170 return result;
1171}
1172
1173/*
1174 */
1175CURLU *curl_url(void)
1176{
1177 return calloc(sizeof(struct Curl_URL), 1);
1178}
1179
1180void curl_url_cleanup(CURLU *u)
1181{
1182 if(u) {
1183 free_urlhandle(u);
1184 free(u);
1185 }
1186}
1187
1188#define DUP(dest, src, name) \
1189 do { \
1190 if(src->name) { \
1191 dest->name = strdup(src->name); \
1192 if(!dest->name) \
1193 goto fail; \
1194 } \
1195 } while(0)
1196
1197CURLU *curl_url_dup(CURLU *in)
1198{
1199 struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
1200 if(u) {
1201 DUP(u, in, scheme);
1202 DUP(u, in, user);
1203 DUP(u, in, password);
1204 DUP(u, in, options);
1205 DUP(u, in, host);
1206 DUP(u, in, port);
1207 DUP(u, in, path);
1208 DUP(u, in, query);
1209 DUP(u, in, fragment);
1210 u->portnum = in->portnum;
1211 }
1212 return u;
1213 fail:
1214 curl_url_cleanup(u);
1215 return NULL;
1216}
1217
1218CURLUcode curl_url_get(CURLU *u, CURLUPart what,
1219 char **part, unsigned int flags)
1220{
1221 char *ptr;
1222 CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1223 char portbuf[7];
1224 bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1225 bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1226 bool plusdecode = FALSE;
1227 (void)flags;
1228 if(!u)
1229 return CURLUE_BAD_HANDLE;
1230 if(!part)
1231 return CURLUE_BAD_PARTPOINTER;
1232 *part = NULL;
1233
1234 switch(what) {
1235 case CURLUPART_SCHEME:
1236 ptr = u->scheme;
1237 ifmissing = CURLUE_NO_SCHEME;
1238 urldecode = FALSE; /* never for schemes */
1239 break;
1240 case CURLUPART_USER:
1241 ptr = u->user;
1242 ifmissing = CURLUE_NO_USER;
1243 break;
1244 case CURLUPART_PASSWORD:
1245 ptr = u->password;
1246 ifmissing = CURLUE_NO_PASSWORD;
1247 break;
1248 case CURLUPART_OPTIONS:
1249 ptr = u->options;
1250 ifmissing = CURLUE_NO_OPTIONS;
1251 break;
1252 case CURLUPART_HOST:
1253 ptr = u->host;
1254 ifmissing = CURLUE_NO_HOST;
1255 break;
1256 case CURLUPART_ZONEID:
1257 ptr = u->zoneid;
1258 ifmissing = CURLUE_NO_ZONEID;
1259 break;
1260 case CURLUPART_PORT:
1261 ptr = u->port;
1262 ifmissing = CURLUE_NO_PORT;
1263 urldecode = FALSE; /* never for port */
1264 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1265 /* there's no stored port number, but asked to deliver
1266 a default one for the scheme */
1267 const struct Curl_handler *h =
1268 Curl_builtin_scheme(u->scheme);
1269 if(h) {
1270 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1271 ptr = portbuf;
1272 }
1273 }
1274 else if(ptr && u->scheme) {
1275 /* there is a stored port number, but ask to inhibit if
1276 it matches the default one for the scheme */
1277 const struct Curl_handler *h =
1278 Curl_builtin_scheme(u->scheme);
1279 if(h && (h->defport == u->portnum) &&
1280 (flags & CURLU_NO_DEFAULT_PORT))
1281 ptr = NULL;
1282 }
1283 break;
1284 case CURLUPART_PATH:
1285 ptr = u->path;
1286 if(!ptr) {
1287 ptr = u->path = strdup("/");
1288 if(!u->path)
1289 return CURLUE_OUT_OF_MEMORY;
1290 }
1291 break;
1292 case CURLUPART_QUERY:
1293 ptr = u->query;
1294 ifmissing = CURLUE_NO_QUERY;
1295 plusdecode = urldecode;
1296 break;
1297 case CURLUPART_FRAGMENT:
1298 ptr = u->fragment;
1299 ifmissing = CURLUE_NO_FRAGMENT;
1300 break;
1301 case CURLUPART_URL: {
1302 char *url;
1303 char *scheme;
1304 char *options = u->options;
1305 char *port = u->port;
1306 char *allochost = NULL;
1307 if(u->scheme && strcasecompare("file", u->scheme)) {
1308 url = aprintf("file://%s%s%s",
1309 u->path,
1310 u->fragment? "#": "",
1311 u->fragment? u->fragment : "");
1312 }
1313 else if(!u->host)
1314 return CURLUE_NO_HOST;
1315 else {
1316 const struct Curl_handler *h = NULL;
1317 if(u->scheme)
1318 scheme = u->scheme;
1319 else if(flags & CURLU_DEFAULT_SCHEME)
1320 scheme = (char *) DEFAULT_SCHEME;
1321 else
1322 return CURLUE_NO_SCHEME;
1323
1324 h = Curl_builtin_scheme(scheme);
1325 if(!port && (flags & CURLU_DEFAULT_PORT)) {
1326 /* there's no stored port number, but asked to deliver
1327 a default one for the scheme */
1328 if(h) {
1329 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1330 port = portbuf;
1331 }
1332 }
1333 else if(port) {
1334 /* there is a stored port number, but asked to inhibit if it matches
1335 the default one for the scheme */
1336 if(h && (h->defport == u->portnum) &&
1337 (flags & CURLU_NO_DEFAULT_PORT))
1338 port = NULL;
1339 }
1340
1341 if(h && !(h->flags & PROTOPT_URLOPTIONS))
1342 options = NULL;
1343
1344 if(u->host[0] == '[') {
1345 if(u->zoneid) {
1346 /* make it '[ host %25 zoneid ]' */
1347 size_t hostlen = strlen(u->host);
1348 size_t alen = hostlen + 3 + strlen(u->zoneid) + 1;
1349 allochost = malloc(alen);
1350 if(!allochost)
1351 return CURLUE_OUT_OF_MEMORY;
1352 memcpy(allochost, u->host, hostlen - 1);
1353 msnprintf(&allochost[hostlen - 1], alen - hostlen + 1,
1354 "%%25%s]", u->zoneid);
1355 }
1356 }
1357 else if(urlencode) {
1358 allochost = curl_easy_escape(NULL, u->host, 0);
1359 if(!allochost)
1360 return CURLUE_OUT_OF_MEMORY;
1361 }
1362 else {
1363 /* only encode '%' in output host name */
1364 char *host = u->host;
1365 size_t pcount = 0;
1366 /* first, count number of percents present in the name */
1367 while(*host) {
1368 if(*host == '%')
1369 pcount++;
1370 host++;
1371 }
1372 /* if there were percents, encode the host name */
1373 if(pcount) {
1374 size_t hostlen = strlen(u->host);
1375 size_t alen = hostlen + 2 * pcount + 1;
1376 char *o = allochost = malloc(alen);
1377 if(!allochost)
1378 return CURLUE_OUT_OF_MEMORY;
1379
1380 host = u->host;
1381 while(*host) {
1382 if(*host == '%') {
1383 memcpy(o, "%25", 3);
1384 o += 3;
1385 host++;
1386 continue;
1387 }
1388 *o++ = *host++;
1389 }
1390 *o = '\0';
1391 }
1392 }
1393
1394 url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1395 scheme,
1396 u->user ? u->user : "",
1397 u->password ? ":": "",
1398 u->password ? u->password : "",
1399 options ? ";" : "",
1400 options ? options : "",
1401 (u->user || u->password || options) ? "@": "",
1402 allochost ? allochost : u->host,
1403 port ? ":": "",
1404 port ? port : "",
1405 (u->path && (u->path[0] != '/')) ? "/": "",
1406 u->path ? u->path : "/",
1407 (u->query && u->query[0]) ? "?": "",
1408 (u->query && u->query[0]) ? u->query : "",
1409 u->fragment? "#": "",
1410 u->fragment? u->fragment : "");
1411 free(allochost);
1412 }
1413 if(!url)
1414 return CURLUE_OUT_OF_MEMORY;
1415 *part = url;
1416 return CURLUE_OK;
1417 }
1418 default:
1419 ptr = NULL;
1420 break;
1421 }
1422 if(ptr) {
1423 *part = strdup(ptr);
1424 if(!*part)
1425 return CURLUE_OUT_OF_MEMORY;
1426 if(plusdecode) {
1427 /* convert + to space */
1428 char *plus;
1429 for(plus = *part; *plus; ++plus) {
1430 if(*plus == '+')
1431 *plus = ' ';
1432 }
1433 }
1434 if(urldecode) {
1435 char *decoded;
1436 size_t dlen;
1437 /* this unconditional rejection of control bytes is documented
1438 API behavior */
1439 CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1440 free(*part);
1441 if(res) {
1442 *part = NULL;
1443 return CURLUE_URLDECODE;
1444 }
1445 *part = decoded;
1446 }
1447 if(urlencode) {
1448 /* worst case output length is 3x the original! */
1449 char *newp = malloc(strlen(*part) * 3);
1450 if(!newp)
1451 return CURLUE_OUT_OF_MEMORY;
1452 if(strcpy_url(newp, *part, TRUE)) { /* consider it relative */
1453 free(*part);
1454 *part = newp;
1455 }
1456 else
1457 free(newp);
1458 }
1459
1460 return CURLUE_OK;
1461 }
1462 else
1463 return ifmissing;
1464}
1465
1466CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1467 const char *part, unsigned int flags)
1468{
1469 char **storep = NULL;
1470 long port = 0;
1471 bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1472 bool plusencode = FALSE;
1473 bool urlskipslash = FALSE;
1474 bool appendquery = FALSE;
1475 bool equalsencode = FALSE;
1476
1477 if(!u)
1478 return CURLUE_BAD_HANDLE;
1479 if(!part) {
1480 /* setting a part to NULL clears it */
1481 switch(what) {
1482 case CURLUPART_URL:
1483 break;
1484 case CURLUPART_SCHEME:
1485 storep = &u->scheme;
1486 break;
1487 case CURLUPART_USER:
1488 storep = &u->user;
1489 break;
1490 case CURLUPART_PASSWORD:
1491 storep = &u->password;
1492 break;
1493 case CURLUPART_OPTIONS:
1494 storep = &u->options;
1495 break;
1496 case CURLUPART_HOST:
1497 storep = &u->host;
1498 break;
1499 case CURLUPART_ZONEID:
1500 storep = &u->zoneid;
1501 break;
1502 case CURLUPART_PORT:
1503 u->portnum = 0;
1504 storep = &u->port;
1505 break;
1506 case CURLUPART_PATH:
1507 storep = &u->path;
1508 break;
1509 case CURLUPART_QUERY:
1510 storep = &u->query;
1511 break;
1512 case CURLUPART_FRAGMENT:
1513 storep = &u->fragment;
1514 break;
1515 default:
1516 return CURLUE_UNKNOWN_PART;
1517 }
1518 if(storep && *storep) {
1519 Curl_safefree(*storep);
1520 }
1521 else if(!storep) {
1522 free_urlhandle(u);
1523 memset(u, 0, sizeof(struct Curl_URL));
1524 }
1525 return CURLUE_OK;
1526 }
1527
1528 switch(what) {
1529 case CURLUPART_SCHEME:
1530 if(strlen(part) > MAX_SCHEME_LEN)
1531 /* too long */
1532 return CURLUE_BAD_SCHEME;
1533 if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1534 /* verify that it is a fine scheme */
1535 !Curl_builtin_scheme(part))
1536 return CURLUE_UNSUPPORTED_SCHEME;
1537 storep = &u->scheme;
1538 urlencode = FALSE; /* never */
1539 break;
1540 case CURLUPART_USER:
1541 storep = &u->user;
1542 break;
1543 case CURLUPART_PASSWORD:
1544 storep = &u->password;
1545 break;
1546 case CURLUPART_OPTIONS:
1547 storep = &u->options;
1548 break;
1549 case CURLUPART_HOST: {
1550 size_t len = strcspn(part, " \r\n");
1551 if(strlen(part) != len)
1552 /* hostname with bad content */
1553 return CURLUE_BAD_HOSTNAME;
1554 storep = &u->host;
1555 Curl_safefree(u->zoneid);
1556 break;
1557 }
1558 case CURLUPART_ZONEID:
1559 storep = &u->zoneid;
1560 break;
1561 case CURLUPART_PORT:
1562 {
1563 char *endp;
1564 urlencode = FALSE; /* never */
1565 port = strtol(part, &endp, 10); /* Port number must be decimal */
1566 if((port <= 0) || (port > 0xffff))
1567 return CURLUE_BAD_PORT_NUMBER;
1568 if(*endp)
1569 /* weirdly provided number, not good! */
1570 return CURLUE_BAD_PORT_NUMBER;
1571 storep = &u->port;
1572 }
1573 break;
1574 case CURLUPART_PATH:
1575 urlskipslash = TRUE;
1576 storep = &u->path;
1577 break;
1578 case CURLUPART_QUERY:
1579 plusencode = urlencode;
1580 appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1581 equalsencode = appendquery;
1582 storep = &u->query;
1583 break;
1584 case CURLUPART_FRAGMENT:
1585 storep = &u->fragment;
1586 break;
1587 case CURLUPART_URL: {
1588 /*
1589 * Allow a new URL to replace the existing (if any) contents.
1590 *
1591 * If the existing contents is enough for a URL, allow a relative URL to
1592 * replace it.
1593 */
1594 CURLUcode result;
1595 char *oldurl;
1596 char *redired_url;
1597
1598 /* if the new thing is absolute or the old one is not
1599 * (we could not get an absolute url in 'oldurl'),
1600 * then replace the existing with the new. */
1601 if(Curl_is_absolute_url(part, NULL, 0)
1602 || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1603 return parseurl_and_replace(part, u, flags);
1604 }
1605
1606 /* apply the relative part to create a new URL
1607 * and replace the existing one with it. */
1608 redired_url = concat_url(oldurl, part);
1609 free(oldurl);
1610 if(!redired_url)
1611 return CURLUE_OUT_OF_MEMORY;
1612
1613 result = parseurl_and_replace(redired_url, u, flags);
1614 free(redired_url);
1615 return result;
1616 }
1617 default:
1618 return CURLUE_UNKNOWN_PART;
1619 }
1620 DEBUGASSERT(storep);
1621 {
1622 const char *newp = part;
1623 size_t nalloc = strlen(part);
1624
1625 if(nalloc > CURL_MAX_INPUT_LENGTH)
1626 /* excessive input length */
1627 return CURLUE_MALFORMED_INPUT;
1628
1629 if(urlencode) {
1630 const unsigned char *i;
1631 char *o;
1632 char *enc = malloc(nalloc * 3 + 1); /* for worst case! */
1633 if(!enc)
1634 return CURLUE_OUT_OF_MEMORY;
1635 for(i = (const unsigned char *)part, o = enc; *i; i++) {
1636 if((*i == ' ') && plusencode) {
1637 *o = '+';
1638 o++;
1639 }
1640 else if(Curl_isunreserved(*i) ||
1641 ((*i == '/') && urlskipslash) ||
1642 ((*i == '=') && equalsencode)) {
1643 if((*i == '=') && equalsencode)
1644 /* only skip the first equals sign */
1645 equalsencode = FALSE;
1646 *o = *i;
1647 o++;
1648 }
1649 else {
1650 msnprintf(o, 4, "%%%02x", *i);
1651 o += 3;
1652 }
1653 }
1654 *o = 0; /* null-terminate */
1655 newp = enc;
1656 }
1657 else {
1658 char *p;
1659 newp = strdup(part);
1660 if(!newp)
1661 return CURLUE_OUT_OF_MEMORY;
1662 p = (char *)newp;
1663 while(*p) {
1664 /* make sure percent encoded are lower case */
1665 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1666 (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1667 p[1] = (char)TOLOWER(p[1]);
1668 p[2] = (char)TOLOWER(p[2]);
1669 p += 3;
1670 }
1671 else
1672 p++;
1673 }
1674 }
1675
1676 if(appendquery) {
1677 /* Append the string onto the old query. Add a '&' separator if none is
1678 present at the end of the exsting query already */
1679 size_t querylen = u->query ? strlen(u->query) : 0;
1680 bool addamperand = querylen && (u->query[querylen -1] != '&');
1681 if(querylen) {
1682 size_t newplen = strlen(newp);
1683 char *p = malloc(querylen + addamperand + newplen + 1);
1684 if(!p) {
1685 free((char *)newp);
1686 return CURLUE_OUT_OF_MEMORY;
1687 }
1688 strcpy(p, u->query); /* original query */
1689 if(addamperand)
1690 p[querylen] = '&'; /* ampersand */
1691 strcpy(&p[querylen + addamperand], newp); /* new suffix */
1692 free((char *)newp);
1693 free(*storep);
1694 *storep = p;
1695 return CURLUE_OK;
1696 }
1697 }
1698
1699 if(what == CURLUPART_HOST) {
1700 if(0 == strlen(newp) && (flags & CURLU_NO_AUTHORITY)) {
1701 /* Skip hostname check, it's allowed to be empty. */
1702 }
1703 else {
1704 if(hostname_check(u, (char *)newp)) {
1705 free((char *)newp);
1706 return CURLUE_BAD_HOSTNAME;
1707 }
1708 }
1709 }
1710
1711 free(*storep);
1712 *storep = (char *)newp;
1713 }
1714 /* set after the string, to make it not assigned if the allocation above
1715 fails */
1716 if(port)
1717 u->portnum = port;
1718 return CURLUE_OK;
1719}
1720