urlapi.c source code [tensorflow/external/curl/lib/urlapi.c]

1	/***************************************************************************
2	* _ _ ____ _
3	* Project ___\| \| \| \| _ \\| \|
4	* / __\| \| \| \| \|_) \| \|
5	* \| (__\| \|_\| \| _ <\| \|___
6	* \___\|\___/\|_\| \_\_____\|
7	*
8	* Copyright (C) 1998 - 2022, Daniel Stenberg, <[email protected]>, et al.
9	*
10	* This software is licensed as described in the file COPYING, which
11	* you should have received as part of this distribution. The terms
12	* are also available at https://curl.se/docs/copyright.html.
13	*
14	* You may opt to use, copy, modify, merge, publish, distribute and/or sell
15	* copies of the Software, and permit persons to whom the Software is
16	* furnished to do so, under the terms of the COPYING file.
17	*
18	* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19	* KIND, either express or implied.
20	*
21	* SPDX-License-Identifier: curl
22	*
23	***************************************************************************/
24
25	#include "curl_setup.h"
26
27	#include "urldata.h"
28	#include "urlapi-int.h"
29	#include "strcase.h"
30	#include "dotdot.h"
31	#include "url.h"
32	#include "escape.h"
33	#include "curl_ctype.h"
34	#include "inet_pton.h"
35	#include "inet_ntop.h"
36
37	/ The last 3 #include files should be in this order /
38	#include "curl_printf.h"
39	#include "curl_memory.h"
40	#include "memdebug.h"
41
42	/ MSDOS/Windows style drive prefix, eg c: in c:foo /
43	#define STARTS_WITH_DRIVE_PREFIX(str) \
44	((('a' <= str[0] && str[0] <= 'z') \|\| \
45	('A' <= str[0] && str[0] <= 'Z')) && \
46	(str[1] == ':'))
47
48	/ MSDOS/Windows style drive prefix, optionally with*
49	* a '\|' instead of ':', followed by a slash or NUL */
50	#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
51	((('a' <= (str)[0] && (str)[0] <= 'z') \|\| \
52	('A' <= (str)[0] && (str)[0] <= 'Z')) && \
53	((str)[1] == ':' \|\| (str)[1] == '\|') && \
54	((str)[2] == '/' \|\| (str)[2] == '\\' \|\| (str)[2] == 0))
55
56	/ scheme is not URL encoded, the longest libcurl supported ones are... /
57	#define MAX_SCHEME_LEN 40
58
59	/ Internal representation of CURLU. Point to URL-encoded strings. /
60	struct Curl_URL {
61	char *scheme;
62	char *user;
63	char *password;
64	char options; /* IMAP only? /
65	char *host;
66	char zoneid; /* for numerical IPv6 addresses /
67	char *port;
68	char *path;
69	char *query;
70	char *fragment;
71
72	char scratch; /* temporary scratch area /
73	char temppath; /* temporary path pointer /
74	long portnum; / the numerical version /
75	};
76
77	#define DEFAULT_SCHEME "https"
78
79	static void free_urlhandle(struct Curl_URL *u)
80	{
81	free(u->scheme);
82	free(u->user);
83	free(u->password);
84	free(u->options);
85	free(u->host);
86	free(u->zoneid);
87	free(u->port);
88	free(u->path);
89	free(u->query);
90	free(u->fragment);
91	free(u->scratch);
92	free(u->temppath);
93	}
94
95	/*
96	* Find the separator at the end of the host name, or the '?' in cases like
97	* http://www.url.com?id=2380
98	*/
99	static const char find_host_sep(const* char *url)
100	{
101	const char *sep;
102	const char *query;
103
104	/ Find the start of the hostname /
105	sep = strstr(url, "//");
106	if(!sep)
107	sep = url;
108	else
109	sep += `2`;
110
111	query = strchr(sep, `'?'`);
112	sep = strchr(sep, `'/'`);
113
114	if(!sep)
115	sep = url + strlen(url);
116
117	if(!query)
118	query = url + strlen(url);
119
120	return sep < query ? sep : query;
121	}
122
123	/*
124	* Decide in an encoding-independent manner whether a character in an
125	* URL must be escaped. The same criterion must be used in strlen_url()
126	* and strcpy_url().
127	*/
128	static bool urlchar_needs_escaping(int c)
129	{
130	return !(ISCNTRL(c) \|\| ISSPACE(c) \|\| ISGRAPH(c));
131	}
132
133	/*
134	* strlen_url() returns the length of the given URL if the spaces within the
135	* URL were properly URL encoded.
136	* URL encoding should be skipped for host names, otherwise IDN resolution
137	* will fail.
138	*/
139	static size_t strlen_url(const char *url, bool relative)
140	{
141	const unsigned char *ptr;
142	size_t newlen = `0`;
143	bool left = TRUE; / left side of the ? /
144	const unsigned char host_sep = (const* unsigned char *) url;
145
146	if(!relative)
147	host_sep = (const unsigned char *) find_host_sep(url);
148
149	for(ptr = (unsigned char )url; ptr; ptr++) {
150
151	if(ptr < host_sep) {
152	++newlen;
153	continue;
154	}
155
156	if(*ptr == `' '`) {
157	if(left)
158	newlen += `3`;
159	else
160	newlen++;
161	continue;
162	}
163
164	if (*ptr == `'?'`)
165	left = FALSE;
166
167	if(urlchar_needs_escaping(*ptr))
168	newlen += `2`;
169
170	newlen++;
171	}
172
173	return newlen;
174	}
175
176	/ strcpy_url() copies a url to a output buffer and URL-encodes the spaces in*
177	* the source URL accordingly.
178	* URL encoding should be skipped for host names, otherwise IDN resolution
179	* will fail.
180	*
181	* Returns TRUE if something was updated.
182	*/
183	static bool strcpy_url(char output, const* char *url, bool relative)
184	{
185	/ we must add this with whitespace-replacing /
186	bool left = TRUE;
187	const unsigned char *iptr;
188	char *optr = output;
189	const unsigned char host_sep = (const* unsigned char *) url;
190	bool changed = FALSE;
191
192	if(!relative)
193	host_sep = (const unsigned char *) find_host_sep(url);
194
195	for(iptr = (unsigned char )url; /* read from here /
196	iptr; /* until zero byte /
197	iptr++) {
198
199	if(iptr < host_sep) {
200	optr++ = iptr;
201	continue;
202	}
203
204	if(*iptr == `' '`) {
205	if(left) {
206	optr++=`'%'`; /* add a '%' /
207	optr++=`'2'`; /* add a '2' /
208	optr++=`'0'`; /* add a '0' /
209	}
210	else
211	optr++=`'+'`; /* add a '+' here /
212	changed = TRUE;
213	continue;
214	}
215
216	if(*iptr == `'?'`)
217	left = FALSE;
218
219	if(urlchar_needs_escaping(*iptr)) {
220	msnprintf(optr, `4`, "%%%02x", *iptr);
221	changed = TRUE;
222	optr += `3`;
223	}
224	else
225	optr++ = iptr;
226	}
227	optr = `0`; /* null-terminate output buffer /
228
229	return changed;
230	}
231
232	/*
233	* Returns true if the given URL is absolute (as opposed to relative). Returns
234	* the scheme in the buffer if TRUE and 'buf' is non-NULL. The buflen must
235	* be larger than MAX_SCHEME_LEN if buf is set.
236	*/
237	bool Curl_is_absolute_url(const char url, char* *buf, size_t buflen)
238	{
239	int i;
240	DEBUGASSERT(!buf \|\| (buflen > MAX_SCHEME_LEN));
241	(void)buflen; / only used in debug-builds /
242	if(buf)
243	buf[`0`] = `0`; / always leave a defined value in buf /
244	#ifdef WIN32
245	if(STARTS_WITH_DRIVE_PREFIX(url))
246	return FALSE;
247	#endif
248	for(i = `0`; i < MAX_SCHEME_LEN; ++i) {
249	char s = url[i];
250	if(s && (ISALNUM(s) \|\| (s == `'+'`) \|\| (s == `'-'`) \|\| (s == `'.'`) )) {
251	/ RFC 3986 3.1 explains:*
252	scheme = ALPHA ( ALPHA / DIGIT / "+" / "-" / "." )*
253	*/
254	}
255	else {
256	break;
257	}
258	}
259	if(i && (url[i] == `':'`) && (url[i + `1`] == `'/'`)) {
260	if(buf) {
261	buf[i] = `0`;
262	while(i--) {
263	buf[i] = (char)TOLOWER(url[i]);
264	}
265	}
266	return TRUE;
267	}
268	return FALSE;
269	}
270
271	/*
272	* Concatenate a relative URL to a base URL making it absolute.
273	* URL-encodes any spaces.
274	* The returned pointer must be freed by the caller unless NULL
275	* (returns NULL on out of memory).
276	*/
277	static char concat_url(const* char base, const* char *relurl)
278	{
279	/***
280	TRY to append this new path to the old URL
281	to the right of the host part. Oh crap, this is doomed to cause
282	problems in the future...
283	*/
284	char *newest;
285	char *protsep;
286	char *pathsep;
287	size_t newlen;
288	bool host_changed = FALSE;
289
290	const char *useurl = relurl;
291	size_t urllen;
292
293	/ we must make our own copy of the URL to play with, as it may*
294	point to read-only data /*
295	char *url_clone = strdup(base);
296
297	if(!url_clone)
298	return NULL; / skip out of this NOW /
299
300	/ protsep points to the start of the host name /
301	protsep = strstr(url_clone, "//");
302	if(!protsep)
303	protsep = url_clone;
304	else
305	protsep += `2`; / pass the slashes /
306
307	if(`'/'` != relurl[`0`]) {
308	int level = `0`;
309
310	/ First we need to find out if there's a ?-letter in the URL,*
311	and cut it and the right-side of that off /*
312	pathsep = strchr(protsep, `'?'`);
313	if(pathsep)
314	*pathsep = `0`;
315
316	/ we have a relative path to append to the last slash if there's one*
317	available, or if the new URL is just a query string (starts with a
318	'?') we append the new one at the end of the entire currently worked
319	out URL /*
320	if(useurl[`0`] != `'?'`) {
321	pathsep = strrchr(protsep, `'/'`);
322	if(pathsep)
323	*pathsep = `0`;
324	}
325
326	/ Check if there's any slash after the host name, and if so, remember*
327	that position instead /*
328	pathsep = strchr(protsep, `'/'`);
329	if(pathsep)
330	protsep = pathsep + `1`;
331	else
332	protsep = NULL;
333
334	/ now deal with one "./" or any amount of "../" in the newurl*
335	and act accordingly /*
336
337	if((useurl[`0`] == `'.'`) && (useurl[`1`] == `'/'`))
338	useurl += `2`; / just skip the "./" /
339
340	while((useurl[`0`] == `'.'`) &&
341	(useurl[`1`] == `'.'`) &&
342	(useurl[`2`] == `'/'`)) {
343	level++;
344	useurl += `3`; / pass the "../" /
345	}
346
347	if(protsep) {
348	while(level--) {
349	/ cut off one more level from the right of the original URL /
350	pathsep = strrchr(protsep, `'/'`);
351	if(pathsep)
352	*pathsep = `0`;
353	else {
354	*protsep = `0`;
355	break;
356	}
357	}
358	}
359	}
360	else {
361	/ We got a new absolute path for this server /
362
363	if(relurl[`1`] == `'/'`) {
364	/ the new URL starts with //, just keep the protocol part from the*
365	original one /*
366	*protsep = `0`;
367	useurl = &relurl[`2`]; / we keep the slashes from the original, so we*
368	skip the new ones /*
369	host_changed = TRUE;
370	}
371	else {
372	/ cut off the original URL from the first slash, or deal with URLs*
373	without slash /*
374	pathsep = strchr(protsep, `'/'`);
375	if(pathsep) {
376	/ When people use badly formatted URLs, such as*
377	"http://www.url.com?dir=/home/daniel" we must not use the first
378	slash, if there's a ?-letter before it! /*
379	char *sep = strchr(protsep, `'?'`);
380	if(sep && (sep < pathsep))
381	pathsep = sep;
382	*pathsep = `0`;
383	}
384	else {
385	/ There was no slash. Now, since we might be operating on a badly*
386	formatted URL, such as "http://www.url.com?id=2380" which doesn't
387	use a slash separator as it is supposed to, we need to check for a
388	?-letter as well! /*
389	pathsep = strchr(protsep, `'?'`);
390	if(pathsep)
391	*pathsep = `0`;
392	}
393	}
394	}
395
396	/ If the new part contains a space, this is a mighty stupid redirect*
397	but we still make an effort to do "right". To the left of a '?'
398	letter we replace each space with %20 while it is replaced with '+'
399	on the right side of the '?' letter.
400	*/
401	newlen = strlen_url(useurl, !host_changed);
402
403	urllen = strlen(url_clone);
404
405	newest = malloc(urllen + `1` + / possible slash /
406	newlen + `1` / zero byte /);
407
408	if(!newest) {
409	free(url_clone); / don't leak this /
410	return NULL;
411	}
412
413	/ copy over the root url part /
414	memcpy(newest, url_clone, urllen);
415
416	/ check if we need to append a slash /
417	if((`'/'` == useurl[`0`]) \|\| (protsep && !*protsep) \|\| (`'?'` == useurl[`0`]))
418	;
419	else
420	newest[urllen++]=`'/'`;
421
422	/ then append the new piece on the right side /
423	strcpy_url(&newest[urllen], useurl, !host_changed);
424
425	free(url_clone);
426
427	return newest;
428	}
429
430	/ scan for byte values < 31 or 127 /
431	static bool junkscan(const char part, unsigned* int flags)
432	{
433	if(part) {
434	static const char badbytes[]={
435	/ / `0x01`, `0x02`, `0x03`, `0x04`, `0x05`, `0x06`, `0x07`,
436	`0x08`, `0x09`, `0x0a`, `0x0b`, `0x0c`, `0x0d`, `0x0e`, `0x0f`,
437	`0x10`, `0x11`, `0x12`, `0x13`, `0x14`, `0x15`, `0x16`, `0x17`,
438	`0x18`, `0x19`, `0x1a`, `0x1b`, `0x1c`, `0x1d`, `0x1e`, `0x1f`,
439	`0x7f`, `0x00` / null-terminate /
440	};
441	size_t n = strlen(part);
442	size_t nfine = strcspn(part, badbytes);
443	if(nfine != n)
444	/ since we don't know which part is scanned, return a generic error*
445	code /*
446	return TRUE;
447	if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, `' '`))
448	return TRUE;
449	}
450	return FALSE;
451	}
452
453	/*
454	* parse_hostname_login()
455	*
456	* Parse the login details (user name, password and options) from the URL and
457	* strip them out of the host name
458	*
459	*/
460	static CURLUcode parse_hostname_login(struct Curl_URL *u,
461	char **hostname,
462	unsigned int flags)
463	{
464	CURLUcode result = CURLUE_OK;
465	CURLcode ccode;
466	char *userp = NULL;
467	char *passwdp = NULL;
468	char *optionsp = NULL;
469	const struct Curl_handler *h = NULL;
470
471	/ At this point, we're hoping all the other special cases have*
472	* been taken care of, so conn->host.name is at most
473	* [user[:password][;options]]@]hostname
474	*
475	* We need somewhere to put the embedded details, so do that first.
476	*/
477
478	char ptr = strchr(hostname, `'@'`);
479	char login = hostname;
480
481	if(!ptr)
482	goto out;
483
484	/ We will now try to extract the*
485	* possible login information in a string like:
486	* ftp://user:[email protected]:8021/README */
487	*hostname = ++ptr;
488
489	/ if this is a known scheme, get some details /
490	if(u->scheme)
491	h = Curl_builtin_scheme(u->scheme);
492
493	/ We could use the login information in the URL so extract it. Only parse*
494	options if the handler says we should. Note that 'h' might be NULL! /*
495	ccode = Curl_parse_login_details(login, ptr - login - `1`,
496	&userp, &passwdp,
497	(h && (h->flags & PROTOPT_URLOPTIONS)) ?
498	&optionsp:NULL);
499	if(ccode) {
500	result = CURLUE_BAD_LOGIN;
501	goto out;
502	}
503
504	if(userp) {
505	if(flags & CURLU_DISALLOW_USER) {
506	/ Option DISALLOW_USER is set and url contains username. /
507	result = CURLUE_USER_NOT_ALLOWED;
508	goto out;
509	}
510	if(junkscan(userp, flags)) {
511	result = CURLUE_BAD_USER;
512	goto out;
513	}
514	u->user = userp;
515	}
516
517	if(passwdp) {
518	if(junkscan(passwdp, flags)) {
519	result = CURLUE_BAD_PASSWORD;
520	goto out;
521	}
522	u->password = passwdp;
523	}
524
525	if(optionsp) {
526	if(junkscan(optionsp, flags)) {
527	result = CURLUE_BAD_LOGIN;
528	goto out;
529	}
530	u->options = optionsp;
531	}
532
533	return CURLUE_OK;
534	out:
535
536	free(userp);
537	free(passwdp);
538	free(optionsp);
539	u->user = NULL;
540	u->password = NULL;
541	u->options = NULL;
542
543	return result;
544	}
545
546	UNITTEST CURLUcode Curl_parse_port(struct Curl_URL u, char* *hostname,
547	bool has_scheme)
548	{
549	char *portptr = NULL;
550	char endbracket;
551	int len;
552
553	/*
554	* Find the end of an IPv6 address, either on the ']' ending bracket or
555	* a percent-encoded zone index.
556	*/
557	if(`1` == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
558	&endbracket, &len)) {
559	if(`']'` == endbracket)
560	portptr = &hostname[len];
561	else if(`'%'` == endbracket) {
562	int zonelen = len;
563	if(`1` == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
564	if(`']'` != endbracket)
565	return CURLUE_BAD_IPV6;
566	portptr = &hostname[--zonelen + len + `1`];
567	}
568	else
569	return CURLUE_BAD_IPV6;
570	}
571	else
572	return CURLUE_BAD_IPV6;
573
574	/ this is a RFC2732-style specified IP-address /
575	if(portptr && *portptr) {
576	if(*portptr != `':'`)
577	return CURLUE_BAD_IPV6;
578	}
579	else
580	portptr = NULL;
581	}
582	else
583	portptr = strchr(hostname, `':'`);
584
585	if(portptr) {
586	char *rest;
587	long port;
588	char portbuf[`7`];
589
590	/ Browser behavior adaptation. If there's a colon with no digits after,*
591	just cut off the name there which makes us ignore the colon and just
592	use the default port. Firefox, Chrome and Safari all do that.
593
594	Don't do it if the URL has no scheme, to make something that looks like
595	a scheme not work!
596	*/
597	if(!portptr[`1`]) {
598	*portptr = `'\0'`;
599	return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
600	}
601
602	if(!ISDIGIT(portptr[`1`]))
603	return CURLUE_BAD_PORT_NUMBER;
604
605	port = strtol(portptr + `1`, &rest, `10`); / Port number must be decimal /
606
607	if(port > `0xffff`)
608	return CURLUE_BAD_PORT_NUMBER;
609
610	if(rest[`0`])
611	return CURLUE_BAD_PORT_NUMBER;
612
613	portptr++ = `'\0'`; /* cut off the name there /
614	*rest = `0`;
615	/ generate a new port number string to get rid of leading zeroes etc /
616	msnprintf(portbuf, sizeof(portbuf), "%ld", port);
617	u->portnum = port;
618	u->port = strdup(portbuf);
619	if(!u->port)
620	return CURLUE_OUT_OF_MEMORY;
621	}
622
623	return CURLUE_OK;
624	}
625
626	static CURLUcode hostname_check(struct Curl_URL u, char* *hostname)
627	{
628	size_t len;
629	size_t hlen = strlen(hostname);
630
631	if(hostname[`0`] == `'['`) {
632	const char *l = "0123456789abcdefABCDEF:.";
633	if(hlen < `4`) / '[::]' is the shortest possible valid string /
634	return CURLUE_BAD_IPV6;
635	hostname++;
636	hlen -= `2`;
637
638	if(hostname[hlen] != `']'`)
639	return CURLUE_BAD_IPV6;
640
641	/ only valid letters are ok /
642	len = strspn(hostname, l);
643	if(hlen != len) {
644	hlen = len;
645	if(hostname[len] == `'%'`) {
646	/ this could now be '%[zone id]' /
647	char zoneid[`16`];
648	int i = `0`;
649	char *h = &hostname[len + `1`];
650	/ pass '25' if present and is a url encoded percent sign /
651	if(!strncmp(h, "25", `2`) && h[`2`] && (h[`2`] != `']'`))
652	h += `2`;
653	while(h && (h != `']'`) && (i < `15`))
654	zoneid[i++] = *h++;
655	if(!i \|\| (`']'` != *h))
656	/ impossible to reach? /
657	return CURLUE_MALFORMED_INPUT;
658	zoneid[i] = `0`;
659	u->zoneid = strdup(zoneid);
660	if(!u->zoneid)
661	return CURLUE_OUT_OF_MEMORY;
662	hostname[len] = `']'`; / insert end bracket /
663	hostname[len + `1`] = `0`; / terminate the hostname /
664	}
665	else
666	return CURLUE_BAD_IPV6;
667	/ hostname is fine /
668	}
669	#ifdef ENABLE_IPV6
670	{
671	char dest[`16`]; / fits a binary IPv6 address /
672	char norm[MAX_IPADR_LEN];
673	hostname[hlen] = `0`; / end the address there /
674	if(`1` != Curl_inet_pton(AF_INET6, hostname, dest))
675	return CURLUE_BAD_IPV6;
676
677	/ check if it can be done shorter /
678	if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
679	(strlen(norm) < hlen)) {
680	strcpy(hostname, norm);
681	hlen = strlen(norm);
682	hostname[hlen + `1`] = `0`;
683	}
684	hostname[hlen] = `']'`; / restore ending bracket /
685	}
686	#endif
687	}
688	else {
689	/ letters from the second string are not ok /
690	len = strcspn(hostname, " \r\n\t/:#?!@");
691	if(hlen != len)
692	/ hostname with bad content /
693	return CURLUE_BAD_HOSTNAME;
694	}
695	if(!hostname[`0`])
696	return CURLUE_NO_HOST;
697	return CURLUE_OK;
698	}
699
700	#define HOSTNAME_END(x) (((x) == '/') \|\| ((x) == '?') \|\| ((x) == '#'))
701
702	/*
703	* Handle partial IPv4 numerical addresses and different bases, like
704	* '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
705	*
706	* If the given input string is syntactically wrong or any part for example is
707	* too big, this function returns FALSE and doesn't create any output.
708	*
709	* Output the "normalized" version of that input string in plain quad decimal
710	* integers and return TRUE.
711	*/
712	static bool ipv4_normalize(const char hostname, char* *outp, size_t olen)
713	{
714	bool done = FALSE;
715	int n = `0`;
716	const char *c = hostname;
717	unsigned long parts[`4`] = {`0`, `0`, `0`, `0`};
718
719	while(!done) {
720	char *endp;
721	unsigned long l;
722	if((c < `'0'`) \|\| (c > `'9'`))
723	/ most importantly this doesn't allow a leading plus or minus /
724	return FALSE;
725	l = strtoul(c, &endp, `0`);
726
727	/ overflow or nothing parsed at all /
728	if(((l == ULONG_MAX) && (errno == ERANGE)) \|\| (endp == c))
729	return FALSE;
730
731	#if SIZEOF_LONG > 4
732	/ a value larger than 32 bits /
733	if(l > UINT_MAX)
734	return FALSE;
735	#endif
736
737	parts[n] = l;
738	c = endp;
739
740	switch (*c) {
741	case `'.'` :
742	if(n == `3`)
743	return FALSE;
744	n++;
745	c++;
746	break;
747
748	case `'\0'`:
749	done = TRUE;
750	break;
751
752	default:
753	return FALSE;
754	}
755	}
756
757	/ this is deemed a valid IPv4 numerical address /
758
759	switch(n) {
760	case `0`: / a -- 32 bits /
761	msnprintf(outp, olen, "%u.%u.%u.%u",
762	parts[`0`] >> `24`, (parts[`0`] >> `16`) & `0xff`,
763	(parts[`0`] >> `8`) & `0xff`, parts[`0`] & `0xff`);
764	break;
765	case `1`: / a.b -- 8.24 bits /
766	if((parts[`0`] > `0xff`) \|\| (parts[`1`] > `0xffffff`))
767	return FALSE;
768	msnprintf(outp, olen, "%u.%u.%u.%u",
769	parts[`0`], (parts[`1`] >> `16`) & `0xff`,
770	(parts[`1`] >> `8`) & `0xff`, parts[`1`] & `0xff`);
771	break;
772	case `2`: / a.b.c -- 8.8.16 bits /
773	if((parts[`0`] > `0xff`) \|\| (parts[`1`] > `0xff`) \|\| (parts[`2`] > `0xffff`))
774	return FALSE;
775	msnprintf(outp, olen, "%u.%u.%u.%u",
776	parts[`0`], parts[`1`], (parts[`2`] >> `8`) & `0xff`,
777	parts[`2`] & `0xff`);
778	break;
779	case `3`: / a.b.c.d -- 8.8.8.8 bits /
780	if((parts[`0`] > `0xff`) \|\| (parts[`1`] > `0xff`) \|\| (parts[`2`] > `0xff`) \|\|
781	(parts[`3`] > `0xff`))
782	return FALSE;
783	msnprintf(outp, olen, "%u.%u.%u.%u",
784	parts[`0`], parts[`1`], parts[`2`], parts[`3`]);
785	break;
786	}
787	return TRUE;
788	}
789
790	/ return strdup'ed version in 'outp', possibly percent decoded /
791	static CURLUcode decode_host(char hostname, char* **outp)
792	{
793	char *per = NULL;
794	if(hostname[`0`] != `'['`)
795	/ only decode if not an ipv6 numerical /
796	per = strchr(hostname, `'%'`);
797	if(!per) {
798	*outp = strdup(hostname);
799	if(!*outp)
800	return CURLUE_OUT_OF_MEMORY;
801	}
802	else {
803	/ might be encoded /
804	size_t dlen;
805	CURLcode result = Curl_urldecode(hostname, `0`, outp, &dlen, REJECT_CTRL);
806	if(result)
807	return CURLUE_BAD_HOSTNAME;
808	}
809
810	return CURLUE_OK;
811	}
812
813	static CURLUcode seturl(const char url, CURLU u, unsigned int flags)
814	{
815	char *path;
816	bool path_alloced = FALSE;
817	bool uncpath = FALSE;
818	char *hostname;
819	char *query = NULL;
820	char *fragment = NULL;
821	CURLUcode result;
822	bool url_has_scheme = FALSE;
823	char schemebuf[MAX_SCHEME_LEN + `1`];
824	const char *schemep = NULL;
825	size_t schemelen = `0`;
826	size_t urllen;
827
828	DEBUGASSERT(url);
829
830	/*************************************************************
831	* Parse the URL.
832	************************************************************/
833	/ allocate scratch area /
834	urllen = strlen(url);
835	if(urllen > CURL_MAX_INPUT_LENGTH)
836	/ excessive input length /
837	return CURLUE_MALFORMED_INPUT;
838
839	path = u->scratch = malloc(urllen * `2` + `2`);
840	if(!path)
841	return CURLUE_OUT_OF_MEMORY;
842
843	hostname = &path[urllen + `1`];
844	hostname[`0`] = `0`;
845
846	if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) {
847	url_has_scheme = TRUE;
848	schemelen = strlen(schemebuf);
849	}
850
851	/ handle the file: scheme /
852	if(url_has_scheme && !strcmp(schemebuf, "file")) {
853	if(urllen <= `6`)
854	/ file:/ is not enough to actually be a complete file: URL /
855	return CURLUE_BAD_FILE_URL;
856
857	/ path has been allocated large enough to hold this /
858	strcpy(path, &url[`5`]);
859
860	u->scheme = strdup("file");
861	if(!u->scheme)
862	return CURLUE_OUT_OF_MEMORY;
863
864	/ Extra handling URLs with an authority component (i.e. that start with*
865	* "file://")
866	*
867	* We allow omitted hostname (e.g. file:/<path>) -- valid according to
868	* RFC 8089, but not the (current) WHAT-WG URL spec.
869	*/
870	if(path[`0`] == `'/'` && path[`1`] == `'/'`) {
871	/ swallow the two slashes /
872	char *ptr = &path[`2`];
873
874	/*
875	* According to RFC 8089, a file: URL can be reliably dereferenced if:
876	*
877	* o it has no/blank hostname, or
878	*
879	* o the hostname matches "localhost" (case-insensitively), or
880	*
881	* o the hostname is a FQDN that resolves to this machine, or
882	*
883	* o it is an UNC String transformed to an URI (Windows only, RFC 8089
884	* Appendix E.3).
885	*
886	* For brevity, we only consider URLs with empty, "localhost", or
887	* "127.0.0.1" hostnames as local, otherwise as an UNC String.
888	*
889	* Additionally, there is an exception for URLs with a Windows drive
890	* letter in the authority (which was accidentally omitted from RFC 8089
891	* Appendix E, but believe me, it was meant to be there. --MK)
892	*/
893	if(ptr[`0`] != `'/'` && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
894	/ the URL includes a host name, it must match "localhost" or*
895	"127.0.0.1" to be valid /*
896	if(checkprefix("localhost/", ptr) \|\|
897	checkprefix("127.0.0.1/", ptr)) {
898	ptr += `9`; / now points to the slash after the host /
899	}
900	else {
901	#if defined(WIN32)
902	size_t len;
903
904	/ the host name, NetBIOS computer name, can not contain disallowed*
905	chars, and the delimiting slash character must be appended to the
906	host name /*
907	path = strpbrk(ptr, "/\\:*?\"<>\|");
908	if(!path \|\| *path != `'/'`)
909	return CURLUE_BAD_FILE_URL;
910
911	len = path - ptr;
912	if(len) {
913	memcpy(hostname, ptr, len);
914	hostname[len] = `0`;
915	uncpath = TRUE;
916	}
917
918	ptr -= `2`; / now points to the // before the host in UNC /
919	#else
920	/ Invalid file://hostname/, expected localhost or 127.0.0.1 or*
921	none /*
922	return CURLUE_BAD_FILE_URL;
923	#endif
924	}
925	}
926
927	path = ptr;
928	}
929
930	if(!uncpath)
931	hostname = NULL; / no host for file: URLs by default /
932
933	#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
934	/ Don't allow Windows drive letters when not in Windows.*
935	* This catches both "file:/c:" and "file:c:" */
936	if((`'/'` == path[`0`] && STARTS_WITH_URL_DRIVE_PREFIX(&path[`1`])) \|\|
937	STARTS_WITH_URL_DRIVE_PREFIX(path)) {
938	/ File drive letters are only accepted in MSDOS/Windows /
939	return CURLUE_BAD_FILE_URL;
940	}
941	#else
942	/ If the path starts with a slash and a drive letter, ditch the slash /
943	if(`'/'` == path[`0`] && STARTS_WITH_URL_DRIVE_PREFIX(&path[`1`])) {
944	/ This cannot be done with strcpy, as the memory chunks overlap! /
945	memmove(path, &path[`1`], strlen(&path[`1`]) + `1`);
946	}
947	#endif
948
949	}
950	else {
951	/ clear path /
952	const char *p;
953	const char *hostp;
954	size_t len;
955	path[`0`] = `0`;
956
957	if(url_has_scheme) {
958	int i = `0`;
959	p = &url[schemelen + `1`];
960	while(p && (*p == `'/'`) && (i < `4`)) {
961	p++;
962	i++;
963	}
964	if((i < `1`) \|\| (i>`3`))
965	/ less than one or more than three slashes /
966	return CURLUE_BAD_SLASHES;
967
968	schemep = schemebuf;
969	if(!Curl_builtin_scheme(schemep) &&
970	!(flags & CURLU_NON_SUPPORT_SCHEME))
971	return CURLUE_UNSUPPORTED_SCHEME;
972
973	if(junkscan(schemep, flags))
974	return CURLUE_BAD_SCHEME;
975	}
976	else {
977	/ no scheme! /
978
979	if(!(flags & (CURLU_DEFAULT_SCHEME\|CURLU_GUESS_SCHEME)))
980	return CURLUE_BAD_SCHEME;
981	if(flags & CURLU_DEFAULT_SCHEME)
982	schemep = DEFAULT_SCHEME;
983
984	/*
985	* The URL was badly formatted, let's try without scheme specified.
986	*/
987	p = url;
988	}
989	hostp = p; / host name starts here /
990
991	/ find the end of the host name + port number /
992	while(p && !HOSTNAME_END(p))
993	p++;
994
995	len = p - hostp;
996	if(len) {
997	memcpy(hostname, hostp, len);
998	hostname[len] = `0`;
999	}
1000	else {
1001	if(!(flags & CURLU_NO_AUTHORITY))
1002	return CURLUE_NO_HOST;
1003	}
1004
1005	strcpy(path, p);
1006
1007	if(schemep) {
1008	u->scheme = strdup(schemep);
1009	if(!u->scheme)
1010	return CURLUE_OUT_OF_MEMORY;
1011	}
1012	}
1013
1014	if((flags & CURLU_URLENCODE) && path[`0`]) {
1015	/ worst case output length is 3x the original! /
1016	char newp = malloc(strlen(path) `3`);
1017	if(!newp)
1018	return CURLUE_OUT_OF_MEMORY;
1019	path_alloced = TRUE;
1020	strcpy_url(newp, path, TRUE); / consider it relative /
1021	u->temppath = path = newp;
1022	}
1023
1024	fragment = strchr(path, `'#'`);
1025	if(fragment) {
1026	*fragment++ = `0`;
1027	if(junkscan(fragment, flags))
1028	return CURLUE_BAD_FRAGMENT;
1029	if(fragment[`0`]) {
1030	u->fragment = strdup(fragment);
1031	if(!u->fragment)
1032	return CURLUE_OUT_OF_MEMORY;
1033	}
1034	}
1035
1036	query = strchr(path, `'?'`);
1037	if(query) {
1038	*query++ = `0`;
1039	if(junkscan(query, flags))
1040	return CURLUE_BAD_QUERY;
1041	/ done even if the query part is a blank string /
1042	u->query = strdup(query);
1043	if(!u->query)
1044	return CURLUE_OUT_OF_MEMORY;
1045	}
1046
1047	if(junkscan(path, flags))
1048	return CURLUE_BAD_PATH;
1049
1050	if(!path[`0`])
1051	/ if there's no path left set, unset /
1052	path = NULL;
1053	else {
1054	if(!(flags & CURLU_PATH_AS_IS)) {
1055	/ remove ../ and ./ sequences according to RFC3986 /
1056	char *newp = Curl_dedotdotify(path);
1057	if(!newp)
1058	return CURLUE_OUT_OF_MEMORY;
1059
1060	if(strcmp(newp, path)) {
1061	/ if we got a new version /
1062	if(path_alloced)
1063	Curl_safefree(u->temppath);
1064	u->temppath = path = newp;
1065	path_alloced = TRUE;
1066	}
1067	else
1068	free(newp);
1069	}
1070
1071	u->path = path_alloced?path:strdup(path);
1072	if(!u->path)
1073	return CURLUE_OUT_OF_MEMORY;
1074	u->temppath = NULL; / used now /
1075	}
1076
1077	if(hostname) {
1078	char normalized_ipv4[sizeof("255.255.255.255") + `1`];
1079
1080	/*
1081	* Parse the login details and strip them out of the host name.
1082	*/
1083	result = parse_hostname_login(u, &hostname, flags);
1084	if(result)
1085	return result;
1086
1087	result = Curl_parse_port(u, hostname, url_has_scheme);
1088	if(result)
1089	return result;
1090
1091	if(junkscan(hostname, flags))
1092	return CURLUE_BAD_HOSTNAME;
1093
1094	if(`0` == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) {
1095	/ Skip hostname check, it's allowed to be empty. /
1096	u->host = strdup("");
1097	}
1098	else {
1099	if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4)))
1100	u->host = strdup(normalized_ipv4);
1101	else {
1102	result = decode_host(hostname, &u->host);
1103	if(result)
1104	return result;
1105	result = hostname_check(u, u->host);
1106	if(result)
1107	return result;
1108	}
1109	}
1110	if(!u->host)
1111	return CURLUE_OUT_OF_MEMORY;
1112	if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1113	/ legacy curl-style guess based on host name /
1114	if(checkprefix("ftp.", hostname))
1115	schemep = "ftp";
1116	else if(checkprefix("dict.", hostname))
1117	schemep = "dict";
1118	else if(checkprefix("ldap.", hostname))
1119	schemep = "ldap";
1120	else if(checkprefix("imap.", hostname))
1121	schemep = "imap";
1122	else if(checkprefix("smtp.", hostname))
1123	schemep = "smtp";
1124	else if(checkprefix("pop3.", hostname))
1125	schemep = "pop3";
1126	else
1127	schemep = "http";
1128
1129	u->scheme = strdup(schemep);
1130	if(!u->scheme)
1131	return CURLUE_OUT_OF_MEMORY;
1132	}
1133	}
1134
1135	Curl_safefree(u->scratch);
1136	Curl_safefree(u->temppath);
1137
1138	return CURLUE_OK;
1139	}
1140
1141	/*
1142	* Parse the URL and set the relevant members of the Curl_URL struct.
1143	*/
1144	static CURLUcode parseurl(const char url, CURLU u, unsigned int flags)
1145	{
1146	CURLUcode result = seturl(url, u, flags);
1147	if(result) {
1148	free_urlhandle(u);
1149	memset(u, `0`, sizeof(struct Curl_URL));
1150	}
1151	return result;
1152	}
1153
1154	/*
1155	* Parse the URL and, if successful, replace everything in the Curl_URL struct.
1156	*/
1157	static CURLUcode parseurl_and_replace(const char url, CURLU u,
1158	unsigned int flags)
1159	{
1160	CURLUcode result;
1161	CURLU tmpurl;
1162	memset(&tmpurl, `0`, sizeof(tmpurl));
1163	result = parseurl(url, &tmpurl, flags);
1164	if(!result) {
1165	free_urlhandle(u);
1166	*u = tmpurl;
1167	}
1168	else
1169	free_urlhandle(&tmpurl);
1170	return result;
1171	}
1172
1173	/*
1174	*/
1175	CURLU curl_url(void*)
1176	{
1177	return calloc(sizeof(struct Curl_URL), `1`);
1178	}
1179
1180	void curl_url_cleanup(CURLU *u)
1181	{
1182	if(u) {
1183	free_urlhandle(u);
1184	free(u);
1185	}
1186	}
1187
1188	#define DUP(dest, src, name) \
1189	do { \
1190	if(src->name) { \
1191	dest->name = strdup(src->name); \
1192	if(!dest->name) \
1193	goto fail; \
1194	} \
1195	} while(0)
1196
1197	CURLU curl_url_dup(CURLU in)
1198	{
1199	struct Curl_URL u = calloc(sizeof(struct* Curl_URL), `1`);
1200	if(u) {
1201	DUP(u, in, scheme);
1202	DUP(u, in, user);
1203	DUP(u, in, password);
1204	DUP(u, in, options);
1205	DUP(u, in, host);
1206	DUP(u, in, port);
1207	DUP(u, in, path);
1208	DUP(u, in, query);
1209	DUP(u, in, fragment);
1210	u->portnum = in->portnum;
1211	}
1212	return u;
1213	fail:
1214	curl_url_cleanup(u);
1215	return NULL;
1216	}
1217
1218	CURLUcode curl_url_get(CURLU *u, CURLUPart what,
1219	char *part, unsigned* int flags)
1220	{
1221	char *ptr;
1222	CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1223	char portbuf[`7`];
1224	bool urldecode = (flags & CURLU_URLDECODE)?`1`:`0`;
1225	bool urlencode = (flags & CURLU_URLENCODE)?`1`:`0`;
1226	bool plusdecode = FALSE;
1227	(void)flags;
1228	if(!u)
1229	return CURLUE_BAD_HANDLE;
1230	if(!part)
1231	return CURLUE_BAD_PARTPOINTER;
1232	*part = NULL;
1233
1234	switch(what) {
1235	case CURLUPART_SCHEME:
1236	ptr = u->scheme;
1237	ifmissing = CURLUE_NO_SCHEME;
1238	urldecode = FALSE; / never for schemes /
1239	break;
1240	case CURLUPART_USER:
1241	ptr = u->user;
1242	ifmissing = CURLUE_NO_USER;
1243	break;
1244	case CURLUPART_PASSWORD:
1245	ptr = u->password;
1246	ifmissing = CURLUE_NO_PASSWORD;
1247	break;
1248	case CURLUPART_OPTIONS:
1249	ptr = u->options;
1250	ifmissing = CURLUE_NO_OPTIONS;
1251	break;
1252	case CURLUPART_HOST:
1253	ptr = u->host;
1254	ifmissing = CURLUE_NO_HOST;
1255	break;
1256	case CURLUPART_ZONEID:
1257	ptr = u->zoneid;
1258	ifmissing = CURLUE_NO_ZONEID;
1259	break;
1260	case CURLUPART_PORT:
1261	ptr = u->port;
1262	ifmissing = CURLUE_NO_PORT;
1263	urldecode = FALSE; / never for port /
1264	if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1265	/ there's no stored port number, but asked to deliver*
1266	a default one for the scheme /*
1267	const struct Curl_handler *h =
1268	Curl_builtin_scheme(u->scheme);
1269	if(h) {
1270	msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1271	ptr = portbuf;
1272	}
1273	}
1274	else if(ptr && u->scheme) {
1275	/ there is a stored port number, but ask to inhibit if*
1276	it matches the default one for the scheme /*
1277	const struct Curl_handler *h =
1278	Curl_builtin_scheme(u->scheme);
1279	if(h && (h->defport == u->portnum) &&
1280	(flags & CURLU_NO_DEFAULT_PORT))
1281	ptr = NULL;
1282	}
1283	break;
1284	case CURLUPART_PATH:
1285	ptr = u->path;
1286	if(!ptr) {
1287	ptr = u->path = strdup("/");
1288	if(!u->path)
1289	return CURLUE_OUT_OF_MEMORY;
1290	}
1291	break;
1292	case CURLUPART_QUERY:
1293	ptr = u->query;
1294	ifmissing = CURLUE_NO_QUERY;
1295	plusdecode = urldecode;
1296	break;
1297	case CURLUPART_FRAGMENT:
1298	ptr = u->fragment;
1299	ifmissing = CURLUE_NO_FRAGMENT;
1300	break;
1301	case CURLUPART_URL: {
1302	char *url;
1303	char *scheme;
1304	char *options = u->options;
1305	char *port = u->port;
1306	char *allochost = NULL;
1307	if(u->scheme && strcasecompare("file", u->scheme)) {
1308	url = aprintf("file://%s%s%s",
1309	u->path,
1310	u->fragment? "#": "",
1311	u->fragment? u->fragment : "");
1312	}
1313	else if(!u->host)
1314	return CURLUE_NO_HOST;
1315	else {
1316	const struct Curl_handler *h = NULL;
1317	if(u->scheme)
1318	scheme = u->scheme;
1319	else if(flags & CURLU_DEFAULT_SCHEME)
1320	scheme = (char *) DEFAULT_SCHEME;
1321	else
1322	return CURLUE_NO_SCHEME;
1323
1324	h = Curl_builtin_scheme(scheme);
1325	if(!port && (flags & CURLU_DEFAULT_PORT)) {
1326	/ there's no stored port number, but asked to deliver*
1327	a default one for the scheme /*
1328	if(h) {
1329	msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1330	port = portbuf;
1331	}
1332	}
1333	else if(port) {
1334	/ there is a stored port number, but asked to inhibit if it matches*
1335	the default one for the scheme /*
1336	if(h && (h->defport == u->portnum) &&
1337	(flags & CURLU_NO_DEFAULT_PORT))
1338	port = NULL;
1339	}
1340
1341	if(h && !(h->flags & PROTOPT_URLOPTIONS))
1342	options = NULL;
1343
1344	if(u->host[`0`] == `'['`) {
1345	if(u->zoneid) {
1346	/ make it '[ host %25 zoneid ]' /
1347	size_t hostlen = strlen(u->host);
1348	size_t alen = hostlen + `3` + strlen(u->zoneid) + `1`;
1349	allochost = malloc(alen);
1350	if(!allochost)
1351	return CURLUE_OUT_OF_MEMORY;
1352	memcpy(allochost, u->host, hostlen - `1`);
1353	msnprintf(&allochost[hostlen - `1`], alen - hostlen + `1`,
1354	"%%25%s]", u->zoneid);
1355	}
1356	}
1357	else if(urlencode) {
1358	allochost = curl_easy_escape(NULL, u->host, `0`);
1359	if(!allochost)
1360	return CURLUE_OUT_OF_MEMORY;
1361	}
1362	else {
1363	/ only encode '%' in output host name /
1364	char *host = u->host;
1365	size_t pcount = `0`;
1366	/ first, count number of percents present in the name /
1367	while(*host) {
1368	if(*host == `'%'`)
1369	pcount++;
1370	host++;
1371	}
1372	/ if there were percents, encode the host name /
1373	if(pcount) {
1374	size_t hostlen = strlen(u->host);
1375	size_t alen = hostlen + `2` * pcount + `1`;
1376	char *o = allochost = malloc(alen);
1377	if(!allochost)
1378	return CURLUE_OUT_OF_MEMORY;
1379
1380	host = u->host;
1381	while(*host) {
1382	if(*host == `'%'`) {
1383	memcpy(o, "%25", `3`);
1384	o += `3`;
1385	host++;
1386	continue;
1387	}
1388	o++ = host++;
1389	}
1390	*o = `'\0'`;
1391	}
1392	}
1393
1394	url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1395	scheme,
1396	u->user ? u->user : "",
1397	u->password ? ":": "",
1398	u->password ? u->password : "",
1399	options ? ";" : "",
1400	options ? options : "",
1401	(u->user \|\| u->password \|\| options) ? "@": "",
1402	allochost ? allochost : u->host,
1403	port ? ":": "",
1404	port ? port : "",
1405	(u->path && (u->path[`0`] != `'/'`)) ? "/": "",
1406	u->path ? u->path : "/",
1407	(u->query && u->query[`0`]) ? "?": "",
1408	(u->query && u->query[`0`]) ? u->query : "",
1409	u->fragment? "#": "",
1410	u->fragment? u->fragment : "");
1411	free(allochost);
1412	}
1413	if(!url)
1414	return CURLUE_OUT_OF_MEMORY;
1415	*part = url;
1416	return CURLUE_OK;
1417	}
1418	default:
1419	ptr = NULL;
1420	break;
1421	}
1422	if(ptr) {
1423	*part = strdup(ptr);
1424	if(!*part)
1425	return CURLUE_OUT_OF_MEMORY;
1426	if(plusdecode) {
1427	/ convert + to space /
1428	char *plus;
1429	for(plus = part; plus; ++plus) {
1430	if(*plus == `'+'`)
1431	*plus = `' '`;
1432	}
1433	}
1434	if(urldecode) {
1435	char *decoded;
1436	size_t dlen;
1437	/ this unconditional rejection of control bytes is documented*
1438	API behavior /*
1439	CURLcode res = Curl_urldecode(*part, `0`, &decoded, &dlen, REJECT_CTRL);
1440	free(*part);
1441	if(res) {
1442	*part = NULL;
1443	return CURLUE_URLDECODE;
1444	}
1445	*part = decoded;
1446	}
1447	if(urlencode) {
1448	/ worst case output length is 3x the original! /
1449	char newp = malloc(strlen(part) * `3`);
1450	if(!newp)
1451	return CURLUE_OUT_OF_MEMORY;
1452	if(strcpy_url(newp, part, TRUE)) { /* consider it relative /
1453	free(*part);
1454	*part = newp;
1455	}
1456	else
1457	free(newp);
1458	}
1459
1460	return CURLUE_OK;
1461	}
1462	else
1463	return ifmissing;
1464	}
1465
1466	CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1467	const char part, unsigned* int flags)
1468	{
1469	char **storep = NULL;
1470	long port = `0`;
1471	bool urlencode = (flags & CURLU_URLENCODE)? `1` : `0`;
1472	bool plusencode = FALSE;
1473	bool urlskipslash = FALSE;
1474	bool appendquery = FALSE;
1475	bool equalsencode = FALSE;
1476
1477	if(!u)
1478	return CURLUE_BAD_HANDLE;
1479	if(!part) {
1480	/ setting a part to NULL clears it /
1481	switch(what) {
1482	case CURLUPART_URL:
1483	break;
1484	case CURLUPART_SCHEME:
1485	storep = &u->scheme;
1486	break;
1487	case CURLUPART_USER:
1488	storep = &u->user;
1489	break;
1490	case CURLUPART_PASSWORD:
1491	storep = &u->password;
1492	break;
1493	case CURLUPART_OPTIONS:
1494	storep = &u->options;
1495	break;
1496	case CURLUPART_HOST:
1497	storep = &u->host;
1498	break;
1499	case CURLUPART_ZONEID:
1500	storep = &u->zoneid;
1501	break;
1502	case CURLUPART_PORT:
1503	u->portnum = `0`;
1504	storep = &u->port;
1505	break;
1506	case CURLUPART_PATH:
1507	storep = &u->path;
1508	break;
1509	case CURLUPART_QUERY:
1510	storep = &u->query;
1511	break;
1512	case CURLUPART_FRAGMENT:
1513	storep = &u->fragment;
1514	break;
1515	default:
1516	return CURLUE_UNKNOWN_PART;
1517	}
1518	if(storep && *storep) {
1519	Curl_safefree(*storep);
1520	}
1521	else if(!storep) {
1522	free_urlhandle(u);
1523	memset(u, `0`, sizeof(struct Curl_URL));
1524	}
1525	return CURLUE_OK;
1526	}
1527
1528	switch(what) {
1529	case CURLUPART_SCHEME:
1530	if(strlen(part) > MAX_SCHEME_LEN)
1531	/ too long /
1532	return CURLUE_BAD_SCHEME;
1533	if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1534	/ verify that it is a fine scheme /
1535	!Curl_builtin_scheme(part))
1536	return CURLUE_UNSUPPORTED_SCHEME;
1537	storep = &u->scheme;
1538	urlencode = FALSE; / never /
1539	break;
1540	case CURLUPART_USER:
1541	storep = &u->user;
1542	break;
1543	case CURLUPART_PASSWORD:
1544	storep = &u->password;
1545	break;
1546	case CURLUPART_OPTIONS:
1547	storep = &u->options;
1548	break;
1549	case CURLUPART_HOST: {
1550	size_t len = strcspn(part, " \r\n");
1551	if(strlen(part) != len)
1552	/ hostname with bad content /
1553	return CURLUE_BAD_HOSTNAME;
1554	storep = &u->host;
1555	Curl_safefree(u->zoneid);
1556	break;
1557	}
1558	case CURLUPART_ZONEID:
1559	storep = &u->zoneid;
1560	break;
1561	case CURLUPART_PORT:
1562	{
1563	char *endp;
1564	urlencode = FALSE; / never /
1565	port = strtol(part, &endp, `10`); / Port number must be decimal /
1566	if((port <= `0`) \|\| (port > `0xffff`))
1567	return CURLUE_BAD_PORT_NUMBER;
1568	if(*endp)
1569	/ weirdly provided number, not good! /
1570	return CURLUE_BAD_PORT_NUMBER;
1571	storep = &u->port;
1572	}
1573	break;
1574	case CURLUPART_PATH:
1575	urlskipslash = TRUE;
1576	storep = &u->path;
1577	break;
1578	case CURLUPART_QUERY:
1579	plusencode = urlencode;
1580	appendquery = (flags & CURLU_APPENDQUERY)?`1`:`0`;
1581	equalsencode = appendquery;
1582	storep = &u->query;
1583	break;
1584	case CURLUPART_FRAGMENT:
1585	storep = &u->fragment;
1586	break;
1587	case CURLUPART_URL: {
1588	/*
1589	* Allow a new URL to replace the existing (if any) contents.
1590	*
1591	* If the existing contents is enough for a URL, allow a relative URL to
1592	* replace it.
1593	*/
1594	CURLUcode result;
1595	char *oldurl;
1596	char *redired_url;
1597
1598	/ if the new thing is absolute or the old one is not*
1599	* (we could not get an absolute url in 'oldurl'),
1600	* then replace the existing with the new. */
1601	if(Curl_is_absolute_url(part, NULL, `0`)
1602	\|\| curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1603	return parseurl_and_replace(part, u, flags);
1604	}
1605
1606	/ apply the relative part to create a new URL*
1607	* and replace the existing one with it. */
1608	redired_url = concat_url(oldurl, part);
1609	free(oldurl);
1610	if(!redired_url)
1611	return CURLUE_OUT_OF_MEMORY;
1612
1613	result = parseurl_and_replace(redired_url, u, flags);
1614	free(redired_url);
1615	return result;
1616	}
1617	default:
1618	return CURLUE_UNKNOWN_PART;
1619	}
1620	DEBUGASSERT(storep);
1621	{
1622	const char *newp = part;
1623	size_t nalloc = strlen(part);
1624
1625	if(nalloc > CURL_MAX_INPUT_LENGTH)
1626	/ excessive input length /
1627	return CURLUE_MALFORMED_INPUT;
1628
1629	if(urlencode) {
1630	const unsigned char *i;
1631	char *o;
1632	char enc = malloc(nalloc `3` + `1`); / for worst case! /
1633	if(!enc)
1634	return CURLUE_OUT_OF_MEMORY;
1635	for(i = (const unsigned char )part, o = enc; i; i++) {
1636	if((*i == `' '`) && plusencode) {
1637	*o = `'+'`;
1638	o++;
1639	}
1640	else if(Curl_isunreserved(*i) \|\|
1641	((*i == `'/'`) && urlskipslash) \|\|
1642	((*i == `'='`) && equalsencode)) {
1643	if((*i == `'='`) && equalsencode)
1644	/ only skip the first equals sign /
1645	equalsencode = FALSE;
1646	o = i;
1647	o++;
1648	}
1649	else {
1650	msnprintf(o, `4`, "%%%02x", *i);
1651	o += `3`;
1652	}
1653	}
1654	o = `0`; /* null-terminate /
1655	newp = enc;
1656	}
1657	else {
1658	char *p;
1659	newp = strdup(part);
1660	if(!newp)
1661	return CURLUE_OUT_OF_MEMORY;
1662	p = (char *)newp;
1663	while(*p) {
1664	/ make sure percent encoded are lower case /
1665	if((*p == `'%'`) && ISXDIGIT(p[`1`]) && ISXDIGIT(p[`2`]) &&
1666	(ISUPPER(p[`1`]) \|\| ISUPPER(p[`2`]))) {
1667	p[`1`] = (char)TOLOWER(p[`1`]);
1668	p[`2`] = (char)TOLOWER(p[`2`]);
1669	p += `3`;
1670	}
1671	else
1672	p++;
1673	}
1674	}
1675
1676	if(appendquery) {
1677	/ Append the string onto the old query. Add a '&' separator if none is*
1678	present at the end of the exsting query already /*
1679	size_t querylen = u->query ? strlen(u->query) : `0`;
1680	bool addamperand = querylen && (u->query[querylen -`1`] != `'&'`);
1681	if(querylen) {
1682	size_t newplen = strlen(newp);
1683	char *p = malloc(querylen + addamperand + newplen + `1`);
1684	if(!p) {
1685	free((char *)newp);
1686	return CURLUE_OUT_OF_MEMORY;
1687	}
1688	strcpy(p, u->query); / original query /
1689	if(addamperand)
1690	p[querylen] = `'&'`; / ampersand /
1691	strcpy(&p[querylen + addamperand], newp); / new suffix /
1692	free((char *)newp);
1693	free(*storep);
1694	*storep = p;
1695	return CURLUE_OK;
1696	}
1697	}
1698
1699	if(what == CURLUPART_HOST) {
1700	if(`0` == strlen(newp) && (flags & CURLU_NO_AUTHORITY)) {
1701	/ Skip hostname check, it's allowed to be empty. /
1702	}
1703	else {
1704	if(hostname_check(u, (char *)newp)) {
1705	free((char *)newp);
1706	return CURLUE_BAD_HOSTNAME;
1707	}
1708	}
1709	}
1710
1711	free(*storep);
1712	storep = (char* *)newp;
1713	}
1714	/ set after the string, to make it not assigned if the allocation above*
1715	fails /*
1716	if(port)
1717	u->portnum = port;
1718	return CURLUE_OK;
1719	}
1720

Browse the source code of tensorflow/external/curl/lib/urlapi.c