1 | /* |
2 | * Copyright (c) Facebook, Inc. and its affiliates. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include <folly/Uri.h> |
18 | |
19 | #include <algorithm> |
20 | #include <cctype> |
21 | |
22 | #include <boost/regex.hpp> |
23 | |
24 | namespace folly { |
25 | |
26 | namespace { |
27 | |
28 | std::string submatch(const boost::cmatch& m, int idx) { |
29 | const auto& sub = m[idx]; |
30 | return std::string(sub.first, sub.second); |
31 | } |
32 | |
33 | } // namespace |
34 | |
35 | Uri::Uri(StringPiece str) : hasAuthority_(false), port_(0) { |
36 | static const boost::regex uriRegex( |
37 | "([a-zA-Z][a-zA-Z0-9+.-]*):" // scheme: |
38 | "([^?#]*)" // authority and path |
39 | "(?:\\?([^#]*))?" // ?query |
40 | "(?:#(.*))?" ); // #fragment |
41 | static const boost::regex authorityAndPathRegex("//([^/]*)(/.*)?" ); |
42 | |
43 | boost::cmatch match; |
44 | if (UNLIKELY(!boost::regex_match(str.begin(), str.end(), match, uriRegex))) { |
45 | throw std::invalid_argument(to<std::string>("invalid URI " , str)); |
46 | } |
47 | |
48 | scheme_ = submatch(match, 1); |
49 | std::transform(scheme_.begin(), scheme_.end(), scheme_.begin(), ::tolower); |
50 | |
51 | StringPiece authorityAndPath(match[2].first, match[2].second); |
52 | boost::cmatch authorityAndPathMatch; |
53 | if (!boost::regex_match( |
54 | authorityAndPath.begin(), |
55 | authorityAndPath.end(), |
56 | authorityAndPathMatch, |
57 | authorityAndPathRegex)) { |
58 | // Does not start with //, doesn't have authority |
59 | hasAuthority_ = false; |
60 | path_ = authorityAndPath.str(); |
61 | } else { |
62 | static const boost::regex authorityRegex( |
63 | "(?:([^@:]*)(?::([^@]*))?@)?" // username, password |
64 | "(\\[[^\\]]*\\]|[^\\[:]*)" // host (IP-literal (e.g. '['+IPv6+']', |
65 | // dotted-IPv4, or named host) |
66 | "(?::(\\d*))?" ); // port |
67 | |
68 | const auto authority = authorityAndPathMatch[1]; |
69 | boost::cmatch authorityMatch; |
70 | if (!boost::regex_match( |
71 | authority.first, |
72 | authority.second, |
73 | authorityMatch, |
74 | authorityRegex)) { |
75 | throw std::invalid_argument(to<std::string>( |
76 | "invalid URI authority " , |
77 | StringPiece(authority.first, authority.second))); |
78 | } |
79 | |
80 | StringPiece port(authorityMatch[4].first, authorityMatch[4].second); |
81 | if (!port.empty()) { |
82 | try { |
83 | port_ = to<uint16_t>(port); |
84 | } catch (ConversionError const& e) { |
85 | throw std::invalid_argument( |
86 | to<std::string>("invalid URI port: " , e.what())); |
87 | } |
88 | } |
89 | |
90 | hasAuthority_ = true; |
91 | username_ = submatch(authorityMatch, 1); |
92 | password_ = submatch(authorityMatch, 2); |
93 | host_ = submatch(authorityMatch, 3); |
94 | path_ = submatch(authorityAndPathMatch, 2); |
95 | } |
96 | |
97 | query_ = submatch(match, 3); |
98 | fragment_ = submatch(match, 4); |
99 | } |
100 | |
101 | std::string Uri::authority() const { |
102 | std::string result; |
103 | |
104 | // Port is 5 characters max and we have up to 3 delimiters. |
105 | result.reserve(host().size() + username().size() + password().size() + 8); |
106 | |
107 | if (!username().empty() || !password().empty()) { |
108 | result.append(username()); |
109 | |
110 | if (!password().empty()) { |
111 | result.push_back(':'); |
112 | result.append(password()); |
113 | } |
114 | |
115 | result.push_back('@'); |
116 | } |
117 | |
118 | result.append(host()); |
119 | |
120 | if (port() != 0) { |
121 | result.push_back(':'); |
122 | toAppend(port(), &result); |
123 | } |
124 | |
125 | return result; |
126 | } |
127 | |
128 | std::string Uri::hostname() const { |
129 | if (!host_.empty() && host_[0] == '[') { |
130 | // If it starts with '[', then it should end with ']', this is ensured by |
131 | // regex |
132 | return host_.substr(1, host_.size() - 2); |
133 | } |
134 | return host_; |
135 | } |
136 | |
137 | const std::vector<std::pair<std::string, std::string>>& Uri::getQueryParams() { |
138 | if (!query_.empty() && queryParams_.empty()) { |
139 | // Parse query string |
140 | static const boost::regex queryParamRegex( |
141 | "(^|&)" /*start of query or start of parameter "&"*/ |
142 | "([^=&]*)=?" /*parameter name and "=" if value is expected*/ |
143 | "([^=&]*)" /*parameter value*/ |
144 | "(?=(&|$))" /*forward reference, next should be end of query or |
145 | start of next parameter*/); |
146 | const boost::cregex_iterator paramBeginItr( |
147 | query_.data(), query_.data() + query_.size(), queryParamRegex); |
148 | boost::cregex_iterator paramEndItr; |
149 | for (auto itr = paramBeginItr; itr != paramEndItr; ++itr) { |
150 | if (itr->length(2) == 0) { |
151 | // key is empty, ignore it |
152 | continue; |
153 | } |
154 | queryParams_.emplace_back( |
155 | std::string((*itr)[2].first, (*itr)[2].second), // parameter name |
156 | std::string((*itr)[3].first, (*itr)[3].second) // parameter value |
157 | ); |
158 | } |
159 | } |
160 | return queryParams_; |
161 | } |
162 | |
163 | } // namespace folly |
164 | |