Line | Branch | Exec | Source |
---|---|---|---|
1 | // | ||
2 | // Copyright (c) 2022 Alan de Freitas (alandefreitas@gmail.com) | ||
3 | // | ||
4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | ||
5 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | ||
6 | // | ||
7 | // Official repository: https://github.com/boostorg/url | ||
8 | // | ||
9 | |||
10 | |||
11 | #include <boost/url/detail/config.hpp> | ||
12 | #include "pattern.hpp" | ||
13 | #include "pct_format.hpp" | ||
14 | #include "boost/url/detail/replacement_field_rule.hpp" | ||
15 | #include <boost/url/grammar/alpha_chars.hpp> | ||
16 | #include <boost/url/grammar/optional_rule.hpp> | ||
17 | #include <boost/url/grammar/token_rule.hpp> | ||
18 | #include "../rfc/detail/charsets.hpp" | ||
19 | #include "../rfc/detail/host_rule.hpp" | ||
20 | #include "boost/url/rfc/detail/path_rules.hpp" | ||
21 | #include "../rfc/detail/port_rule.hpp" | ||
22 | #include "../rfc/detail/scheme_rule.hpp" | ||
23 | |||
24 | namespace boost { | ||
25 | namespace urls { | ||
26 | namespace detail { | ||
27 | |||
28 | static constexpr auto lhost_chars = host_chars + ':'; | ||
29 | |||
30 | void | ||
31 | 140 | pattern:: | |
32 | apply( | ||
33 | url_base& u, | ||
34 | format_args const& args) const | ||
35 | { | ||
36 | // measure total | ||
37 | struct sizes | ||
38 | { | ||
39 | std::size_t scheme = 0; | ||
40 | std::size_t user = 0; | ||
41 | std::size_t pass = 0; | ||
42 | std::size_t host = 0; | ||
43 | std::size_t port = 0; | ||
44 | std::size_t path = 0; | ||
45 | std::size_t query = 0; | ||
46 | std::size_t frag = 0; | ||
47 | }; | ||
48 | 140 | sizes n; | |
49 | |||
50 | 140 | format_parse_context pctx(nullptr, nullptr, 0); | |
51 |
1/2✓ Branch 1 taken 140 times.
✗ Branch 2 not taken.
|
140 | measure_context mctx(args); |
52 |
2/2✓ Branch 1 taken 54 times.
✓ Branch 2 taken 86 times.
|
140 | if (!scheme.empty()) |
53 | { | ||
54 | 54 | pctx = {scheme, pctx.next_arg_id()}; | |
55 |
1/2✓ Branch 2 taken 54 times.
✗ Branch 3 not taken.
|
54 | n.scheme = pct_vmeasure( |
56 | grammar::alpha_chars, pctx, mctx); | ||
57 | 54 | mctx.advance_to(0); | |
58 | } | ||
59 |
2/2✓ Branch 0 taken 47 times.
✓ Branch 1 taken 93 times.
|
140 | if (has_authority) |
60 | { | ||
61 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 39 times.
|
47 | if (has_user) |
62 | { | ||
63 | 8 | pctx = {user, pctx.next_arg_id()}; | |
64 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | n.user = pct_vmeasure( |
65 | user_chars, pctx, mctx); | ||
66 | 8 | mctx.advance_to(0); | |
67 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | if (has_pass) |
68 | { | ||
69 | 6 | pctx = {pass, pctx.next_arg_id()}; | |
70 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | n.pass = pct_vmeasure( |
71 | password_chars, pctx, mctx); | ||
72 | 6 | mctx.advance_to(0); | |
73 | } | ||
74 | } | ||
75 |
2/2✓ Branch 1 taken 1 times.
✓ Branch 2 taken 46 times.
|
47 | if (host.starts_with('[')) |
76 | { | ||
77 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
|
1 | BOOST_ASSERT(host.ends_with(']')); |
78 |
1/2✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
|
1 | pctx = {host.substr(1, host.size() - 2), pctx.next_arg_id()}; |
79 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | n.host = pct_vmeasure( |
80 | 1 | lhost_chars, pctx, mctx) + 2; | |
81 | 1 | mctx.advance_to(0); | |
82 | } | ||
83 | else | ||
84 | { | ||
85 | 46 | pctx = {host, pctx.next_arg_id()}; | |
86 |
1/2✓ Branch 1 taken 46 times.
✗ Branch 2 not taken.
|
46 | n.host = pct_vmeasure( |
87 | host_chars, pctx, mctx); | ||
88 | 46 | mctx.advance_to(0); | |
89 | } | ||
90 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 34 times.
|
47 | if (has_port) |
91 | { | ||
92 | 13 | pctx = {port, pctx.next_arg_id()}; | |
93 |
1/2✓ Branch 2 taken 13 times.
✗ Branch 3 not taken.
|
13 | n.port = pct_vmeasure( |
94 | grammar::digit_chars, pctx, mctx); | ||
95 | 13 | mctx.advance_to(0); | |
96 | } | ||
97 | } | ||
98 |
2/2✓ Branch 1 taken 102 times.
✓ Branch 2 taken 38 times.
|
140 | if (!path.empty()) |
99 | { | ||
100 | 102 | pctx = {path, pctx.next_arg_id()}; | |
101 |
2/2✓ Branch 1 taken 100 times.
✓ Branch 2 taken 2 times.
|
102 | n.path = pct_vmeasure( |
102 | path_chars, pctx, mctx); | ||
103 | 100 | mctx.advance_to(0); | |
104 | } | ||
105 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 125 times.
|
138 | if (has_query) |
106 | { | ||
107 | 13 | pctx = {query, pctx.next_arg_id()}; | |
108 |
1/2✓ Branch 1 taken 13 times.
✗ Branch 2 not taken.
|
13 | n.query = pct_vmeasure( |
109 | query_chars, pctx, mctx); | ||
110 | 13 | mctx.advance_to(0); | |
111 | } | ||
112 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 131 times.
|
138 | if (has_frag) |
113 | { | ||
114 | 7 | pctx = {frag, pctx.next_arg_id()}; | |
115 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | n.frag = pct_vmeasure( |
116 | fragment_chars, pctx, mctx); | ||
117 | 7 | mctx.advance_to(0); | |
118 | } | ||
119 | 138 | std::size_t const n_total = | |
120 | 138 | n.scheme + | |
121 | 138 | (n.scheme != 0) * 1 + // ":" | |
122 | 138 | has_authority * 2 + // "//" | |
123 | 138 | n.user + | |
124 | 138 | has_pass * 1 + // ":" | |
125 | 138 | n.pass + | |
126 | 138 | has_user * 1 + // "@" | |
127 | 138 | n.host + | |
128 | 138 | has_port * 1 + // ":" | |
129 | 138 | n.port + | |
130 | 138 | n.path + | |
131 | 138 | has_query * 1 + // "?" | |
132 | 138 | n.query + | |
133 | 138 | has_frag * 1 + // "#" | |
134 | 138 | n.frag; | |
135 |
2/2✓ Branch 1 taken 137 times.
✓ Branch 2 taken 1 times.
|
138 | u.reserve(n_total); |
136 | |||
137 | // Apply | ||
138 | 137 | pctx = {nullptr, nullptr, 0}; | |
139 | 137 | format_context fctx(nullptr, args); | |
140 | 274 | url_base::op_t op(u); | |
141 | using parts = parts_base; | ||
142 |
2/2✓ Branch 1 taken 53 times.
✓ Branch 2 taken 84 times.
|
137 | if (!scheme.empty()) |
143 | { | ||
144 | 106 | auto dest = u.resize_impl( | |
145 | parts::id_scheme, | ||
146 |
1/2✓ Branch 1 taken 53 times.
✗ Branch 2 not taken.
|
53 | n.scheme + 1, op); |
147 | 53 | pctx = {scheme, pctx.next_arg_id()}; | |
148 | 53 | fctx.advance_to(dest); | |
149 |
1/2✓ Branch 2 taken 53 times.
✗ Branch 3 not taken.
|
53 | const char* dest1 = pct_vformat( |
150 | grammar::alpha_chars, pctx, fctx); | ||
151 | 53 | dest[n.scheme] = ':'; | |
152 | // validate | ||
153 |
2/2✓ Branch 3 taken 1 times.
✓ Branch 4 taken 52 times.
|
53 | if (!grammar::parse({dest, dest1}, scheme_rule())) |
154 | { | ||
155 | 1 | throw_invalid_argument(); | |
156 | } | ||
157 | } | ||
158 |
2/2✓ Branch 0 taken 45 times.
✓ Branch 1 taken 91 times.
|
136 | if (has_authority) |
159 | { | ||
160 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 37 times.
|
45 | if (has_user) |
161 | { | ||
162 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | auto dest = u.set_user_impl( |
163 | n.user, op); | ||
164 | 8 | pctx = {user, pctx.next_arg_id()}; | |
165 | 8 | fctx.advance_to(dest); | |
166 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | char const* dest1 = pct_vformat( |
167 | user_chars, pctx, fctx); | ||
168 | 8 | u.impl_.decoded_[parts::id_user] = | |
169 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | pct_string_view(dest, dest1 - dest) |
170 | 8 | ->decoded_size(); | |
171 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | if (has_pass) |
172 | { | ||
173 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | char* destp = u.set_password_impl( |
174 | n.pass, op); | ||
175 | 6 | pctx = {pass, pctx.next_arg_id()}; | |
176 | 6 | fctx.advance_to(destp); | |
177 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | dest1 = pct_vformat( |
178 | password_chars, pctx, fctx); | ||
179 | 6 | u.impl_.decoded_[parts::id_pass] = | |
180 |
1/2✓ Branch 2 taken 6 times.
✗ Branch 3 not taken.
|
6 | pct_string_view({destp, dest1}) |
181 | 6 | ->decoded_size() + 1; | |
182 | } | ||
183 | } | ||
184 |
1/2✓ Branch 1 taken 45 times.
✗ Branch 2 not taken.
|
45 | auto dest = u.set_host_impl( |
185 | n.host, op); | ||
186 |
2/2✓ Branch 1 taken 1 times.
✓ Branch 2 taken 44 times.
|
45 | if (host.starts_with('[')) |
187 | { | ||
188 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
|
1 | BOOST_ASSERT(host.ends_with(']')); |
189 |
1/2✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
|
1 | pctx = {host.substr(1, host.size() - 2), pctx.next_arg_id()}; |
190 | 1 | *dest++ = '['; | |
191 | 1 | fctx.advance_to(dest); | |
192 | char* dest1 = | ||
193 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | pct_vformat(lhost_chars, pctx, fctx); |
194 | 1 | *dest1++ = ']'; | |
195 | 1 | u.impl_.decoded_[parts::id_host] = | |
196 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
2 | pct_string_view(dest - 1, dest1 - dest) |
197 | 1 | ->decoded_size(); | |
198 | } | ||
199 | else | ||
200 | { | ||
201 | 44 | pctx = {host, pctx.next_arg_id()}; | |
202 | 44 | fctx.advance_to(dest); | |
203 | char const* dest1 = | ||
204 |
1/2✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
|
44 | pct_vformat(host_chars, pctx, fctx); |
205 | 44 | u.impl_.decoded_[parts::id_host] = | |
206 |
1/2✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
|
88 | pct_string_view(dest, dest1 - dest) |
207 | 44 | ->decoded_size(); | |
208 | } | ||
209 | 45 | auto uh = u.encoded_host(); | |
210 |
1/2✓ Branch 4 taken 45 times.
✗ Branch 5 not taken.
|
45 | auto h = grammar::parse(uh, host_rule).value(); |
211 | 45 | std::memcpy( | |
212 | 45 | u.impl_.ip_addr_, | |
213 | h.addr, | ||
214 | sizeof(u.impl_.ip_addr_)); | ||
215 | 45 | u.impl_.host_type_ = h.host_type; | |
216 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 32 times.
|
45 | if (has_port) |
217 | { | ||
218 |
1/2✓ Branch 1 taken 13 times.
✗ Branch 2 not taken.
|
13 | dest = u.set_port_impl(n.port, op); |
219 | 13 | pctx = {port, pctx.next_arg_id()}; | |
220 | 13 | fctx.advance_to(dest); | |
221 |
1/2✓ Branch 2 taken 13 times.
✗ Branch 3 not taken.
|
13 | char const* dest1 = pct_vformat( |
222 | grammar::digit_chars, pctx, fctx); | ||
223 | 13 | u.impl_.decoded_[parts::id_port] = | |
224 |
1/2✓ Branch 1 taken 13 times.
✗ Branch 2 not taken.
|
13 | pct_string_view(dest, dest1 - dest) |
225 | 13 | ->decoded_size() + 1; | |
226 | 13 | core::string_view up = {dest - 1, dest1}; | |
227 |
1/2✓ Branch 3 taken 13 times.
✗ Branch 4 not taken.
|
13 | auto p = grammar::parse(up, detail::port_part_rule).value(); |
228 |
1/2✓ Branch 0 taken 13 times.
✗ Branch 1 not taken.
|
13 | if (p.has_port) |
229 | 13 | u.impl_.port_number_ = p.port_number; | |
230 | } | ||
231 | } | ||
232 |
2/2✓ Branch 1 taken 100 times.
✓ Branch 2 taken 36 times.
|
136 | if (!path.empty()) |
233 | { | ||
234 |
1/2✓ Branch 1 taken 100 times.
✗ Branch 2 not taken.
|
100 | auto dest = u.resize_impl( |
235 | parts::id_path, | ||
236 | n.path, op); | ||
237 | 100 | pctx = {path, pctx.next_arg_id()}; | |
238 | 100 | fctx.advance_to(dest); | |
239 |
1/2✓ Branch 1 taken 100 times.
✗ Branch 2 not taken.
|
100 | auto dest1 = pct_vformat( |
240 | path_chars, pctx, fctx); | ||
241 |
1/2✓ Branch 1 taken 100 times.
✗ Branch 2 not taken.
|
100 | pct_string_view npath(dest, dest1 - dest); |
242 | 100 | u.impl_.decoded_[parts::id_path] += | |
243 | 100 | npath.decoded_size(); | |
244 |
1/2✓ Branch 1 taken 100 times.
✗ Branch 2 not taken.
|
100 | if (!npath.empty()) |
245 | { | ||
246 |
1/2✓ Branch 1 taken 100 times.
✗ Branch 2 not taken.
|
100 | u.impl_.nseg_ = std::count( |
247 | 100 | npath.begin() + 1, | |
248 | 200 | npath.end(), '/') + 1; | |
249 | } | ||
250 | // handle edge cases | ||
251 | // 1) path is first component and the | ||
252 | // first segment contains an unencoded ':' | ||
253 | // This is impossible because the template | ||
254 | // "{}" would be a host. | ||
255 |
4/4✓ Branch 2 taken 78 times.
✓ Branch 3 taken 22 times.
✓ Branch 4 taken 78 times.
✓ Branch 5 taken 22 times.
|
178 | if (u.scheme().empty() && |
256 |
1/2✓ Branch 1 taken 78 times.
✗ Branch 2 not taken.
|
78 | !u.has_authority()) |
257 | { | ||
258 | 78 | auto fseg = u.encoded_segments().front(); | |
259 |
1/2✓ Branch 2 taken 78 times.
✗ Branch 3 not taken.
|
78 | std::size_t nc = std::count( |
260 | 78 | fseg.begin(), fseg.end(), ':'); | |
261 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 74 times.
|
78 | if (nc) |
262 | { | ||
263 | 4 | std::size_t diff = nc * 2; | |
264 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | u.reserve(n_total + diff); |
265 | 8 | dest = u.resize_impl( | |
266 | parts::id_path, | ||
267 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | n.path + diff, op); |
268 | 4 | char* dest0 = dest + diff; | |
269 | 4 | std::memmove(dest0, dest, n.path); | |
270 |
2/2✓ Branch 0 taken 23 times.
✓ Branch 1 taken 4 times.
|
27 | while (dest0 != dest) |
271 | { | ||
272 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 8 times.
|
23 | if (*dest0 != ':') |
273 | { | ||
274 | 15 | *dest++ = *dest0++; | |
275 | } | ||
276 | else | ||
277 | { | ||
278 | 8 | *dest++ = '%'; | |
279 | 8 | *dest++ = '3'; | |
280 | 8 | *dest++ = 'A'; | |
281 | 8 | dest0++; | |
282 | } | ||
283 | } | ||
284 | } | ||
285 | } | ||
286 | // 2) url has no authority and path | ||
287 | // starts with "//" | ||
288 |
4/4✓ Branch 1 taken 86 times.
✓ Branch 2 taken 14 times.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 84 times.
|
186 | if (!u.has_authority() && |
289 |
2/2✓ Branch 2 taken 2 times.
✓ Branch 3 taken 98 times.
|
186 | u.encoded_path().starts_with("//")) |
290 | { | ||
291 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | u.reserve(n_total + 2); |
292 | 4 | dest = u.resize_impl( | |
293 | parts::id_path, | ||
294 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | n.path + 2, op); |
295 | 2 | std::memmove(dest + 2, dest, n.path); | |
296 | 2 | *dest++ = '/'; | |
297 | 2 | *dest = '.'; | |
298 | } | ||
299 | } | ||
300 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 123 times.
|
136 | if (has_query) |
301 | { | ||
302 | 26 | auto dest = u.resize_impl( | |
303 | parts::id_query, | ||
304 |
1/2✓ Branch 1 taken 13 times.
✗ Branch 2 not taken.
|
13 | n.query + 1, op); |
305 | 13 | *dest++ = '?'; | |
306 | 13 | pctx = {query, pctx.next_arg_id()}; | |
307 | 13 | fctx.advance_to(dest); | |
308 |
1/2✓ Branch 1 taken 13 times.
✗ Branch 2 not taken.
|
13 | auto dest1 = pct_vformat( |
309 | query_chars, pctx, fctx); | ||
310 |
1/2✓ Branch 1 taken 13 times.
✗ Branch 2 not taken.
|
13 | pct_string_view nquery(dest, dest1 - dest); |
311 | 13 | u.impl_.decoded_[parts::id_query] += | |
312 | 13 | nquery.decoded_size() + 1; | |
313 |
1/2✓ Branch 1 taken 13 times.
✗ Branch 2 not taken.
|
13 | if (!nquery.empty()) |
314 | { | ||
315 |
1/2✓ Branch 2 taken 13 times.
✗ Branch 3 not taken.
|
13 | u.impl_.nparam_ = std::count( |
316 | nquery.begin(), | ||
317 | 26 | nquery.end(), '&') + 1; | |
318 | } | ||
319 | } | ||
320 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 129 times.
|
136 | if (has_frag) |
321 | { | ||
322 | 14 | auto dest = u.resize_impl( | |
323 | parts::id_frag, | ||
324 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | n.frag + 1, op); |
325 | 7 | *dest++ = '#'; | |
326 | 7 | pctx = {frag, pctx.next_arg_id()}; | |
327 | 7 | fctx.advance_to(dest); | |
328 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | auto dest1 = pct_vformat( |
329 | fragment_chars, pctx, fctx); | ||
330 | 7 | u.impl_.decoded_[parts::id_frag] += | |
331 | 7 | make_pct_string_view( | |
332 | 7 | core::string_view(dest, dest1 - dest)) | |
333 | 7 | ->decoded_size() + 1; | |
334 | } | ||
335 | 136 | } | |
336 | |||
337 | // This rule represents a pct-encoded string | ||
338 | // that contains an arbitrary number of | ||
339 | // replacement ids in it | ||
340 | template<class CharSet> | ||
341 | struct pct_encoded_fmt_string_rule_t | ||
342 | { | ||
343 | using value_type = pct_string_view; | ||
344 | |||
345 | constexpr | ||
346 | pct_encoded_fmt_string_rule_t( | ||
347 | CharSet const& cs) noexcept | ||
348 | : cs_(cs) | ||
349 | { | ||
350 | } | ||
351 | |||
352 | template<class CharSet_> | ||
353 | friend | ||
354 | constexpr | ||
355 | auto | ||
356 | pct_encoded_fmt_string_rule( | ||
357 | CharSet_ const& cs) noexcept -> | ||
358 | pct_encoded_fmt_string_rule_t<CharSet_>; | ||
359 | |||
360 | system::result<value_type> | ||
361 | 482 | parse( | |
362 | char const*& it, | ||
363 | char const* end) const noexcept | ||
364 | { | ||
365 | 482 | auto const start = it; | |
366 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 240 times.
|
482 | if(it == end) |
367 | { | ||
368 | // this might be empty | ||
369 | 2 | return {}; | |
370 | } | ||
371 | |||
372 | // consume some with literal rule | ||
373 | // this might be an empty literal | ||
374 | 480 | auto literal_rule = pct_encoded_rule(cs_); | |
375 | 480 | auto rv = literal_rule.parse(it, end); | |
376 |
1/2✓ Branch 1 taken 470 times.
✗ Branch 2 not taken.
|
940 | while (rv) |
377 | { | ||
378 | 940 | auto it0 = it; | |
379 | // consume some with replacement id | ||
380 | // rule | ||
381 |
2/2✓ Branch 2 taken 240 times.
✓ Branch 3 taken 230 times.
|
940 | if (!replacement_field_rule.parse(it, end)) |
382 | { | ||
383 | 480 | it = it0; | |
384 | 480 | break; | |
385 | } | ||
386 | 460 | rv = literal_rule.parse(it, end); | |
387 | } | ||
388 | |||
389 | 480 | return core::string_view(start, it - start); | |
390 | } | ||
391 | |||
392 | private: | ||
393 | CharSet cs_; | ||
394 | }; | ||
395 | |||
396 | template<class CharSet> | ||
397 | constexpr | ||
398 | auto | ||
399 | pct_encoded_fmt_string_rule( | ||
400 | CharSet const& cs) noexcept -> | ||
401 | pct_encoded_fmt_string_rule_t<CharSet> | ||
402 | { | ||
403 | // If an error occurs here it means that | ||
404 | // the value of your type does not meet | ||
405 | // the requirements. Please check the | ||
406 | // documentation! | ||
407 | static_assert( | ||
408 | grammar::is_charset<CharSet>::value, | ||
409 | "CharSet requirements not met"); | ||
410 | |||
411 | return pct_encoded_fmt_string_rule_t<CharSet>(cs); | ||
412 | } | ||
413 | |||
414 | // This rule represents a regular string with | ||
415 | // only chars from the specified charset and | ||
416 | // an arbitrary number of replacement ids in it | ||
417 | template<class CharSet> | ||
418 | struct fmt_token_rule_t | ||
419 | { | ||
420 | using value_type = pct_string_view; | ||
421 | |||
422 | constexpr | ||
423 | fmt_token_rule_t( | ||
424 | CharSet const& cs) noexcept | ||
425 | : cs_(cs) | ||
426 | { | ||
427 | } | ||
428 | |||
429 | template<class CharSet_> | ||
430 | friend | ||
431 | constexpr | ||
432 | auto | ||
433 | fmt_token_rule( | ||
434 | CharSet_ const& cs) noexcept -> | ||
435 | fmt_token_rule_t<CharSet_>; | ||
436 | |||
437 | system::result<value_type> | ||
438 | 13 | parse( | |
439 | char const*& it, | ||
440 | char const* end) const noexcept | ||
441 | { | ||
442 | 13 | auto const start = it; | |
443 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 13 times.
|
13 | BOOST_ASSERT(it != end); |
444 | /* | ||
445 | // This should never happen because | ||
446 | // all tokens are optional and will | ||
447 | // already return `none`: | ||
448 | if(it == end) | ||
449 | { | ||
450 | BOOST_URL_RETURN_EC( | ||
451 | grammar::error::need_more); | ||
452 | } | ||
453 | */ | ||
454 | |||
455 | // consume some with literal rule | ||
456 | // this might be an empty literal | ||
457 | auto partial_token_rule = | ||
458 | 13 | grammar::optional_rule( | |
459 | 13 | grammar::token_rule(cs_)); | |
460 | 26 | auto rv = partial_token_rule.parse(it, end); | |
461 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | while (rv) |
462 | { | ||
463 | 24 | auto it0 = it; | |
464 | // consume some with replacement id | ||
465 |
2/2✓ Branch 2 taken 13 times.
✓ Branch 3 taken 11 times.
|
24 | if (!replacement_field_rule.parse(it, end)) |
466 | { | ||
467 | // no replacement and no more cs | ||
468 | // before: nothing else to consume | ||
469 | 13 | it = it0; | |
470 | 13 | break; | |
471 | } | ||
472 | // after {...}, consume any more chars | ||
473 | // in the charset | ||
474 | 11 | rv = partial_token_rule.parse(it, end); | |
475 | } | ||
476 | |||
477 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 12 times.
|
13 | if(it == start) |
478 | { | ||
479 | // it != end but we consumed nothing | ||
480 | 1 | BOOST_URL_RETURN_EC( | |
481 | grammar::error::need_more); | ||
482 | } | ||
483 | |||
484 | 12 | return core::string_view(start, it - start); | |
485 | } | ||
486 | |||
487 | private: | ||
488 | CharSet cs_; | ||
489 | }; | ||
490 | |||
491 | template<class CharSet> | ||
492 | constexpr | ||
493 | auto | ||
494 | fmt_token_rule( | ||
495 | CharSet const& cs) noexcept -> | ||
496 | fmt_token_rule_t<CharSet> | ||
497 | { | ||
498 | // If an error occurs here it means that | ||
499 | // the value of your type does not meet | ||
500 | // the requirements. Please check the | ||
501 | // documentation! | ||
502 | static_assert( | ||
503 | grammar::is_charset<CharSet>::value, | ||
504 | "CharSet requirements not met"); | ||
505 | |||
506 | return fmt_token_rule_t<CharSet>(cs); | ||
507 | } | ||
508 | |||
509 | struct userinfo_template_rule_t | ||
510 | { | ||
511 | struct value_type | ||
512 | { | ||
513 | core::string_view user; | ||
514 | core::string_view password; | ||
515 | bool has_password = false; | ||
516 | }; | ||
517 | |||
518 | auto | ||
519 | 48 | parse( | |
520 | char const*& it, | ||
521 | char const* end | ||
522 | ) const noexcept -> | ||
523 | system::result<value_type> | ||
524 | { | ||
525 | static constexpr auto uchars = | ||
526 | unreserved_chars + | ||
527 | sub_delim_chars; | ||
528 | static constexpr auto pwchars = | ||
529 | uchars + ':'; | ||
530 | |||
531 | 48 | value_type t; | |
532 | |||
533 | // user | ||
534 | static constexpr auto user_fmt_rule = | ||
535 | pct_encoded_fmt_string_rule(uchars); | ||
536 | auto rv = grammar::parse( | ||
537 | 48 | it, end, user_fmt_rule); | |
538 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 48 times.
|
48 | BOOST_ASSERT(rv); |
539 | 48 | t.user = *rv; | |
540 | |||
541 | // ':' | ||
542 |
2/2✓ Branch 0 taken 31 times.
✓ Branch 1 taken 17 times.
|
48 | if( it == end || |
543 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 16 times.
|
31 | *it != ':') |
544 | { | ||
545 | 32 | t.has_password = false; | |
546 | 32 | t.password = {}; | |
547 | 32 | return t; | |
548 | } | ||
549 | 16 | ++it; | |
550 | |||
551 | // pass | ||
552 | static constexpr auto pass_fmt_rule = | ||
553 | pct_encoded_fmt_string_rule(grammar::ref(pwchars)); | ||
554 | rv = grammar::parse( | ||
555 | 16 | it, end, pass_fmt_rule); | |
556 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
|
16 | BOOST_ASSERT(rv); |
557 | 16 | t.has_password = true; | |
558 | 16 | t.password = *rv; | |
559 | |||
560 | 16 | return t; | |
561 | } | ||
562 | }; | ||
563 | |||
564 | constexpr userinfo_template_rule_t userinfo_template_rule{}; | ||
565 | |||
566 | struct host_template_rule_t | ||
567 | { | ||
568 | using value_type = core::string_view; | ||
569 | |||
570 | auto | ||
571 | 49 | parse( | |
572 | char const*& it, | ||
573 | char const* end | ||
574 | ) const noexcept -> | ||
575 | system::result<value_type> | ||
576 | { | ||
577 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 48 times.
|
49 | if(it == end) |
578 | { | ||
579 | // empty host | ||
580 | 1 | return {}; | |
581 | } | ||
582 | |||
583 | // the host type will be ultimately | ||
584 | // validated when applying the replacement | ||
585 | // strings. Any chars allowed in hosts | ||
586 | // are allowed here. | ||
587 |
2/2✓ Branch 0 taken 46 times.
✓ Branch 1 taken 2 times.
|
48 | if (*it != '[') |
588 | { | ||
589 | // IPv4address and reg-name have the | ||
590 | // same char sets. | ||
591 | 46 | constexpr auto any_host_template_rule = | |
592 | pct_encoded_fmt_string_rule(host_chars); | ||
593 | auto rv = grammar::parse( | ||
594 | 46 | it, end, any_host_template_rule); | |
595 | // any_host_template_rule can always | ||
596 | // be empty, so it's never invalid | ||
597 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 46 times.
|
46 | BOOST_ASSERT(rv); |
598 | 46 | return detail::to_sv(*rv); | |
599 | } | ||
600 | // IP-literals need to be enclosed in | ||
601 | // "[]" if using ':' in the template | ||
602 | // string, because the ':' would be | ||
603 | // ambiguous with the port in fmt string. | ||
604 | // The "[]:" can be used in replacement | ||
605 | // strings without the "[]" though. | ||
606 | 2 | constexpr auto ip_literal_template_rule = | |
607 | pct_encoded_fmt_string_rule(lhost_chars); | ||
608 | 2 | auto it0 = it; | |
609 | auto rv = grammar::parse( | ||
610 | it, end, | ||
611 | 2 | grammar::optional_rule( | |
612 | 2 | grammar::tuple_rule( | |
613 | 2 | grammar::squelch( | |
614 | 2 | grammar::delim_rule('[')), | |
615 | ip_literal_template_rule, | ||
616 | 2 | grammar::squelch( | |
617 | 4 | grammar::delim_rule(']'))))); | |
618 | // ip_literal_template_rule can always | ||
619 | // be empty, so it's never invalid, but | ||
620 | // the rule might fail to match the | ||
621 | // closing "]" | ||
622 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
|
2 | BOOST_ASSERT(rv); |
623 | 2 | return core::string_view{it0, it}; | |
624 | } | ||
625 | }; | ||
626 | |||
627 | constexpr host_template_rule_t host_template_rule{}; | ||
628 | |||
629 | struct authority_template_rule_t | ||
630 | { | ||
631 | using value_type = pattern; | ||
632 | |||
633 | system::result<value_type> | ||
634 | 49 | parse( | |
635 | char const*& it, | ||
636 | char const* end | ||
637 | ) const noexcept | ||
638 | { | ||
639 | 49 | pattern u; | |
640 | |||
641 | // [ userinfo "@" ] | ||
642 | { | ||
643 | auto rv = grammar::parse( | ||
644 | it, end, | ||
645 | 49 | grammar::optional_rule( | |
646 | 49 | grammar::tuple_rule( | |
647 | userinfo_template_rule, | ||
648 | 49 | grammar::squelch( | |
649 | 98 | grammar::delim_rule('@'))))); | |
650 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 49 times.
|
49 | BOOST_ASSERT(rv); |
651 |
2/2✓ Branch 2 taken 9 times.
✓ Branch 3 taken 40 times.
|
49 | if(rv->has_value()) |
652 | { | ||
653 | 9 | auto& r = **rv; | |
654 | 9 | u.has_user = true; | |
655 | 9 | u.user = r.user; | |
656 | 9 | u.has_pass = r.has_password; | |
657 | 9 | u.pass = r.password; | |
658 | } | ||
659 | } | ||
660 | |||
661 | // host | ||
662 | { | ||
663 | auto rv = grammar::parse( | ||
664 | it, end, | ||
665 | 49 | host_template_rule); | |
666 | // host is allowed to be empty | ||
667 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 49 times.
|
49 | BOOST_ASSERT(rv); |
668 | 49 | u.host = *rv; | |
669 | } | ||
670 | |||
671 | // [ ":" port ] | ||
672 | { | ||
673 | constexpr auto port_template_rule = | ||
674 | grammar::optional_rule( | ||
675 | fmt_token_rule(grammar::digit_chars)); | ||
676 | 49 | auto it0 = it; | |
677 | auto rv = grammar::parse( | ||
678 | it, end, | ||
679 | 49 | grammar::tuple_rule( | |
680 | 49 | grammar::squelch( | |
681 | 49 | grammar::delim_rule(':')), | |
682 | 98 | port_template_rule)); | |
683 |
2/2✓ Branch 1 taken 35 times.
✓ Branch 2 taken 14 times.
|
49 | if (!rv) |
684 | { | ||
685 | 35 | it = it0; | |
686 | } | ||
687 | else | ||
688 | { | ||
689 | 14 | u.has_port = true; | |
690 |
2/2✓ Branch 2 taken 12 times.
✓ Branch 3 taken 2 times.
|
14 | if (rv->has_value()) |
691 | { | ||
692 | 12 | u.port = **rv; | |
693 | } | ||
694 | } | ||
695 | } | ||
696 | |||
697 | 49 | return u; | |
698 | } | ||
699 | }; | ||
700 | |||
701 | constexpr authority_template_rule_t authority_template_rule{}; | ||
702 | |||
703 | struct scheme_template_rule_t | ||
704 | { | ||
705 | using value_type = core::string_view; | ||
706 | |||
707 | system::result<value_type> | ||
708 | 147 | parse( | |
709 | char const*& it, | ||
710 | char const* end) const noexcept | ||
711 | { | ||
712 | 147 | auto const start = it; | |
713 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 146 times.
|
147 | if(it == end) |
714 | { | ||
715 | // scheme can't be empty | ||
716 | 1 | BOOST_URL_RETURN_EC( | |
717 | grammar::error::mismatch); | ||
718 | } | ||
719 |
4/4✓ Branch 1 taken 124 times.
✓ Branch 2 taken 22 times.
✓ Branch 3 taken 20 times.
✓ Branch 4 taken 126 times.
|
270 | if(!grammar::alpha_chars(*it) && |
720 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 104 times.
|
124 | *it != '{') |
721 | { | ||
722 | // expected alpha | ||
723 | 20 | BOOST_URL_RETURN_EC( | |
724 | grammar::error::mismatch); | ||
725 | } | ||
726 | |||
727 | // it starts with replacement id or alpha char | ||
728 |
2/2✓ Branch 1 taken 104 times.
✓ Branch 2 taken 22 times.
|
126 | if (!grammar::alpha_chars(*it)) |
729 | { | ||
730 |
2/2✓ Branch 2 taken 2 times.
✓ Branch 3 taken 102 times.
|
104 | if (!replacement_field_rule.parse(it, end)) |
731 | { | ||
732 | // replacement_field_rule is invalid | ||
733 | 2 | BOOST_URL_RETURN_EC( | |
734 | grammar::error::mismatch); | ||
735 | } | ||
736 | } | ||
737 | else | ||
738 | { | ||
739 | // skip first | ||
740 | 22 | ++it; | |
741 | } | ||
742 | |||
743 | static | ||
744 | constexpr | ||
745 | grammar::lut_chars scheme_chars( | ||
746 | "0123456789" "+-." | ||
747 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" | ||
748 | "abcdefghijklmnopqrstuvwxyz"); | ||
749 | |||
750 | // non-scheme chars might be a new | ||
751 | // replacement-id or just an invalid char | ||
752 | 124 | it = grammar::find_if_not( | |
753 | it, end, scheme_chars); | ||
754 |
2/2✓ Branch 0 taken 75 times.
✓ Branch 1 taken 51 times.
|
126 | while (it != end) |
755 | { | ||
756 | 75 | auto it0 = it; | |
757 |
2/2✓ Branch 2 taken 73 times.
✓ Branch 3 taken 2 times.
|
75 | if (!replacement_field_rule.parse(it, end)) |
758 | { | ||
759 | 73 | it = it0; | |
760 | 73 | break; | |
761 | } | ||
762 | 2 | it = grammar::find_if_not( | |
763 | it, end, scheme_chars); | ||
764 | } | ||
765 | 124 | return core::string_view(start, it - start); | |
766 | } | ||
767 | }; | ||
768 | |||
769 | constexpr scheme_template_rule_t scheme_template_rule{}; | ||
770 | |||
771 | // This rule should consider all url types at the | ||
772 | // same time according to the format string | ||
773 | // - relative urls with no scheme/authority | ||
774 | // - absolute urls have no fragment | ||
775 | struct pattern_rule_t | ||
776 | { | ||
777 | using value_type = pattern; | ||
778 | |||
779 | system::result<value_type> | ||
780 | 147 | parse( | |
781 | char const*& it, | ||
782 | char const* const end | ||
783 | ) const noexcept | ||
784 | { | ||
785 | 147 | pattern u; | |
786 | |||
787 | // optional scheme | ||
788 | { | ||
789 | 147 | auto it0 = it; | |
790 | auto rv = grammar::parse( | ||
791 | it, end, | ||
792 | 147 | grammar::tuple_rule( | |
793 | scheme_template_rule, | ||
794 | 147 | grammar::squelch( | |
795 | 147 | grammar::delim_rule(':')))); | |
796 |
2/2✓ Branch 1 taken 59 times.
✓ Branch 2 taken 88 times.
|
147 | if(rv) |
797 | 59 | u.scheme = *rv; | |
798 | else | ||
799 | 88 | it = it0; | |
800 | } | ||
801 | |||
802 | // hier_part (authority + path) | ||
803 | // if there are less than 2 chars left, | ||
804 | // we are parsing the path | ||
805 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 143 times.
|
147 | if (it == end) |
806 | { | ||
807 | // this is over, so we can consider | ||
808 | // that a "path-empty" | ||
809 | 4 | return u; | |
810 | } | ||
811 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 138 times.
|
143 | if(end - it == 1) |
812 | { | ||
813 | // only one char left | ||
814 | // it can be a single separator "/", | ||
815 | // representing an empty absolute path, | ||
816 | // or a single-char segment | ||
817 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 3 times.
|
5 | if(*it == '/') |
818 | { | ||
819 | // path-absolute | ||
820 | 2 | u.path = {it, 1}; | |
821 | 2 | ++it; | |
822 | 2 | return u; | |
823 | } | ||
824 | // this can be a: | ||
825 | // - path-noscheme if there's no scheme, or | ||
826 | // - path-rootless with a single char, or | ||
827 | // - path-empty (and consume nothing) | ||
828 |
3/4✓ Branch 1 taken 1 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 3 times.
✗ Branch 4 not taken.
|
4 | if (!u.scheme.empty() || |
829 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | *it != ':') |
830 | { | ||
831 | // path-rootless with a single char | ||
832 | // this needs to be a segment because | ||
833 | // the authority needs two slashes | ||
834 | // "//" | ||
835 | // path-noscheme also matches here | ||
836 | // because we already validated the | ||
837 | // first char | ||
838 | auto rv = grammar::parse( | ||
839 | 3 | it, end, urls::detail::segment_rule); | |
840 |
2/2✓ Branch 1 taken 1 times.
✓ Branch 2 taken 2 times.
|
3 | if(! rv) |
841 | 1 | return rv.error(); | |
842 | 2 | u.path = *rv; | |
843 | } | ||
844 | 2 | return u; | |
845 | } | ||
846 | |||
847 | // authority | ||
848 |
2/2✓ Branch 0 taken 62 times.
✓ Branch 1 taken 76 times.
|
138 | if( it[0] == '/' && |
849 |
2/2✓ Branch 0 taken 49 times.
✓ Branch 1 taken 13 times.
|
62 | it[1] == '/') |
850 | { | ||
851 | // "//" always indicates authority | ||
852 | 49 | it += 2; | |
853 | auto rv = grammar::parse( | ||
854 | it, end, | ||
855 | 49 | authority_template_rule); | |
856 | // authority is allowed to be empty | ||
857 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 49 times.
|
49 | BOOST_ASSERT(rv); |
858 | 49 | u.has_authority = true; | |
859 | 49 | u.has_user = rv->has_user; | |
860 | 49 | u.user = rv->user; | |
861 | 49 | u.has_pass = rv->has_pass; | |
862 | 49 | u.pass = rv->pass; | |
863 | 49 | u.host = rv->host; | |
864 | 49 | u.has_port = rv->has_port; | |
865 | 49 | u.port = rv->port; | |
866 | } | ||
867 | |||
868 | // the authority requires an absolute path | ||
869 | // or an empty path | ||
870 |
2/2✓ Branch 0 taken 111 times.
✓ Branch 1 taken 27 times.
|
138 | if (it == end || |
871 |
2/2✓ Branch 0 taken 22 times.
✓ Branch 1 taken 89 times.
|
111 | (u.has_authority && |
872 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 14 times.
|
22 | (*it != '/' && |
873 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
|
8 | *it != '?' && |
874 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
2 | *it != '#'))) |
875 | { | ||
876 | // path-empty | ||
877 | 29 | return u; | |
878 | } | ||
879 | |||
880 | // path-abempty | ||
881 | // consume the whole path at once because | ||
882 | // we're going to count number of segments | ||
883 | // later after the replacements happen | ||
884 | static constexpr auto segment_fmt_rule = | ||
885 | pct_encoded_fmt_string_rule(path_chars); | ||
886 | auto rp = grammar::parse( | ||
887 | 109 | it, end, segment_fmt_rule); | |
888 | // path-abempty is allowed to be empty | ||
889 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 109 times.
|
109 | BOOST_ASSERT(rp); |
890 | 109 | u.path = *rp; | |
891 | |||
892 | // [ "?" query ] | ||
893 | { | ||
894 | static constexpr auto query_fmt_rule = | ||
895 | pct_encoded_fmt_string_rule(query_chars); | ||
896 | auto rv = grammar::parse( | ||
897 | it, end, | ||
898 | 109 | grammar::tuple_rule( | |
899 | 109 | grammar::squelch( | |
900 | 109 | grammar::delim_rule('?')), | |
901 | 109 | query_fmt_rule)); | |
902 | // query is allowed to be empty but | ||
903 | // delim rule is not | ||
904 |
2/2✓ Branch 1 taken 13 times.
✓ Branch 2 taken 96 times.
|
109 | if (rv) |
905 | { | ||
906 | 13 | u.has_query = true; | |
907 | 13 | u.query = *rv; | |
908 | } | ||
909 | } | ||
910 | |||
911 | // [ "#" fragment ] | ||
912 | { | ||
913 | static constexpr auto frag_fmt_rule = | ||
914 | pct_encoded_fmt_string_rule(fragment_chars); | ||
915 | auto rv = grammar::parse( | ||
916 | it, end, | ||
917 | 109 | grammar::tuple_rule( | |
918 | 109 | grammar::squelch( | |
919 | 109 | grammar::delim_rule('#')), | |
920 | 109 | frag_fmt_rule)); | |
921 | // frag is allowed to be empty but | ||
922 | // delim rule is not | ||
923 |
2/2✓ Branch 1 taken 7 times.
✓ Branch 2 taken 102 times.
|
109 | if (rv) |
924 | { | ||
925 | 7 | u.has_frag = true; | |
926 | 7 | u.frag = *rv; | |
927 | } | ||
928 | } | ||
929 | |||
930 | 109 | return u; | |
931 | } | ||
932 | }; | ||
933 | |||
934 | constexpr pattern_rule_t pattern_rule{}; | ||
935 | |||
936 | system::result<pattern> | ||
937 | 147 | parse_pattern( | |
938 | core::string_view s) | ||
939 | { | ||
940 | return grammar::parse( | ||
941 | 147 | s, pattern_rule); | |
942 | } | ||
943 | |||
944 | } // detail | ||
945 | } // urls | ||
946 | } // boost | ||
947 | |||
948 |