Line | Branch | Exec | Source |
---|---|---|---|
1 | // | ||
2 | // Copyright (c) 2021 Vinnie Falco (vinnie dot falco at gmail dot com) | ||
3 | // | ||
4 | // Distributed under the Boost Software License, Version 1.0. (See accompanying | ||
5 | // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | ||
6 | // | ||
7 | // Official repository: https://github.com/boostorg/url | ||
8 | // | ||
9 | |||
10 | #ifndef BOOST_URL_GRAMMAR_LUT_CHARS_HPP | ||
11 | #define BOOST_URL_GRAMMAR_LUT_CHARS_HPP | ||
12 | |||
13 | #include <boost/url/detail/config.hpp> | ||
14 | #include <boost/url/grammar/detail/charset.hpp> | ||
15 | #include <cstdint> | ||
16 | #include <type_traits> | ||
17 | |||
18 | // Credit to Peter Dimov for ideas regarding | ||
19 | // SIMD constexpr, and character set masks. | ||
20 | |||
21 | namespace boost { | ||
22 | namespace urls { | ||
23 | namespace grammar { | ||
24 | |||
25 | #ifndef BOOST_URL_DOCS | ||
26 | namespace detail { | ||
27 | template<class T, class = void> | ||
28 | struct is_pred : std::false_type {}; | ||
29 | |||
30 | template<class T> | ||
31 | struct is_pred<T, void_t< | ||
32 | decltype( | ||
33 | std::declval<bool&>() = | ||
34 | std::declval<T const&>().operator()( | ||
35 | std::declval<char>()) | ||
36 | ) > > : std::true_type | ||
37 | { | ||
38 | }; | ||
39 | } // detail | ||
40 | #endif | ||
41 | |||
42 | /** A set of characters | ||
43 | |||
44 | The characters defined by instances of | ||
45 | this set are provided upon construction. | ||
46 | The `constexpr` implementation allows | ||
47 | these to become compile-time constants. | ||
48 | |||
49 | @par Example | ||
50 | Character sets are used with rules and the | ||
51 | functions @ref find_if and @ref find_if_not. | ||
52 | @code | ||
53 | constexpr lut_chars vowel_chars = "AEIOU" "aeiou"; | ||
54 | |||
55 | system::result< core::string_view > rv = parse( "Aiea", token_rule( vowel_chars ) ); | ||
56 | @endcode | ||
57 | |||
58 | @see | ||
59 | @ref find_if, | ||
60 | @ref find_if_not, | ||
61 | @ref parse, | ||
62 | @ref token_rule. | ||
63 | */ | ||
64 | class lut_chars | ||
65 | { | ||
66 | std::uint64_t mask_[4] = {}; | ||
67 | |||
68 | constexpr | ||
69 | static | ||
70 | std::uint64_t | ||
71 | 155542 | lo(char c) noexcept | |
72 | { | ||
73 | 155542 | return static_cast< | |
74 | 155542 | unsigned char>(c) & 3; | |
75 | } | ||
76 | |||
77 | constexpr | ||
78 | static | ||
79 | std::uint64_t | ||
80 | 136531 | hi(char c) noexcept | |
81 | { | ||
82 | 136531 | return 1ULL << (static_cast< | |
83 | 136531 | unsigned char>(c) >> 2); | |
84 | } | ||
85 | |||
86 | constexpr | ||
87 | static | ||
88 | lut_chars | ||
89 | construct( | ||
90 | char const* s) noexcept | ||
91 | { | ||
92 | return *s | ||
93 | ? lut_chars(*s) + | ||
94 | construct(s+1) | ||
95 | : lut_chars(); | ||
96 | } | ||
97 | |||
98 | constexpr | ||
99 | static | ||
100 | lut_chars | ||
101 | 34048 | construct( | |
102 | unsigned char ch, | ||
103 | bool b) noexcept | ||
104 | { | ||
105 | return b | ||
106 | 5824 | ? lut_chars(ch) | |
107 |
4/4✓ Branch 0 taken 5824 times.
✓ Branch 1 taken 28224 times.
✓ Branch 3 taken 84672 times.
✓ Branch 4 taken 28224 times.
|
118720 | : lut_chars(); |
108 | } | ||
109 | |||
110 | template<class Pred> | ||
111 | constexpr | ||
112 | static | ||
113 | lut_chars | ||
114 | 68096 | construct( | |
115 | Pred pred, | ||
116 | unsigned char ch) noexcept | ||
117 | { | ||
118 | return ch == 255 | ||
119 | 266 | ? construct(ch, pred(static_cast<char>(ch))) | |
120 | 67830 | : construct(ch, pred(static_cast<char>(ch))) + | |
121 |
2/2✓ Branch 0 taken 133 times.
✓ Branch 1 taken 33915 times.
|
136192 | construct(pred, ch + 1); |
122 | } | ||
123 | |||
124 | constexpr | ||
125 | 28224 | lut_chars() = default; | |
126 | |||
127 | constexpr | ||
128 | 34105 | lut_chars( | |
129 | std::uint64_t m0, | ||
130 | std::uint64_t m1, | ||
131 | std::uint64_t m2, | ||
132 | std::uint64_t m3) noexcept | ||
133 | 34105 | : mask_{ m0, m1, m2, m3 } | |
134 | { | ||
135 | 34105 | } | |
136 | |||
137 | public: | ||
138 | /** Constructor | ||
139 | |||
140 | This function constructs a character | ||
141 | set which has as a single member, | ||
142 | the character `ch`. | ||
143 | |||
144 | @par Example | ||
145 | @code | ||
146 | constexpr lut_chars asterisk( '*' ); | ||
147 | @endcode | ||
148 | |||
149 | @par Complexity | ||
150 | Constant. | ||
151 | |||
152 | @par Exception Safety | ||
153 | Throws nothing. | ||
154 | |||
155 | @param ch A character. | ||
156 | */ | ||
157 | constexpr | ||
158 | 6337 | lut_chars(char ch) noexcept | |
159 | 6337 | : mask_ { | |
160 |
2/2✓ Branch 1 taken 1362 times.
✓ Branch 2 taken 4975 times.
|
6337 | lo(ch) == 0 ? hi(ch) : 0, |
161 |
2/2✓ Branch 0 taken 1901 times.
✓ Branch 1 taken 4436 times.
|
6337 | lo(ch) == 1 ? hi(ch) : 0, |
162 |
2/2✓ Branch 0 taken 1738 times.
✓ Branch 1 taken 4599 times.
|
6337 | lo(ch) == 2 ? hi(ch) : 0, |
163 |
2/2✓ Branch 3 taken 1336 times.
✓ Branch 4 taken 5001 times.
|
19011 | lo(ch) == 3 ? hi(ch) : 0 } |
164 | { | ||
165 | 6337 | } | |
166 | |||
167 | /** Constructor | ||
168 | |||
169 | This function constructs a character | ||
170 | set which has as members, all of the | ||
171 | characters present in the null-terminated | ||
172 | string `s`. | ||
173 | |||
174 | @par Example | ||
175 | @code | ||
176 | constexpr lut_chars digits = "0123456789"; | ||
177 | @endcode | ||
178 | |||
179 | @par Complexity | ||
180 | Linear in `::strlen(s)`, or constant | ||
181 | if `s` is a constant expression. | ||
182 | |||
183 | @par Exception Safety | ||
184 | Throws nothing. | ||
185 | |||
186 | @param s A null-terminated string. | ||
187 | */ | ||
188 | constexpr | ||
189 | lut_chars( | ||
190 | char const* s) noexcept | ||
191 | : lut_chars(construct(s)) | ||
192 | { | ||
193 | } | ||
194 | |||
195 | /** Constructor. | ||
196 | |||
197 | This function constructs a character | ||
198 | set which has as members, every value | ||
199 | of `char ch` for which the expression | ||
200 | `pred(ch)` returns `true`. | ||
201 | |||
202 | @par Example | ||
203 | @code | ||
204 | struct is_digit | ||
205 | { | ||
206 | constexpr bool | ||
207 | operator()(char c ) const noexcept | ||
208 | { | ||
209 | return c >= '0' && c <= '9'; | ||
210 | } | ||
211 | }; | ||
212 | |||
213 | constexpr lut_chars digits( is_digit{} ); | ||
214 | @endcode | ||
215 | |||
216 | @par Complexity | ||
217 | Linear in `pred`, or constant if | ||
218 | `pred(ch)` is a constant expression. | ||
219 | |||
220 | @par Exception Safety | ||
221 | Throws nothing. | ||
222 | |||
223 | @param pred The function object to | ||
224 | use for determining membership in | ||
225 | the character set. | ||
226 | */ | ||
227 | template<class Pred | ||
228 | #ifndef BOOST_URL_DOCS | ||
229 | ,class = typename std::enable_if< | ||
230 | detail::is_pred<Pred>::value && | ||
231 | ! std::is_base_of< | ||
232 | lut_chars, Pred>::value>::type | ||
233 | #endif | ||
234 | > | ||
235 | constexpr | ||
236 | 266 | lut_chars(Pred const& pred) noexcept | |
237 | : lut_chars( | ||
238 | 266 | construct(pred, 0)) | |
239 | { | ||
240 | 266 | } | |
241 | |||
242 | /** Return true if ch is in the character set. | ||
243 | |||
244 | This function returns true if the | ||
245 | character `ch` is in the set, otherwise | ||
246 | it returns false. | ||
247 | |||
248 | @par Complexity | ||
249 | Constant. | ||
250 | |||
251 | @par Exception Safety | ||
252 | Throws nothing. | ||
253 | |||
254 | @param ch The character to test. | ||
255 | */ | ||
256 | constexpr | ||
257 | bool | ||
258 | 1280 | operator()( | |
259 | unsigned char ch) const noexcept | ||
260 | { | ||
261 | 1280 | return operator()(static_cast<char>(ch)); | |
262 | } | ||
263 | |||
264 | /// @copydoc operator()(unsigned char) const | ||
265 | constexpr | ||
266 | bool | ||
267 | 130194 | operator()(char ch) const noexcept | |
268 | { | ||
269 | 130194 | return mask_[lo(ch)] & hi(ch); | |
270 | } | ||
271 | |||
272 | /** Return the union of two character sets. | ||
273 | |||
274 | This function returns a new character | ||
275 | set which contains all of the characters | ||
276 | in `cs0` as well as all of the characters | ||
277 | in `cs`. | ||
278 | |||
279 | @par Example | ||
280 | This creates a character set which | ||
281 | includes all letters and numbers | ||
282 | @code | ||
283 | constexpr lut_chars alpha_chars( | ||
284 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" | ||
285 | "abcdefghijklmnopqrstuvwxyz"); | ||
286 | |||
287 | constexpr lut_chars alnum_chars = alpha_chars + "0123456789"; | ||
288 | @endcode | ||
289 | |||
290 | @par Complexity | ||
291 | Constant. | ||
292 | |||
293 | @return The new character set. | ||
294 | |||
295 | @param cs0 A character to join | ||
296 | |||
297 | @param cs1 A character to join | ||
298 | */ | ||
299 | friend | ||
300 | constexpr | ||
301 | lut_chars | ||
302 | 33919 | operator+( | |
303 | lut_chars const& cs0, | ||
304 | lut_chars const& cs1) noexcept | ||
305 | { | ||
306 | return lut_chars( | ||
307 | 33919 | cs0.mask_[0] | cs1.mask_[0], | |
308 | 33919 | cs0.mask_[1] | cs1.mask_[1], | |
309 | 33919 | cs0.mask_[2] | cs1.mask_[2], | |
310 | 33919 | cs0.mask_[3] | cs1.mask_[3]); | |
311 | } | ||
312 | |||
313 | /** Return a new character set by subtracting | ||
314 | |||
315 | This function returns a new character | ||
316 | set which is formed from all of the | ||
317 | characters in `cs0` which are not in `cs`. | ||
318 | |||
319 | @par Example | ||
320 | This statement declares a character set | ||
321 | containing all the lowercase letters | ||
322 | which are not vowels: | ||
323 | @code | ||
324 | constexpr lut_chars consonants = lut_chars("abcdefghijklmnopqrstuvwxyz") - "aeiou"; | ||
325 | @endcode | ||
326 | |||
327 | @par Complexity | ||
328 | Constant. | ||
329 | |||
330 | @return The new character set. | ||
331 | |||
332 | @param cs0 A character set to join. | ||
333 | |||
334 | @param cs1 A character set to join. | ||
335 | */ | ||
336 | friend | ||
337 | constexpr | ||
338 | lut_chars | ||
339 | 186 | operator-( | |
340 | lut_chars const& cs0, | ||
341 | lut_chars const& cs1) noexcept | ||
342 | { | ||
343 | return lut_chars( | ||
344 | 186 | cs0.mask_[0] & ~cs1.mask_[0], | |
345 | 186 | cs0.mask_[1] & ~cs1.mask_[1], | |
346 | 186 | cs0.mask_[2] & ~cs1.mask_[2], | |
347 | 186 | cs0.mask_[3] & ~cs1.mask_[3]); | |
348 | } | ||
349 | |||
350 | /** Return a new character set which is the complement of another character set. | ||
351 | |||
352 | This function returns a new character | ||
353 | set which contains all of the characters | ||
354 | that are not in `*this`. | ||
355 | |||
356 | @par Example | ||
357 | This statement declares a character set | ||
358 | containing everything but vowels: | ||
359 | @code | ||
360 | constexpr lut_chars not_vowels = ~lut_chars( "AEIOU" "aeiou" ); | ||
361 | @endcode | ||
362 | |||
363 | @par Complexity | ||
364 | Constant. | ||
365 | |||
366 | @par Exception Safety | ||
367 | Throws nothing. | ||
368 | |||
369 | @return The new character set. | ||
370 | */ | ||
371 | constexpr | ||
372 | lut_chars | ||
373 | operator~() const noexcept | ||
374 | { | ||
375 | return lut_chars( | ||
376 | ~mask_[0], | ||
377 | ~mask_[1], | ||
378 | ~mask_[2], | ||
379 | ~mask_[3] | ||
380 | ); | ||
381 | } | ||
382 | |||
383 | #ifndef BOOST_URL_DOCS | ||
384 | #ifdef BOOST_URL_USE_SSE2 | ||
385 | char const* | ||
386 | 1603 | find_if( | |
387 | char const* first, | ||
388 | char const* last) const noexcept | ||
389 | { | ||
390 | 1603 | return detail::find_if_pred( | |
391 | 1603 | *this, first, last); | |
392 | } | ||
393 | |||
394 | char const* | ||
395 | 13897 | find_if_not( | |
396 | char const* first, | ||
397 | char const* last) const noexcept | ||
398 | { | ||
399 | 13897 | return detail::find_if_not_pred( | |
400 | 13897 | *this, first, last); | |
401 | } | ||
402 | #endif | ||
403 | #endif | ||
404 | }; | ||
405 | |||
406 | } // grammar | ||
407 | } // urls | ||
408 | } // boost | ||
409 | |||
410 | #endif | ||
411 |