1  
//
1  
//
2  
// Copyright (c) 2019 Peter Dimov (pdimov at gmail dot com),
2  
// Copyright (c) 2019 Peter Dimov (pdimov at gmail dot com),
3  
//                    Vinnie Falco (vinnie.falco@gmail.com)
3  
//                    Vinnie Falco (vinnie.falco@gmail.com)
4  
// Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
4  
// Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
5  
//
5  
//
6  
// Distributed under the Boost Software License, Version 1.0. (See accompanying
6  
// Distributed under the Boost Software License, Version 1.0. (See accompanying
7  
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7  
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
8  
//
8  
//
9  
// Official repository: https://github.com/boostorg/json
9  
// Official repository: https://github.com/boostorg/json
10  
//
10  
//
11  

11  

12  
#ifndef BOOST_JSON_DETAIL_SSE2_HPP
12  
#ifndef BOOST_JSON_DETAIL_SSE2_HPP
13  
#define BOOST_JSON_DETAIL_SSE2_HPP
13  
#define BOOST_JSON_DETAIL_SSE2_HPP
14  

14  

15  
#include <boost/json/detail/config.hpp>
15  
#include <boost/json/detail/config.hpp>
16  
#include <boost/json/detail/utf8.hpp>
16  
#include <boost/json/detail/utf8.hpp>
17  
#include <cstddef>
17  
#include <cstddef>
18  
#include <cstring>
18  
#include <cstring>
19  
#ifdef BOOST_JSON_USE_SSE2
19  
#ifdef BOOST_JSON_USE_SSE2
20  
# include <emmintrin.h>
20  
# include <emmintrin.h>
21  
# include <xmmintrin.h>
21  
# include <xmmintrin.h>
22  
# ifdef _MSC_VER
22  
# ifdef _MSC_VER
23  
#  include <intrin.h>
23  
#  include <intrin.h>
24  
# endif
24  
# endif
25  
#endif
25  
#endif
26  

26  

27  
namespace boost {
27  
namespace boost {
28  
namespace json {
28  
namespace json {
29  
namespace detail {
29  
namespace detail {
30  

30  

31  
#ifdef BOOST_JSON_USE_SSE2
31  
#ifdef BOOST_JSON_USE_SSE2
32  

32  

33  
template<bool AllowBadUTF8>
33  
template<bool AllowBadUTF8>
34  
inline
34  
inline
35  
const char*
35  
const char*
36  
count_valid(
36  
count_valid(
37  
    char const* p,
37  
    char const* p,
38  
    const char* end) noexcept
38  
    const char* end) noexcept
39  
{
39  
{
40  
    __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
40  
    __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
41  
    __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
41  
    __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
42  
    __m128i const q3 = _mm_set1_epi8( 0x1F );
42  
    __m128i const q3 = _mm_set1_epi8( 0x1F );
43  

43  

44  
    while(end - p >= 16)
44  
    while(end - p >= 16)
45  
    {
45  
    {
46  
        __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
46  
        __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
47  
        __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
47  
        __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
48  
        __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
48  
        __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
49  
        __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
49  
        __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
50  
        __m128i v5 = _mm_min_epu8( v1, q3 );
50  
        __m128i v5 = _mm_min_epu8( v1, q3 );
51  
        __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
51  
        __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
52  
        __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
52  
        __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
53  

53  

54  
        int w = _mm_movemask_epi8( v7 );
54  
        int w = _mm_movemask_epi8( v7 );
55  

55  

56  
        if( w != 0 )
56  
        if( w != 0 )
57  
        {
57  
        {
58  
            int m;
58  
            int m;
59  
#if defined(__GNUC__) || defined(__clang__)
59  
#if defined(__GNUC__) || defined(__clang__)
60  
            m = __builtin_ffs( w ) - 1;
60  
            m = __builtin_ffs( w ) - 1;
61  
#else
61  
#else
62  
            unsigned long index;
62  
            unsigned long index;
63  
            _BitScanForward( &index, w );
63  
            _BitScanForward( &index, w );
64  
            m = index;
64  
            m = index;
65  
#endif
65  
#endif
66  
            return p + m;
66  
            return p + m;
67  
        }
67  
        }
68  

68  

69  
        p += 16;
69  
        p += 16;
70  
    }
70  
    }
71  

71  

72  
    while(p != end)
72  
    while(p != end)
73  
    {
73  
    {
74  
        const unsigned char c = *p;
74  
        const unsigned char c = *p;
75  
        if(c == '\x22' || c == '\\' || c < 0x20)
75  
        if(c == '\x22' || c == '\\' || c < 0x20)
76  
            break;
76  
            break;
77  
        ++p;
77  
        ++p;
78  
    }
78  
    }
79  

79  

80  
    return p;
80  
    return p;
81  
}
81  
}
82  

82  

83  
template<>
83  
template<>
84  
inline
84  
inline
85  
const char*
85  
const char*
86  
count_valid<false>(
86  
count_valid<false>(
87  
    char const* p,
87  
    char const* p,
88  
    const char* end) noexcept
88  
    const char* end) noexcept
89  
{
89  
{
90  
    __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
90  
    __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
91  
    __m128i const q2 = _mm_set1_epi8( '\\' );
91  
    __m128i const q2 = _mm_set1_epi8( '\\' );
92  
    __m128i const q3 = _mm_set1_epi8( 0x20 );
92  
    __m128i const q3 = _mm_set1_epi8( 0x20 );
93  

93  

94  
    while(end - p >= 16)
94  
    while(end - p >= 16)
95  
    {
95  
    {
96  
        __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
96  
        __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
97  

97  

98  
        __m128i v2 = _mm_cmpeq_epi8( v1, q1 );
98  
        __m128i v2 = _mm_cmpeq_epi8( v1, q1 );
99  
        __m128i v3 = _mm_cmpeq_epi8( v1, q2 );
99  
        __m128i v3 = _mm_cmpeq_epi8( v1, q2 );
100  
        __m128i v4 = _mm_cmplt_epi8( v1, q3 );
100  
        __m128i v4 = _mm_cmplt_epi8( v1, q3 );
101  

101  

102  
        __m128i v5 = _mm_or_si128( v2, v3 );
102  
        __m128i v5 = _mm_or_si128( v2, v3 );
103  
        __m128i v6 = _mm_or_si128( v5, v4 );
103  
        __m128i v6 = _mm_or_si128( v5, v4 );
104  

104  

105  
        int w = _mm_movemask_epi8( v6 );
105  
        int w = _mm_movemask_epi8( v6 );
106  

106  

107  
        if( w != 0 )
107  
        if( w != 0 )
108  
        {
108  
        {
109  
            int m;
109  
            int m;
110  
#if defined(__GNUC__) || defined(__clang__)
110  
#if defined(__GNUC__) || defined(__clang__)
111  
            m = __builtin_ffs( w ) - 1;
111  
            m = __builtin_ffs( w ) - 1;
112  
#else
112  
#else
113  
            unsigned long index;
113  
            unsigned long index;
114  
            _BitScanForward( &index, w );
114  
            _BitScanForward( &index, w );
115  
            m = index;
115  
            m = index;
116  
#endif
116  
#endif
117  
            p += m;
117  
            p += m;
118  
            break;
118  
            break;
119  
        }
119  
        }
120  

120  

121  
        p += 16;
121  
        p += 16;
122  
    }
122  
    }
123  

123  

124  
    while(p != end)
124  
    while(p != end)
125  
    {
125  
    {
126  
        const unsigned char c = *p;
126  
        const unsigned char c = *p;
127  
        if(c == '\x22' || c == '\\' || c < 0x20)
127  
        if(c == '\x22' || c == '\\' || c < 0x20)
128  
            break;
128  
            break;
129  
        if(c < 0x80)
129  
        if(c < 0x80)
130  
        {
130  
        {
131  
            ++p;
131  
            ++p;
132  
            continue;
132  
            continue;
133  
        }
133  
        }
134  
        // validate utf-8
134  
        // validate utf-8
135  
        uint16_t first = classify_utf8(c);
135  
        uint16_t first = classify_utf8(c);
136  
        uint8_t len = first & 0xFF;
136  
        uint8_t len = first & 0xFF;
137  
        if(BOOST_JSON_UNLIKELY(end - p < len))
137  
        if(BOOST_JSON_UNLIKELY(end - p < len))
138  
            break;
138  
            break;
139  
        if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
139  
        if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
140  
            break;
140  
            break;
141  
        p += len;
141  
        p += len;
142  
    }
142  
    }
143  

143  

144  
    return p;
144  
    return p;
145  
}
145  
}
146  

146  

147  
#else
147  
#else
148  

148  

149  
template<bool AllowBadUTF8>
149  
template<bool AllowBadUTF8>
150  
char const*
150  
char const*
151  
count_valid(
151  
count_valid(
152  
    char const* p,
152  
    char const* p,
153  
    char const* end) noexcept
153  
    char const* end) noexcept
154  
{
154  
{
155  
    while(p != end)
155  
    while(p != end)
156  
    {
156  
    {
157  
        const unsigned char c = *p;
157  
        const unsigned char c = *p;
158  
        if(c == '\x22' || c == '\\' || c < 0x20)
158  
        if(c == '\x22' || c == '\\' || c < 0x20)
159  
            break;
159  
            break;
160  
        ++p;
160  
        ++p;
161  
    }
161  
    }
162  

162  

163  
    return p;
163  
    return p;
164  
}
164  
}
165  

165  

166  
template<>
166  
template<>
167  
inline
167  
inline
168  
char const*
168  
char const*
169  
count_valid<false>(
169  
count_valid<false>(
170  
    char const* p,
170  
    char const* p,
171  
    char const* end) noexcept
171  
    char const* end) noexcept
172  
{
172  
{
173  
    while(p != end)
173  
    while(p != end)
174  
    {
174  
    {
175  
        const unsigned char c = *p;
175  
        const unsigned char c = *p;
176  
        if(c == '\x22' || c == '\\' || c < 0x20)
176  
        if(c == '\x22' || c == '\\' || c < 0x20)
177  
            break;
177  
            break;
178  
        if(c < 0x80)
178  
        if(c < 0x80)
179  
        {
179  
        {
180  
            ++p;
180  
            ++p;
181  
            continue;
181  
            continue;
182  
        }
182  
        }
183  
        // validate utf-8
183  
        // validate utf-8
184  
        uint16_t first = classify_utf8(c);
184  
        uint16_t first = classify_utf8(c);
185  
        uint8_t len = first & 0xFF;
185  
        uint8_t len = first & 0xFF;
186  
        if(BOOST_JSON_UNLIKELY(end - p < len))
186  
        if(BOOST_JSON_UNLIKELY(end - p < len))
187  
            break;
187  
            break;
188  
        if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
188  
        if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
189  
            break;
189  
            break;
190  
        p += len;
190  
        p += len;
191  
    }
191  
    }
192  

192  

193  
    return p;
193  
    return p;
194  
}
194  
}
195  

195  

196  
#endif
196  
#endif
197  

197  

198  
// KRYSTIAN NOTE: does not stop to validate
198  
// KRYSTIAN NOTE: does not stop to validate
199  
// count_unescaped
199  
// count_unescaped
200  

200  

201  
#ifdef BOOST_JSON_USE_SSE2
201  
#ifdef BOOST_JSON_USE_SSE2
202  

202  

203  
inline
203  
inline
204  
size_t
204  
size_t
205  
count_unescaped(
205  
count_unescaped(
206  
    char const* s,
206  
    char const* s,
207  
    size_t n) noexcept
207  
    size_t n) noexcept
208  
{
208  
{
209  

209  

210  
    __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
210  
    __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
211  
    __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
211  
    __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
212  
    __m128i const q3 = _mm_set1_epi8( 0x1F );
212  
    __m128i const q3 = _mm_set1_epi8( 0x1F );
213  

213  

214  
    char const * s0 = s;
214  
    char const * s0 = s;
215  

215  

216  
    while( n >= 16 )
216  
    while( n >= 16 )
217  
    {
217  
    {
218  
        __m128i v1 = _mm_loadu_si128( (__m128i const*)s );
218  
        __m128i v1 = _mm_loadu_si128( (__m128i const*)s );
219  
        __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
219  
        __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
220  
        __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
220  
        __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
221  
        __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
221  
        __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
222  
        __m128i v5 = _mm_min_epu8( v1, q3 );
222  
        __m128i v5 = _mm_min_epu8( v1, q3 );
223  
        __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
223  
        __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
224  
        __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
224  
        __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
225  

225  

226  
        int w = _mm_movemask_epi8( v7 );
226  
        int w = _mm_movemask_epi8( v7 );
227  

227  

228  
        if( w != 0 )
228  
        if( w != 0 )
229  
        {
229  
        {
230  
            int m;
230  
            int m;
231  
#if defined(__GNUC__) || defined(__clang__)
231  
#if defined(__GNUC__) || defined(__clang__)
232  
            m = __builtin_ffs( w ) - 1;
232  
            m = __builtin_ffs( w ) - 1;
233  
#else
233  
#else
234  
            unsigned long index;
234  
            unsigned long index;
235  
            _BitScanForward( &index, w );
235  
            _BitScanForward( &index, w );
236  
            m = index;
236  
            m = index;
237  
#endif
237  
#endif
238  

238  

239  
            s += m;
239  
            s += m;
240  
            break;
240  
            break;
241  
        }
241  
        }
242  

242  

243  
        s += 16;
243  
        s += 16;
244  
        n -= 16;
244  
        n -= 16;
245  
    }
245  
    }
246  

246  

247  
    return s - s0;
247  
    return s - s0;
248  
}
248  
}
249  

249  

250  
#else
250  
#else
251  

251  

252  
inline
252  
inline
253  
std::size_t
253  
std::size_t
254  
count_unescaped(
254  
count_unescaped(
255  
    char const*,
255  
    char const*,
256  
    std::size_t) noexcept
256  
    std::size_t) noexcept
257  
{
257  
{
258  
    return 0;
258  
    return 0;
259  
}
259  
}
260  

260  

261  
#endif
261  
#endif
262  

262  

263  
// count_digits
263  
// count_digits
264  

264  

265  
#ifdef BOOST_JSON_USE_SSE2
265  
#ifdef BOOST_JSON_USE_SSE2
266  

266  

267  
// assumes p..p+15 are valid
267  
// assumes p..p+15 are valid
268  
inline int count_digits( char const* p ) noexcept
268  
inline int count_digits( char const* p ) noexcept
269  
{
269  
{
270  
    __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
270  
    __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
271  
    v1 = _mm_add_epi8(v1, _mm_set1_epi8(70));
271  
    v1 = _mm_add_epi8(v1, _mm_set1_epi8(70));
272  
    v1 = _mm_cmplt_epi8(v1, _mm_set1_epi8(118));
272  
    v1 = _mm_cmplt_epi8(v1, _mm_set1_epi8(118));
273  

273  

274  
    int m = _mm_movemask_epi8(v1);
274  
    int m = _mm_movemask_epi8(v1);
275  

275  

276  
    int n;
276  
    int n;
277  

277  

278  
    if( m == 0 )
278  
    if( m == 0 )
279  
    {
279  
    {
280  
        n = 16;
280  
        n = 16;
281  
    }
281  
    }
282  
    else
282  
    else
283  
    {
283  
    {
284  
#if defined(__GNUC__) || defined(__clang__)
284  
#if defined(__GNUC__) || defined(__clang__)
285  
        n = __builtin_ffs( m ) - 1;
285  
        n = __builtin_ffs( m ) - 1;
286  
#else
286  
#else
287  
        unsigned long index;
287  
        unsigned long index;
288  
        _BitScanForward( &index, m );
288  
        _BitScanForward( &index, m );
289  
        n = static_cast<int>(index);
289  
        n = static_cast<int>(index);
290  
#endif
290  
#endif
291  
    }
291  
    }
292  

292  

293  
    return n;
293  
    return n;
294  
}
294  
}
295  

295  

296  
#else
296  
#else
297  

297  

298  
// assumes p..p+15 are valid
298  
// assumes p..p+15 are valid
299  
inline int count_digits( char const* p ) noexcept
299  
inline int count_digits( char const* p ) noexcept
300  
{
300  
{
301  
    int n = 0;
301  
    int n = 0;
302  

302  

303  
    for( ; n < 16; ++n )
303  
    for( ; n < 16; ++n )
304  
    {
304  
    {
305  
        unsigned char const d = *p++ - '0';
305  
        unsigned char const d = *p++ - '0';
306  
        if(d > 9) break;
306  
        if(d > 9) break;
307  
    }
307  
    }
308  

308  

309  
    return n;
309  
    return n;
310  
}
310  
}
311  

311  

312  
#endif
312  
#endif
313  

313  

314  
// parse_unsigned
314  
// parse_unsigned
315  

315  

316  
inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noexcept
316  
inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noexcept
317  
{
317  
{
318  
    while( n >= 4 )
318  
    while( n >= 4 )
319  
    {
319  
    {
320  
        // faster on on clang for x86,
320  
        // faster on on clang for x86,
321  
        // slower on gcc
321  
        // slower on gcc
322  
#ifdef __clang__
322  
#ifdef __clang__
323  
        r = r * 10 + p[0] - '0';
323  
        r = r * 10 + p[0] - '0';
324  
        r = r * 10 + p[1] - '0';
324  
        r = r * 10 + p[1] - '0';
325  
        r = r * 10 + p[2] - '0';
325  
        r = r * 10 + p[2] - '0';
326  
        r = r * 10 + p[3] - '0';
326  
        r = r * 10 + p[3] - '0';
327  
#else
327  
#else
328  
        uint32_t v;
328  
        uint32_t v;
329  
        std::memcpy( &v, p, 4 );
329  
        std::memcpy( &v, p, 4 );
330  
        endian::native_to_little_inplace(v);
330  
        endian::native_to_little_inplace(v);
331  

331  

332  
        v -= 0x30303030;
332  
        v -= 0x30303030;
333  

333  

334  
        unsigned w0 = v & 0xFF;
334  
        unsigned w0 = v & 0xFF;
335  
        unsigned w1 = (v >> 8) & 0xFF;
335  
        unsigned w1 = (v >> 8) & 0xFF;
336  
        unsigned w2 = (v >> 16) & 0xFF;
336  
        unsigned w2 = (v >> 16) & 0xFF;
337  
        unsigned w3 = (v >> 24);
337  
        unsigned w3 = (v >> 24);
338  

338  

339  
        r = (((r * 10 + w0) * 10 + w1) * 10 + w2) * 10 + w3;
339  
        r = (((r * 10 + w0) * 10 + w1) * 10 + w2) * 10 + w3;
340  
#endif
340  
#endif
341  
        p += 4;
341  
        p += 4;
342  
        n -= 4;
342  
        n -= 4;
343  
    }
343  
    }
344  

344  

345  
    switch( n )
345  
    switch( n )
346  
    {
346  
    {
347  
    case 0:
347  
    case 0:
348  
        break;
348  
        break;
349  
    case 1:
349  
    case 1:
350  
        r = r * 10 + p[0] - '0';
350  
        r = r * 10 + p[0] - '0';
351  
        break;
351  
        break;
352  
    case 2:
352  
    case 2:
353  
        r = r * 10 + p[0] - '0';
353  
        r = r * 10 + p[0] - '0';
354  
        r = r * 10 + p[1] - '0';
354  
        r = r * 10 + p[1] - '0';
355  
        break;
355  
        break;
356  
    case 3:
356  
    case 3:
357  
        r = r * 10 + p[0] - '0';
357  
        r = r * 10 + p[0] - '0';
358  
        r = r * 10 + p[1] - '0';
358  
        r = r * 10 + p[1] - '0';
359  
        r = r * 10 + p[2] - '0';
359  
        r = r * 10 + p[2] - '0';
360  
        break;
360  
        break;
361  
    }
361  
    }
362  
    return r;
362  
    return r;
363  
}
363  
}
364  

364  

365  
// KRYSTIAN: this function is unused
365  
// KRYSTIAN: this function is unused
366  
// count_leading
366  
// count_leading
367  

367  

368  
/*
368  
/*
369  
#ifdef BOOST_JSON_USE_SSE2
369  
#ifdef BOOST_JSON_USE_SSE2
370  

370  

371  
// assumes p..p+15
371  
// assumes p..p+15
372  
inline std::size_t count_leading( char const * p, char ch ) noexcept
372  
inline std::size_t count_leading( char const * p, char ch ) noexcept
373  
{
373  
{
374  
    __m128i const q1 = _mm_set1_epi8( ch );
374  
    __m128i const q1 = _mm_set1_epi8( ch );
375  

375  

376  
    __m128i v = _mm_loadu_si128( (__m128i const*)p );
376  
    __m128i v = _mm_loadu_si128( (__m128i const*)p );
377  

377  

378  
    __m128i w = _mm_cmpeq_epi8( v, q1 );
378  
    __m128i w = _mm_cmpeq_epi8( v, q1 );
379  

379  

380  
    int m = _mm_movemask_epi8( w ) ^ 0xFFFF;
380  
    int m = _mm_movemask_epi8( w ) ^ 0xFFFF;
381  

381  

382  
    std::size_t n;
382  
    std::size_t n;
383  

383  

384  
    if( m == 0 )
384  
    if( m == 0 )
385  
    {
385  
    {
386  
        n = 16;
386  
        n = 16;
387  
    }
387  
    }
388  
    else
388  
    else
389  
    {
389  
    {
390  
#if defined(__GNUC__) || defined(__clang__)
390  
#if defined(__GNUC__) || defined(__clang__)
391  
        n = __builtin_ffs( m ) - 1;
391  
        n = __builtin_ffs( m ) - 1;
392  
#else
392  
#else
393  
        unsigned long index;
393  
        unsigned long index;
394  
        _BitScanForward( &index, m );
394  
        _BitScanForward( &index, m );
395  
        n = index;
395  
        n = index;
396  
#endif
396  
#endif
397  
    }
397  
    }
398  

398  

399  
    return n;
399  
    return n;
400  
}
400  
}
401  

401  

402  
#else
402  
#else
403  

403  

404  
// assumes p..p+15
404  
// assumes p..p+15
405  
inline std::size_t count_leading( char const * p, char ch ) noexcept
405  
inline std::size_t count_leading( char const * p, char ch ) noexcept
406  
{
406  
{
407  
    std::size_t n = 0;
407  
    std::size_t n = 0;
408  

408  

409  
    for( ; n < 16 && *p == ch; ++p, ++n );
409  
    for( ; n < 16 && *p == ch; ++p, ++n );
410  

410  

411  
    return n;
411  
    return n;
412  
}
412  
}
413  

413  

414  
#endif
414  
#endif
415  
*/
415  
*/
416  

416  

417  
// count_whitespace
417  
// count_whitespace
418  

418  

419  
#ifdef BOOST_JSON_USE_SSE2
419  
#ifdef BOOST_JSON_USE_SSE2
420  

420  

421  
inline const char* count_whitespace( char const* p, const char* end ) noexcept
421  
inline const char* count_whitespace( char const* p, const char* end ) noexcept
422  
{
422  
{
423  
    if( p == end )
423  
    if( p == end )
424  
    {
424  
    {
425  
        return p;
425  
        return p;
426  
    }
426  
    }
427  

427  

428  
    if( static_cast<unsigned char>( *p ) > 0x20 )
428  
    if( static_cast<unsigned char>( *p ) > 0x20 )
429  
    {
429  
    {
430  
        return p;
430  
        return p;
431  
    }
431  
    }
432  

432  

433  
    __m128i const q1 = _mm_set1_epi8( ' ' );
433  
    __m128i const q1 = _mm_set1_epi8( ' ' );
434  
    __m128i const q2 = _mm_set1_epi8( '\n' );
434  
    __m128i const q2 = _mm_set1_epi8( '\n' );
435  
    __m128i const q3 = _mm_set1_epi8( 4 ); // '\t' | 4 == '\r'
435  
    __m128i const q3 = _mm_set1_epi8( 4 ); // '\t' | 4 == '\r'
436  
    __m128i const q4 = _mm_set1_epi8( '\r' );
436  
    __m128i const q4 = _mm_set1_epi8( '\r' );
437  

437  

438  
    while( end - p >= 16 )
438  
    while( end - p >= 16 )
439  
    {
439  
    {
440  
        __m128i v0 = _mm_loadu_si128( (__m128i const*)p );
440  
        __m128i v0 = _mm_loadu_si128( (__m128i const*)p );
441  

441  

442  
        __m128i w0 = _mm_or_si128(
442  
        __m128i w0 = _mm_or_si128(
443  
            _mm_cmpeq_epi8( v0, q1 ),
443  
            _mm_cmpeq_epi8( v0, q1 ),
444  
            _mm_cmpeq_epi8( v0, q2 ));
444  
            _mm_cmpeq_epi8( v0, q2 ));
445  
        __m128i v1 = _mm_or_si128( v0, q3 );
445  
        __m128i v1 = _mm_or_si128( v0, q3 );
446  
        __m128i w1 = _mm_cmpeq_epi8( v1, q4 );
446  
        __m128i w1 = _mm_cmpeq_epi8( v1, q4 );
447  
        __m128i w2 = _mm_or_si128( w0, w1 );
447  
        __m128i w2 = _mm_or_si128( w0, w1 );
448  

448  

449  
        int m = _mm_movemask_epi8( w2 ) ^ 0xFFFF;
449  
        int m = _mm_movemask_epi8( w2 ) ^ 0xFFFF;
450  

450  

451  
        if( m != 0 )
451  
        if( m != 0 )
452  
        {
452  
        {
453  
#if defined(__GNUC__) || defined(__clang__)
453  
#if defined(__GNUC__) || defined(__clang__)
454  
            std::size_t c = __builtin_ffs( m ) - 1;
454  
            std::size_t c = __builtin_ffs( m ) - 1;
455  
#else
455  
#else
456  
            unsigned long index;
456  
            unsigned long index;
457  
            _BitScanForward( &index, m );
457  
            _BitScanForward( &index, m );
458  
            std::size_t c = index;
458  
            std::size_t c = index;
459  
#endif
459  
#endif
460  

460  

461  
            p += c;
461  
            p += c;
462  
            return p;
462  
            return p;
463  
        }
463  
        }
464  

464  

465  
        p += 16;
465  
        p += 16;
466  
    }
466  
    }
467  

467  

468  
    while( p != end )
468  
    while( p != end )
469  
    {
469  
    {
470  
        if( *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n' )
470  
        if( *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n' )
471  
        {
471  
        {
472  
            return p;
472  
            return p;
473  
        }
473  
        }
474  

474  

475  
        ++p;
475  
        ++p;
476  
    }
476  
    }
477  

477  

478  
    return p;
478  
    return p;
479  
}
479  
}
480  

480  

481  
/*
481  
/*
482  

482  

483  
// slightly faster on msvc-14.2, slightly slower on clang-win
483  
// slightly faster on msvc-14.2, slightly slower on clang-win
484  

484  

485  
inline std::size_t count_whitespace( char const * p, std::size_t n ) noexcept
485  
inline std::size_t count_whitespace( char const * p, std::size_t n ) noexcept
486  
{
486  
{
487  
    char const * p0 = p;
487  
    char const * p0 = p;
488  

488  

489  
    while( n > 0 )
489  
    while( n > 0 )
490  
    {
490  
    {
491  
        char ch = *p;
491  
        char ch = *p;
492  

492  

493  
        if( ch == '\n' || ch == '\r' )
493  
        if( ch == '\n' || ch == '\r' )
494  
        {
494  
        {
495  
            ++p;
495  
            ++p;
496  
            --n;
496  
            --n;
497  
            continue;
497  
            continue;
498  
        }
498  
        }
499  

499  

500  
        if( ch != ' ' && ch != '\t' )
500  
        if( ch != ' ' && ch != '\t' )
501  
        {
501  
        {
502  
            break;
502  
            break;
503  
        }
503  
        }
504  

504  

505  
        ++p;
505  
        ++p;
506  
        --n;
506  
        --n;
507  

507  

508  
        while( n >= 16 )
508  
        while( n >= 16 )
509  
        {
509  
        {
510  
            std::size_t n2 = count_leading( p, ch );
510  
            std::size_t n2 = count_leading( p, ch );
511  

511  

512  
            p += n2;
512  
            p += n2;
513  
            n -= n2;
513  
            n -= n2;
514  

514  

515  
            if( n2 < 16 )
515  
            if( n2 < 16 )
516  
            {
516  
            {
517  
                break;
517  
                break;
518  
            }
518  
            }
519  
        }
519  
        }
520  
    }
520  
    }
521  

521  

522  
    return p - p0;
522  
    return p - p0;
523  
}
523  
}
524  
*/
524  
*/
525  

525  

526  
#else
526  
#else
527  

527  

528  
inline const char* count_whitespace( char const* p, const char* end ) noexcept
528  
inline const char* count_whitespace( char const* p, const char* end ) noexcept
529  
{
529  
{
530  

530  

531  
    for(; p != end; ++p)
531  
    for(; p != end; ++p)
532  
    {
532  
    {
533  
        char const c = *p;
533  
        char const c = *p;
534  
        if( c != ' ' && c != '\n' && c != '\r' && c != '\t' ) break;
534  
        if( c != ' ' && c != '\n' && c != '\r' && c != '\t' ) break;
535  
    }
535  
    }
536  

536  

537  
    return p;
537  
    return p;
538  
}
538  
}
539  

539  

540  
#endif
540  
#endif
541  

541  

542  
} // detail
542  
} // detail
543  
} // namespace json
543  
} // namespace json
544  
} // namespace boost
544  
} // namespace boost
545  

545  

546  
#endif
546  
#endif