1
1
#include " atoi.h"
2
+ #include " atoi_impl.h"
2
3
3
4
#include " zoo/swar/associative_iteration.h"
4
5
@@ -53,6 +54,21 @@ uint32_t calculateBase10(zoo::swar::SWAR<8, uint64_t> convertedToIntegers) noexc
53
54
return uint32_t (by10001base2to32.value () >> 32 );
54
55
}
55
56
57
+ uint64_t calculateBase10 (zoo::swar::SWAR<8 , __uint128_t > convertedToIntegers) noexcept {
58
+ auto by11base256 = convertedToIntegers.multiply (256 *10 + 1 );
59
+ auto bytePairs = zoo::swar::doublePrecision (by11base256).odd ;
60
+ auto by101base2to16 = bytePairs.multiply (1 + (100 << 16 ));
61
+ auto byteQuads = zoo::swar::doublePrecision (by101base2to16).odd ;
62
+ auto by10001base2to32 = byteQuads.multiply (1 + (10000ull << 32 ));
63
+ // Now, truly work with 128 bits: combine two 32 bit results, each
64
+ // corresponding to 8 bytes of inputs, into the the 64 bit result by
65
+ // scaling one by 10^8
66
+ auto byteOcts = zoo::swar::doublePrecision (by10001base2to32).odd ;
67
+ auto byHundredMillionBase2to64 =
68
+ byteOcts.multiply (1 + (__uint128_t (100'000'000 ) << 64 ));
69
+ return uint64_t (byHundredMillionBase2to64.value () >> 64 );
70
+ }
71
+
56
72
// Note: eight digits can represent from 0 to (10^9) - 1, the logarithm base 2
57
73
// of 10^9 is slightly less than 30, thus, only 30 bits are needed.
58
74
uint32_t lemire_as_zoo_swar (const char *chars) noexcept {
@@ -104,25 +120,6 @@ std::size_t leadingSpacesCountAligned(S bytes) noexcept {
104
120
return rv;
105
121
}
106
122
107
- // / @brief Loads the "block" containing the pointer, by proper alignment
108
- // / @tparam PtrT Pointer type for loading
109
- // / @tparam Block as the name indicates
110
- // / @param pointerInsideBlock the potentially misaligned pointer
111
- // / @param b where the loaded bytes will be put
112
- // / @return a pair to indicate the aligned pointer to the base of the block
113
- // / and the misalignment, in bytes, of the source pointer
114
- template <typename PtrT, typename Block>
115
- std::tuple<PtrT *, int >
116
- blockAlignedLoad (PtrT *pointerInsideBlock, Block *b) {
117
- uintptr_t asUint = reinterpret_cast <uintptr_t >(pointerInsideBlock);
118
- constexpr auto Alignment = alignof (Block), Size = sizeof (Block);
119
- static_assert (Alignment == Size);
120
- auto misalignment = asUint % Alignment;
121
- auto *base = reinterpret_cast <PtrT *>(asUint - misalignment);
122
- memcpy (b, base, Size);
123
- return { base, misalignment };
124
- }
125
-
126
123
std::size_t leadingSpacesCount (const char *p) noexcept {
127
124
using S = swar::SWAR<8 , uint64_t >;
128
125
S bytes;
@@ -173,11 +170,33 @@ auto leadingDigitsCount(const char *p) noexcept {
173
170
}
174
171
}
175
172
176
- int c_strToI (const char *str) noexcept {
177
- constexpr static std::array<int , 8 > LastFactor = {
178
- 1 , 10 , 100 , 1000 ,
179
- 10'000 , 100'000 , 1000'000 , 10'000'000
180
- };
173
+ namespace impl {
174
+
175
+ template <typename > struct ConversionTraits ;
176
+ template <> struct ConversionTraits <int32_t >{
177
+ constexpr static auto NPositions = 9 ; // from 10^0 to 10^8
178
+ using PowersOf10Array = std::array<int32_t , NPositions>;
179
+ using DoublePrecision = uint64_t ;
180
+ };
181
+ template <> struct ConversionTraits <int64_t >{
182
+ constexpr static auto NPositions = 17 ; // from 10^0 to 10^16
183
+ using PowersOf10Array = std::array<int64_t , NPositions>;
184
+ using DoublePrecision = __uint128_t ;
185
+ };
186
+
187
+ template <typename Result>
188
+ auto PowersOf10Array () {
189
+ using Traits = ConversionTraits<Result>;
190
+ typename Traits::PowersOf10Array rv{1 };
191
+ for (std::size_t i = 1 ; i < Traits::NPositions; ++i) {
192
+ rv[i] = rv[i - 1 ] * 10 ;
193
+ }
194
+ return rv;
195
+ };
196
+
197
+ template <typename Return>
198
+ Return c_strToIntegral (const char *str) noexcept {
199
+ auto LastFactor = PowersOf10Array<Return>();
181
200
auto leadingSpaces = leadingSpacesCount (str);
182
201
auto s = str + leadingSpaces;
183
202
auto sign = 1 ;
@@ -187,14 +206,19 @@ int c_strToI(const char *str) noexcept {
187
206
case ' +' : ++s; break ;
188
207
default : ;
189
208
}
190
- using S = swar::SWAR<8 , uint64_t >;
209
+
210
+ using SWAR_BaseType = typename ConversionTraits<Return>::DoublePrecision;
211
+ constexpr auto
212
+ NBytes = sizeof (SWAR_BaseType),
213
+ NBitsPerByte = 8ul ; // 8 bits per byte
214
+ using S = swar::SWAR<NBitsPerByte, SWAR_BaseType>;
191
215
S bytes;
192
216
auto [base, misalignment] = blockAlignedLoad (s, &bytes.m_v );
193
- auto bitDisplacement = 8 * misalignment;
217
+ auto bitDisplacement = NBitsPerByte * misalignment;
194
218
constexpr static S
195
- AllZeroCharacter{meta::BitmaskMaker<uint64_t , ' 0' , 8 >::value},
219
+ AllZeroCharacter{meta::BitmaskMaker<SWAR_BaseType , ' 0' , NBitsPerByte >::value},
196
220
AllOn = ~S{0 };
197
- // blit the zero-characters to the misaligned part
221
+
198
222
auto mask = S{AllOn.value () << bitDisplacement};
199
223
auto misalignedEliminated = bytes & mask;
200
224
auto zeroCharactersIntroduced = AllZeroCharacter & ~mask;
@@ -210,23 +234,35 @@ int c_strToI(const char *str) noexcept {
210
234
auto nonDigitIndex = nonDigits.lsbIndex ();
211
235
auto asIntegers = bytes - AllZeroCharacter; // upper lanes garbage
212
236
auto integersInHighLanes =
213
- // allow complete clearing of the 8 bytes by doing 2 shifts,
214
- // since it is UB to shift 64 bits.
215
- asIntegers.shiftLanesLeft (7 - nonDigitIndex).shiftLanesLeft (1 );
237
+ // split the shift in two steps because if nonDigitIndex is
238
+ // zero, then you'd shift all bits, this would result in U.B.
239
+ // for a single shift
240
+ asIntegers.shiftLanesLeft (NBytes - 1 - nonDigitIndex)
241
+ .shiftLanesLeft (1 );
216
242
auto inBase10 = calculateBase10 (integersInHighLanes);
217
243
auto scaledAccumulator = accumulator * LastFactor[nonDigitIndex];
218
- return int ((scaledAccumulator + inBase10) * sign);
244
+ return Return ((scaledAccumulator + inBase10) * sign);
219
245
}
220
- // all 8 bytes are digits
246
+ // all bytes are digits
221
247
auto asIntegers = bytes - AllZeroCharacter;
222
- accumulator *= 100'000'000 ;
248
+ accumulator *= LastFactor. back () ;
223
249
auto inBase10 = calculateBase10 (asIntegers);
224
250
accumulator += inBase10;
225
- base += 8 ;
226
- memcpy (&bytes.m_v , base, 8 );
251
+ base += NBytes ;
252
+ memcpy (&bytes.m_v , base, NBytes );
227
253
}
228
254
}
229
255
256
+ }
257
+
258
+ int c_strToI (const char *str) noexcept {
259
+ return impl::c_strToIntegral<int >(str);
260
+ }
261
+
262
+ int64_t c_strToL (const char *str) noexcept {
263
+ return impl::c_strToIntegral<int64_t >(str);
264
+ }
265
+
230
266
// / \brief Helper function to fix the non-string part of block
231
267
template <typename S>
232
268
S adjustMisalignmentFor_strlen (S data, int misalignment) {
@@ -252,7 +288,7 @@ std::size_t c_strLength(const char *s) {
252
288
253
289
auto indexOfFirstTrue = [](auto bs) { return bs.lsbIndex (); };
254
290
255
- // Misalignment must be taken into account because a SWAR read is
291
+ // Misalignment must be taken into account because a SWAR read is
256
292
// speculative, it might read bytes outside of the actual string.
257
293
// It is safe to read within the page where the string occurs, and to
258
294
// guarantee that, simply make aligned reads because the size of the SWAR
0 commit comments