File Coverage

lib/Crypt/komihash.h
Criterion Covered Total %
statement 54 92 58.7
branch 17 36 47.2
condition n/a
subroutine n/a
pod n/a
total 71 128 55.4


line stmt bran cond sub pod time code
1             /**
2             * @file komihash.h
3             *
4             * @version 5.10
5             *
6             * @brief The inclusion file for the "komihash" 64-bit hash function,
7             * "komirand" 64-bit PRNG, and streamed "komihash".
8             *
9             * This function is named the way it is named is to honor the Komi Republic
10             * (located in Russia), native to the author.
11             *
12             * Description is available at https://github.com/avaneev/komihash
13             *
14             * E-mail: aleksey.vaneev@gmail.com or info@voxengo.com
15             *
16             * LICENSE:
17             *
18             * Copyright (c) 2021-2024 Aleksey Vaneev
19             *
20             * Permission is hereby granted, free of charge, to any person obtaining a
21             * copy of this software and associated documentation files (the "Software"),
22             * to deal in the Software without restriction, including without limitation
23             * the rights to use, copy, modify, merge, publish, distribute, sublicense,
24             * and/or sell copies of the Software, and to permit persons to whom the
25             * Software is furnished to do so, subject to the following conditions:
26             *
27             * The above copyright notice and this permission notice shall be included in
28             * all copies or substantial portions of the Software.
29             *
30             * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
31             * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
32             * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
33             * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
34             * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
35             * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
36             * DEALINGS IN THE SOFTWARE.
37             */
38            
39             #ifndef KOMIHASH_INCLUDED
40             #define KOMIHASH_INCLUDED
41            
42             #include
43             #include
44            
45             #define KOMIHASH_VER_STR "5.10" ///< KOMIHASH source code version string.
46            
47             /**
48             * @def KOMIHASH_LITTLE_ENDIAN
49             * @brief Endianness definition macro, can be used as a logical constant.
50             *
51             * Can be defined externally (e.g., =1, if endianness-correction and
52             * hash-value portability are unnecessary in any case, to reduce overhead).
53             */
54            
55             #if !defined( KOMIHASH_LITTLE_ENDIAN )
56             #if defined( __LITTLE_ENDIAN__ ) || defined( __LITTLE_ENDIAN ) || \
57             defined( _LITTLE_ENDIAN ) || defined( _WIN32 ) || defined( i386 ) || \
58             defined( __i386 ) || defined( __i386__ ) || defined( __x86_64__ ) || \
59             ( defined( __BYTE_ORDER__ ) && \
60             __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ )
61            
62             #define KOMIHASH_LITTLE_ENDIAN 1
63            
64             #elif defined( __BIG_ENDIAN__ ) || defined( __BIG_ENDIAN ) || \
65             defined( _BIG_ENDIAN ) || defined( __SYSC_ZARCH__ ) || \
66             defined( __zarch__ ) || defined( __s390x__ ) || defined( __sparc ) || \
67             defined( __sparc__ ) || ( defined( __BYTE_ORDER__ ) && \
68             __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ )
69            
70             #define KOMIHASH_LITTLE_ENDIAN 0
71            
72             #else // defined( __BIG_ENDIAN__ )
73            
74             #warning KOMIHASH: cannot determine endianness, assuming little-endian.
75            
76             #define KOMIHASH_LITTLE_ENDIAN 1
77            
78             #endif // defined( __BIG_ENDIAN__ )
79             #endif // !defined( KOMIHASH_LITTLE_ENDIAN )
80            
81             /**
82             * @def KOMIHASH_GCC_BUILTINS
83             * @brief Macro that denotes availability of GCC-style built-in functions.
84             */
85            
86             #if defined( __GNUC__ ) || defined( __clang__ ) || \
87             defined( __IBMC__ ) || defined( __IBMCPP__ ) || defined( __COMPCERT__ )
88            
89             #define KOMIHASH_GCC_BUILTINS
90            
91             #endif // GCC built-ins check
92            
93             /**
94             * @def KOMIHASH_EC32( v )
95             * @brief Macro that appies 32-bit byte-swapping, for endianness-correction.
96             * @param v Value to byte-swap.
97             */
98            
99             /**
100             * @def KOMIHASH_EC64( v )
101             * @brief Macro that appies 64-bit byte-swapping, for endianness-correction.
102             * @param v Value to byte-swap.
103             */
104            
105             #if KOMIHASH_LITTLE_ENDIAN
106            
107             #define KOMIHASH_EC32( v ) ( v )
108             #define KOMIHASH_EC64( v ) ( v )
109            
110             #else // KOMIHASH_LITTLE_ENDIAN
111            
112             #if defined( KOMIHASH_GCC_BUILTINS )
113            
114             #define KOMIHASH_EC32( v ) __builtin_bswap32( v )
115             #define KOMIHASH_EC64( v ) __builtin_bswap64( v )
116            
117             #elif defined( _MSC_VER )
118            
119             #include
120            
121             #define KOMIHASH_EC32( v ) _byteswap_ulong( v )
122             #define KOMIHASH_EC64( v ) _byteswap_uint64( v )
123            
124             #else // defined( _MSC_VER )
125            
126             #define KOMIHASH_EC32( v ) ( \
127             ( v & 0xFF000000 ) >> 24 | \
128             ( v & 0x00FF0000 ) >> 8 | \
129             ( v & 0x0000FF00 ) << 8 | \
130             ( v & 0x000000FF ) << 24 )
131            
132             #define KOMIHASH_EC64( v ) ( \
133             ( v & 0xFF00000000000000 ) >> 56 | \
134             ( v & 0x00FF000000000000 ) >> 40 | \
135             ( v & 0x0000FF0000000000 ) >> 24 | \
136             ( v & 0x000000FF00000000 ) >> 8 | \
137             ( v & 0x00000000FF000000 ) << 8 | \
138             ( v & 0x0000000000FF0000 ) << 24 | \
139             ( v & 0x000000000000FF00 ) << 40 | \
140             ( v & 0x00000000000000FF ) << 56 )
141            
142             #endif // defined( _MSC_VER )
143            
144             #endif // KOMIHASH_LITTLE_ENDIAN
145            
146             /**
147             * @def KOMIHASH_LIKELY( x )
148             * @brief Likelihood macro that is used for manually-guided
149             * micro-optimization.
150             * @param x Expression that is likely to be evaluated to "true".
151             */
152            
153             /**
154             * @def KOMIHASH_UNLIKELY( x )
155             * @brief Unlikelihood macro that is used for manually-guided
156             * micro-optimization.
157             * @param x Expression that is unlikely to be evaluated to "true".
158             */
159            
160             #if defined( KOMIHASH_GCC_BUILTINS )
161            
162             #define KOMIHASH_LIKELY( x ) __builtin_expect( x, 1 )
163             #define KOMIHASH_UNLIKELY( x ) __builtin_expect( x, 0 )
164            
165             #else // defined( KOMIHASH_GCC_BUILTINS )
166            
167             #define KOMIHASH_LIKELY( x ) ( x )
168             #define KOMIHASH_UNLIKELY( x ) ( x )
169            
170             #endif // defined( KOMIHASH_GCC_BUILTINS )
171            
172             /**
173             * @def KOMIHASH_PREFETCH( a )
174             * @brief Memory address prefetch macro, to preload some data into CPU cache.
175             *
176             * Temporal locality=2, in case a collision resolution would be necessary,
177             * or for a subsequent disk write.
178             *
179             * @param a Prefetch address.
180             */
181            
182             #if defined( KOMIHASH_GCC_BUILTINS ) && !defined( __COMPCERT__ )
183            
184             #define KOMIHASH_PREFETCH( a ) __builtin_prefetch( a, 0, 2 )
185            
186             #else // Prefetch macro
187            
188             #define KOMIHASH_PREFETCH( a )
189            
190             #endif // Prefetch macro
191            
192             /**
193             * @def KOMIHASH_PREFETCH_1
194             * @brief Compiler-dependent address prefetch macro, ordered position 1.
195             * @param a Prefetch address.
196             */
197            
198             /**
199             * @def KOMIHASH_PREFETCH_2
200             * @brief Compiler-dependent address prefetch macro, ordered position 2.
201             * @param a Prefetch address.
202             */
203            
204             #if defined( __clang__ )
205             #define KOMIHASH_PREFETCH_1( a ) KOMIHASH_PREFETCH( a )
206             #define KOMIHASH_PREFETCH_2( a )
207             #else // defined( __clang__ )
208             #define KOMIHASH_PREFETCH_1( a )
209             #define KOMIHASH_PREFETCH_2( a ) KOMIHASH_PREFETCH( a )
210             #endif // defined( __clang__ )
211            
212             /**
213             * @def KOMIHASH_INLINE
214             * @brief Macro to force code inlining.
215             */
216            
217             #if defined( KOMIHASH_GCC_BUILTINS )
218            
219             #define KOMIHASH_INLINE inline __attribute__((always_inline))
220            
221             #elif defined( _MSC_VER )
222            
223             #define KOMIHASH_INLINE inline __forceinline
224            
225             #else // defined( _MSC_VER )
226            
227             #define KOMIHASH_INLINE inline
228            
229             #endif // defined( _MSC_VER )
230            
231             /**
232             * @brief Load unsigned 32-bit value with endianness-correction.
233             *
234             * An auxiliary function that returns an unsigned 32-bit value created out of
235             * a sequence of bytes in memory. This function is used to convert endianness
236             * of in-memory 32-bit unsigned values, and to avoid unaligned memory
237             * accesses.
238             *
239             * @param p Pointer to 4 bytes in memory. Alignment is unimportant.
240             * @return Endianness-corrected 32-bit value from memory.
241             */
242            
243             static KOMIHASH_INLINE uint32_t kh_lu32ec( const uint8_t* const p )
244             {
245             uint32_t v;
246 68           memcpy( &v, p, 4 );
247            
248 68           return( KOMIHASH_EC32( v ));
249             }
250            
251             /**
252             * @brief Load unsigned 64-bit value with endianness-correction.
253             *
254             * An auxiliary function that returns an unsigned 64-bit value created out of
255             * a sequence of bytes in memory. This function is used to convert endianness
256             * of in-memory 64-bit unsigned values, and to avoid unaligned memory
257             * accesses.
258             *
259             * @param p Pointer to 8 bytes in memory. Alignment is unimportant.
260             * @return Endianness-corrected 64-bit value from memory.
261             */
262            
263             static KOMIHASH_INLINE uint64_t kh_lu64ec( const uint8_t* const p )
264             {
265             uint64_t v;
266 41           memcpy( &v, p, 8 );
267            
268 41           return( KOMIHASH_EC64( v ));
269             }
270            
271             /**
272             * @brief Load unsigned 64-bit value with padding (Msg-3 reads).
273             *
274             * Function builds an unsigned 64-bit value out of remaining bytes in a
275             * message, and pads it with the "final byte". This function can only be
276             * called if less than 8 bytes are left to read. The message should be "long",
277             * permitting `Msg[ -3 ]` reads.
278             *
279             * @param Msg Message pointer, alignment is unimportant.
280             * @param MsgLen Message's remaining length, in bytes; can be 0.
281             * @return Final byte-padded value from the message.
282             */
283            
284             static KOMIHASH_INLINE uint64_t kh_lpu64ec_l3( const uint8_t* const Msg,
285             const size_t MsgLen )
286             {
287 26           const int ml8 = (int) ( MsgLen * 8 );
288            
289 26           if( MsgLen < 4 )
290             {
291 11           const uint8_t* const Msg3 = Msg + MsgLen - 3;
292 11           const uint64_t m = (uint64_t) Msg3[ 0 ] | (uint64_t) Msg3[ 1 ] << 8 |
293 11           (uint64_t) Msg3[ 2 ] << 16;
294            
295 11           return( (uint64_t) 1 << ml8 | m >> ( 24 - ml8 ));
296             }
297            
298 30           const uint64_t mh = kh_lu32ec( Msg + MsgLen - 4 );
299 15           const uint64_t ml = kh_lu32ec( Msg );
300            
301 15           return( (uint64_t) 1 << ml8 | ml | ( mh >> ( 64 - ml8 )) << 32 );
302             }
303            
304             /**
305             * @brief Load unsigned 64-bit value with padding (non-zero message length).
306             *
307             * Function builds an unsigned 64-bit value out of remaining bytes in a
308             * message, and pads it with the "final byte". This function can only be
309             * called if less than 8 bytes are left to read. Can be used on "short"
310             * messages, but `MsgLen` should be greater than 0.
311             *
312             * @param Msg Message pointer, alignment is unimportant.
313             * @param MsgLen Message's remaining length, in bytes; cannot be 0.
314             * @return Final byte-padded value from the message.
315             */
316            
317             static KOMIHASH_INLINE uint64_t kh_lpu64ec_nz( const uint8_t* const Msg,
318             const size_t MsgLen )
319             {
320 21           const int ml8 = (int) ( MsgLen * 8 );
321            
322 21 100         if( MsgLen < 4 )
323             {
324 5           uint64_t m = Msg[ 0 ];
325            
326 5 100         if( MsgLen > 1 )
327             {
328 3           m |= (uint64_t) Msg[ 1 ] << 8;
329            
330 3 100         if( MsgLen > 2 )
331             {
332 2           m |= (uint64_t) Msg[ 2 ] << 16;
333             }
334             }
335            
336 5           return( (uint64_t) 1 << ml8 | m );
337             }
338            
339 32           const uint64_t mh = kh_lu32ec( Msg + MsgLen - 4 );
340 16           const uint64_t ml = kh_lu32ec( Msg );
341            
342 16           return( (uint64_t) 1 << ml8 | ml | ( mh >> ( 64 - ml8 )) << 32 );
343             }
344            
345             /**
346             * @brief Load unsigned 64-bit value with padding (Msg-4 reads).
347             *
348             * Function builds an unsigned 64-bit value out of remaining bytes in a
349             * message, and pads it with the "final byte". This function can only be
350             * called if less than 8 bytes are left to read. The message should be "long",
351             * permitting `Msg[ -4 ]` reads.
352             *
353             * @param Msg Message pointer, alignment is unimportant.
354             * @param MsgLen Message's remaining length, in bytes; can be 0.
355             * @return Final byte-padded value from the message.
356             */
357            
358             static KOMIHASH_INLINE uint64_t kh_lpu64ec_l4( const uint8_t* const Msg,
359             const size_t MsgLen )
360             {
361 7           const int ml8 = (int) ( MsgLen * 8 );
362            
363 0 0         if( MsgLen < 5 )
364             {
365 6           const uint64_t m = kh_lu32ec( Msg + MsgLen - 4 );
366            
367 6           return( (uint64_t) 1 << ml8 | m >> ( 32 - ml8 ));
368             }
369            
370 1           const uint64_t m = kh_lu64ec( Msg + MsgLen - 8 );
371            
372 1           return( (uint64_t) 1 << ml8 | m >> ( 64 - ml8 ));
373             }
374            
375             /**
376             * @fn void kh_m128( uint64_t u, uint64_t v, uint64_t* rl, uint64_t* rha )
377             * @brief 64-bit by 64-bit unsigned multiplication with result accumulation.
378             *
379             * @param u Multiplier 1.
380             * @param v Multiplier 2.
381             * @param[out] rl The lower half of the 128-bit result.
382             * @param[in,out] rha The accumulator to receive the higher half of the
383             * 128-bit result.
384             */
385            
386             /**
387             * @def KOMIHASH_EMULU( u, v )
388             * @brief Macro for 32-bit by 32-bit unsigned multiplication with 64-bit
389             * result.
390             * @param u Multiplier 1.
391             * @param v Multiplier 2.
392             */
393            
394             #if defined( _MSC_VER ) && ( defined( _M_ARM64 ) || \
395             ( defined( _M_X64 ) && defined( __INTEL_COMPILER )))
396            
397             #include
398            
399             static KOMIHASH_INLINE void kh_m128( const uint64_t u, const uint64_t v,
400             uint64_t* const rl, uint64_t* const rha )
401             {
402             *rl = u * v;
403             *rha += __umulh( u, v );
404             }
405            
406             #elif defined( _MSC_VER ) && ( defined( _M_X64 ) || defined( _M_IA64 ))
407            
408             #include
409             #pragma intrinsic(_umul128)
410            
411             static KOMIHASH_INLINE void kh_m128( const uint64_t u, const uint64_t v,
412             uint64_t* const rl, uint64_t* const rha )
413             {
414             uint64_t rh;
415             *rl = _umul128( u, v, &rh );
416             *rha += rh;
417             }
418            
419             #elif defined( __SIZEOF_INT128__ )
420            
421             static KOMIHASH_INLINE void kh_m128( const uint64_t u, const uint64_t v,
422             uint64_t* const rl, uint64_t* const rha )
423             {
424 169           const unsigned __int128 r = (unsigned __int128) u * v;
425            
426 169           *rha += (uint64_t) ( r >> 64 );
427 169           *rl = (uint64_t) r;
428 47           }
429            
430             #elif ( defined( __IBMC__ ) || defined( __IBMCPP__ )) && defined( __LP64__ )
431            
432             static KOMIHASH_INLINE void kh_m128( const uint64_t u, const uint64_t v,
433             uint64_t* const rl, uint64_t* const rha )
434             {
435             *rl = u * v;
436             *rha += __mulhdu( u, v );
437             }
438            
439             #else // defined( __IBMC__ )
440            
441             // _umul128() code for 32-bit systems, adapted from Hacker's Delight,
442             // Henry S. Warren, Jr.
443            
444             #if defined( _MSC_VER ) && !defined( __INTEL_COMPILER )
445            
446             #include
447             #pragma intrinsic(__emulu)
448            
449             #define KOMIHASH_EMULU( u, v ) __emulu( u, v )
450            
451             #else // defined( _MSC_VER ) && !defined( __INTEL_COMPILER )
452            
453             #define KOMIHASH_EMULU( u, v ) ( (uint64_t) ( u ) * ( v ))
454            
455             #endif // defined( _MSC_VER ) && !defined( __INTEL_COMPILER )
456            
457             static inline void kh_m128( const uint64_t u, const uint64_t v,
458             uint64_t* const rl, uint64_t* const rha )
459             {
460             *rl = u * v;
461            
462             const uint32_t u0 = (uint32_t) u;
463             const uint32_t v0 = (uint32_t) v;
464             const uint64_t w0 = KOMIHASH_EMULU( u0, v0 );
465             const uint32_t u1 = (uint32_t) ( u >> 32 );
466             const uint32_t v1 = (uint32_t) ( v >> 32 );
467             const uint64_t t = KOMIHASH_EMULU( u1, v0 ) + (uint32_t) ( w0 >> 32 );
468             const uint64_t w1 = KOMIHASH_EMULU( u0, v1 ) + (uint32_t) t;
469            
470             *rha += KOMIHASH_EMULU( u1, v1 ) + (uint32_t) ( w1 >> 32 ) +
471             (uint32_t) ( t >> 32 );
472             }
473            
474             #endif // defined( __IBMC__ )
475            
476             /**
477             * @def KOMIHASH_HASHROUND()
478             * @brief Macro for a common hashing round without input.
479             *
480             * The three instructions in this macro (multiply, add, and XOR) represent the
481             * simplest constantless PRNG, scalable to any even-sized state variables,
482             * with the `Seed1` being the PRNG output (2^64 PRNG period). It passes
483             * `PractRand` tests with rare non-systematic "unusual" evaluations.
484             *
485             * To make this PRNG reliable, self-starting, and eliminate a risk of
486             * stopping, the following variant can be used, which adds a "register
487             * checker-board", a source of raw entropy. The PRNG is available as the
488             * komirand() function. Not required for hashing (but works for it) since the
489             * input entropy is usually available in abundance during hashing.
490             *
491             * `Seed5 += 0xAAAAAAAAAAAAAAAA;`
492             *
493             * (the `0xAAAA...` constant should match register's size; essentially, it is
494             * a replication of the `10` bit-pair; it is not an arbitrary constant).
495             */
496            
497             #define KOMIHASH_HASHROUND() \
498             kh_m128( Seed1, Seed5, &Seed1, &Seed5 ); \
499             Seed1 ^= Seed5;
500            
501             /**
502             * @def KOMIHASH_HASH16( m )
503             * @brief Macro for a common hashing round with 16-byte input.
504             * @param m Message pointer, alignment is unimportant.
505             */
506            
507             #define KOMIHASH_HASH16( m ) \
508             kh_m128( Seed1 ^ kh_lu64ec( m ), \
509             Seed5 ^ kh_lu64ec( m + 8 ), &Seed1, &Seed5 ); \
510             Seed1 ^= Seed5;
511            
512             /**
513             * @def KOMIHASH_HASHFIN()
514             * @brief Macro for common hashing finalization round.
515             *
516             * The final hashing input is expected in the `r1h` and `r2h` temporary
517             * variables. The macro inserts the function return instruction.
518             */
519            
520             #define KOMIHASH_HASHFIN() \
521             kh_m128( r1h, r2h, &Seed1, &Seed5 ); \
522             Seed1 ^= Seed5; \
523             KOMIHASH_HASHROUND(); \
524             return( Seed1 );
525            
526             /**
527             * @def KOMIHASH_HASHLOOP64()
528             * @brief Macro for a common 64-byte full-performance hashing loop.
529             *
530             * Expects `Msg` and `MsgLen` values (greater than 63), requires initialized
531             * `Seed1-8` values.
532             *
533             * The "shifting" arrangement of `Seed1-4` XORs (below) does not increase
534             * individual `SeedN` PRNG period beyond 2^64, but reduces a chance of any
535             * occassional synchronization between PRNG lanes happening. Practically,
536             * `Seed1-4` together become a single "fused" 256-bit PRNG value, having 2^66
537             * summary PRNG period.
538             */
539            
540             #define KOMIHASH_HASHLOOP64() \
541             do \
542             { \
543             KOMIHASH_PREFETCH_1( Msg ); \
544             \
545             kh_m128( Seed1 ^ kh_lu64ec( Msg ), \
546             Seed5 ^ kh_lu64ec( Msg + 32 ), &Seed1, &Seed5 ); \
547             \
548             kh_m128( Seed2 ^ kh_lu64ec( Msg + 8 ), \
549             Seed6 ^ kh_lu64ec( Msg + 40 ), &Seed2, &Seed6 ); \
550             \
551             kh_m128( Seed3 ^ kh_lu64ec( Msg + 16 ), \
552             Seed7 ^ kh_lu64ec( Msg + 48 ), &Seed3, &Seed7 ); \
553             \
554             kh_m128( Seed4 ^ kh_lu64ec( Msg + 24 ), \
555             Seed8 ^ kh_lu64ec( Msg + 56 ), &Seed4, &Seed8 ); \
556             \
557             Msg += 64; \
558             MsgLen -= 64; \
559             \
560             KOMIHASH_PREFETCH_2( Msg ); \
561             \
562             Seed2 ^= Seed5; \
563             Seed3 ^= Seed6; \
564             Seed4 ^= Seed7; \
565             Seed1 ^= Seed8; \
566             \
567             } while( KOMIHASH_LIKELY( MsgLen > 63 ));
568            
569             /**
570             * @brief The hashing epilogue function (for internal use).
571             *
572             * @param Msg Pointer to the remaining part of the message.
573             * @param MsgLen Remaining part's length, can be 0.
574             * @param Seed1 Latest Seed1 value.
575             * @param Seed5 Latest Seed5 value.
576             * @return 64-bit hash value.
577             */
578            
579             static KOMIHASH_INLINE uint64_t komihash_epi( const uint8_t* Msg,
580             size_t MsgLen, uint64_t Seed1, uint64_t Seed5 )
581             {
582             uint64_t r1h, r2h;
583            
584 0           if( KOMIHASH_LIKELY( MsgLen > 31 ))
585             {
586 0           KOMIHASH_HASH16( Msg );
587 0           KOMIHASH_HASH16( Msg + 16 );
588            
589 0           Msg += 32;
590 0           MsgLen -= 32;
591             }
592            
593 0 0         if( MsgLen > 15 )
594             {
595 0           KOMIHASH_HASH16( Msg );
596            
597 0           Msg += 16;
598 0           MsgLen -= 16;
599             }
600            
601 0 0         if( MsgLen > 7 )
602             {
603 0 0         r2h = Seed5 ^ kh_lpu64ec_l4( Msg + 8, MsgLen - 8 );
604 0           r1h = Seed1 ^ kh_lu64ec( Msg );
605             }
606             else
607             {
608 0           r1h = Seed1 ^ kh_lpu64ec_l4( Msg, MsgLen );
609 0           r2h = Seed5;
610             }
611            
612 0           KOMIHASH_HASHFIN();
613             }
614            
615             /**
616             * @brief KOMIHASH 64-bit hash function.
617             *
618             * Produces and returns a 64-bit hash value of the specified message, string,
619             * or binary data block. Designed for 64-bit hash-table and hash-map uses.
620             * Produces identical hashes on both big- and little-endian systems.
621             *
622             * @param Msg0 The message to produce a hash from. The alignment of this
623             * pointer is unimportant. It is valid to pass 0 when `MsgLen` equals 0
624             * (assuming that compiler's implementation of the address prefetch permits
625             * the use of zero address).
626             * @param MsgLen Message's length, in bytes, can be zero.
627             * @param UseSeed Optional value, to use instead of the default seed. To use
628             * the default seed, set to 0. The UseSeed value can have any bit length and
629             * statistical quality, and is used only as an additional entropy source. May
630             * need endianness-correction via KOMIHASH_EC64(), if this value is shared
631             * between big- and little-endian systems.
632             * @return 64-bit hash of the input data. Should be endianness-corrected when
633             * this value is shared between big- and little-endian systems.
634             */
635            
636 54           static inline uint64_t komihash( const void* const Msg0, size_t MsgLen,
637             const uint64_t UseSeed )
638             {
639 54           const uint8_t* Msg = (const uint8_t*) Msg0;
640            
641             // The seeds are initialized to the first mantissa bits of PI.
642            
643 54           uint64_t Seed1 = 0x243F6A8885A308D3 ^ ( UseSeed & 0x5555555555555555 );
644 54           uint64_t Seed5 = 0x452821E638D01377 ^ ( UseSeed & 0xAAAAAAAAAAAAAAAA );
645             uint64_t r1h, r2h;
646            
647 54           KOMIHASH_PREFETCH( Msg );
648            
649 54           KOMIHASH_HASHROUND(); // Required for Perlin Noise.
650            
651 54 100         if( KOMIHASH_LIKELY( MsgLen < 16 ))
652             {
653 47           r1h = Seed1;
654 47           r2h = Seed5;
655            
656 47 100         if( MsgLen > 7 )
657             {
658             // The following two XOR instructions are equivalent to mixing a
659             // message with a cryptographic one-time-pad (bitwise modulo 2
660             // addition). Message's statistics and distribution are thus
661             // unimportant.
662            
663 52 100         r2h ^= kh_lpu64ec_l3( Msg + 8, MsgLen - 8 );
664 26           r1h ^= kh_lu64ec( Msg );
665             }
666             else
667 21 50         if( KOMIHASH_LIKELY( MsgLen != 0 ))
668             {
669 21           r1h ^= kh_lpu64ec_nz( Msg, MsgLen );
670             }
671            
672 47           KOMIHASH_HASHFIN();
673             }
674            
675 7 50         if( KOMIHASH_LIKELY( MsgLen < 32 ))
676             {
677 21           KOMIHASH_HASH16( Msg );
678            
679 7 50         if( MsgLen > 23 )
680             {
681 0 0         r2h = Seed5 ^ kh_lpu64ec_l4( Msg + 24, MsgLen - 24 );
682 0           r1h = Seed1 ^ kh_lu64ec( Msg + 16 );
683 0           KOMIHASH_HASHFIN();
684             }
685             else
686             {
687 7 100         r1h = Seed1 ^ kh_lpu64ec_l4( Msg + 16, MsgLen - 16 );
688 7           r2h = Seed5;
689 7           KOMIHASH_HASHFIN();
690             }
691             }
692            
693 0 0         if( KOMIHASH_LIKELY( MsgLen > 63 ))
694             {
695 0           uint64_t Seed2 = 0x13198A2E03707344 ^ Seed1;
696 0           uint64_t Seed3 = 0xA4093822299F31D0 ^ Seed1;
697 0           uint64_t Seed4 = 0x082EFA98EC4E6C89 ^ Seed1;
698 0           uint64_t Seed6 = 0xBE5466CF34E90C6C ^ Seed5;
699 0           uint64_t Seed7 = 0xC0AC29B7C97C50DD ^ Seed5;
700 0           uint64_t Seed8 = 0x3F84D5B5B5470917 ^ Seed5;
701            
702 0 0         KOMIHASH_HASHLOOP64();
703            
704 0           Seed5 ^= Seed6 ^ Seed7 ^ Seed8;
705 0           Seed1 ^= Seed2 ^ Seed3 ^ Seed4;
706             }
707            
708 0 0         return( komihash_epi( Msg, MsgLen, Seed1, Seed5 ));
709             }
710            
711             /**
712             * @brief KOMIRAND 64-bit pseudo-random number generator.
713             *
714             * Simple, reliable, self-starting yet efficient PRNG, with 2^64 period.
715             * 0.62 cycles/byte performance. Self-starts in 4 iterations, which is a
716             * suggested "warming up" initialization before using its output.
717             *
718             * @param[in,out] Seed1 Seed value 1. Can be initialized to any value
719             * (even 0). This is the usual "PRNG seed" value.
720             * @param[in,out] Seed2 Seed value 2, a supporting variable. Best initialized
721             * to the same value as `Seed1`.
722             * @return The next uniformly-random 64-bit value.
723             */
724            
725             static KOMIHASH_INLINE uint64_t komirand( uint64_t* const Seed1,
726             uint64_t* const Seed2 )
727             {
728 0           uint64_t s1 = *Seed1;
729 0           uint64_t s2 = *Seed2;
730            
731 0           kh_m128( s1, s2, &s1, &s2 );
732 0           s2 += 0xAAAAAAAAAAAAAAAA;
733 0           s1 ^= s2;
734            
735 0           *Seed2 = s2;
736 0           *Seed1 = s1;
737            
738 0           return( s1 );
739             }
740            
741             /**
742             * @def KOMIHASH_BUFSIZE
743             * @brief Streamed hashing's buffer size, in bytes.
744             *
745             * Must be a multiple of 64, and not less than 128. Can be defined externally.
746             */
747            
748             #if !defined( KOMIHASH_BUFSIZE )
749            
750             #define KOMIHASH_BUFSIZE 768
751            
752             #endif // !defined( KOMIHASH_BUFSIZE )
753            
754             /**
755             * @brief Context structure for the streamed "komihash" hashing.
756             *
757             * The komihash_init() function should be called to initalize the structure
758             * before hashing. Note that the default buffer size is modest, permitting
759             * placement of this structure on stack. `Seed[ 0 ]` is used as `UseSeed`
760             * value storage.
761             */
762            
763             typedef struct {
764             uint8_t pb[ 8 ]; ///< Buffer's padding bytes, to avoid OOB.
765             uint8_t Buf[ KOMIHASH_BUFSIZE ]; ///< Buffer.
766             uint64_t Seed[ 8 ]; ///< Hashing state variables.
767             size_t BufFill; ///< Buffer fill count (position), in bytes.
768             size_t IsHashing; ///< 0 or 1, equals 1 if the actual hashing was started.
769             } komihash_stream_t;
770            
771             /**
772             * @brief Function initializes the streamed "komihash" hashing session.
773             *
774             * @param[out] ctx Pointer to the context structure.
775             * @param UseSeed Optional value, to use instead of the default seed. To use
776             * the default seed, set to 0. The UseSeed value can have any bit length and
777             * statistical quality, and is used only as an additional entropy source. May
778             * need endianness-correction via KOMIHASH_EC64(), if this value is shared
779             * between big- and little-endian systems.
780             */
781            
782             static inline void komihash_stream_init( komihash_stream_t* const ctx,
783             const uint64_t UseSeed )
784             {
785             ctx -> Seed[ 0 ] = UseSeed;
786             ctx -> BufFill = 0;
787             ctx -> IsHashing = 0;
788             }
789            
790             /**
791             * @brief Function updates the streamed hashing state with a new input data.
792             *
793             * @param[in,out] ctx Pointer to the context structure. The structure must be
794             * initialized via the komihash_stream_init() function.
795             * @param Msg0 The next part of the whole message being hashed. The alignment
796             * of this pointer is unimportant. It is valid to pass 0 when `MsgLen` equals
797             * 0.
798             * @param MsgLen Message's length, in bytes, can be zero.
799             */
800            
801             static inline void komihash_stream_update( komihash_stream_t* const ctx,
802             const void* const Msg0, size_t MsgLen )
803             {
804             const uint8_t* Msg = (const uint8_t*) Msg0;
805            
806             const uint8_t* SwMsg = 0;
807             size_t SwMsgLen = 0;
808             size_t BufFill = ctx -> BufFill;
809            
810             if( BufFill + MsgLen >= KOMIHASH_BUFSIZE && BufFill != 0 )
811             {
812             const size_t CopyLen = KOMIHASH_BUFSIZE - BufFill;
813             memcpy( ctx -> Buf + BufFill, Msg, CopyLen );
814             BufFill = 0;
815            
816             SwMsg = Msg + CopyLen;
817             SwMsgLen = MsgLen - CopyLen;
818            
819             Msg = ctx -> Buf;
820             MsgLen = KOMIHASH_BUFSIZE;
821             }
822            
823             if( BufFill == 0 )
824             {
825             while( MsgLen > 127 )
826             {
827             uint64_t Seed1, Seed2, Seed3, Seed4;
828             uint64_t Seed5, Seed6, Seed7, Seed8;
829            
830             KOMIHASH_PREFETCH_2( Msg );
831            
832             if( ctx -> IsHashing )
833             {
834             Seed1 = ctx -> Seed[ 0 ];
835             Seed2 = ctx -> Seed[ 1 ];
836             Seed3 = ctx -> Seed[ 2 ];
837             Seed4 = ctx -> Seed[ 3 ];
838             Seed5 = ctx -> Seed[ 4 ];
839             Seed6 = ctx -> Seed[ 5 ];
840             Seed7 = ctx -> Seed[ 6 ];
841             Seed8 = ctx -> Seed[ 7 ];
842             }
843             else
844             {
845             ctx -> IsHashing = 1;
846            
847             const uint64_t UseSeed = ctx -> Seed[ 0 ];
848             Seed1 = 0x243F6A8885A308D3 ^ ( UseSeed & 0x5555555555555555 );
849             Seed5 = 0x452821E638D01377 ^ ( UseSeed & 0xAAAAAAAAAAAAAAAA );
850            
851             KOMIHASH_HASHROUND();
852            
853             Seed2 = 0x13198A2E03707344 ^ Seed1;
854             Seed3 = 0xA4093822299F31D0 ^ Seed1;
855             Seed4 = 0x082EFA98EC4E6C89 ^ Seed1;
856             Seed6 = 0xBE5466CF34E90C6C ^ Seed5;
857             Seed7 = 0xC0AC29B7C97C50DD ^ Seed5;
858             Seed8 = 0x3F84D5B5B5470917 ^ Seed5;
859             }
860            
861             KOMIHASH_HASHLOOP64();
862            
863             ctx -> Seed[ 0 ] = Seed1;
864             ctx -> Seed[ 1 ] = Seed2;
865             ctx -> Seed[ 2 ] = Seed3;
866             ctx -> Seed[ 3 ] = Seed4;
867             ctx -> Seed[ 4 ] = Seed5;
868             ctx -> Seed[ 5 ] = Seed6;
869             ctx -> Seed[ 6 ] = Seed7;
870             ctx -> Seed[ 7 ] = Seed8;
871            
872             if( SwMsgLen == 0 )
873             {
874             if( MsgLen != 0 )
875             {
876             break;
877             }
878            
879             ctx -> BufFill = 0;
880             return;
881             }
882            
883             Msg = SwMsg;
884             MsgLen = SwMsgLen;
885             SwMsgLen = 0;
886             }
887             }
888            
889             ctx -> BufFill = BufFill + MsgLen;
890             uint8_t* op = ctx -> Buf + BufFill;
891            
892             while( MsgLen != 0 )
893             {
894             *op = *Msg;
895             Msg++;
896             op++;
897             MsgLen--;
898             }
899             }
900            
901             /**
902             * @brief Function finalizes the streamed "komihash" hashing session.
903             *
904             * Returns the resulting hash value of the previously hashed data. This value
905             * is equal to the value returned by the komihash() function for the same
906             * provided data.
907             *
908             * Note that since this function is non-destructive to the context structure,
909             * the function can be used to obtain intermediate hashes of the data stream
910             * being hashed, and the hashing can then be resumed.
911             *
912             * @param[in] ctx Pointer to the context structure. The structure must be
913             * initialized via the komihash_stream_init() function.
914             * @return 64-bit hash value. Should be endianness-corrected when this value
915             * is shared between big- and little-endian systems.
916             */
917            
918             static inline uint64_t komihash_stream_final( komihash_stream_t* const ctx )
919             {
920             const uint8_t* Msg = ctx -> Buf;
921             size_t MsgLen = ctx -> BufFill;
922            
923             if( ctx -> IsHashing == 0 )
924             {
925             return( komihash( Msg, MsgLen, ctx -> Seed[ 0 ]));
926             }
927            
928             ctx -> pb[ 4 ] = 0;
929             ctx -> pb[ 5 ] = 0;
930             ctx -> pb[ 6 ] = 0;
931             ctx -> pb[ 7 ] = 0;
932            
933             uint64_t Seed1 = ctx -> Seed[ 0 ];
934             uint64_t Seed2 = ctx -> Seed[ 1 ];
935             uint64_t Seed3 = ctx -> Seed[ 2 ];
936             uint64_t Seed4 = ctx -> Seed[ 3 ];
937             uint64_t Seed5 = ctx -> Seed[ 4 ];
938             uint64_t Seed6 = ctx -> Seed[ 5 ];
939             uint64_t Seed7 = ctx -> Seed[ 6 ];
940             uint64_t Seed8 = ctx -> Seed[ 7 ];
941            
942             if( MsgLen > 63 )
943             {
944             KOMIHASH_HASHLOOP64();
945             }
946            
947             Seed5 ^= Seed6 ^ Seed7 ^ Seed8;
948             Seed1 ^= Seed2 ^ Seed3 ^ Seed4;
949            
950             return( komihash_epi( Msg, MsgLen, Seed1, Seed5 ));
951             }
952            
953             /**
954             * @brief FOR TESTING PURPOSES ONLY - use the komihash() function instead.
955             *
956             * @param Msg The message to produce a hash from.
957             * @param MsgLen Message's length, in bytes.
958             * @param UseSeed Seed to use.
959             * @return 64-bit hash value.
960             */
961            
962             static inline uint64_t komihash_stream_oneshot( const void* const Msg,
963             const size_t MsgLen, const uint64_t UseSeed )
964             {
965             komihash_stream_t ctx;
966            
967             komihash_stream_init( &ctx, UseSeed );
968             komihash_stream_update( &ctx, Msg, MsgLen );
969            
970             return( komihash_stream_final( &ctx ));
971             }
972            
973             #endif // KOMIHASH_INCLUDED