librsync  2.3.2
rabinkarp.c
1 /*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
2  *
3  * rabinkarp -- The RabinKarp rolling checksum.
4  *
5  * Copyright (C) 2019 by Donovan Baarda <abo@minkirri.apana.org.au>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU Lesser General Public License as published by
9  * the Free Software Foundation; either version 2.1 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21 #include "rabinkarp.h"
22 
23 /* Constant for RABINKARP_MULT^2. */
24 #define RABINKARP_MULT2 0xa5b71959U
25 
26 /* Macros for doing 16 bytes with 2 mults that can be done in parallel. Testing
27  showed this as a performance sweet spot vs 16x1, 8x2, 4x4 1x16 alternative
28  arrangements. */
29 #define PAR2X1(hash,buf,i) (RABINKARP_MULT2*(hash) + \
30  RABINKARP_MULT*buf[i] + \
31  buf[i+1])
32 #define PAR2X2(hash,buf,i) PAR2X1(PAR2X1(hash,buf,i),buf,i+2)
33 #define PAR2X4(hash,buf,i) PAR2X2(PAR2X2(hash,buf,i),buf,i+4)
34 #define PAR2X8(hash,buf) PAR2X4(PAR2X4(hash,buf,0),buf,8)
35 
36 /* Table of RABINKARP_MULT^(2^(i+1)) for power lookups. */
37 const static uint32_t RABINKARP_MULT_POW2[32] = {
38  0x08104225U,
39  0xa5b71959U,
40  0xf9c080f1U,
41  0x7c71e2e1U,
42  0x0bb409c1U,
43  0x4dc72381U,
44  0xd17a8701U,
45  0x96260e01U,
46  0x55101c01U,
47  0x2d303801U,
48  0x66a07001U,
49  0xfe40e001U,
50  0xc081c001U,
51  0x91038001U,
52  0x62070001U,
53  0xc40e0001U,
54  0x881c0001U,
55  0x10380001U,
56  0x20700001U,
57  0x40e00001U,
58  0x81c00001U,
59  0x03800001U,
60  0x07000001U,
61  0x0e000001U,
62  0x1c000001U,
63  0x38000001U,
64  0x70000001U,
65  0xe0000001U,
66  0xc0000001U,
67  0x80000001U,
68  0x00000001U,
69  0x00000001U
70 };
71 
72 /* Get the value of RABINKARP_MULT^n. */
73 static inline uint32_t rabinkarp_pow(uint32_t n)
74 {
75  const uint32_t *m = RABINKARP_MULT_POW2;
76  uint32_t ans = 1;
77  while (n) {
78  if (n & 1) {
79  ans *= *m;
80  }
81  m++;
82  n >>= 1;
83  }
84  return ans;
85 }
86 
87 void rabinkarp_update(rabinkarp_t *sum, const unsigned char *buf, size_t len)
88 {
89  size_t n = len;
90  uint32_t hash = sum->hash;
91 
92  while (n >= 16) {
93  hash = PAR2X8(hash, buf);
94  buf += 16;
95  n -= 16;
96  }
97  while (n) {
98  hash = RABINKARP_MULT * hash + *buf++;
99  n--;
100  }
101  sum->hash = hash;
102  sum->count += len;
103  sum->mult *= rabinkarp_pow((uint32_t)len);
104 }