hll.c (2631B)
1 #include <stdlib.h> 2 #include <errno.h> 3 #include <math.h> 4 #include <string.h> 5 6 #include <stdio.h> 7 8 #include "../MurmurHash3/MurmurHash3.h" 9 #include "hll.h" 10 11 static __inline uint8_t _hll_rank(uint32_t hash, uint8_t bits) { 12 uint8_t i; 13 14 for(i = 1; i <= 32 - bits; i++) { 15 if(hash & 1) 16 break; 17 18 hash >>= 1; 19 } 20 21 return i; 22 } 23 24 int hll_init(struct HLL *hll, uint8_t bits) { 25 if(bits < 4 || bits > 20) { 26 errno = ERANGE; 27 return -1; 28 } 29 30 hll->bits = bits; 31 hll->size = (size_t)1 << bits; 32 hll->registers = calloc(hll->size, 1); 33 34 return 0; 35 } 36 37 void hll_destroy(struct HLL *hll) { 38 free(hll->registers); 39 40 hll->registers = NULL; 41 } 42 43 static __inline void _hll_add_hash(struct HLL *hll, uint32_t hash) { 44 uint32_t index = hash >> (32 - hll->bits); 45 uint8_t rank = _hll_rank(hash, hll->bits); 46 47 if(rank > hll->registers[index]) { 48 hll->registers[index] = rank; 49 } 50 } 51 52 void hll_add(struct HLL *hll, const void *buf, size_t size) { 53 uint32_t hash = MurmurHash3_x86_32((const char *)buf, (uint32_t)size, 0x5f61767a); 54 55 _hll_add_hash(hll, hash); 56 } 57 58 double hll_count(const struct HLL *hll) { 59 double alpha_mm; 60 uint32_t i; 61 62 switch (hll->bits) { 63 case 4: 64 alpha_mm = 0.673; 65 break; 66 case 5: 67 alpha_mm = 0.697; 68 break; 69 case 6: 70 alpha_mm = 0.709; 71 break; 72 default: 73 alpha_mm = 0.7213 / (1.0 + 1.079 / (double)hll->size); 74 break; 75 } 76 77 alpha_mm *= ((double)hll->size * (double)hll->size); 78 79 double sum = 0; 80 for(i = 0; i < hll->size; i++) { 81 sum += 1.0 / (1 << hll->registers[i]); 82 } 83 84 double estimate = alpha_mm / sum; 85 86 if (estimate <= 5.0 / 2.0 * (double)hll->size) { 87 int zeros = 0; 88 89 for(i = 0; i < hll->size; i++) 90 zeros += (hll->registers[i] == 0); 91 92 if(zeros) 93 estimate = (double)hll->size * log((double)hll->size / zeros); 94 95 } else if (estimate > (1.0 / 30.0) * 4294967296.0) { 96 estimate = -4294967296.0 * log(1.0 - (estimate / 4294967296.0)); 97 } 98 99 return estimate; 100 } 101 102 int hll_merge(struct HLL *dst, const struct HLL *src) { 103 uint32_t i; 104 105 if(dst->bits != src->bits) { 106 errno = EINVAL; 107 return -1; 108 } 109 110 for(i = 0; i < dst->size; i++) { 111 if(src->registers[i] > dst->registers[i]) 112 dst->registers[i] = src->registers[i]; 113 } 114 115 return 0; 116 } 117 118 int hll_load(struct HLL *hll, const void *registers, size_t size) { 119 uint8_t bits = 0; 120 size_t s = size; 121 122 while(s) { 123 if(s & 1) 124 break; 125 126 bits++; 127 128 s >>= 1; 129 } 130 131 if(!bits || ((size_t)1 << bits) != size) { 132 errno = EINVAL; 133 return -1; 134 } 135 136 if(hll_init(hll, bits) == -1) 137 return -1; 138 139 memcpy(hll->registers, registers, size); 140 141 return 0; 142 } 143 144 extern uint32_t _hll_hash(const struct HLL *hll) { 145 return MurmurHash3_x86_32(hll->registers, (uint32_t)hll->size, 0); 146 }