logswan

Fast Web log analyzer using probabilistic data structures
Log | Files | Refs | README | LICENSE

commit 3820fef9982b92771764cb63ea2a0e918c4cdb33
parent 9e02b63978401972c38450a1dc15c9db19a5752c
Author: Frederic Cambus <fcambus@users.sourceforge.net>
Date:   Tue, 14 Jul 2015 00:05:35 +0200

Importing and linking HyperLogLog library

Diffstat:
MCMakeLists.txt | 5+++--
Adeps/MurmurHash3/MurmurHash3.c | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adeps/MurmurHash3/MurmurHash3.h | 8++++++++
Adeps/hll/hll.c | 146+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adeps/hll/hll.h | 23+++++++++++++++++++++++
Ddeps/murmur3.c | 315-------------------------------------------------------------------------------
Ddeps/murmur3.h | 29-----------------------------
7 files changed, 241 insertions(+), 346 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt @@ -20,14 +20,15 @@ check_function_exists(strtonum HAVE_STRTONUM) find_library(LIB_GEOIP NAMES GeoIP REQUIRED) find_library(LIB_JANSSON NAMES jansson REQUIRED) -set (SRC src/logswan.c src/output.c src/parse.c deps/murmur3.c) +set (DEPS deps/hll/hll.c deps/MurmurHash3/MurmurHash3.c) +set (SRC src/logswan.c src/output.c src/parse.c) if(NOT HAVE_STRTONUM) set (SRC ${SRC} compat/strtonum.c) endif() add_definitions(-Wall -Wextra -Werror -std=c99 -pedantic) -add_executable(logswan ${SRC}) +add_executable(logswan ${SRC} ${DEPS}) target_link_libraries(logswan ${LIB_GEOIP} ${LIB_JANSSON}) diff --git a/deps/MurmurHash3/MurmurHash3.c b/deps/MurmurHash3/MurmurHash3.c @@ -0,0 +1,61 @@ +/*----------------------------------------------------------------------------- + MurmurHash3 was written by Austin Appleby, and is placed in the public + domain. The author hereby disclaims copyright to this source code. +*/ + +#include "MurmurHash3.h" + +#define ROTL32(x, r) ((x) << (r)) | ((x) >> (32 - (r))) + +uint32_t MurmurHash3_x86_32(const void *key, uint32_t len, uint32_t seed) { + const uint8_t *data = (const uint8_t *)key; + const int32_t nblocks = (int32_t)len / 4; + + uint32_t h1 = seed; + int i; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4); + + for(i = -nblocks; i; i++) + { + uint32_t k1 = blocks[i]; + + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + + h1 ^= k1; + h1 = ROTL32(h1, 13); + h1 = h1 * 5 + 0xe6546b64; + } + + const uint8_t * tail = (const uint8_t *)(data + nblocks * 4); + + uint32_t k1 = 0; + + switch(len & 3) + { + case 3: + k1 ^= (uint32_t)tail[2] << 16; + case 2: + k1 ^= (uint32_t)tail[1] << 8; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + h1 ^= k1; + }; + + h1 ^= len; + + h1 ^= h1 >> 16; + h1 *= 0x85ebca6b; + h1 ^= h1 >> 13; + h1 *= 0xc2b2ae35; + h1 ^= h1 >> 16; + + return h1; +} diff --git a/deps/MurmurHash3/MurmurHash3.h b/deps/MurmurHash3/MurmurHash3.h @@ -0,0 +1,8 @@ +#ifndef _MURMURHASH3_H_ +#define _MURMURHASH3_H_ + +#include <stdint.h> + +uint32_t MurmurHash3_x86_32(const void * key, uint32_t len, uint32_t seed); + +#endif diff --git a/deps/hll/hll.c b/deps/hll/hll.c @@ -0,0 +1,146 @@ +#include <stdlib.h> +#include <errno.h> +#include <math.h> +#include <string.h> + +#include <stdio.h> + +#include "../MurmurHash3/MurmurHash3.h" +#include "hll.h" + +static __inline uint8_t _hll_rank(uint32_t hash, uint8_t bits) { + uint8_t i; + + for(i = 1; i <= 32 - bits; i++) { + if(hash & 1) + break; + + hash >>= 1; + } + + return i; +} + +int hll_init(struct HLL *hll, uint8_t bits) { + if(bits < 4 || bits > 20) { + errno = ERANGE; + return -1; + } + + hll->bits = bits; + hll->size = (size_t)1 << bits; + hll->registers = calloc(hll->size, 1); + + return 0; +} + +void hll_destroy(struct HLL *hll) { + free(hll->registers); + + hll->registers = NULL; +} + +static __inline void _hll_add_hash(struct HLL *hll, uint32_t hash) { + uint32_t index = hash >> (32 - hll->bits); + uint8_t rank = _hll_rank(hash, hll->bits); + + if(rank > hll->registers[index]) { + hll->registers[index] = rank; + } +} + +void hll_add(struct HLL *hll, const void *buf, size_t size) { + uint32_t hash = MurmurHash3_x86_32((const char *)buf, (uint32_t)size, 0x5f61767a); + + _hll_add_hash(hll, hash); +} + +double hll_count(const struct HLL *hll) { + double alpha_mm; + uint32_t i; + + switch (hll->bits) { + case 4: + alpha_mm = 0.673; + break; + case 5: + alpha_mm = 0.697; + break; + case 6: + alpha_mm = 0.709; + break; + default: + alpha_mm = 0.7213 / (1.0 + 1.079 / (double)hll->size); + break; + } + + alpha_mm *= ((double)hll->size * (double)hll->size); + + double sum = 0; + for(i = 0; i < hll->size; i++) { + sum += 1.0 / (1 << hll->registers[i]); + } + + double estimate = alpha_mm / sum; + + if (estimate <= 5.0 / 2.0 * (double)hll->size) { + int zeros = 0; + + for(i = 0; i < hll->size; i++) + zeros += (hll->registers[i] == 0); + + if(zeros) + estimate = (double)hll->size * log((double)hll->size / zeros); + + } else if (estimate > (1.0 / 30.0) * 4294967296.0) { + estimate = -4294967296.0 * log(1.0 - (estimate / 4294967296.0)); + } + + return estimate; +} + +int hll_merge(struct HLL *dst, const struct HLL *src) { + uint32_t i; + + if(dst->bits != src->bits) { + errno = EINVAL; + return -1; + } + + for(i = 0; i < dst->size; i++) { + if(src->registers[i] > dst->registers[i]) + dst->registers[i] = src->registers[i]; + } + + return 0; +} + +int hll_load(struct HLL *hll, const void *registers, size_t size) { + uint8_t bits = 0; + size_t s = size; + + while(s) { + if(s & 1) + break; + + bits++; + + s >>= 1; + } + + if(!bits || (1 << bits) != size) { + errno = EINVAL; + return -1; + } + + if(hll_init(hll, bits) == -1) + return -1; + + memcpy(hll->registers, registers, size); + + return 0; +} + +extern uint32_t _hll_hash(const struct HLL *hll) { + return MurmurHash3_x86_32(hll->registers, (uint32_t)hll->size, 0); +} diff --git a/deps/hll/hll.h b/deps/hll/hll.h @@ -0,0 +1,23 @@ +#ifndef AVZ_HLL_H +#define AVZ_HLL_H + +#include <sys/types.h> +#include <stdint.h> + +struct HLL { + uint8_t bits; + + size_t size; + uint8_t *registers; +}; + +extern int hll_init(struct HLL *hll, uint8_t bits); +extern int hll_load(struct HLL *hll, const void *registers, size_t size); +extern void hll_destroy(struct HLL *hll); +extern int hll_merge(struct HLL *dst, const struct HLL *src); +extern void hll_add(struct HLL *hll, const void *buf, size_t size); +extern double hll_count(const struct HLL *hll); + +extern uint32_t _hll_hash(const struct HLL *hll); + +#endif /* AVZ_HLL_H */ diff --git a/deps/murmur3.c b/deps/murmur3.c @@ -1,315 +0,0 @@ -//----------------------------------------------------------------------------- -// MurmurHash3 was written by Austin Appleby, and is placed in the public -// domain. The author hereby disclaims copyright to this source code. - -// Note - The x86 and x64 versions do _not_ produce the same results, as the -// algorithms are optimized for their respective platforms. You can still -// compile and run any of them on any platform, but your performance with the -// non-native version will be less than optimal. - -#include "murmur3.h" - -//----------------------------------------------------------------------------- -// Platform-specific functions and macros - -#ifdef __GNUC__ -#define FORCE_INLINE __attribute__((always_inline)) inline -#else -#define FORCE_INLINE inline -#endif - -static FORCE_INLINE uint32_t rotl32 ( uint32_t x, int8_t r ) -{ - return (x << r) | (x >> (32 - r)); -} - -static FORCE_INLINE uint64_t rotl64 ( uint64_t x, int8_t r ) -{ - return (x << r) | (x >> (64 - r)); -} - -#define ROTL32(x,y) rotl32(x,y) -#define ROTL64(x,y) rotl64(x,y) - -#define BIG_CONSTANT(x) (x##LLU) - -//----------------------------------------------------------------------------- -// Block read - if your platform needs to do endian-swapping or can only -// handle aligned reads, do the conversion here - -#define getblock(p, i) (p[i]) - -//----------------------------------------------------------------------------- -// Finalization mix - force all bits of a hash block to avalanche - -static FORCE_INLINE uint32_t fmix32 ( uint32_t h ) -{ - h ^= h >> 16; - h *= 0x85ebca6b; - h ^= h >> 13; - h *= 0xc2b2ae35; - h ^= h >> 16; - - return h; -} - -//---------- - -static FORCE_INLINE uint64_t fmix64 ( uint64_t k ) -{ - k ^= k >> 33; - k *= BIG_CONSTANT(0xff51afd7ed558ccd); - k ^= k >> 33; - k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); - k ^= k >> 33; - - return k; -} - -//----------------------------------------------------------------------------- - -void MurmurHash3_x86_32 ( const void * key, int len, - uint32_t seed, void * out ) -{ - const uint8_t * data = (const uint8_t*)key; - const int nblocks = len / 4; - int i; - - uint32_t h1 = seed; - - uint32_t c1 = 0xcc9e2d51; - uint32_t c2 = 0x1b873593; - - //---------- - // body - - const uint32_t * blocks = (const uint32_t *)(data + nblocks*4); - - for(i = -nblocks; i; i++) - { - uint32_t k1 = getblock(blocks,i); - - k1 *= c1; - k1 = ROTL32(k1,15); - k1 *= c2; - - h1 ^= k1; - h1 = ROTL32(h1,13); - h1 = h1*5+0xe6546b64; - } - - //---------- - // tail - - const uint8_t * tail = (const uint8_t*)(data + nblocks*4); - - uint32_t k1 = 0; - - switch(len & 3) - { - case 3: k1 ^= tail[2] << 16; - case 2: k1 ^= tail[1] << 8; - case 1: k1 ^= tail[0]; - k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; - }; - - //---------- - // finalization - - h1 ^= len; - - h1 = fmix32(h1); - - *(uint32_t*)out = h1; -} - -//----------------------------------------------------------------------------- - -void MurmurHash3_x86_128 ( const void * key, const int len, - uint32_t seed, void * out ) -{ - const uint8_t * data = (const uint8_t*)key; - const int nblocks = len / 16; - int i; - - uint32_t h1 = seed; - uint32_t h2 = seed; - uint32_t h3 = seed; - uint32_t h4 = seed; - - uint32_t c1 = 0x239b961b; - uint32_t c2 = 0xab0e9789; - uint32_t c3 = 0x38b34ae5; - uint32_t c4 = 0xa1e38b93; - - //---------- - // body - - const uint32_t * blocks = (const uint32_t *)(data + nblocks*16); - - for(i = -nblocks; i; i++) - { - uint32_t k1 = getblock(blocks,i*4+0); - uint32_t k2 = getblock(blocks,i*4+1); - uint32_t k3 = getblock(blocks,i*4+2); - uint32_t k4 = getblock(blocks,i*4+3); - - k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; - - h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b; - - k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; - - h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747; - - k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; - - h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35; - - k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; - - h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17; - } - - //---------- - // tail - - const uint8_t * tail = (const uint8_t*)(data + nblocks*16); - - uint32_t k1 = 0; - uint32_t k2 = 0; - uint32_t k3 = 0; - uint32_t k4 = 0; - - switch(len & 15) - { - case 15: k4 ^= tail[14] << 16; - case 14: k4 ^= tail[13] << 8; - case 13: k4 ^= tail[12] << 0; - k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4; - - case 12: k3 ^= tail[11] << 24; - case 11: k3 ^= tail[10] << 16; - case 10: k3 ^= tail[ 9] << 8; - case 9: k3 ^= tail[ 8] << 0; - k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3; - - case 8: k2 ^= tail[ 7] << 24; - case 7: k2 ^= tail[ 6] << 16; - case 6: k2 ^= tail[ 5] << 8; - case 5: k2 ^= tail[ 4] << 0; - k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2; - - case 4: k1 ^= tail[ 3] << 24; - case 3: k1 ^= tail[ 2] << 16; - case 2: k1 ^= tail[ 1] << 8; - case 1: k1 ^= tail[ 0] << 0; - k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; - }; - - //---------- - // finalization - - h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; - - h1 += h2; h1 += h3; h1 += h4; - h2 += h1; h3 += h1; h4 += h1; - - h1 = fmix32(h1); - h2 = fmix32(h2); - h3 = fmix32(h3); - h4 = fmix32(h4); - - h1 += h2; h1 += h3; h1 += h4; - h2 += h1; h3 += h1; h4 += h1; - - ((uint32_t*)out)[0] = h1; - ((uint32_t*)out)[1] = h2; - ((uint32_t*)out)[2] = h3; - ((uint32_t*)out)[3] = h4; -} - -//----------------------------------------------------------------------------- - -void MurmurHash3_x64_128 ( const void * key, const int len, - const uint32_t seed, void * out ) -{ - const uint8_t * data = (const uint8_t*)key; - const int nblocks = len / 16; - int i; - - uint64_t h1 = seed; - uint64_t h2 = seed; - - uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); - uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); - - //---------- - // body - - const uint64_t * blocks = (const uint64_t *)(data); - - for(i = 0; i < nblocks; i++) - { - uint64_t k1 = getblock(blocks,i*2+0); - uint64_t k2 = getblock(blocks,i*2+1); - - k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; - - h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; - - k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; - - h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; - } - - //---------- - // tail - - const uint8_t * tail = (const uint8_t*)(data + nblocks*16); - - uint64_t k1 = 0; - uint64_t k2 = 0; - - switch(len & 15) - { - case 15: k2 ^= (uint64_t)(tail[14]) << 48; - case 14: k2 ^= (uint64_t)(tail[13]) << 40; - case 13: k2 ^= (uint64_t)(tail[12]) << 32; - case 12: k2 ^= (uint64_t)(tail[11]) << 24; - case 11: k2 ^= (uint64_t)(tail[10]) << 16; - case 10: k2 ^= (uint64_t)(tail[ 9]) << 8; - case 9: k2 ^= (uint64_t)(tail[ 8]) << 0; - k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; - - case 8: k1 ^= (uint64_t)(tail[ 7]) << 56; - case 7: k1 ^= (uint64_t)(tail[ 6]) << 48; - case 6: k1 ^= (uint64_t)(tail[ 5]) << 40; - case 5: k1 ^= (uint64_t)(tail[ 4]) << 32; - case 4: k1 ^= (uint64_t)(tail[ 3]) << 24; - case 3: k1 ^= (uint64_t)(tail[ 2]) << 16; - case 2: k1 ^= (uint64_t)(tail[ 1]) << 8; - case 1: k1 ^= (uint64_t)(tail[ 0]) << 0; - k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; - }; - - //---------- - // finalization - - h1 ^= len; h2 ^= len; - - h1 += h2; - h2 += h1; - - h1 = fmix64(h1); - h2 = fmix64(h2); - - h1 += h2; - h2 += h1; - - ((uint64_t*)out)[0] = h1; - ((uint64_t*)out)[1] = h2; -} - -//----------------------------------------------------------------------------- - diff --git a/deps/murmur3.h b/deps/murmur3.h @@ -1,29 +0,0 @@ -//----------------------------------------------------------------------------- -// MurmurHash3 was written by Austin Appleby, and is placed in the -// public domain. The author hereby disclaims copyright to this source -// code. - -#ifndef _MURMURHASH3_H_ -#define _MURMURHASH3_H_ - -#include <stdint.h> - -#ifdef __cplusplus -extern "C" { -#endif - -//----------------------------------------------------------------------------- - -void MurmurHash3_x86_32 (const void *key, int len, uint32_t seed, void *out); - -void MurmurHash3_x86_128(const void *key, int len, uint32_t seed, void *out); - -void MurmurHash3_x64_128(const void *key, int len, uint32_t seed, void *out); - -//----------------------------------------------------------------------------- - -#ifdef __cplusplus -} -#endif - -#endif // _MURMURHASH3_H_