logswan

Fast Web log analyzer using probabilistic data structures
Log | Files | Refs | README | LICENSE

logswan.c (7177B)


      1 /*
      2  * Logswan 2.1.8
      3  * Copyright (c) 2015-2020, Frederic Cambus
      4  * https://www.logswan.org
      5  *
      6  * Created:      2015-05-31
      7  * Last Updated: 2020-08-06
      8  *
      9  * Logswan is released under the BSD 2-Clause license.
     10  * See LICENSE file for details.
     11  */
     12 
     13 #include <sys/socket.h>
     14 #include <sys/stat.h>
     15 #include <sys/time.h>
     16 #include <arpa/inet.h>
     17 #include <err.h>
     18 #include <getopt.h>
     19 #include <inttypes.h>
     20 #include <netinet/in.h>
     21 #include <stdbool.h>
     22 #include <stdlib.h>
     23 #include <stdint.h>
     24 #include <stdio.h>
     25 #include <string.h>
     26 #include <time.h>
     27 
     28 #ifdef HAVE_SECCOMP
     29 #include <sys/prctl.h>
     30 #include <linux/seccomp.h>
     31 #include "seccomp.h"
     32 #endif
     33 
     34 #include <maxminddb.h>
     35 
     36 #include "compat.h"
     37 #include "config.h"
     38 #include "continents.h"
     39 #include "countries.h"
     40 #include "hll.h"
     41 #include "output.h"
     42 #include "parse.h"
     43 
     44 bool geoip;
     45 MMDB_s geoip2;
     46 
     47 struct timespec begin, end, elapsed;
     48 
     49 char lineBuffer[LINE_LENGTH_MAX];
     50 
     51 struct results results;
     52 struct date parsedDate;
     53 struct logLine parsedLine;
     54 struct request parsedRequest;
     55 
     56 struct sockaddr_in ipv4;
     57 struct sockaddr_in6 ipv6;
     58 bool isIPv4, isIPv6;
     59 
     60 uint64_t bandwidth;
     61 uint32_t statusCode;
     62 uint32_t hour;
     63 uint32_t countryId;
     64 
     65 FILE *logFile;
     66 struct stat logFileStat;
     67 
     68 const char *errstr;
     69 
     70 int8_t getoptFlag;
     71 
     72 struct HLL uniqueIPv4, uniqueIPv6;
     73 char *intputFile;
     74 char *db = NULL;
     75 
     76 static void
     77 displayUsage()
     78 {
     79 	printf("USAGE: logswan [options] inputfile\n\n" \
     80 	    "Options are:\n\n" \
     81 	    "	-d Specify path to a GeoIP database\n" \
     82 	    "	-g Enable GeoIP lookups\n" \
     83 	    "	-h Display usage\n" \
     84 	    "	-v Display version\n");
     85 }
     86 
     87 int
     88 main(int argc, char *argv[])
     89 {
     90 	int gai_error, mmdb_error;
     91 	MMDB_lookup_result_s lookup;
     92 
     93 	if (pledge("stdio rpath", NULL) == -1) {
     94 		err(EXIT_FAILURE, "pledge");
     95 	}
     96 
     97 #ifdef HAVE_SECCOMP
     98 	if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
     99 		perror("Can't initialize seccomp");
    100 		return EXIT_FAILURE;
    101 	}
    102 
    103 	if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &logswan)) {
    104 		perror("Can't load seccomp filter");
    105 		return EXIT_FAILURE;
    106 	}
    107 #endif
    108 
    109 	hll_init(&uniqueIPv4, HLL_BITS);
    110 	hll_init(&uniqueIPv6, HLL_BITS);
    111 
    112 	while ((getoptFlag = getopt(argc, argv, "d:ghv")) != -1) {
    113 		switch (getoptFlag) {
    114 		case 'd':
    115 			db = optarg;
    116 			break;
    117 
    118 		case 'g':
    119 			geoip = true;
    120 			break;
    121 
    122 		case 'h':
    123 			displayUsage();
    124 			return EXIT_SUCCESS;
    125 
    126 		case 'v':
    127 			printf("%s\n", VERSION);
    128 			return EXIT_SUCCESS;
    129 		}
    130 	}
    131 
    132 	if (optind < argc) {
    133 		intputFile = argv[optind];
    134 	} else {
    135 		displayUsage();
    136 		return EXIT_SUCCESS;
    137 	}
    138 
    139 	argc -= optind;
    140 	argv += optind;
    141 
    142 	/* Starting timer */
    143 	clock_gettime(CLOCK_MONOTONIC, &begin);
    144 
    145 	/* Initializing GeoIP */
    146 	if (geoip) {
    147 		if (!db)
    148 			db = GEOIP2DIR GEOIP2DB;
    149 
    150 		if (MMDB_open(db, MMDB_MODE_MMAP, &geoip2) != MMDB_SUCCESS)
    151 			err(EXIT_FAILURE, "Can't open database (%s)", db);
    152 	}
    153 
    154 	/* Open log file */
    155 	if (!strcmp(intputFile, "-")) {
    156 		/* Read from standard input */
    157 		logFile = stdin;
    158 	} else {
    159 		/* Attempt to read from file */
    160 		if (!(logFile = fopen(intputFile, "r"))) {
    161 			perror("Can't open log file");
    162 			return EXIT_FAILURE;
    163 		}
    164 	}
    165 
    166 	/* Get log file size */
    167 	if (fstat(fileno(logFile), &logFileStat)) {
    168 		perror("Can't stat log file");
    169 		return EXIT_FAILURE;
    170 	}
    171 
    172 	results.fileName = intputFile;
    173 	results.fileSize = logFileStat.st_size;
    174 
    175 	while (fgets(lineBuffer, LINE_LENGTH_MAX, logFile)) {
    176 		/* Parse and tokenize line */
    177 		parseLine(&parsedLine, lineBuffer);
    178 
    179 		/* Detect if remote host is IPv4 or IPv6 */
    180 		if (parsedLine.remoteHost) { /* Do not feed NULL tokens to inet_pton */
    181 			if ((isIPv4 = inet_pton(AF_INET, parsedLine.remoteHost, &ipv4.sin_addr))) {
    182 				isIPv6 = false;
    183 			} else {
    184 				isIPv6 = inet_pton(AF_INET6, parsedLine.remoteHost, &ipv6.sin6_addr);
    185 
    186 				if (!isIPv6) {
    187 					results.invalidLines++;
    188 					continue;
    189 				}
    190 			}
    191 		} else {
    192 			/* Invalid line */
    193 			results.invalidLines++;
    194 			continue;
    195 		}
    196 
    197 		if (isIPv4) {
    198 			/* Increment hits counter */
    199 			results.hitsIPv4++;
    200 
    201 			/* Unique visitors */
    202 			hll_add(&uniqueIPv4, parsedLine.remoteHost, strlen(parsedLine.remoteHost));
    203 		}
    204 
    205 		if (isIPv6) {
    206 			/* Increment hits counter */
    207 			results.hitsIPv6++;
    208 
    209 			/* Unique visitors */
    210 			hll_add(&uniqueIPv6, parsedLine.remoteHost, strlen(parsedLine.remoteHost));
    211 		}
    212 
    213 		if (geoip) {
    214 			MMDB_entry_data_s entry_data;
    215 			memset(&entry_data, 0, sizeof(MMDB_entry_data_s));
    216 
    217 			lookup = MMDB_lookup_string(&geoip2, parsedLine.remoteHost, &gai_error, &mmdb_error);
    218 
    219 			MMDB_get_value(&lookup.entry, &entry_data, "country", "iso_code", NULL);
    220 
    221 			if (entry_data.has_data) {
    222 				/* Increment countries array */
    223 				for (size_t loop = 0; loop < COUNTRIES; loop++) {
    224 					if (!strncmp(countriesId[loop], entry_data.utf8_string, 2)) {
    225 						results.countries[loop]++;
    226 						break;
    227 					}
    228 				}
    229 			}
    230 
    231 			MMDB_get_value(&lookup.entry, &entry_data, "continent", "code", NULL);
    232 
    233 			if (entry_data.has_data) {
    234 				/* Increment continents array */
    235 				for (size_t loop = 0; loop < CONTINENTS; loop++) {
    236 					if (!strncmp(continentsId[loop], entry_data.utf8_string, 2)) {
    237 						results.continents[loop]++;
    238 						break;
    239 					}
    240 				}
    241 			}
    242 		}
    243 
    244 		/* Hourly distribution */
    245 		if (parsedLine.date) {
    246 			parseDate(&parsedDate, parsedLine.date);
    247 
    248 			if (parsedDate.hour) {
    249 				hour = strtonum(parsedDate.hour, 0, 23, &errstr);
    250 
    251 				if (!errstr) {
    252 					results.hours[hour]++;
    253 				}
    254 			}
    255 		}
    256 
    257 		/* Parse request */
    258 		if (parsedLine.request) {
    259 			parseRequest(&parsedRequest, parsedLine.request);
    260 
    261 			if (parsedRequest.method) {
    262 				for (size_t loop = 0; loop < METHODS; loop++) {
    263 					if (!strcmp(methodsNames[loop], parsedRequest.method)) {
    264 						results.methods[loop]++;
    265 						break;
    266 					}
    267 				}
    268 			}
    269 
    270 			if (parsedRequest.protocol) {
    271 				for (size_t loop = 0; loop < PROTOCOLS; loop++) {
    272 					if (!strcmp(protocolsNames[loop], parsedRequest.protocol)) {
    273 						results.protocols[loop]++;
    274 						break;
    275 					}
    276 				}
    277 			}
    278 		}
    279 
    280 		/* Count HTTP status codes occurrences */
    281 		if (parsedLine.statusCode) {
    282 			statusCode = strtonum(parsedLine.statusCode, 0, STATUS_CODE_MAX-1, &errstr);
    283 
    284 			if (!errstr) {
    285 				results.status[statusCode]++;
    286 			}
    287 		}
    288 
    289 		/* Increment bandwidth usage */
    290 		if (parsedLine.objectSize) {
    291 			bandwidth = strtonum(parsedLine.objectSize, 0, INT64_MAX, &errstr);
    292 
    293 			if (!errstr) {
    294 				results.bandwidth += bandwidth;
    295 			}
    296 		}
    297 	}
    298 
    299 	/* Counting hits and processed lines */
    300 	results.hits = results.hitsIPv4 + results.hitsIPv6;
    301 	results.processedLines = results.hits + results.invalidLines;
    302 
    303 	/* Counting unique visitors */
    304 	results.visitsIPv4 = hll_count(&uniqueIPv4);
    305 	results.visitsIPv6 = hll_count(&uniqueIPv6);
    306 	results.visits = results.visitsIPv4 + results.visitsIPv6;
    307 
    308 	/* Stopping timer */
    309 	clock_gettime(CLOCK_MONOTONIC, &end);
    310 
    311 	timespecsub(&end, &begin, &elapsed);
    312 	results.runtime = elapsed.tv_sec + elapsed.tv_nsec / 1E9;
    313 
    314 	/* Generate timestamp */
    315 	time_t now = time(NULL);
    316 	strftime(results.timeStamp, 20, "%Y-%m-%d %H:%M:%S", localtime(&now));
    317 
    318 	/* Printing results */
    319 	fprintf(stdout, "%s\n", output(&results));
    320 	fprintf(stderr, "Processed %" PRIu64 " lines in %f seconds.\n", results.processedLines, results.runtime);
    321 
    322 	/* Clean up */
    323 	fclose(logFile);
    324 
    325 	MMDB_close(&geoip2);
    326 
    327 	hll_destroy(&uniqueIPv4);
    328 	hll_destroy(&uniqueIPv6);
    329 
    330 	return EXIT_SUCCESS;
    331 }