logswan.c (7310B)
1 /* 2 * Logswan 2.1.13 3 * Copyright (c) 2015-2022, Frederic Cambus 4 * https://www.logswan.org 5 * 6 * Created: 2015-05-31 7 * Last Updated: 2022-02-26 8 * 9 * Logswan is released under the BSD 2-Clause license. 10 * See LICENSE file for details. 11 * 12 * SPDX-License-Identifier: BSD-2-Clause 13 */ 14 15 #include <sys/socket.h> 16 #include <sys/stat.h> 17 #include <sys/time.h> 18 #include <arpa/inet.h> 19 #include <err.h> 20 #include <getopt.h> 21 #include <inttypes.h> 22 #include <netinet/in.h> 23 #include <stdbool.h> 24 #include <stdlib.h> 25 #include <stdint.h> 26 #include <stdio.h> 27 #include <string.h> 28 #include <time.h> 29 30 #ifdef HAVE_SECCOMP 31 #include <sys/prctl.h> 32 #include <linux/seccomp.h> 33 #include "seccomp.h" 34 #endif 35 36 #include <maxminddb.h> 37 38 #include "compat.h" 39 #include "config.h" 40 #include "continents.h" 41 #include "countries.h" 42 #include "hll.h" 43 #include "output.h" 44 #include "parse.h" 45 46 static void 47 usage() 48 { 49 printf("logswan [-ghv] [-d db] logfile\n\n" 50 "The options are as follows:\n\n" 51 " -d db Specify path to a GeoIP database.\n" 52 " -g Enable GeoIP lookups.\n" 53 " -h Display usage.\n" 54 " -v Display version.\n"); 55 } 56 57 int 58 main(int argc, char *argv[]) 59 { 60 struct timespec begin, end, elapsed; 61 struct HLL unique_ipv4, unique_ipv6; 62 struct results results; 63 struct date parsed_date; 64 struct logline parsed_line; 65 struct request parsed_request; 66 struct stat logfile_stat; 67 68 struct sockaddr_in ipv4; 69 struct sockaddr_in6 ipv6; 70 71 uint64_t bandwidth; 72 uint32_t status_code; 73 uint32_t hour; 74 int gai_error, mmdb_error; 75 int opt; 76 77 const char *errstr; 78 char *linebuffer = NULL; 79 size_t linesize = 0; 80 char *input; 81 char *db = NULL; 82 83 bool geoip = false; 84 bool is_ipv4, is_ipv6; 85 86 MMDB_s geoip2; 87 MMDB_lookup_result_s lookup; 88 89 FILE *logfile; 90 91 if (pledge("stdio rpath", NULL) == -1) { 92 err(EXIT_FAILURE, "pledge"); 93 } 94 95 #ifdef HAVE_SECCOMP 96 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { 97 perror("Can't initialize seccomp"); 98 return EXIT_FAILURE; 99 } 100 101 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &logswan)) { 102 perror("Can't load seccomp filter"); 103 return EXIT_FAILURE; 104 } 105 #endif 106 107 while ((opt = getopt(argc, argv, "d:ghv")) != -1) { 108 switch (opt) { 109 case 'd': 110 db = optarg; 111 break; 112 113 case 'g': 114 geoip = true; 115 break; 116 117 case 'h': 118 usage(); 119 return EXIT_SUCCESS; 120 121 case 'v': 122 printf("%s\n", VERSION); 123 return EXIT_SUCCESS; 124 } 125 } 126 127 if (optind < argc) { 128 input = argv[optind]; 129 } else { 130 usage(); 131 return EXIT_SUCCESS; 132 } 133 134 hll_init(&unique_ipv4, HLL_BITS); 135 hll_init(&unique_ipv6, HLL_BITS); 136 137 /* Starting timer */ 138 clock_gettime(CLOCK_MONOTONIC, &begin); 139 140 /* Initializing GeoIP */ 141 if (geoip) { 142 if (!db) 143 db = GEOIP2DIR GEOIP2DB; 144 145 if (MMDB_open(db, MMDB_MODE_MMAP, &geoip2) != MMDB_SUCCESS) 146 err(EXIT_FAILURE, "Can't open database (%s)", db); 147 } 148 149 /* Open log file */ 150 if (!strcmp(input, "-")) { 151 /* Read from standard input */ 152 logfile = stdin; 153 } else { 154 /* Attempt to read from file */ 155 if (!(logfile = fopen(input, "r"))) { 156 perror("Can't open log file"); 157 return EXIT_FAILURE; 158 } 159 } 160 161 /* Get log file size */ 162 if (fstat(fileno(logfile), &logfile_stat)) { 163 perror("Can't stat log file"); 164 return EXIT_FAILURE; 165 } 166 167 memset(&results, 0, sizeof(struct results)); 168 results.file_name = input; 169 results.file_size = logfile_stat.st_size; 170 171 while (getline(&linebuffer, &linesize, logfile) != -1) { 172 /* Parse and tokenize line */ 173 parse_line(&parsed_line, linebuffer); 174 175 /* Detect if remote host is IPv4 or IPv6 */ 176 if (parsed_line.remote_host) { /* Do not feed NULL tokens to inet_pton */ 177 if ((is_ipv4 = inet_pton(AF_INET, parsed_line.remote_host, &ipv4.sin_addr))) { 178 is_ipv6 = false; 179 } else { 180 is_ipv6 = inet_pton(AF_INET6, parsed_line.remote_host, &ipv6.sin6_addr); 181 182 if (!is_ipv6) { 183 results.invalid_lines++; 184 continue; 185 } 186 } 187 } else { 188 /* Invalid line */ 189 results.invalid_lines++; 190 continue; 191 } 192 193 if (is_ipv4) { 194 /* Increment hits counter */ 195 results.hits_ipv4++; 196 197 /* Unique visitors */ 198 hll_add(&unique_ipv4, parsed_line.remote_host, strlen(parsed_line.remote_host)); 199 } 200 201 if (is_ipv6) { 202 /* Increment hits counter */ 203 results.hits_ipv6++; 204 205 /* Unique visitors */ 206 hll_add(&unique_ipv6, parsed_line.remote_host, strlen(parsed_line.remote_host)); 207 } 208 209 if (geoip) { 210 MMDB_entry_data_s entry_data; 211 memset(&entry_data, 0, sizeof(MMDB_entry_data_s)); 212 213 lookup = MMDB_lookup_string(&geoip2, parsed_line.remote_host, &gai_error, &mmdb_error); 214 215 MMDB_get_value(&lookup.entry, &entry_data, "country", "iso_code", NULL); 216 217 if (entry_data.has_data) { 218 /* Increment countries array */ 219 for (size_t loop = 0; loop < COUNTRIES; loop++) { 220 if (!strncmp(countries_id[loop], entry_data.utf8_string, 2)) { 221 results.countries[loop]++; 222 break; 223 } 224 } 225 } 226 227 MMDB_get_value(&lookup.entry, &entry_data, "continent", "code", NULL); 228 229 if (entry_data.has_data) { 230 /* Increment continents array */ 231 for (size_t loop = 0; loop < CONTINENTS; loop++) { 232 if (!strncmp(continents_id[loop], entry_data.utf8_string, 2)) { 233 results.continents[loop]++; 234 break; 235 } 236 } 237 } 238 } 239 240 /* Hourly distribution */ 241 if (parsed_line.date) { 242 parse_date(&parsed_date, parsed_line.date); 243 244 if (parsed_date.hour) { 245 hour = strtonum(parsed_date.hour, 0, 23, &errstr); 246 247 if (!errstr) { 248 results.hours[hour]++; 249 } 250 } 251 } 252 253 /* Parse request */ 254 if (parsed_line.request) { 255 parse_request(&parsed_request, parsed_line.request); 256 257 if (parsed_request.method) { 258 for (size_t loop = 0; loop < METHODS; loop++) { 259 if (!strcmp(methods_names[loop], parsed_request.method)) { 260 results.methods[loop]++; 261 break; 262 } 263 } 264 } 265 266 if (parsed_request.protocol) { 267 for (size_t loop = 0; loop < PROTOCOLS; loop++) { 268 if (!strcmp(protocols_names[loop], parsed_request.protocol)) { 269 results.protocols[loop]++; 270 break; 271 } 272 } 273 } 274 } 275 276 /* Count HTTP status codes occurrences */ 277 if (parsed_line.status_code) { 278 status_code = strtonum(parsed_line.status_code, 0, STATUS_CODE_MAX-1, &errstr); 279 280 if (!errstr) { 281 results.status[status_code]++; 282 } 283 } 284 285 /* Increment bandwidth usage */ 286 if (parsed_line.object_size) { 287 bandwidth = strtonum(parsed_line.object_size, 0, INT64_MAX, &errstr); 288 289 if (!errstr) { 290 results.bandwidth += bandwidth; 291 } 292 } 293 } 294 295 /* Counting hits and processed lines */ 296 results.hits = results.hits_ipv4 + results.hits_ipv6; 297 results.processed_lines = results.hits + results.invalid_lines; 298 299 /* Counting unique visitors */ 300 results.visits_ipv4 = hll_count(&unique_ipv4); 301 results.visits_ipv6 = hll_count(&unique_ipv6); 302 results.visits = results.visits_ipv4 + results.visits_ipv6; 303 304 /* Stopping timer */ 305 clock_gettime(CLOCK_MONOTONIC, &end); 306 307 timespecsub(&end, &begin, &elapsed); 308 results.runtime = elapsed.tv_sec + elapsed.tv_nsec / 1E9; 309 310 /* Generate timestamp */ 311 time_t now = time(NULL); 312 strftime(results.timestamp, 20, "%Y-%m-%d %H:%M:%S", localtime(&now)); 313 314 /* Printing results */ 315 fprintf(stdout, "%s\n", output(&results)); 316 fprintf(stderr, "Processed %" PRIu64 " lines in %f seconds.\n", results.processed_lines, results.runtime); 317 318 /* Clean up */ 319 free(linebuffer); 320 fclose(logfile); 321 322 if (geoip) 323 MMDB_close(&geoip2); 324 325 hll_destroy(&unique_ipv4); 326 hll_destroy(&unique_ipv6); 327 328 return EXIT_SUCCESS; 329 }