source: trunk/src/sh_log_parse_apache.c@ 484

Last change on this file since 484 was 481, checked in by katerina, 9 years ago

Enhancements and fixes for tickets #374, #375, #376, #377, #378, and #379.

File size: 11.1 KB
RevLine 
[183]1/**************************************
2 **
3 ** PARSER RULES
4 **
5 ** (a) must set record->host
6 ** (eventually to dummy value)
7 **
8 ** (b) must set record->prefix
9 ** (itoa(status))
10 **
11 **
12 **************************************/
13
14/* for strptime */
15#define _XOPEN_SOURCE 500
16
17#include "config_xor.h"
18
19#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22#include <sys/types.h>
23#include <time.h>
24
25#ifdef USE_LOGFILE_MONITOR
26
27#undef FIL__
28#define FIL__ _("sh_log_parse_apache.c")
29
30/* Debian/Ubuntu: libpcre3-dev */
[203]31#ifdef HAVE_PCRE_PCRE_H
32#include <pcre/pcre.h>
33#else
[183]34#include <pcre.h>
[203]35#endif
[183]36
37#include "samhain.h"
38#include "sh_pthread.h"
39#include "sh_log_check.h"
40#include "sh_utils.h"
41#include "sh_string.h"
42
43extern int flag_err_debug;
44
45struct sh_fileinfo_apache {
46 pcre * line_regex;
47 int * line_ovector; /* captured substrings */
48 int line_ovecnum; /* how many captured */
49
50 int pos_host;
51 int pos_status;
52 int pos_time;
[186]53 char * format_time;
[183]54};
55
[185]56static const char lf_error0[] = N_("%error");
[183]57static const char lf_common0[] = N_("%h %l %u %t \"%r\" %>s %b");
58static const char lf_combined0[] = N_("%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"");
59
60/* This variable is not used anywhere. It only exist
61 * to assign &new to them, which keeps gcc from
62 * putting it into a register, and avoids the 'clobbered
63 * by longjmp' warning. And no, 'volatile' proved insufficient.
64 */
[481]65void * sh_dummy_65_new = NULL;
66void * sh_dummy_66_fti = NULL;
67void * sh_dummy_67_ftr = NULL;
[183]68
69void * sh_eval_fileinfo_apache(char * str)
70{
71 struct sh_fileinfo_apache * result = NULL;
72 unsigned int i, quotes;
73 unsigned int nfields = 64;
74 size_t lengths[64];
75 char * new = NULL;
76 char ** splits;
77 char * token;
78 sh_string * re_string;
79 char * p;
80 volatile int p_host = -1;
81 volatile int p_status = -1;
82 volatile int p_time = -1;
[186]83 char * f_time = NULL;
[183]84 const char * error;
85 int erroffset;
86
87 /* Take the address to keep gcc from putting them into registers.
88 * Avoids the 'clobbered by longjmp' warning.
89 */
[481]90 sh_dummy_65_new = (void*) &new;
91 sh_dummy_66_fti = (void*) &f_time;
92 sh_dummy_67_ftr = (void*) &result;
[183]93
94 if (0 == strncmp("common", str, 6))
95 {
96 new = sh_util_strdup(_(lf_common0));
97 }
98 else if (0 == strncmp("combined", str, 8))
99 {
100 new = sh_util_strdup(_(lf_combined0));
101 }
[185]102 else if (0 == strncmp("error", str, 8))
103 {
104 new = sh_util_strdup(_(lf_error0));
105 }
[183]106 else
107 {
108 new = sh_util_strdup(str);
109 }
110
[481]111 if (flag_err_debug == S_TRUE)
[183]112 {
113 SH_MUTEX_LOCK(mutex_thread_nolog);
114 sh_error_handle(SH_ERR_ALL, FIL__, __LINE__, 0, MSG_E_SUBGEN,
115 new,
116 _("eval_fileinfo"));
117 SH_MUTEX_UNLOCK(mutex_thread_nolog);
118 }
119
120 splits = split_array_ws(new, &nfields, lengths);
121
122 if (nfields < 1)
123 {
124 SH_FREE(splits);
125 SH_FREE(new);
126 return NULL;
127 }
128
129 /* Build the regex string re_string
130 */
131 re_string = sh_string_new(0);
132 sh_string_add_from_char(re_string, "^");
133
134 for (i = 0; i < nfields; ++i)
135 {
136
137 if (i > 0)
138 sh_string_add_from_char(re_string, " ");
139
140 if (splits[i][0] != '"')
141 quotes = 0;
142 else
143 quotes = 1;
144
145 if (quotes && lengths[i] > 1 && splits[i][lengths[i]-1] == '"')
146 {
147 splits[i][lengths[i]-1] = '\0'; /* cut trailing quote */
148 token = &(splits[i][1]);
149 } else {
150 token = splits[i];
151 }
152
153 if(quotes)
154 {
155 if(strcmp(token, "%r") == 0 ||
[326]156 strstr(token, _("{Referer}")) != NULL ||
157 strstr(token, _("{User-Agent}")) != NULL ||
158 strstr(token, _("{X-Forwarded-For}")) != NULL )
[183]159 {
[186]160 /*
161 p = "\"([^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"";
[183]162 sh_string_add_from_char(re_string, p);
[186]163 */
164 sh_string_add_from_char(re_string, "\"([^");
165 sh_string_add_from_char(re_string, "\"\\\\");
166 sh_string_add_from_char(re_string, "]*");
167 sh_string_add_from_char(re_string, "(?:");
168 sh_string_add_from_char(re_string, "\\\\.");
169 sh_string_add_from_char(re_string, "[^\"");
170 sh_string_add_from_char(re_string, "\\\\]*");
171 sh_string_add_from_char(re_string, ")*)\"");
[183]172 }
173 else
174 {
[186]175 sh_string_add_from_char(re_string, "(");
176 sh_string_add_from_char(re_string, "\\S+");
177 sh_string_add_from_char(re_string, ")");
[183]178 }
179 }
[326]180 else if (token[0] == 'R' && token[1] == 'E' && token[2] == '{' && token[strlen(token)-1] == '}')
181 {
182 char * lb = strchr(token, '{');
183 char * rb = strrchr(token, '}');
184
185 if (lb && rb)
186 {
187 ++lb; *rb = '\0';
188 sh_string_add_from_char(re_string, lb);
189 }
190 }
[183]191 else if (token[0] == '%' && token[strlen(token)-1] == 't')
192 {
[186]193 char * lb = strchr(token, '{');
194 char * rb = strchr(token, '}');
195
196 sh_string_add_from_char(re_string, "\\[");
197 sh_string_add_from_char(re_string, "([^");
198 sh_string_add_from_char(re_string, "(\\]");
199 sh_string_add_from_char(re_string, "]+)");
200 sh_string_add_from_char(re_string, "\\]");
201
[183]202 p_time = i+1;
[186]203 if (lb && rb)
204 {
205 ++lb; *rb = '\0';
206 f_time = sh_util_strdup(lb);
207 }
208 else
209 {
210 f_time = sh_util_strdup(_("%d/%b/%Y:%T"));
211 }
[183]212 }
[185]213 else if (token[0] == '%' && token[1] == 'e' && 0 == strcmp(token, _("%error")))
214 {
[186]215 sh_string_add_from_char(re_string, "\\[");
216 sh_string_add_from_char(re_string, "([^");
217 sh_string_add_from_char(re_string, "]");
218 sh_string_add_from_char(re_string, "]+)");
219 sh_string_add_from_char(re_string, "\\]");
[185]220
[186]221 p_time = i+1; f_time = sh_util_strdup(_("%a %b %d %T %Y")); ++i;
[185]222 sh_string_add_from_char(re_string, " ");
[186]223
224 sh_string_add_from_char(re_string, "\\[");
225 sh_string_add_from_char(re_string, "([^");
226 sh_string_add_from_char(re_string, "]");
227 sh_string_add_from_char(re_string, "]+)");
228 sh_string_add_from_char(re_string, "\\]");
229
[185]230 p_status = i+1;
[186]231 sh_string_add_from_char(re_string, " ");
[185]232
[186]233 p = "(.+)";
[185]234 sh_string_add_from_char(re_string, p);
235
[186]236 nfields = 3;
237
[185]238 break;
239 }
[183]240 else
241 {
[186]242 sh_string_add_from_char(re_string, "(");
243 sh_string_add_from_char(re_string, "\\S+");
244 sh_string_add_from_char(re_string, ")");
[183]245 if (token[0] == '%' && token[strlen(token)-1] == 's')
246 p_status = i+1;
247 else if (token[0] == '%' && token[strlen(token)-1] == 'v')
248 p_host = i+1;
249 }
250 }
251 sh_string_add_from_char(re_string, "$");
252
[481]253 if (flag_err_debug == S_TRUE)
[183]254 {
255 SH_MUTEX_LOCK(mutex_thread_nolog);
256 sh_error_handle(SH_ERR_ALL, FIL__, __LINE__, 0, MSG_E_SUBGEN,
257 sh_string_str(re_string),
258 _("eval_fileinfo"));
259 SH_MUTEX_UNLOCK(mutex_thread_nolog);
260 }
261
262 result = SH_ALLOC(sizeof(struct sh_fileinfo_apache));
263 result->line_regex = pcre_compile(sh_string_str(re_string), 0,
264 &error, &erroffset, NULL);
265 if (!(result->line_regex))
266 {
267 sh_string * msg = sh_string_new(0);
268 sh_string_add_from_char(msg, _("Bad regex: "));
269 sh_string_add_from_char(msg, sh_string_str(re_string));
270
271 SH_MUTEX_LOCK(mutex_thread_nolog);
272 sh_error_handle(SH_ERR_ERR, FIL__, __LINE__, 0, MSG_E_SUBGEN,
273 sh_string_str(msg),
274 _("eval_fileinfo"));
275 SH_MUTEX_UNLOCK(mutex_thread_nolog);
276
277 SH_FREE(result);
278 SH_FREE(splits);
279 SH_FREE(new);
280 sh_string_destroy(&msg);
281 sh_string_destroy(&re_string);
282
283 return NULL;
284 }
285 sh_string_destroy(&re_string);
286
287 result->line_ovector = SH_ALLOC(sizeof(int) * (nfields+1) * 3);
288 result->line_ovecnum = nfields;
289 result->pos_host = p_host;
290 result->pos_status = p_status;
291 result->pos_time = p_time;
[185]292 result->format_time = f_time;
[183]293
294 SH_FREE(splits);
295 SH_FREE(new);
296 return (void*)result;
297}
298
299struct sh_logrecord * sh_parse_apache (sh_string * logline, void * fileinfo)
300{
301 static struct tm old_tm;
302 static time_t old_time;
303
304 char tstr[128];
305 char sstr[128];
[481]306 const char * hstr;
[183]307 int res;
308 const char **hstr_addr = (const char **) &hstr;
309
310 struct sh_fileinfo_apache * info = (struct sh_fileinfo_apache *) fileinfo;
311
[481]312 if (sh_string_len(logline) > 0 && flag_err_debug == S_TRUE)
[183]313 {
314 SH_MUTEX_LOCK(mutex_thread_nolog);
315 sh_error_handle(SH_ERR_ALL, FIL__, __LINE__, 0, MSG_E_SUBGEN,
316 sh_string_str(logline),
317 _("sh_parse_apache"));
318 SH_MUTEX_UNLOCK(mutex_thread_nolog);
319 }
320
321 if (logline == NULL || info == NULL)
322 {
323 return NULL;
324 }
325
326 res = pcre_exec(info->line_regex, NULL,
327 sh_string_str(logline), (int)sh_string_len(logline), 0,
328 0, info->line_ovector, (3*(1+info->line_ovecnum)));
329
330 if (res == (1+info->line_ovecnum))
331 {
332 struct sh_logrecord * record;
333 time_t timestamp = 0;
334
335 if (info->pos_time > 0)
336 {
337 res = pcre_copy_substring(sh_string_str(logline),
338 info->line_ovector, res,
339 info->pos_time, tstr, sizeof(tstr));
340 if (res <= 0)
341 goto corrupt;
342 }
343 else
344 {
345 res = 0;
346 timestamp = 0;
[186]347 info->format_time = sh_util_strdup(_("%d/%b/%Y:%T"));
[183]348 sl_strlcpy(tstr, _("01/Jan/1970:00:00:00"), sizeof(tstr));
349 }
350
351 if (res > 0)
352 {
353 struct tm btime;
[186]354 char * ptr = NULL;
[357]355
356 memset(&btime, '\0', sizeof(struct tm));
357 btime.tm_isdst = -1;
[183]358
359 /* example: 01/Jun/2008:07:55:28 +0200 */
360
[186]361 ptr = /*@i@*/strptime(tstr, info->format_time, &btime);
[183]362
[186]363 if (ptr)
[183]364 {
365 timestamp = conv_timestamp(&btime, &old_tm, &old_time);
366 }
367 else
368 goto corrupt;
369 }
370
371 if (info->pos_status > 0)
372 {
373 res = pcre_copy_substring(sh_string_str(logline),
374 info->line_ovector, res,
375 info->pos_status, sstr, sizeof(sstr));
376 if (res <= 0)
377 goto corrupt;
378 }
379 else
380 {
[185]381 sl_strlcpy(sstr, _("000"), sizeof(sstr));
[183]382 }
383
384 if (info->pos_host > 0)
385 {
386 res = pcre_get_substring(sh_string_str(logline),
387 info->line_ovector, res,
388 info->pos_host, hstr_addr);
389 if (res <= 0)
390 goto corrupt;
391 }
392 else
393 {
394 hstr = NULL;
395 }
396
397 record = SH_ALLOC(sizeof(struct sh_logrecord));
398
399 record->timestamp = timestamp;
400 record->timestr = sh_string_new_from_lchar(tstr, strlen(tstr));
401
402 if (hstr)
403 record->host = sh_string_new_from_lchar(hstr, strlen(hstr));
404 else
405 record->host = sh_string_new_from_lchar(sh.host.name, strlen(sh.host.name));
406
407 record->message = sh_string_new_from_lchar(sh_string_str(logline),
408 sh_string_len(logline));
[276]409 record->pid = PID_INVALID;
[183]410
[481]411 pcre_free_substring(hstr);
[183]412 return record;
413 }
414 else
415 {
416 char msg[128];
417 sl_snprintf(msg, sizeof(msg), _("Incorrect number of captured subexpressions: %d vs %d"),
418 res, info->line_ovecnum);
419
420 SH_MUTEX_LOCK(mutex_thread_nolog);
421 sh_error_handle(SH_ERR_ERR, FIL__, __LINE__, 0, MSG_E_SUBGEN,
422 msg,
423 _("sh_parse_apache"));
424 SH_MUTEX_UNLOCK(mutex_thread_nolog);
425 }
426
427 /* Corrupted logline */
428 corrupt:
429
430 {
431 sh_string * msg = sh_string_new(0);
432 sh_string_add_from_char(msg, _("Corrupt logline: "));
433 sh_string_add_from_char(msg, sh_string_str(logline));
434
435 SH_MUTEX_LOCK(mutex_thread_nolog);
436 sh_error_handle(SH_ERR_ERR, FIL__, __LINE__, 0, MSG_E_SUBGEN,
437 sh_string_str(msg),
438 _("sh_parse_apache"));
439 SH_MUTEX_UNLOCK(mutex_thread_nolog);
440 sh_string_destroy(&msg);
441 }
442 return NULL;
443}
444
445/* USE_LOGFILE_MONITOR */
446#endif
Note: See TracBrowser for help on using the repository browser.