source: trunk/src/sh_log_parse_apache.c@ 350

Last change on this file since 350 was 326, checked in by katerina, 14 years ago

Fix for ticket #246: Add method to specify user-defined regex in APACHE log format

File size: 11.0 KB
Line 
1/**************************************
2 **
3 ** PARSER RULES
4 **
5 ** (a) must set record->host
6 ** (eventually to dummy value)
7 **
8 ** (b) must set record->prefix
9 ** (itoa(status))
10 **
11 **
12 **************************************/
13
14/* for strptime */
15#define _XOPEN_SOURCE 500
16
17#include "config_xor.h"
18
19#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22#include <sys/types.h>
23#include <time.h>
24
25#ifdef USE_LOGFILE_MONITOR
26
27#undef FIL__
28#define FIL__ _("sh_log_parse_apache.c")
29
30/* Debian/Ubuntu: libpcre3-dev */
31#ifdef HAVE_PCRE_PCRE_H
32#include <pcre/pcre.h>
33#else
34#include <pcre.h>
35#endif
36
37#include "samhain.h"
38#include "sh_pthread.h"
39#include "sh_log_check.h"
40#include "sh_utils.h"
41#include "sh_string.h"
42
43extern int flag_err_debug;
44
45struct sh_fileinfo_apache {
46 pcre * line_regex;
47 int * line_ovector; /* captured substrings */
48 int line_ovecnum; /* how many captured */
49
50 int pos_host;
51 int pos_status;
52 int pos_time;
53 char * format_time;
54};
55
56static const char lf_error0[] = N_("%error");
57static const char lf_common0[] = N_("%h %l %u %t \"%r\" %>s %b");
58static const char lf_combined0[] = N_("%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"");
59
60/* This variable is not used anywhere. It only exist
61 * to assign &new to them, which keeps gcc from
62 * putting it into a register, and avoids the 'clobbered
63 * by longjmp' warning. And no, 'volatile' proved insufficient.
64 */
65static void * sh_dummy_new = NULL;
66static void * sh_dummy_fti = NULL;
67
68void * sh_eval_fileinfo_apache(char * str)
69{
70 struct sh_fileinfo_apache * result = NULL;
71 unsigned int i, quotes;
72 unsigned int nfields = 64;
73 size_t lengths[64];
74 char * new = NULL;
75 char ** splits;
76 char * token;
77 sh_string * re_string;
78 char * p;
79 volatile int p_host = -1;
80 volatile int p_status = -1;
81 volatile int p_time = -1;
82 char * f_time = NULL;
83 const char * error;
84 int erroffset;
85
86 /* Take the address to keep gcc from putting them into registers.
87 * Avoids the 'clobbered by longjmp' warning.
88 */
89 sh_dummy_new = (void*) &new;
90 sh_dummy_fti = (void*) &f_time;
91
92 if (0 == strncmp("common", str, 6))
93 {
94 new = sh_util_strdup(_(lf_common0));
95 }
96 else if (0 == strncmp("combined", str, 8))
97 {
98 new = sh_util_strdup(_(lf_combined0));
99 }
100 else if (0 == strncmp("error", str, 8))
101 {
102 new = sh_util_strdup(_(lf_error0));
103 }
104 else
105 {
106 new = sh_util_strdup(str);
107 }
108
109 if (flag_err_debug == SL_TRUE)
110 {
111 SH_MUTEX_LOCK(mutex_thread_nolog);
112 sh_error_handle(SH_ERR_ALL, FIL__, __LINE__, 0, MSG_E_SUBGEN,
113 new,
114 _("eval_fileinfo"));
115 SH_MUTEX_UNLOCK(mutex_thread_nolog);
116 }
117
118 splits = split_array_ws(new, &nfields, lengths);
119
120 if (nfields < 1)
121 {
122 SH_FREE(splits);
123 SH_FREE(new);
124 return NULL;
125 }
126
127 /* Build the regex string re_string
128 */
129 re_string = sh_string_new(0);
130 sh_string_add_from_char(re_string, "^");
131
132 for (i = 0; i < nfields; ++i)
133 {
134
135 if (i > 0)
136 sh_string_add_from_char(re_string, " ");
137
138 if (splits[i][0] != '"')
139 quotes = 0;
140 else
141 quotes = 1;
142
143 if (quotes && lengths[i] > 1 && splits[i][lengths[i]-1] == '"')
144 {
145 splits[i][lengths[i]-1] = '\0'; /* cut trailing quote */
146 token = &(splits[i][1]);
147 } else {
148 token = splits[i];
149 }
150
151 if(quotes)
152 {
153 if(strcmp(token, "%r") == 0 ||
154 strstr(token, _("{Referer}")) != NULL ||
155 strstr(token, _("{User-Agent}")) != NULL ||
156 strstr(token, _("{X-Forwarded-For}")) != NULL )
157 {
158 /*
159 p = "\"([^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"";
160 sh_string_add_from_char(re_string, p);
161 */
162 sh_string_add_from_char(re_string, "\"([^");
163 sh_string_add_from_char(re_string, "\"\\\\");
164 sh_string_add_from_char(re_string, "]*");
165 sh_string_add_from_char(re_string, "(?:");
166 sh_string_add_from_char(re_string, "\\\\.");
167 sh_string_add_from_char(re_string, "[^\"");
168 sh_string_add_from_char(re_string, "\\\\]*");
169 sh_string_add_from_char(re_string, ")*)\"");
170 }
171 else
172 {
173 sh_string_add_from_char(re_string, "(");
174 sh_string_add_from_char(re_string, "\\S+");
175 sh_string_add_from_char(re_string, ")");
176 }
177 }
178 else if (token[0] == 'R' && token[1] == 'E' && token[2] == '{' && token[strlen(token)-1] == '}')
179 {
180 char * lb = strchr(token, '{');
181 char * rb = strrchr(token, '}');
182
183 if (lb && rb)
184 {
185 ++lb; *rb = '\0';
186 sh_string_add_from_char(re_string, lb);
187 }
188 }
189 else if (token[0] == '%' && token[strlen(token)-1] == 't')
190 {
191 char * lb = strchr(token, '{');
192 char * rb = strchr(token, '}');
193
194 sh_string_add_from_char(re_string, "\\[");
195 sh_string_add_from_char(re_string, "([^");
196 sh_string_add_from_char(re_string, "(\\]");
197 sh_string_add_from_char(re_string, "]+)");
198 sh_string_add_from_char(re_string, "\\]");
199
200 p_time = i+1;
201 if (lb && rb)
202 {
203 ++lb; *rb = '\0';
204 f_time = sh_util_strdup(lb);
205 }
206 else
207 {
208 f_time = sh_util_strdup(_("%d/%b/%Y:%T"));
209 }
210 }
211 else if (token[0] == '%' && token[1] == 'e' && 0 == strcmp(token, _("%error")))
212 {
213 sh_string_add_from_char(re_string, "\\[");
214 sh_string_add_from_char(re_string, "([^");
215 sh_string_add_from_char(re_string, "]");
216 sh_string_add_from_char(re_string, "]+)");
217 sh_string_add_from_char(re_string, "\\]");
218
219 p_time = i+1; f_time = sh_util_strdup(_("%a %b %d %T %Y")); ++i;
220 sh_string_add_from_char(re_string, " ");
221
222 sh_string_add_from_char(re_string, "\\[");
223 sh_string_add_from_char(re_string, "([^");
224 sh_string_add_from_char(re_string, "]");
225 sh_string_add_from_char(re_string, "]+)");
226 sh_string_add_from_char(re_string, "\\]");
227
228 p_status = i+1;
229 sh_string_add_from_char(re_string, " ");
230
231 p = "(.+)";
232 sh_string_add_from_char(re_string, p);
233
234 nfields = 3;
235
236 break;
237 }
238 else
239 {
240 sh_string_add_from_char(re_string, "(");
241 sh_string_add_from_char(re_string, "\\S+");
242 sh_string_add_from_char(re_string, ")");
243 if (token[0] == '%' && token[strlen(token)-1] == 's')
244 p_status = i+1;
245 else if (token[0] == '%' && token[strlen(token)-1] == 'v')
246 p_host = i+1;
247 }
248 }
249 sh_string_add_from_char(re_string, "$");
250
251 if (flag_err_debug == SL_TRUE)
252 {
253 SH_MUTEX_LOCK(mutex_thread_nolog);
254 sh_error_handle(SH_ERR_ALL, FIL__, __LINE__, 0, MSG_E_SUBGEN,
255 sh_string_str(re_string),
256 _("eval_fileinfo"));
257 SH_MUTEX_UNLOCK(mutex_thread_nolog);
258 }
259
260 result = SH_ALLOC(sizeof(struct sh_fileinfo_apache));
261 result->line_regex = pcre_compile(sh_string_str(re_string), 0,
262 &error, &erroffset, NULL);
263 if (!(result->line_regex))
264 {
265 sh_string * msg = sh_string_new(0);
266 sh_string_add_from_char(msg, _("Bad regex: "));
267 sh_string_add_from_char(msg, sh_string_str(re_string));
268
269 SH_MUTEX_LOCK(mutex_thread_nolog);
270 sh_error_handle(SH_ERR_ERR, FIL__, __LINE__, 0, MSG_E_SUBGEN,
271 sh_string_str(msg),
272 _("eval_fileinfo"));
273 SH_MUTEX_UNLOCK(mutex_thread_nolog);
274
275 SH_FREE(result);
276 SH_FREE(splits);
277 SH_FREE(new);
278 sh_string_destroy(&msg);
279 sh_string_destroy(&re_string);
280
281 return NULL;
282 }
283 sh_string_destroy(&re_string);
284
285 result->line_ovector = SH_ALLOC(sizeof(int) * (nfields+1) * 3);
286 result->line_ovecnum = nfields;
287 result->pos_host = p_host;
288 result->pos_status = p_status;
289 result->pos_time = p_time;
290 result->format_time = f_time;
291
292 SH_FREE(splits);
293 SH_FREE(new);
294 return (void*)result;
295}
296
297struct sh_logrecord * sh_parse_apache (sh_string * logline, void * fileinfo)
298{
299 static struct tm old_tm;
300 static time_t old_time;
301
302 char tstr[128];
303 char sstr[128];
304 char * hstr;
305 int res;
306 const char **hstr_addr = (const char **) &hstr;
307
308 struct sh_fileinfo_apache * info = (struct sh_fileinfo_apache *) fileinfo;
309
310 if (sh_string_len(logline) > 0 && flag_err_debug == SL_TRUE)
311 {
312 SH_MUTEX_LOCK(mutex_thread_nolog);
313 sh_error_handle(SH_ERR_ALL, FIL__, __LINE__, 0, MSG_E_SUBGEN,
314 sh_string_str(logline),
315 _("sh_parse_apache"));
316 SH_MUTEX_UNLOCK(mutex_thread_nolog);
317 }
318
319 if (logline == NULL || info == NULL)
320 {
321 return NULL;
322 }
323
324 res = pcre_exec(info->line_regex, NULL,
325 sh_string_str(logline), (int)sh_string_len(logline), 0,
326 0, info->line_ovector, (3*(1+info->line_ovecnum)));
327
328 if (res == (1+info->line_ovecnum))
329 {
330 struct sh_logrecord * record;
331 time_t timestamp = 0;
332
333 if (info->pos_time > 0)
334 {
335 res = pcre_copy_substring(sh_string_str(logline),
336 info->line_ovector, res,
337 info->pos_time, tstr, sizeof(tstr));
338 if (res <= 0)
339 goto corrupt;
340 }
341 else
342 {
343 res = 0;
344 timestamp = 0;
345 info->format_time = sh_util_strdup(_("%d/%b/%Y:%T"));
346 sl_strlcpy(tstr, _("01/Jan/1970:00:00:00"), sizeof(tstr));
347 }
348
349 if (res > 0)
350 {
351 struct tm btime;
352 char * ptr = NULL;
353
354 /* example: 01/Jun/2008:07:55:28 +0200 */
355
356 ptr = /*@i@*/strptime(tstr, info->format_time, &btime);
357
358 if (ptr)
359 {
360 timestamp = conv_timestamp(&btime, &old_tm, &old_time);
361 }
362 else
363 goto corrupt;
364 }
365
366 if (info->pos_status > 0)
367 {
368 res = pcre_copy_substring(sh_string_str(logline),
369 info->line_ovector, res,
370 info->pos_status, sstr, sizeof(sstr));
371 if (res <= 0)
372 goto corrupt;
373 }
374 else
375 {
376 sl_strlcpy(sstr, _("000"), sizeof(sstr));
377 }
378
379 if (info->pos_host > 0)
380 {
381 res = pcre_get_substring(sh_string_str(logline),
382 info->line_ovector, res,
383 info->pos_host, hstr_addr);
384 if (res <= 0)
385 goto corrupt;
386 }
387 else
388 {
389 hstr = NULL;
390 }
391
392 record = SH_ALLOC(sizeof(struct sh_logrecord));
393
394 record->timestamp = timestamp;
395 record->timestr = sh_string_new_from_lchar(tstr, strlen(tstr));
396
397 if (hstr)
398 record->host = sh_string_new_from_lchar(hstr, strlen(hstr));
399 else
400 record->host = sh_string_new_from_lchar(sh.host.name, strlen(sh.host.name));
401
402 record->message = sh_string_new_from_lchar(sh_string_str(logline),
403 sh_string_len(logline));
404 record->pid = PID_INVALID;
405
406 pcre_free(hstr);
407 return record;
408 }
409 else
410 {
411 char msg[128];
412 sl_snprintf(msg, sizeof(msg), _("Incorrect number of captured subexpressions: %d vs %d"),
413 res, info->line_ovecnum);
414
415 SH_MUTEX_LOCK(mutex_thread_nolog);
416 sh_error_handle(SH_ERR_ERR, FIL__, __LINE__, 0, MSG_E_SUBGEN,
417 msg,
418 _("sh_parse_apache"));
419 SH_MUTEX_UNLOCK(mutex_thread_nolog);
420 }
421
422 /* Corrupted logline */
423 corrupt:
424
425 {
426 sh_string * msg = sh_string_new(0);
427 sh_string_add_from_char(msg, _("Corrupt logline: "));
428 sh_string_add_from_char(msg, sh_string_str(logline));
429
430 SH_MUTEX_LOCK(mutex_thread_nolog);
431 sh_error_handle(SH_ERR_ERR, FIL__, __LINE__, 0, MSG_E_SUBGEN,
432 sh_string_str(msg),
433 _("sh_parse_apache"));
434 SH_MUTEX_UNLOCK(mutex_thread_nolog);
435 sh_string_destroy(&msg);
436 }
437 return NULL;
438}
439
440/* USE_LOGFILE_MONITOR */
441#endif
Note: See TracBrowser for help on using the repository browser.