source: trunk/src/sh_log_parse_apache.c@ 220

Last change on this file since 220 was 203, checked in by katerina, 16 years ago

Fix compile failures on RHEL3 (ticket #130) and FreeBSD7 amd64 (ticket #131).

File size: 10.7 KB
Line 
1/**************************************
2 **
3 ** PARSER RULES
4 **
5 ** (a) must set record->host
6 ** (eventually to dummy value)
7 **
8 ** (b) must set record->prefix
9 ** (itoa(status))
10 **
11 **
12 **************************************/
13
14/* for strptime */
15#define _XOPEN_SOURCE 500
16
17#include "config_xor.h"
18
19#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22#include <sys/types.h>
23#include <time.h>
24
25#ifdef USE_LOGFILE_MONITOR
26
27#undef FIL__
28#define FIL__ _("sh_log_parse_apache.c")
29
30/* Debian/Ubuntu: libpcre3-dev */
31#ifdef HAVE_PCRE_PCRE_H
32#include <pcre/pcre.h>
33#else
34#include <pcre.h>
35#endif
36
37#include "samhain.h"
38#include "sh_pthread.h"
39#include "sh_log_check.h"
40#include "sh_utils.h"
41#include "sh_string.h"
42
43extern int flag_err_debug;
44
45struct sh_fileinfo_apache {
46 pcre * line_regex;
47 int * line_ovector; /* captured substrings */
48 int line_ovecnum; /* how many captured */
49
50 int pos_host;
51 int pos_status;
52 int pos_time;
53 char * format_time;
54};
55
56static const char lf_error0[] = N_("%error");
57static const char lf_common0[] = N_("%h %l %u %t \"%r\" %>s %b");
58static const char lf_combined0[] = N_("%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"");
59
60/* This variable is not used anywhere. It only exist
61 * to assign &new to them, which keeps gcc from
62 * putting it into a register, and avoids the 'clobbered
63 * by longjmp' warning. And no, 'volatile' proved insufficient.
64 */
65static void * sh_dummy_new = NULL;
66static void * sh_dummy_fti = NULL;
67
68void * sh_eval_fileinfo_apache(char * str)
69{
70 struct sh_fileinfo_apache * result = NULL;
71 unsigned int i, quotes;
72 unsigned int nfields = 64;
73 size_t lengths[64];
74 char * new = NULL;
75 char ** splits;
76 char * token;
77 sh_string * re_string;
78 char * p;
79 volatile int p_host = -1;
80 volatile int p_status = -1;
81 volatile int p_time = -1;
82 char * f_time = NULL;
83 const char * error;
84 int erroffset;
85
86 /* Take the address to keep gcc from putting them into registers.
87 * Avoids the 'clobbered by longjmp' warning.
88 */
89 sh_dummy_new = (void*) &new;
90 sh_dummy_fti = (void*) &f_time;
91
92 if (0 == strncmp("common", str, 6))
93 {
94 new = sh_util_strdup(_(lf_common0));
95 }
96 else if (0 == strncmp("combined", str, 8))
97 {
98 new = sh_util_strdup(_(lf_combined0));
99 }
100 else if (0 == strncmp("error", str, 8))
101 {
102 new = sh_util_strdup(_(lf_error0));
103 }
104 else
105 {
106 new = sh_util_strdup(str);
107 }
108
109 if (flag_err_debug == SL_TRUE)
110 {
111 SH_MUTEX_LOCK(mutex_thread_nolog);
112 sh_error_handle(SH_ERR_ALL, FIL__, __LINE__, 0, MSG_E_SUBGEN,
113 new,
114 _("eval_fileinfo"));
115 SH_MUTEX_UNLOCK(mutex_thread_nolog);
116 }
117
118 splits = split_array_ws(new, &nfields, lengths);
119
120 if (nfields < 1)
121 {
122 SH_FREE(splits);
123 SH_FREE(new);
124 return NULL;
125 }
126
127 /* Build the regex string re_string
128 */
129 re_string = sh_string_new(0);
130 sh_string_add_from_char(re_string, "^");
131
132 for (i = 0; i < nfields; ++i)
133 {
134
135 if (i > 0)
136 sh_string_add_from_char(re_string, " ");
137
138 if (splits[i][0] != '"')
139 quotes = 0;
140 else
141 quotes = 1;
142
143 if (quotes && lengths[i] > 1 && splits[i][lengths[i]-1] == '"')
144 {
145 splits[i][lengths[i]-1] = '\0'; /* cut trailing quote */
146 token = &(splits[i][1]);
147 } else {
148 token = splits[i];
149 }
150
151 if(quotes)
152 {
153 if(strcmp(token, "%r") == 0 ||
154 strstr(token, _("{Referer}")) == 0 ||
155 strstr(token, _("{User-Agent}")) == 0)
156 {
157 /*
158 p = "\"([^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"";
159 sh_string_add_from_char(re_string, p);
160 */
161 sh_string_add_from_char(re_string, "\"([^");
162 sh_string_add_from_char(re_string, "\"\\\\");
163 sh_string_add_from_char(re_string, "]*");
164 sh_string_add_from_char(re_string, "(?:");
165 sh_string_add_from_char(re_string, "\\\\.");
166 sh_string_add_from_char(re_string, "[^\"");
167 sh_string_add_from_char(re_string, "\\\\]*");
168 sh_string_add_from_char(re_string, ")*)\"");
169 }
170 else
171 {
172 sh_string_add_from_char(re_string, "(");
173 sh_string_add_from_char(re_string, "\\S+");
174 sh_string_add_from_char(re_string, ")");
175 }
176 }
177 else if (token[0] == '%' && token[strlen(token)-1] == 't')
178 {
179 char * lb = strchr(token, '{');
180 char * rb = strchr(token, '}');
181
182 sh_string_add_from_char(re_string, "\\[");
183 sh_string_add_from_char(re_string, "([^");
184 sh_string_add_from_char(re_string, "(\\]");
185 sh_string_add_from_char(re_string, "]+)");
186 sh_string_add_from_char(re_string, "\\]");
187
188 p_time = i+1;
189 if (lb && rb)
190 {
191 ++lb; *rb = '\0';
192 f_time = sh_util_strdup(lb);
193 }
194 else
195 {
196 f_time = sh_util_strdup(_("%d/%b/%Y:%T"));
197 }
198 }
199 else if (token[0] == '%' && token[1] == 'e' && 0 == strcmp(token, _("%error")))
200 {
201 sh_string_add_from_char(re_string, "\\[");
202 sh_string_add_from_char(re_string, "([^");
203 sh_string_add_from_char(re_string, "]");
204 sh_string_add_from_char(re_string, "]+)");
205 sh_string_add_from_char(re_string, "\\]");
206
207 p_time = i+1; f_time = sh_util_strdup(_("%a %b %d %T %Y")); ++i;
208 sh_string_add_from_char(re_string, " ");
209
210 sh_string_add_from_char(re_string, "\\[");
211 sh_string_add_from_char(re_string, "([^");
212 sh_string_add_from_char(re_string, "]");
213 sh_string_add_from_char(re_string, "]+)");
214 sh_string_add_from_char(re_string, "\\]");
215
216 p_status = i+1;
217 sh_string_add_from_char(re_string, " ");
218
219 p = "(.+)";
220 sh_string_add_from_char(re_string, p);
221
222 nfields = 3;
223
224 break;
225 }
226 else
227 {
228 sh_string_add_from_char(re_string, "(");
229 sh_string_add_from_char(re_string, "\\S+");
230 sh_string_add_from_char(re_string, ")");
231 if (token[0] == '%' && token[strlen(token)-1] == 's')
232 p_status = i+1;
233 else if (token[0] == '%' && token[strlen(token)-1] == 'v')
234 p_host = i+1;
235 }
236 }
237 sh_string_add_from_char(re_string, "$");
238
239 if (flag_err_debug == SL_TRUE)
240 {
241 SH_MUTEX_LOCK(mutex_thread_nolog);
242 sh_error_handle(SH_ERR_ALL, FIL__, __LINE__, 0, MSG_E_SUBGEN,
243 sh_string_str(re_string),
244 _("eval_fileinfo"));
245 SH_MUTEX_UNLOCK(mutex_thread_nolog);
246 }
247
248 result = SH_ALLOC(sizeof(struct sh_fileinfo_apache));
249 result->line_regex = pcre_compile(sh_string_str(re_string), 0,
250 &error, &erroffset, NULL);
251 if (!(result->line_regex))
252 {
253 sh_string * msg = sh_string_new(0);
254 sh_string_add_from_char(msg, _("Bad regex: "));
255 sh_string_add_from_char(msg, sh_string_str(re_string));
256
257 SH_MUTEX_LOCK(mutex_thread_nolog);
258 sh_error_handle(SH_ERR_ERR, FIL__, __LINE__, 0, MSG_E_SUBGEN,
259 sh_string_str(msg),
260 _("eval_fileinfo"));
261 SH_MUTEX_UNLOCK(mutex_thread_nolog);
262
263 SH_FREE(result);
264 SH_FREE(splits);
265 SH_FREE(new);
266 sh_string_destroy(&msg);
267 sh_string_destroy(&re_string);
268
269 return NULL;
270 }
271 sh_string_destroy(&re_string);
272
273 result->line_ovector = SH_ALLOC(sizeof(int) * (nfields+1) * 3);
274 result->line_ovecnum = nfields;
275 result->pos_host = p_host;
276 result->pos_status = p_status;
277 result->pos_time = p_time;
278 result->format_time = f_time;
279
280 SH_FREE(splits);
281 SH_FREE(new);
282 return (void*)result;
283}
284
285struct sh_logrecord * sh_parse_apache (sh_string * logline, void * fileinfo)
286{
287 static struct tm old_tm;
288 static time_t old_time;
289
290 char tstr[128];
291 char sstr[128];
292 char * hstr;
293 int res;
294 const char **hstr_addr = (const char **) &hstr;
295
296 struct sh_fileinfo_apache * info = (struct sh_fileinfo_apache *) fileinfo;
297
298 if (sh_string_len(logline) > 0 && flag_err_debug == SL_TRUE)
299 {
300 SH_MUTEX_LOCK(mutex_thread_nolog);
301 sh_error_handle(SH_ERR_ALL, FIL__, __LINE__, 0, MSG_E_SUBGEN,
302 sh_string_str(logline),
303 _("sh_parse_apache"));
304 SH_MUTEX_UNLOCK(mutex_thread_nolog);
305 }
306
307 if (logline == NULL || info == NULL)
308 {
309 return NULL;
310 }
311
312 res = pcre_exec(info->line_regex, NULL,
313 sh_string_str(logline), (int)sh_string_len(logline), 0,
314 0, info->line_ovector, (3*(1+info->line_ovecnum)));
315
316 if (res == (1+info->line_ovecnum))
317 {
318 struct sh_logrecord * record;
319 time_t timestamp = 0;
320
321 if (info->pos_time > 0)
322 {
323 res = pcre_copy_substring(sh_string_str(logline),
324 info->line_ovector, res,
325 info->pos_time, tstr, sizeof(tstr));
326 if (res <= 0)
327 goto corrupt;
328 }
329 else
330 {
331 res = 0;
332 timestamp = 0;
333 info->format_time = sh_util_strdup(_("%d/%b/%Y:%T"));
334 sl_strlcpy(tstr, _("01/Jan/1970:00:00:00"), sizeof(tstr));
335 }
336
337 if (res > 0)
338 {
339 struct tm btime;
340 char * ptr = NULL;
341
342 /* example: 01/Jun/2008:07:55:28 +0200 */
343
344 ptr = /*@i@*/strptime(tstr, info->format_time, &btime);
345
346 if (ptr)
347 {
348 timestamp = conv_timestamp(&btime, &old_tm, &old_time);
349 }
350 else
351 goto corrupt;
352 }
353
354 if (info->pos_status > 0)
355 {
356 res = pcre_copy_substring(sh_string_str(logline),
357 info->line_ovector, res,
358 info->pos_status, sstr, sizeof(sstr));
359 if (res <= 0)
360 goto corrupt;
361 }
362 else
363 {
364 sl_strlcpy(sstr, _("000"), sizeof(sstr));
365 }
366
367 if (info->pos_host > 0)
368 {
369 res = pcre_get_substring(sh_string_str(logline),
370 info->line_ovector, res,
371 info->pos_host, hstr_addr);
372 if (res <= 0)
373 goto corrupt;
374 }
375 else
376 {
377 hstr = NULL;
378 }
379
380 record = SH_ALLOC(sizeof(struct sh_logrecord));
381
382 record->timestamp = timestamp;
383 record->timestr = sh_string_new_from_lchar(tstr, strlen(tstr));
384
385 if (hstr)
386 record->host = sh_string_new_from_lchar(hstr, strlen(hstr));
387 else
388 record->host = sh_string_new_from_lchar(sh.host.name, strlen(sh.host.name));
389
390 record->message = sh_string_new_from_lchar(sh_string_str(logline),
391 sh_string_len(logline));
392 record->pid = 0;
393
394 pcre_free(hstr);
395 return record;
396 }
397 else
398 {
399 char msg[128];
400 sl_snprintf(msg, sizeof(msg), _("Incorrect number of captured subexpressions: %d vs %d"),
401 res, info->line_ovecnum);
402
403 SH_MUTEX_LOCK(mutex_thread_nolog);
404 sh_error_handle(SH_ERR_ERR, FIL__, __LINE__, 0, MSG_E_SUBGEN,
405 msg,
406 _("sh_parse_apache"));
407 SH_MUTEX_UNLOCK(mutex_thread_nolog);
408 }
409
410 /* Corrupted logline */
411 corrupt:
412
413 {
414 sh_string * msg = sh_string_new(0);
415 sh_string_add_from_char(msg, _("Corrupt logline: "));
416 sh_string_add_from_char(msg, sh_string_str(logline));
417
418 SH_MUTEX_LOCK(mutex_thread_nolog);
419 sh_error_handle(SH_ERR_ERR, FIL__, __LINE__, 0, MSG_E_SUBGEN,
420 sh_string_str(msg),
421 _("sh_parse_apache"));
422 SH_MUTEX_UNLOCK(mutex_thread_nolog);
423 sh_string_destroy(&msg);
424 }
425 return NULL;
426}
427
428/* USE_LOGFILE_MONITOR */
429#endif
Note: See TracBrowser for help on using the repository browser.