source: trunk/src/sh_log_parse_apache.c@ 187

Last change on this file since 187 was 186, checked in by katerina, 16 years ago

More fixes for log monitoring, and documentation update.

File size: 10.6 KB
Line 
1/**************************************
2 **
3 ** PARSER RULES
4 **
5 ** (a) must set record->host
6 ** (eventually to dummy value)
7 **
8 ** (b) must set record->prefix
9 ** (itoa(status))
10 **
11 **
12 **************************************/
13
14/* for strptime */
15#define _XOPEN_SOURCE 500
16
17#include "config_xor.h"
18
19#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22#include <sys/types.h>
23#include <time.h>
24
25#ifdef USE_LOGFILE_MONITOR
26
27#undef FIL__
28#define FIL__ _("sh_log_parse_apache.c")
29
30/* Debian/Ubuntu: libpcre3-dev */
31#include <pcre.h>
32
33#include "samhain.h"
34#include "sh_pthread.h"
35#include "sh_log_check.h"
36#include "sh_utils.h"
37#include "sh_string.h"
38
39extern int flag_err_debug;
40
41struct sh_fileinfo_apache {
42 pcre * line_regex;
43 int * line_ovector; /* captured substrings */
44 int line_ovecnum; /* how many captured */
45
46 int pos_host;
47 int pos_status;
48 int pos_time;
49 char * format_time;
50};
51
52static const char lf_error0[] = N_("%error");
53static const char lf_common0[] = N_("%h %l %u %t \"%r\" %>s %b");
54static const char lf_combined0[] = N_("%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"");
55
56/* This variable is not used anywhere. It only exist
57 * to assign &new to them, which keeps gcc from
58 * putting it into a register, and avoids the 'clobbered
59 * by longjmp' warning. And no, 'volatile' proved insufficient.
60 */
61static void * sh_dummy_new = NULL;
62static void * sh_dummy_fti = NULL;
63
64void * sh_eval_fileinfo_apache(char * str)
65{
66 struct sh_fileinfo_apache * result = NULL;
67 unsigned int i, quotes;
68 unsigned int nfields = 64;
69 size_t lengths[64];
70 char * new = NULL;
71 char ** splits;
72 char * token;
73 sh_string * re_string;
74 char * p;
75 volatile int p_host = -1;
76 volatile int p_status = -1;
77 volatile int p_time = -1;
78 char * f_time = NULL;
79 const char * error;
80 int erroffset;
81
82 /* Take the address to keep gcc from putting them into registers.
83 * Avoids the 'clobbered by longjmp' warning.
84 */
85 sh_dummy_new = (void*) &new;
86 sh_dummy_fti = (void*) &f_time;
87
88 if (0 == strncmp("common", str, 6))
89 {
90 new = sh_util_strdup(_(lf_common0));
91 }
92 else if (0 == strncmp("combined", str, 8))
93 {
94 new = sh_util_strdup(_(lf_combined0));
95 }
96 else if (0 == strncmp("error", str, 8))
97 {
98 new = sh_util_strdup(_(lf_error0));
99 }
100 else
101 {
102 new = sh_util_strdup(str);
103 }
104
105 if (flag_err_debug == SL_TRUE)
106 {
107 SH_MUTEX_LOCK(mutex_thread_nolog);
108 sh_error_handle(SH_ERR_ALL, FIL__, __LINE__, 0, MSG_E_SUBGEN,
109 new,
110 _("eval_fileinfo"));
111 SH_MUTEX_UNLOCK(mutex_thread_nolog);
112 }
113
114 splits = split_array_ws(new, &nfields, lengths);
115
116 if (nfields < 1)
117 {
118 SH_FREE(splits);
119 SH_FREE(new);
120 return NULL;
121 }
122
123 /* Build the regex string re_string
124 */
125 re_string = sh_string_new(0);
126 sh_string_add_from_char(re_string, "^");
127
128 for (i = 0; i < nfields; ++i)
129 {
130
131 if (i > 0)
132 sh_string_add_from_char(re_string, " ");
133
134 if (splits[i][0] != '"')
135 quotes = 0;
136 else
137 quotes = 1;
138
139 if (quotes && lengths[i] > 1 && splits[i][lengths[i]-1] == '"')
140 {
141 splits[i][lengths[i]-1] = '\0'; /* cut trailing quote */
142 token = &(splits[i][1]);
143 } else {
144 token = splits[i];
145 }
146
147 if(quotes)
148 {
149 if(strcmp(token, "%r") == 0 ||
150 strstr(token, _("{Referer}")) == 0 ||
151 strstr(token, _("{User-Agent}")) == 0)
152 {
153 /*
154 p = "\"([^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"";
155 sh_string_add_from_char(re_string, p);
156 */
157 sh_string_add_from_char(re_string, "\"([^");
158 sh_string_add_from_char(re_string, "\"\\\\");
159 sh_string_add_from_char(re_string, "]*");
160 sh_string_add_from_char(re_string, "(?:");
161 sh_string_add_from_char(re_string, "\\\\.");
162 sh_string_add_from_char(re_string, "[^\"");
163 sh_string_add_from_char(re_string, "\\\\]*");
164 sh_string_add_from_char(re_string, ")*)\"");
165 }
166 else
167 {
168 sh_string_add_from_char(re_string, "(");
169 sh_string_add_from_char(re_string, "\\S+");
170 sh_string_add_from_char(re_string, ")");
171 }
172 }
173 else if (token[0] == '%' && token[strlen(token)-1] == 't')
174 {
175 char * lb = strchr(token, '{');
176 char * rb = strchr(token, '}');
177
178 sh_string_add_from_char(re_string, "\\[");
179 sh_string_add_from_char(re_string, "([^");
180 sh_string_add_from_char(re_string, "(\\]");
181 sh_string_add_from_char(re_string, "]+)");
182 sh_string_add_from_char(re_string, "\\]");
183
184 p_time = i+1;
185 if (lb && rb)
186 {
187 ++lb; *rb = '\0';
188 f_time = sh_util_strdup(lb);
189 }
190 else
191 {
192 f_time = sh_util_strdup(_("%d/%b/%Y:%T"));
193 }
194 }
195 else if (token[0] == '%' && token[1] == 'e' && 0 == strcmp(token, _("%error")))
196 {
197 sh_string_add_from_char(re_string, "\\[");
198 sh_string_add_from_char(re_string, "([^");
199 sh_string_add_from_char(re_string, "]");
200 sh_string_add_from_char(re_string, "]+)");
201 sh_string_add_from_char(re_string, "\\]");
202
203 p_time = i+1; f_time = sh_util_strdup(_("%a %b %d %T %Y")); ++i;
204 sh_string_add_from_char(re_string, " ");
205
206 sh_string_add_from_char(re_string, "\\[");
207 sh_string_add_from_char(re_string, "([^");
208 sh_string_add_from_char(re_string, "]");
209 sh_string_add_from_char(re_string, "]+)");
210 sh_string_add_from_char(re_string, "\\]");
211
212 p_status = i+1;
213 sh_string_add_from_char(re_string, " ");
214
215 p = "(.+)";
216 sh_string_add_from_char(re_string, p);
217
218 nfields = 3;
219
220 break;
221 }
222 else
223 {
224 sh_string_add_from_char(re_string, "(");
225 sh_string_add_from_char(re_string, "\\S+");
226 sh_string_add_from_char(re_string, ")");
227 if (token[0] == '%' && token[strlen(token)-1] == 's')
228 p_status = i+1;
229 else if (token[0] == '%' && token[strlen(token)-1] == 'v')
230 p_host = i+1;
231 }
232 }
233 sh_string_add_from_char(re_string, "$");
234
235 if (flag_err_debug == SL_TRUE)
236 {
237 SH_MUTEX_LOCK(mutex_thread_nolog);
238 sh_error_handle(SH_ERR_ALL, FIL__, __LINE__, 0, MSG_E_SUBGEN,
239 sh_string_str(re_string),
240 _("eval_fileinfo"));
241 SH_MUTEX_UNLOCK(mutex_thread_nolog);
242 }
243
244 result = SH_ALLOC(sizeof(struct sh_fileinfo_apache));
245 result->line_regex = pcre_compile(sh_string_str(re_string), 0,
246 &error, &erroffset, NULL);
247 if (!(result->line_regex))
248 {
249 sh_string * msg = sh_string_new(0);
250 sh_string_add_from_char(msg, _("Bad regex: "));
251 sh_string_add_from_char(msg, sh_string_str(re_string));
252
253 SH_MUTEX_LOCK(mutex_thread_nolog);
254 sh_error_handle(SH_ERR_ERR, FIL__, __LINE__, 0, MSG_E_SUBGEN,
255 sh_string_str(msg),
256 _("eval_fileinfo"));
257 SH_MUTEX_UNLOCK(mutex_thread_nolog);
258
259 SH_FREE(result);
260 SH_FREE(splits);
261 SH_FREE(new);
262 sh_string_destroy(&msg);
263 sh_string_destroy(&re_string);
264
265 return NULL;
266 }
267 sh_string_destroy(&re_string);
268
269 result->line_ovector = SH_ALLOC(sizeof(int) * (nfields+1) * 3);
270 result->line_ovecnum = nfields;
271 result->pos_host = p_host;
272 result->pos_status = p_status;
273 result->pos_time = p_time;
274 result->format_time = f_time;
275
276 SH_FREE(splits);
277 SH_FREE(new);
278 return (void*)result;
279}
280
281struct sh_logrecord * sh_parse_apache (sh_string * logline, void * fileinfo)
282{
283 static struct tm old_tm;
284 static time_t old_time;
285
286 char tstr[128];
287 char sstr[128];
288 char * hstr;
289 int res;
290 const char **hstr_addr = (const char **) &hstr;
291
292 struct sh_fileinfo_apache * info = (struct sh_fileinfo_apache *) fileinfo;
293
294 if (sh_string_len(logline) > 0 && flag_err_debug == SL_TRUE)
295 {
296 SH_MUTEX_LOCK(mutex_thread_nolog);
297 sh_error_handle(SH_ERR_ALL, FIL__, __LINE__, 0, MSG_E_SUBGEN,
298 sh_string_str(logline),
299 _("sh_parse_apache"));
300 SH_MUTEX_UNLOCK(mutex_thread_nolog);
301 }
302
303 if (logline == NULL || info == NULL)
304 {
305 return NULL;
306 }
307
308 res = pcre_exec(info->line_regex, NULL,
309 sh_string_str(logline), (int)sh_string_len(logline), 0,
310 0, info->line_ovector, (3*(1+info->line_ovecnum)));
311
312 if (res == (1+info->line_ovecnum))
313 {
314 struct sh_logrecord * record;
315 time_t timestamp = 0;
316
317 if (info->pos_time > 0)
318 {
319 res = pcre_copy_substring(sh_string_str(logline),
320 info->line_ovector, res,
321 info->pos_time, tstr, sizeof(tstr));
322 if (res <= 0)
323 goto corrupt;
324 }
325 else
326 {
327 res = 0;
328 timestamp = 0;
329 info->format_time = sh_util_strdup(_("%d/%b/%Y:%T"));
330 sl_strlcpy(tstr, _("01/Jan/1970:00:00:00"), sizeof(tstr));
331 }
332
333 if (res > 0)
334 {
335 struct tm btime;
336 char * ptr = NULL;
337
338 /* example: 01/Jun/2008:07:55:28 +0200 */
339
340 ptr = /*@i@*/strptime(tstr, info->format_time, &btime);
341
342 if (ptr)
343 {
344 timestamp = conv_timestamp(&btime, &old_tm, &old_time);
345 }
346 else
347 goto corrupt;
348 }
349
350 if (info->pos_status > 0)
351 {
352 res = pcre_copy_substring(sh_string_str(logline),
353 info->line_ovector, res,
354 info->pos_status, sstr, sizeof(sstr));
355 if (res <= 0)
356 goto corrupt;
357 }
358 else
359 {
360 sl_strlcpy(sstr, _("000"), sizeof(sstr));
361 }
362
363 if (info->pos_host > 0)
364 {
365 res = pcre_get_substring(sh_string_str(logline),
366 info->line_ovector, res,
367 info->pos_host, hstr_addr);
368 if (res <= 0)
369 goto corrupt;
370 }
371 else
372 {
373 hstr = NULL;
374 }
375
376 record = SH_ALLOC(sizeof(struct sh_logrecord));
377
378 record->timestamp = timestamp;
379 record->timestr = sh_string_new_from_lchar(tstr, strlen(tstr));
380
381 if (hstr)
382 record->host = sh_string_new_from_lchar(hstr, strlen(hstr));
383 else
384 record->host = sh_string_new_from_lchar(sh.host.name, strlen(sh.host.name));
385
386 record->message = sh_string_new_from_lchar(sh_string_str(logline),
387 sh_string_len(logline));
388 record->pid = 0;
389
390 pcre_free(hstr);
391 return record;
392 }
393 else
394 {
395 char msg[128];
396 sl_snprintf(msg, sizeof(msg), _("Incorrect number of captured subexpressions: %d vs %d"),
397 res, info->line_ovecnum);
398
399 SH_MUTEX_LOCK(mutex_thread_nolog);
400 sh_error_handle(SH_ERR_ERR, FIL__, __LINE__, 0, MSG_E_SUBGEN,
401 msg,
402 _("sh_parse_apache"));
403 SH_MUTEX_UNLOCK(mutex_thread_nolog);
404 }
405
406 /* Corrupted logline */
407 corrupt:
408
409 {
410 sh_string * msg = sh_string_new(0);
411 sh_string_add_from_char(msg, _("Corrupt logline: "));
412 sh_string_add_from_char(msg, sh_string_str(logline));
413
414 SH_MUTEX_LOCK(mutex_thread_nolog);
415 sh_error_handle(SH_ERR_ERR, FIL__, __LINE__, 0, MSG_E_SUBGEN,
416 sh_string_str(msg),
417 _("sh_parse_apache"));
418 SH_MUTEX_UNLOCK(mutex_thread_nolog);
419 sh_string_destroy(&msg);
420 }
421 return NULL;
422}
423
424/* USE_LOGFILE_MONITOR */
425#endif
Note: See TracBrowser for help on using the repository browser.