source: trunk/src/sh_log_parse_apache.c@ 589

Last change on this file since 589 was 588, checked in by katerina, 24 hours ago

Fix for ticket #476 (move logfile monitoring module from PCRE to PCRE2).

File size: 11.5 KB
Line 
1/**************************************
2 **
3 ** PARSER RULES
4 **
5 ** (a) must set record->host
6 ** (eventually to dummy value)
7 **
8 ** (b) must set record->prefix
9 ** (itoa(status))
10 **
11 **
12 **************************************/
13
14/* for strptime */
15#define _XOPEN_SOURCE 500
16
17#include "config_xor.h"
18
19#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22#include <sys/types.h>
23#include <time.h>
24
25#ifdef USE_LOGFILE_MONITOR
26
27#undef FIL__
28#define FIL__ _("sh_log_parse_apache.c")
29
30/* Debian/Ubuntu: libpcre2-dev */
31#define PCRE2_CODE_UNIT_WIDTH 8
32#ifdef HAVE_PCRE2_PCRE2_H
33#include <pcre2/pcre2.h>
34#else
35#include <pcre2.h>
36#endif
37
38#include "samhain.h"
39#include "sh_pthread.h"
40#include "sh_log_check.h"
41#include "sh_utils.h"
42#include "sh_string.h"
43
44extern int flag_err_debug;
45
46struct sh_fileinfo_apache {
47 pcre2_code * line_regex;
48 PCRE2_SIZE * line_ovector; /* captured substrings */
49 int line_ovecnum; /* how many captured */
50
51 int pos_host;
52 int pos_status;
53 int pos_time;
54 char * format_time;
55};
56
57static const char lf_error0[] = N_("%error");
58static const char lf_common0[] = N_("%h %l %u %t \"%r\" %>s %b");
59static const char lf_combined0[] = N_("%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"");
60
61/* This variable is not used anywhere. It only exist
62 * to assign &new to them, which keeps gcc from
63 * putting it into a register, and avoids the 'clobbered
64 * by longjmp' warning. And no, 'volatile' proved insufficient.
65 */
66void * sh_dummy_65_new = NULL;
67void * sh_dummy_66_fti = NULL;
68void * sh_dummy_67_ftr = NULL;
69
70void * sh_eval_fileinfo_apache(char * str)
71{
72 struct sh_fileinfo_apache * result = NULL;
73 unsigned int i, quotes;
74 unsigned int nfields = 64;
75 size_t lengths[64];
76 char * new = NULL;
77 char ** splits;
78 char * token;
79 sh_string * re_string;
80 char * p;
81 volatile int p_host = -1;
82 volatile int p_status = -1;
83 volatile int p_time = -1;
84 char * f_time = NULL;
85 int error;
86 size_t erroffset;
87
88 /* Take the address to keep gcc from putting them into registers.
89 * Avoids the 'clobbered by longjmp' warning.
90 */
91 sh_dummy_65_new = (void*) &new;
92 sh_dummy_66_fti = (void*) &f_time;
93 sh_dummy_67_ftr = (void*) &result;
94
95 if (0 == strncmp("common", str, 6))
96 {
97 new = sh_util_strdup(_(lf_common0));
98 }
99 else if (0 == strncmp("combined", str, 8))
100 {
101 new = sh_util_strdup(_(lf_combined0));
102 }
103 else if (0 == strncmp("error", str, 8))
104 {
105 new = sh_util_strdup(_(lf_error0));
106 }
107 else
108 {
109 new = sh_util_strdup(str);
110 }
111
112 if (flag_err_debug == S_TRUE)
113 {
114 SH_MUTEX_LOCK(mutex_thread_nolog);
115 sh_error_handle(SH_ERR_ALL, FIL__, __LINE__, 0, MSG_E_SUBGEN,
116 new,
117 _("eval_fileinfo"));
118 SH_MUTEX_UNLOCK(mutex_thread_nolog);
119 }
120
121 splits = split_array_ws(new, &nfields, lengths);
122
123 if (nfields < 1)
124 {
125 SH_FREE(splits);
126 SH_FREE(new);
127 return NULL;
128 }
129
130 /* Build the regex string re_string
131 */
132 re_string = sh_string_new(0);
133 sh_string_add_from_char(re_string, "^");
134
135 for (i = 0; i < nfields; ++i)
136 {
137
138 if (i > 0)
139 sh_string_add_from_char(re_string, " ");
140
141 if (splits[i][0] != '"')
142 quotes = 0;
143 else
144 quotes = 1;
145
146 if (quotes && lengths[i] > 1 && splits[i][lengths[i]-1] == '"')
147 {
148 splits[i][lengths[i]-1] = '\0'; /* cut trailing quote */
149 token = &(splits[i][1]);
150 } else {
151 token = splits[i];
152 }
153
154 if(quotes)
155 {
156 if(strcmp(token, "%r") == 0 ||
157 strstr(token, _("{Referer}")) != NULL ||
158 strstr(token, _("{User-Agent}")) != NULL ||
159 strstr(token, _("{X-Forwarded-For}")) != NULL )
160 {
161 /*
162 p = "\"([^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"";
163 sh_string_add_from_char(re_string, p);
164 */
165 sh_string_add_from_char(re_string, "\"([^");
166 sh_string_add_from_char(re_string, "\"\\\\");
167 sh_string_add_from_char(re_string, "]*");
168 sh_string_add_from_char(re_string, "(?:");
169 sh_string_add_from_char(re_string, "\\\\.");
170 sh_string_add_from_char(re_string, "[^\"");
171 sh_string_add_from_char(re_string, "\\\\]*");
172 sh_string_add_from_char(re_string, ")*)\"");
173 }
174 else
175 {
176 sh_string_add_from_char(re_string, "(");
177 sh_string_add_from_char(re_string, "\\S+");
178 sh_string_add_from_char(re_string, ")");
179 }
180 }
181 else if (token[0] == 'R' && token[1] == 'E' && token[2] == '{' && token[strlen(token)-1] == '}')
182 {
183 char * lb = strchr(token, '{');
184 char * rb = strrchr(token, '}');
185
186 if (lb && rb)
187 {
188 ++lb; *rb = '\0';
189 sh_string_add_from_char(re_string, lb);
190 }
191 }
192 else if (token[0] == '%' && token[strlen(token)-1] == 't')
193 {
194 char * lb = strchr(token, '{');
195 char * rb = strchr(token, '}');
196
197 sh_string_add_from_char(re_string, "\\[");
198 sh_string_add_from_char(re_string, "([^");
199 sh_string_add_from_char(re_string, "(\\]");
200 sh_string_add_from_char(re_string, "]+)");
201 sh_string_add_from_char(re_string, "\\]");
202
203 p_time = i+1;
204 if (lb && rb)
205 {
206 ++lb; *rb = '\0';
207 f_time = sh_util_strdup(lb);
208 }
209 else
210 {
211 f_time = sh_util_strdup(_("%d/%b/%Y:%T"));
212 }
213 }
214 else if (token[0] == '%' && token[1] == 'e' && 0 == strcmp(token, _("%error")))
215 {
216 sh_string_add_from_char(re_string, "\\[");
217 sh_string_add_from_char(re_string, "([^");
218 sh_string_add_from_char(re_string, "]");
219 sh_string_add_from_char(re_string, "]+)");
220 sh_string_add_from_char(re_string, "\\]");
221
222 p_time = i+1; f_time = sh_util_strdup(_("%a %b %d %T %Y")); ++i;
223 sh_string_add_from_char(re_string, " ");
224
225 sh_string_add_from_char(re_string, "\\[");
226 sh_string_add_from_char(re_string, "([^");
227 sh_string_add_from_char(re_string, "]");
228 sh_string_add_from_char(re_string, "]+)");
229 sh_string_add_from_char(re_string, "\\]");
230
231 p_status = i+1;
232 sh_string_add_from_char(re_string, " ");
233
234 p = "(.+)";
235 sh_string_add_from_char(re_string, p);
236
237 nfields = 3;
238
239 break;
240 }
241 else
242 {
243 sh_string_add_from_char(re_string, "(");
244 sh_string_add_from_char(re_string, "\\S+");
245 sh_string_add_from_char(re_string, ")");
246 if (token[0] == '%' && token[strlen(token)-1] == 's')
247 p_status = i+1;
248 else if (token[0] == '%' && token[strlen(token)-1] == 'v')
249 p_host = i+1;
250 }
251 }
252 sh_string_add_from_char(re_string, "$");
253
254 if (flag_err_debug == S_TRUE)
255 {
256 SH_MUTEX_LOCK(mutex_thread_nolog);
257 sh_error_handle(SH_ERR_ALL, FIL__, __LINE__, 0, MSG_E_SUBGEN,
258 sh_string_str(re_string),
259 _("eval_fileinfo"));
260 SH_MUTEX_UNLOCK(mutex_thread_nolog);
261 }
262
263 result = SH_ALLOC(sizeof(struct sh_fileinfo_apache));
264 result->line_regex = pcre2_compile((PCRE2_SPTR8)sh_string_str(re_string), sh_string_len(re_string), 0,
265 &error, &erroffset, NULL);
266 if (!(result->line_regex))
267 {
268 sh_string * msg = sh_string_new(0);
269 sh_string_add_from_char(msg, _("Bad regex: "));
270 sh_string_add_from_char(msg, sh_string_str(re_string));
271
272 SH_MUTEX_LOCK(mutex_thread_nolog);
273 sh_error_handle(SH_ERR_ERR, FIL__, __LINE__, 0, MSG_E_SUBGEN,
274 sh_string_str(msg),
275 _("eval_fileinfo"));
276 SH_MUTEX_UNLOCK(mutex_thread_nolog);
277
278 SH_FREE(result);
279 SH_FREE(splits);
280 SH_FREE(new);
281 sh_string_destroy(&msg);
282 sh_string_destroy(&re_string);
283
284 return NULL;
285 }
286 sh_string_destroy(&re_string);
287
288 result->line_ovector = NULL;
289 result->line_ovecnum = nfields;
290 result->pos_host = p_host;
291 result->pos_status = p_status;
292 result->pos_time = p_time;
293 result->format_time = f_time;
294
295 SH_FREE(splits);
296 SH_FREE(new);
297 return (void*)result;
298}
299
300struct sh_logrecord * sh_parse_apache (sh_string * logline, void * fileinfo)
301{
302 static struct tm old_tm;
303 static time_t old_time;
304
305 char tstr[128];
306 char sstr[128];
307 char * hstr;
308 int res;
309 unsigned char **hstr_addr = (unsigned char **) &hstr;
310 size_t hstr_len;
311
312 struct sh_fileinfo_apache * info = (struct sh_fileinfo_apache *) fileinfo;
313
314 pcre2_match_data * match_data = NULL;
315
316 if (sh_string_len(logline) > 0 && flag_err_debug == S_TRUE)
317 {
318 SH_MUTEX_LOCK(mutex_thread_nolog);
319 sh_error_handle(SH_ERR_ALL, FIL__, __LINE__, 0, MSG_E_SUBGEN,
320 sh_string_str(logline),
321 _("sh_parse_apache"));
322 SH_MUTEX_UNLOCK(mutex_thread_nolog);
323 }
324
325 if (logline == NULL || info == NULL)
326 {
327 return NULL;
328 }
329
330 match_data = pcre2_match_data_create_from_pattern(info->line_regex, NULL);
331
332 res = pcre2_match(info->line_regex,
333 (PCRE2_SPTR8)sh_string_str(logline), (int)sh_string_len(logline), 0,
334 0, match_data, NULL);
335
336 if (res == 1+info->line_ovecnum) /* successful match */
337 {
338 struct sh_logrecord * record;
339 time_t timestamp = 0;
340 size_t size;
341
342 info->line_ovector = pcre2_get_ovector_pointer(match_data);
343
344 if (info->pos_time > 0)
345 {
346 size = sizeof(tstr);
347 res = pcre2_substring_copy_bynumber(match_data, info->pos_time,
348 (PCRE2_UCHAR8 *)tstr, &size);
349 if (res != 0)
350 goto corrupt;
351 }
352 else
353 {
354 res = -1;
355 timestamp = 0;
356 info->format_time = sh_util_strdup(_("%d/%b/%Y:%T"));
357 sl_strlcpy(tstr, _("01/Jan/1970:00:00:00"), sizeof(tstr));
358 }
359
360 if (res == 0)
361 {
362 struct tm btime;
363 char * ptr = NULL;
364
365 memset(&btime, 0, sizeof(struct tm));
366 btime.tm_isdst = -1;
367
368 /* example: 01/Jun/2008:07:55:28 +0200 */
369
370 ptr = /*@i@*/strptime(tstr, info->format_time, &btime);
371
372 if (ptr)
373 {
374 timestamp = conv_timestamp(&btime, &old_tm, &old_time);
375 }
376 else
377 goto corrupt;
378 }
379
380 if (info->pos_status > 0)
381 {
382 size = sizeof(sstr);
383 res = pcre2_substring_copy_bynumber(match_data, info->pos_status,
384 (PCRE2_UCHAR8 *)sstr, &size);
385 if (res != 0)
386 goto corrupt;
387 }
388 else
389 {
390 sl_strlcpy(sstr, _("000"), sizeof(sstr));
391 }
392
393 if (info->pos_host > 0)
394 {
395 res = pcre2_substring_get_bynumber(match_data, info->pos_host,
396 hstr_addr, &hstr_len);
397 if (res != 0)
398 goto corrupt;
399 }
400 else
401 {
402 hstr = NULL;
403 }
404
405 record = SH_ALLOC(sizeof(struct sh_logrecord));
406
407 record->timestamp = timestamp;
408 record->timestr = sh_string_new_from_lchar(tstr, strlen(tstr));
409
410 if (hstr)
411 record->host = sh_string_new_from_lchar(hstr, hstr_len);
412 else
413 record->host = sh_string_new_from_lchar(sh.host.name, strlen(sh.host.name));
414
415 record->message = sh_string_new_from_lchar(sh_string_str(logline),
416 sh_string_len(logline));
417 record->pid = PID_INVALID;
418
419 /* does nothing if hstr == NULL */
420 pcre2_substring_free((PCRE2_UCHAR8 *)hstr);
421
422 pcre2_match_data_free(match_data);
423 return record;
424 }
425 else
426 {
427 char msg[128];
428 sl_snprintf(msg, sizeof(msg), _("Matching error: %d"), res);
429
430 SH_MUTEX_LOCK(mutex_thread_nolog);
431 sh_error_handle(SH_ERR_ERR, FIL__, __LINE__, 0, MSG_E_SUBGEN,
432 msg,
433 _("sh_parse_apache"));
434 SH_MUTEX_UNLOCK(mutex_thread_nolog);
435 }
436
437 /* Corrupted logline */
438 corrupt:
439
440 {
441 sh_string * msg = sh_string_new(0);
442 sh_string_add_from_char(msg, _("Corrupt logline: "));
443 sh_string_add_from_char(msg, sh_string_str(logline));
444
445 SH_MUTEX_LOCK(mutex_thread_nolog);
446 sh_error_handle(SH_ERR_ERR, FIL__, __LINE__, 0, MSG_E_SUBGEN,
447 sh_string_str(msg),
448 _("sh_parse_apache"));
449 SH_MUTEX_UNLOCK(mutex_thread_nolog);
450 sh_string_destroy(&msg);
451 }
452 pcre2_match_data_free(match_data);
453 return NULL;
454}
455
456/* USE_LOGFILE_MONITOR */
457#endif
Note: See TracBrowser for help on using the repository browser.