121 lines
4.4 KiB
Go
121 lines
4.4 KiB
Go
package logparsingpipeline
|
||
|
||
import (
|
||
"errors"
|
||
"fmt"
|
||
"regexp"
|
||
"strings"
|
||
)
|
||
|
||
// Regex for strptime format placeholders supported by the time parser.
|
||
// Used for defining if conditions on time parsing operators so they do not
|
||
// spam collector logs when encountering values that can't be parsed.
|
||
//
|
||
// Based on ctimeSubstitutes defined in https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/internal/coreinternal/timeutils/internal/ctimefmt/ctimefmt.go#L22
|
||
//
|
||
// TODO(Raj): Maybe make the expressions tighter.
|
||
var ctimeRegex = map[string]string{
|
||
// %Y - Year, zero-padded (0001, 0002, ..., 2019, 2020, ..., 9999)
|
||
"%Y": "[0-9]{4}",
|
||
// %y - Year, last two digits, zero-padded (01, ..., 99)
|
||
"%y": "[0-9]{2}",
|
||
// %m - Month as a decimal number (01, 02, ..., 12)
|
||
"%m": "[0-9]{2}",
|
||
// %o - Month as a space-padded number ( 1, 2, ..., 12)
|
||
"%o": "_[0-9]",
|
||
// %q - Month as a unpadded number (1,2,...,12)
|
||
"%q": "[0-9]",
|
||
// %b, %h - Abbreviated month name (Jan, Feb, ...)
|
||
"%b": "[a-zA-Z]*?",
|
||
"%h": "[a-zA-Z]*?",
|
||
// %B - Full month name (January, February, ...)
|
||
"%B": "[a-zA-Z]*?",
|
||
// %d - Day of the month, zero-padded (01, 02, ..., 31)
|
||
"%d": "[0-9]{2}",
|
||
// %e - Day of the month, space-padded ( 1, 2, ..., 31)
|
||
"%e": "_[0-9]",
|
||
// %g - Day of the month, unpadded (1,2,...,31)
|
||
"%g": "[0-9]",
|
||
// %a - Abbreviated weekday name (Sun, Mon, ...)
|
||
"%a": "[a-zA-Z]*?",
|
||
// %A - Full weekday name (Sunday, Monday, ...)
|
||
"%A": "[a-zA-Z]*?",
|
||
// %H - Hour (24-hour clock) as a zero-padded decimal number (00, ..., 24)
|
||
"%H": "[0-9]{2}",
|
||
// %l - Hour (12-hour clock: 0, ..., 12)
|
||
"%l": "[0-9]{1-2}",
|
||
// %I - Hour (12-hour clock) as a zero-padded decimal number (00, ..., 12)
|
||
"%I": "[0-9]{2}",
|
||
// %p - Locale’s equivalent of either AM or PM
|
||
"%p": "(AM|PM)",
|
||
// %P - Locale’s equivalent of either am or pm
|
||
"%P": "(am|pm)",
|
||
// %M - Minute, zero-padded (00, 01, ..., 59)
|
||
"%M": "[0-9]{2}",
|
||
// %S - Second as a zero-padded decimal number (00, 01, ..., 59)
|
||
"%S": "[0-9]{2}",
|
||
// %L - Millisecond as a decimal number, zero-padded on the left (000, 001, ..., 999)
|
||
"%L": "[0-9]*?",
|
||
// %f - Microsecond as a decimal number, zero-padded on the left (000000, ..., 999999)
|
||
"%f": "[0-9]*?",
|
||
// %s - Nanosecond as a decimal number, zero-padded on the left (000000, ..., 999999)
|
||
"%s": "[0-9]*?",
|
||
// %Z - Timezone name or abbreviation or empty (UTC, EST, CST)
|
||
"%Z": "[a-zA-Z]*?",
|
||
// %z - UTC offset in the form ±HHMM[SS[.ffffff]] or empty(+0000, -0400)
|
||
"%z": "[-+][0-9]*?",
|
||
// Weekday as a decimal number, where 0 is Sunday and 6 is Saturday.
|
||
"%w": "[-+][0-9]*?",
|
||
"%i": "[-+][0-9]*?",
|
||
"%j": "[-+][0-9]{2}:[0-9]{2}",
|
||
"%k": "[-+][0-9]{2}:[0-9]{2}:[0-9]{2}",
|
||
// %D, %x - Short MM/DD/YY date, equivalent to %m/%d/%y
|
||
"%D": "[0-9]{2}/[0-9]{2}/[0-9]{4}",
|
||
// %D, %x - Short MM/DD/YY date, equivalent to %m/%d/%y
|
||
"%x": "[0-9]{2}/[0-9]{2}/[0-9]{4}",
|
||
// %F - Short YYYY-MM-DD date, equivalent to %Y-%m-%d
|
||
"%F": "[0-9]{4}-[0-9]{2}-[0-9]{2}",
|
||
// %T, %X - ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S
|
||
"%T": "[0-9]{2}:[0-9]{2}:[0-9]{2}",
|
||
// %T, %X - ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S
|
||
"%X": "[0-9]{2}:[0-9]{2}:[0-9]{2}",
|
||
// %r - 12-hour clock time (02:55:02 pm)
|
||
"%r": "[0-9]{2}:[0-9]{2}:[0-9]{2} (am|pm)",
|
||
// %R - 24-hour HH:MM time, equivalent to %H:%M
|
||
"%R": "[0-9]{2}:[0-9]{2}",
|
||
// %n - New-line character ('\n')
|
||
"%n": "\n",
|
||
// %t - Horizontal-tab character ('\t')
|
||
"%t": "\t",
|
||
// %% - A % sign
|
||
"%%": "%",
|
||
// %c - Date and time representation (Mon Jan 02 15:04:05 2006)
|
||
"%c": "[a-zA-Z]{3} [a-zA-Z]{3} [0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2} [0-9]{4}",
|
||
}
|
||
|
||
func RegexForStrptimeLayout(layout string) (string, error) {
|
||
layoutRegex := layout
|
||
for _, regexSpecialChar := range []string{
|
||
".", "+", "*", "?", "^", "$", "(", ")", "[", "]", "{", "}", "|", `\`,
|
||
} {
|
||
layoutRegex = strings.ReplaceAll(layoutRegex, regexSpecialChar, `\`+regexSpecialChar)
|
||
}
|
||
|
||
var errs []error
|
||
replaceStrptimeDirectiveWithRegex := func(directive string) string {
|
||
if regex, ok := ctimeRegex[directive]; ok {
|
||
return regex
|
||
}
|
||
errs = append(errs, errors.New("unsupported ctimefmt directive: "+directive))
|
||
return ""
|
||
}
|
||
|
||
strptimeDirectiveRegexp := regexp.MustCompile(`%.`)
|
||
layoutRegex = strptimeDirectiveRegexp.ReplaceAllStringFunc(layoutRegex, replaceStrptimeDirectiveWithRegex)
|
||
if len(errs) != 0 {
|
||
return "", fmt.Errorf("couldn't generate regex for ctime format: %v", errs)
|
||
}
|
||
|
||
return layoutRegex, nil
|
||
}
|