121 lines
4.4 KiB
Go
121 lines
4.4 KiB
Go
|
package logparsingpipeline
|
|||
|
|
|||
|
import (
|
|||
|
"errors"
|
|||
|
"fmt"
|
|||
|
"regexp"
|
|||
|
"strings"
|
|||
|
)
|
|||
|
|
|||
|
// Regex for strptime format placeholders supported by the time parser.
|
|||
|
// Used for defining if conditions on time parsing operators so they do not
|
|||
|
// spam collector logs when encountering values that can't be parsed.
|
|||
|
//
|
|||
|
// Based on ctimeSubstitutes defined in https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/internal/coreinternal/timeutils/internal/ctimefmt/ctimefmt.go#L22
|
|||
|
//
|
|||
|
// TODO(Raj): Maybe make the expressions tighter.
|
|||
|
var ctimeRegex = map[string]string{
|
|||
|
// %Y - Year, zero-padded (0001, 0002, ..., 2019, 2020, ..., 9999)
|
|||
|
"%Y": "[0-9]{4}",
|
|||
|
// %y - Year, last two digits, zero-padded (01, ..., 99)
|
|||
|
"%y": "[0-9]{2}",
|
|||
|
// %m - Month as a decimal number (01, 02, ..., 12)
|
|||
|
"%m": "[0-9]{2}",
|
|||
|
// %o - Month as a space-padded number ( 1, 2, ..., 12)
|
|||
|
"%o": "_[0-9]",
|
|||
|
// %q - Month as a unpadded number (1,2,...,12)
|
|||
|
"%q": "[0-9]",
|
|||
|
// %b, %h - Abbreviated month name (Jan, Feb, ...)
|
|||
|
"%b": "[a-zA-Z]*?",
|
|||
|
"%h": "[a-zA-Z]*?",
|
|||
|
// %B - Full month name (January, February, ...)
|
|||
|
"%B": "[a-zA-Z]*?",
|
|||
|
// %d - Day of the month, zero-padded (01, 02, ..., 31)
|
|||
|
"%d": "[0-9]{2}",
|
|||
|
// %e - Day of the month, space-padded ( 1, 2, ..., 31)
|
|||
|
"%e": "_[0-9]",
|
|||
|
// %g - Day of the month, unpadded (1,2,...,31)
|
|||
|
"%g": "[0-9]",
|
|||
|
// %a - Abbreviated weekday name (Sun, Mon, ...)
|
|||
|
"%a": "[a-zA-Z]*?",
|
|||
|
// %A - Full weekday name (Sunday, Monday, ...)
|
|||
|
"%A": "[a-zA-Z]*?",
|
|||
|
// %H - Hour (24-hour clock) as a zero-padded decimal number (00, ..., 24)
|
|||
|
"%H": "[0-9]{2}",
|
|||
|
// %l - Hour (12-hour clock: 0, ..., 12)
|
|||
|
"%l": "[0-9]{1-2}",
|
|||
|
// %I - Hour (12-hour clock) as a zero-padded decimal number (00, ..., 12)
|
|||
|
"%I": "[0-9]{2}",
|
|||
|
// %p - Locale’s equivalent of either AM or PM
|
|||
|
"%p": "(AM|PM)",
|
|||
|
// %P - Locale’s equivalent of either am or pm
|
|||
|
"%P": "(am|pm)",
|
|||
|
// %M - Minute, zero-padded (00, 01, ..., 59)
|
|||
|
"%M": "[0-9]{2}",
|
|||
|
// %S - Second as a zero-padded decimal number (00, 01, ..., 59)
|
|||
|
"%S": "[0-9]{2}",
|
|||
|
// %L - Millisecond as a decimal number, zero-padded on the left (000, 001, ..., 999)
|
|||
|
"%L": "[0-9]*?",
|
|||
|
// %f - Microsecond as a decimal number, zero-padded on the left (000000, ..., 999999)
|
|||
|
"%f": "[0-9]*?",
|
|||
|
// %s - Nanosecond as a decimal number, zero-padded on the left (000000, ..., 999999)
|
|||
|
"%s": "[0-9]*?",
|
|||
|
// %Z - Timezone name or abbreviation or empty (UTC, EST, CST)
|
|||
|
"%Z": "[a-zA-Z]*?",
|
|||
|
// %z - UTC offset in the form ±HHMM[SS[.ffffff]] or empty(+0000, -0400)
|
|||
|
"%z": "[-+][0-9]*?",
|
|||
|
// Weekday as a decimal number, where 0 is Sunday and 6 is Saturday.
|
|||
|
"%w": "[-+][0-9]*?",
|
|||
|
"%i": "[-+][0-9]*?",
|
|||
|
"%j": "[-+][0-9]{2}:[0-9]{2}",
|
|||
|
"%k": "[-+][0-9]{2}:[0-9]{2}:[0-9]{2}",
|
|||
|
// %D, %x - Short MM/DD/YY date, equivalent to %m/%d/%y
|
|||
|
"%D": "[0-9]{2}/[0-9]{2}/[0-9]{4}",
|
|||
|
// %D, %x - Short MM/DD/YY date, equivalent to %m/%d/%y
|
|||
|
"%x": "[0-9]{2}/[0-9]{2}/[0-9]{4}",
|
|||
|
// %F - Short YYYY-MM-DD date, equivalent to %Y-%m-%d
|
|||
|
"%F": "[0-9]{4}-[0-9]{2}-[0-9]{2}",
|
|||
|
// %T, %X - ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S
|
|||
|
"%T": "[0-9]{2}:[0-9]{2}:[0-9]{2}",
|
|||
|
// %T, %X - ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S
|
|||
|
"%X": "[0-9]{2}:[0-9]{2}:[0-9]{2}",
|
|||
|
// %r - 12-hour clock time (02:55:02 pm)
|
|||
|
"%r": "[0-9]{2}:[0-9]{2}:[0-9]{2} (am|pm)",
|
|||
|
// %R - 24-hour HH:MM time, equivalent to %H:%M
|
|||
|
"%R": "[0-9]{2}:[0-9]{2}",
|
|||
|
// %n - New-line character ('\n')
|
|||
|
"%n": "\n",
|
|||
|
// %t - Horizontal-tab character ('\t')
|
|||
|
"%t": "\t",
|
|||
|
// %% - A % sign
|
|||
|
"%%": "%",
|
|||
|
// %c - Date and time representation (Mon Jan 02 15:04:05 2006)
|
|||
|
"%c": "[a-zA-Z]{3} [a-zA-Z]{3} [0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2} [0-9]{4}",
|
|||
|
}
|
|||
|
|
|||
|
func RegexForStrptimeLayout(layout string) (string, error) {
|
|||
|
layoutRegex := layout
|
|||
|
for _, regexSpecialChar := range []string{
|
|||
|
".", "+", "*", "?", "^", "$", "(", ")", "[", "]", "{", "}", "|", `\`,
|
|||
|
} {
|
|||
|
layoutRegex = strings.ReplaceAll(layoutRegex, regexSpecialChar, `\`+regexSpecialChar)
|
|||
|
}
|
|||
|
|
|||
|
var errs []error
|
|||
|
replaceStrptimeDirectiveWithRegex := func(directive string) string {
|
|||
|
if regex, ok := ctimeRegex[directive]; ok {
|
|||
|
return regex
|
|||
|
}
|
|||
|
errs = append(errs, errors.New("unsupported ctimefmt directive: "+directive))
|
|||
|
return ""
|
|||
|
}
|
|||
|
|
|||
|
strptimeDirectiveRegexp := regexp.MustCompile(`%.`)
|
|||
|
layoutRegex = strptimeDirectiveRegexp.ReplaceAllStringFunc(layoutRegex, replaceStrptimeDirectiveWithRegex)
|
|||
|
if len(errs) != 0 {
|
|||
|
return "", fmt.Errorf("couldn't generate regex for ctime format: %v", errs)
|
|||
|
}
|
|||
|
|
|||
|
return layoutRegex, nil
|
|||
|
}
|