package logparsingpipeline import ( "errors" "fmt" "regexp" "strings" ) // Regex for strptime format placeholders supported by the time parser. // Used for defining if conditions on time parsing operators so they do not // spam collector logs when encountering values that can't be parsed. // // Based on ctimeSubstitutes defined in https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/internal/coreinternal/timeutils/internal/ctimefmt/ctimefmt.go#L22 // // TODO(Raj): Maybe make the expressions tighter. var ctimeRegex = map[string]string{ // %Y - Year, zero-padded (0001, 0002, ..., 2019, 2020, ..., 9999) "%Y": "[0-9]{4}", // %y - Year, last two digits, zero-padded (01, ..., 99) "%y": "[0-9]{2}", // %m - Month as a decimal number (01, 02, ..., 12) "%m": "[0-9]{2}", // %o - Month as a space-padded number ( 1, 2, ..., 12) "%o": "_[0-9]", // %q - Month as a unpadded number (1,2,...,12) "%q": "[0-9]", // %b, %h - Abbreviated month name (Jan, Feb, ...) "%b": "[a-zA-Z]*?", "%h": "[a-zA-Z]*?", // %B - Full month name (January, February, ...) "%B": "[a-zA-Z]*?", // %d - Day of the month, zero-padded (01, 02, ..., 31) "%d": "[0-9]{2}", // %e - Day of the month, space-padded ( 1, 2, ..., 31) "%e": "_[0-9]", // %g - Day of the month, unpadded (1,2,...,31) "%g": "[0-9]", // %a - Abbreviated weekday name (Sun, Mon, ...) "%a": "[a-zA-Z]*?", // %A - Full weekday name (Sunday, Monday, ...) "%A": "[a-zA-Z]*?", // %H - Hour (24-hour clock) as a zero-padded decimal number (00, ..., 24) "%H": "[0-9]{2}", // %l - Hour (12-hour clock: 0, ..., 12) "%l": "[0-9]{1-2}", // %I - Hour (12-hour clock) as a zero-padded decimal number (00, ..., 12) "%I": "[0-9]{2}", // %p - Locale’s equivalent of either AM or PM "%p": "(AM|PM)", // %P - Locale’s equivalent of either am or pm "%P": "(am|pm)", // %M - Minute, zero-padded (00, 01, ..., 59) "%M": "[0-9]{2}", // %S - Second as a zero-padded decimal number (00, 01, ..., 59) "%S": "[0-9]{2}", // %L - Millisecond as a decimal number, zero-padded on the left (000, 001, ..., 999) "%L": "[0-9]*?", // %f - Microsecond as a decimal number, zero-padded on the left (000000, ..., 999999) "%f": "[0-9]*?", // %s - Nanosecond as a decimal number, zero-padded on the left (000000, ..., 999999) "%s": "[0-9]*?", // %Z - Timezone name or abbreviation or empty (UTC, EST, CST) "%Z": "[a-zA-Z]*?", // %z - UTC offset in the form ±HHMM[SS[.ffffff]] or empty(+0000, -0400) "%z": "[-+][0-9]*?", // Weekday as a decimal number, where 0 is Sunday and 6 is Saturday. "%w": "[-+][0-9]*?", "%i": "[-+][0-9]*?", "%j": "[-+][0-9]{2}:[0-9]{2}", "%k": "[-+][0-9]{2}:[0-9]{2}:[0-9]{2}", // %D, %x - Short MM/DD/YY date, equivalent to %m/%d/%y "%D": "[0-9]{2}/[0-9]{2}/[0-9]{4}", // %D, %x - Short MM/DD/YY date, equivalent to %m/%d/%y "%x": "[0-9]{2}/[0-9]{2}/[0-9]{4}", // %F - Short YYYY-MM-DD date, equivalent to %Y-%m-%d "%F": "[0-9]{4}-[0-9]{2}-[0-9]{2}", // %T, %X - ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S "%T": "[0-9]{2}:[0-9]{2}:[0-9]{2}", // %T, %X - ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S "%X": "[0-9]{2}:[0-9]{2}:[0-9]{2}", // %r - 12-hour clock time (02:55:02 pm) "%r": "[0-9]{2}:[0-9]{2}:[0-9]{2} (am|pm)", // %R - 24-hour HH:MM time, equivalent to %H:%M "%R": "[0-9]{2}:[0-9]{2}", // %n - New-line character ('\n') "%n": "\n", // %t - Horizontal-tab character ('\t') "%t": "\t", // %% - A % sign "%%": "%", // %c - Date and time representation (Mon Jan 02 15:04:05 2006) "%c": "[a-zA-Z]{3} [a-zA-Z]{3} [0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2} [0-9]{4}", } func RegexForStrptimeLayout(layout string) (string, error) { layoutRegex := layout for _, regexSpecialChar := range []string{ ".", "+", "*", "?", "^", "$", "(", ")", "[", "]", "{", "}", "|", `\`, } { layoutRegex = strings.ReplaceAll(layoutRegex, regexSpecialChar, `\`+regexSpecialChar) } var errs []error replaceStrptimeDirectiveWithRegex := func(directive string) string { if regex, ok := ctimeRegex[directive]; ok { return regex } errs = append(errs, errors.New("unsupported ctimefmt directive: "+directive)) return "" } strptimeDirectiveRegexp := regexp.MustCompile(`%.`) layoutRegex = strptimeDirectiveRegexp.ReplaceAllStringFunc(layoutRegex, replaceStrptimeDirectiveWithRegex) if len(errs) != 0 { return "", fmt.Errorf("couldn't generate regex for ctime format: %v", errs) } return layoutRegex, nil }