logs-analyzer/signoz/pkg/query-service/app/logparsingpipeline/time_parser.go
2024-09-02 22:47:30 +03:00

121 lines
4.4 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package logparsingpipeline
import (
"errors"
"fmt"
"regexp"
"strings"
)
// Regex for strptime format placeholders supported by the time parser.
// Used for defining if conditions on time parsing operators so they do not
// spam collector logs when encountering values that can't be parsed.
//
// Based on ctimeSubstitutes defined in https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/internal/coreinternal/timeutils/internal/ctimefmt/ctimefmt.go#L22
//
// TODO(Raj): Maybe make the expressions tighter.
var ctimeRegex = map[string]string{
// %Y - Year, zero-padded (0001, 0002, ..., 2019, 2020, ..., 9999)
"%Y": "[0-9]{4}",
// %y - Year, last two digits, zero-padded (01, ..., 99)
"%y": "[0-9]{2}",
// %m - Month as a decimal number (01, 02, ..., 12)
"%m": "[0-9]{2}",
// %o - Month as a space-padded number ( 1, 2, ..., 12)
"%o": "_[0-9]",
// %q - Month as a unpadded number (1,2,...,12)
"%q": "[0-9]",
// %b, %h - Abbreviated month name (Jan, Feb, ...)
"%b": "[a-zA-Z]*?",
"%h": "[a-zA-Z]*?",
// %B - Full month name (January, February, ...)
"%B": "[a-zA-Z]*?",
// %d - Day of the month, zero-padded (01, 02, ..., 31)
"%d": "[0-9]{2}",
// %e - Day of the month, space-padded ( 1, 2, ..., 31)
"%e": "_[0-9]",
// %g - Day of the month, unpadded (1,2,...,31)
"%g": "[0-9]",
// %a - Abbreviated weekday name (Sun, Mon, ...)
"%a": "[a-zA-Z]*?",
// %A - Full weekday name (Sunday, Monday, ...)
"%A": "[a-zA-Z]*?",
// %H - Hour (24-hour clock) as a zero-padded decimal number (00, ..., 24)
"%H": "[0-9]{2}",
// %l - Hour (12-hour clock: 0, ..., 12)
"%l": "[0-9]{1-2}",
// %I - Hour (12-hour clock) as a zero-padded decimal number (00, ..., 12)
"%I": "[0-9]{2}",
// %p - Locales equivalent of either AM or PM
"%p": "(AM|PM)",
// %P - Locales equivalent of either am or pm
"%P": "(am|pm)",
// %M - Minute, zero-padded (00, 01, ..., 59)
"%M": "[0-9]{2}",
// %S - Second as a zero-padded decimal number (00, 01, ..., 59)
"%S": "[0-9]{2}",
// %L - Millisecond as a decimal number, zero-padded on the left (000, 001, ..., 999)
"%L": "[0-9]*?",
// %f - Microsecond as a decimal number, zero-padded on the left (000000, ..., 999999)
"%f": "[0-9]*?",
// %s - Nanosecond as a decimal number, zero-padded on the left (000000, ..., 999999)
"%s": "[0-9]*?",
// %Z - Timezone name or abbreviation or empty (UTC, EST, CST)
"%Z": "[a-zA-Z]*?",
// %z - UTC offset in the form ±HHMM[SS[.ffffff]] or empty(+0000, -0400)
"%z": "[-+][0-9]*?",
// Weekday as a decimal number, where 0 is Sunday and 6 is Saturday.
"%w": "[-+][0-9]*?",
"%i": "[-+][0-9]*?",
"%j": "[-+][0-9]{2}:[0-9]{2}",
"%k": "[-+][0-9]{2}:[0-9]{2}:[0-9]{2}",
// %D, %x - Short MM/DD/YY date, equivalent to %m/%d/%y
"%D": "[0-9]{2}/[0-9]{2}/[0-9]{4}",
// %D, %x - Short MM/DD/YY date, equivalent to %m/%d/%y
"%x": "[0-9]{2}/[0-9]{2}/[0-9]{4}",
// %F - Short YYYY-MM-DD date, equivalent to %Y-%m-%d
"%F": "[0-9]{4}-[0-9]{2}-[0-9]{2}",
// %T, %X - ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S
"%T": "[0-9]{2}:[0-9]{2}:[0-9]{2}",
// %T, %X - ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S
"%X": "[0-9]{2}:[0-9]{2}:[0-9]{2}",
// %r - 12-hour clock time (02:55:02 pm)
"%r": "[0-9]{2}:[0-9]{2}:[0-9]{2} (am|pm)",
// %R - 24-hour HH:MM time, equivalent to %H:%M
"%R": "[0-9]{2}:[0-9]{2}",
// %n - New-line character ('\n')
"%n": "\n",
// %t - Horizontal-tab character ('\t')
"%t": "\t",
// %% - A % sign
"%%": "%",
// %c - Date and time representation (Mon Jan 02 15:04:05 2006)
"%c": "[a-zA-Z]{3} [a-zA-Z]{3} [0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2} [0-9]{4}",
}
func RegexForStrptimeLayout(layout string) (string, error) {
layoutRegex := layout
for _, regexSpecialChar := range []string{
".", "+", "*", "?", "^", "$", "(", ")", "[", "]", "{", "}", "|", `\`,
} {
layoutRegex = strings.ReplaceAll(layoutRegex, regexSpecialChar, `\`+regexSpecialChar)
}
var errs []error
replaceStrptimeDirectiveWithRegex := func(directive string) string {
if regex, ok := ctimeRegex[directive]; ok {
return regex
}
errs = append(errs, errors.New("unsupported ctimefmt directive: "+directive))
return ""
}
strptimeDirectiveRegexp := regexp.MustCompile(`%.`)
layoutRegex = strptimeDirectiveRegexp.ReplaceAllStringFunc(layoutRegex, replaceStrptimeDirectiveWithRegex)
if len(errs) != 0 {
return "", fmt.Errorf("couldn't generate regex for ctime format: %v", errs)
}
return layoutRegex, nil
}