logs-analyzer/signoz/pkg/query-service/app/logparsingpipeline/pipelineBuilder.go
2024-09-02 22:47:30 +03:00

387 lines
12 KiB
Go

package logparsingpipeline
import (
"fmt"
"slices"
"strings"
"github.com/antonmedv/expr"
"github.com/antonmedv/expr/ast"
"github.com/antonmedv/expr/parser"
"github.com/pkg/errors"
"go.signoz.io/signoz/pkg/query-service/constants"
"go.signoz.io/signoz/pkg/query-service/queryBuilderToExpr"
)
const (
NOOP = "noop"
)
func CollectorConfProcessorName(p Pipeline) string {
return constants.LogsPPLPfx + p.Alias
}
func PreparePipelineProcessor(pipelines []Pipeline) (map[string]interface{}, []string, error) {
processors := map[string]interface{}{}
names := []string{}
for pipelineIdx, v := range pipelines {
if !v.Enabled {
continue
}
operators, err := getOperators(v.Config)
if err != nil {
return nil, nil, errors.Wrap(err, "failed to prepare operators")
}
if len(operators) == 0 {
continue
}
filterExpr, err := queryBuilderToExpr.Parse(v.Filter)
if err != nil {
return nil, nil, errors.Wrap(err, "failed to parse pipeline filter")
}
router := []PipelineOperator{
{
ID: "router_signoz",
Type: "router",
Routes: &[]Route{
{
Output: v.Config[0].ID,
Expr: filterExpr,
},
},
Default: NOOP,
},
}
v.Config = append(router, operators...)
// noop operator is needed as the default operator so that logs are not dropped
noop := PipelineOperator{
ID: NOOP,
Type: NOOP,
}
v.Config = append(v.Config, noop)
processor := Processor{
Operators: v.Config,
}
name := CollectorConfProcessorName(v)
// Ensure name is unique
if _, nameExists := processors[name]; nameExists {
name = fmt.Sprintf("%s-%d", name, pipelineIdx)
}
processors[name] = processor
names = append(names, name)
}
return processors, names, nil
}
func getOperators(ops []PipelineOperator) ([]PipelineOperator, error) {
filteredOp := []PipelineOperator{}
for i, operator := range ops {
if operator.Enabled {
if len(filteredOp) > 0 {
filteredOp[len(filteredOp)-1].Output = operator.ID
}
if operator.Type == "regex_parser" {
parseFromNotNilCheck, err := fieldNotNilCheck(operator.ParseFrom)
if err != nil {
return nil, fmt.Errorf(
"couldn't generate nil check for parseFrom of regex op %s: %w", operator.Name, err,
)
}
operator.If = fmt.Sprintf(
`%s && %s matches "%s"`,
parseFromNotNilCheck,
operator.ParseFrom,
strings.ReplaceAll(
strings.ReplaceAll(operator.Regex, `\`, `\\`),
`"`, `\"`,
),
)
} else if operator.Type == "grok_parser" {
parseFromNotNilCheck, err := fieldNotNilCheck(operator.ParseFrom)
if err != nil {
return nil, fmt.Errorf(
"couldn't generate nil check for parseFrom of grok op %s: %w", operator.Name, err,
)
}
operator.If = parseFromNotNilCheck
} else if operator.Type == "json_parser" {
parseFromNotNilCheck, err := fieldNotNilCheck(operator.ParseFrom)
if err != nil {
return nil, fmt.Errorf(
"couldn't generate nil check for parseFrom of json parser op %s: %w", operator.Name, err,
)
}
operator.If = fmt.Sprintf(
`%s && %s matches "^\\s*{.*}\\s*$"`, parseFromNotNilCheck, operator.ParseFrom,
)
} else if operator.Type == "add" {
if strings.HasPrefix(operator.Value, "EXPR(") && strings.HasSuffix(operator.Value, ")") {
expression := strings.TrimSuffix(strings.TrimPrefix(operator.Value, "EXPR("), ")")
fieldsNotNilCheck, err := fieldsReferencedInExprNotNilCheck(expression)
if err != nil {
return nil, fmt.Errorf(
"could'nt generate nil check for fields referenced in value expr of add operator %s: %w",
operator.Name, err,
)
}
if fieldsNotNilCheck != "" {
operator.If = fieldsNotNilCheck
}
}
} else if operator.Type == "move" || operator.Type == "copy" {
fromNotNilCheck, err := fieldNotNilCheck(operator.From)
if err != nil {
return nil, fmt.Errorf(
"couldn't generate nil check for From field of %s op %s: %w", operator.Type, operator.Name, err,
)
}
operator.If = fromNotNilCheck
} else if operator.Type == "remove" {
fieldNotNilCheck, err := fieldNotNilCheck(operator.Field)
if err != nil {
return nil, fmt.Errorf(
"couldn't generate nil check for field to be removed by op %s: %w", operator.Name, err,
)
}
operator.If = fieldNotNilCheck
} else if operator.Type == "trace_parser" {
cleanTraceParser(&operator)
} else if operator.Type == "time_parser" {
parseFromNotNilCheck, err := fieldNotNilCheck(operator.ParseFrom)
if err != nil {
return nil, fmt.Errorf(
"couldn't generate nil check for parseFrom of time parser op %s: %w", operator.Name, err,
)
}
operator.If = parseFromNotNilCheck
if operator.LayoutType == "strptime" {
regex, err := RegexForStrptimeLayout(operator.Layout)
if err != nil {
return nil, fmt.Errorf(
"couldn't generate layout regex for time_parser %s: %w", operator.Name, err,
)
}
operator.If = fmt.Sprintf(
`%s && %s matches "%s"`, operator.If, operator.ParseFrom, regex,
)
} else if operator.LayoutType == "epoch" {
valueRegex := `^\\s*[0-9]+\\s*$`
if strings.Contains(operator.Layout, ".") {
valueRegex = `^\\s*[0-9]+\\.[0-9]+\\s*$`
}
operator.If = fmt.Sprintf(
`%s && string(%s) matches "%s"`, operator.If, operator.ParseFrom, valueRegex,
)
}
// TODO(Raj): Maybe add support for gotime too eventually
} else if operator.Type == "severity_parser" {
parseFromNotNilCheck, err := fieldNotNilCheck(operator.ParseFrom)
if err != nil {
return nil, fmt.Errorf(
"couldn't generate nil check for parseFrom of severity parser %s: %w", operator.Name, err,
)
}
operator.If = fmt.Sprintf(
`%s && ( type(%s) == "string" || ( type(%s) in ["int", "float"] && %s == float(int(%s)) ) )`,
parseFromNotNilCheck, operator.ParseFrom, operator.ParseFrom, operator.ParseFrom, operator.ParseFrom,
)
}
filteredOp = append(filteredOp, operator)
} else if i == len(ops)-1 && len(filteredOp) != 0 {
filteredOp[len(filteredOp)-1].Output = ""
}
}
return filteredOp, nil
}
func cleanTraceParser(operator *PipelineOperator) {
if operator.TraceId != nil && len(operator.TraceId.ParseFrom) < 1 {
operator.TraceId = nil
}
if operator.SpanId != nil && len(operator.SpanId.ParseFrom) < 1 {
operator.SpanId = nil
}
if operator.TraceFlags != nil && len(operator.TraceFlags.ParseFrom) < 1 {
operator.TraceFlags = nil
}
}
// Generates an expression checking that `fieldPath` has a non-nil value in a log record.
func fieldNotNilCheck(fieldPath string) (string, error) {
_, err := expr.Compile(fieldPath)
if err != nil {
return "", fmt.Errorf("invalid fieldPath %s: %w", fieldPath, err)
}
// helper for turning `.` into `?.` in field paths.
// Eg: a.b?.c.d -> a?.b?.c?.d
optionalChainedPath := func(path string) string {
return strings.ReplaceAll(
strings.ReplaceAll(path, "?.", "."), ".", "?.",
)
}
// Optional chaining before membership ops is not supported by expr.
// Eg: The field `attributes.test["a.b"].value["c.d"].e` can't be checked using
// the nil check `attributes.test?.["a.b"]?.value?.["c.d"]?.e != nil`
// This needs to be worked around by checking that the target of membership op is not nil first.
// Eg: attributes.test != nil && attributes.test["a.b"]?.value != nil && attributes.test["a.b"].value["c.d"]?.e != nil
// Split once from the right to include the rightmost membership op and everything after it.
// Eg: `attributes.test["a.b"].value["c.d"].e` would result in `attributes.test["a.b"].value` and `["c.d"].e`
parts := rSplitAfterN(fieldPath, "[", 2)
if len(parts) < 2 {
// there is no [] access in fieldPath
return fmt.Sprintf("%s != nil", optionalChainedPath(fieldPath)), nil
}
// recursively generate nil check for target of the rightmost membership op (attributes.test["a.b"].value)
// should come out to be (attributes.test != nil && attributes.test["a.b"]?.value != nil)
collectionNotNilCheck, err := fieldNotNilCheck(parts[0])
if err != nil {
return "", fmt.Errorf("couldn't generate nil check for %s: %w", parts[0], err)
}
// generate nil check for entire path.
suffixParts := strings.SplitAfter(parts[1], "]") // ["c.d"], ".e"
fullPath := parts[0] + suffixParts[0]
if len(suffixParts) > 1 {
// attributes.test["a.b"].value["c.d"]?.e
fullPath += optionalChainedPath(suffixParts[1])
}
fullPathCheck := fmt.Sprintf("%s != nil", fullPath)
// If the membership op is for array/slice indexing, add check ensuring array is long enough
// attributes.test[3] -> len(attributes.test) > 3 && attributes.test[3] != nil
if !(strings.Contains(suffixParts[0], "'") || strings.Contains(suffixParts[0], `"`)) {
fullPathCheck = fmt.Sprintf(
"len(%s) > %s && %s",
parts[0], suffixParts[0][1:len(suffixParts[0])-1], fullPathCheck,
)
}
// If prefix is `attributes` or `resource` there is no need to add a nil check for
// the prefix since all log records have non nil `attributes` and `resource` fields.
if slices.Contains([]string{"attributes", "resource"}, parts[0]) {
return fullPathCheck, nil
}
return fmt.Sprintf("%s && %s", collectionNotNilCheck, fullPathCheck), nil
}
// Split `str` after `sep` from the right to create up to `n` parts.
// rSplitAfterN("a.b.c.d", ".", 3) -> ["a.b", ".c", ".d"]
func rSplitAfterN(str string, sep string, n int) []string {
reversedStr := reverseString(str)
parts := strings.SplitAfterN(reversedStr, sep, n)
slices.Reverse(parts)
result := []string{}
for _, p := range parts {
result = append(result, reverseString(p))
}
return result
}
func reverseString(s string) string {
r := []rune(s)
for i := 0; i < len(r)/2; i++ {
j := len(s) - 1 - i
r[i], r[j] = r[j], r[i]
}
return string(r)
}
// Generate expression for checking that all fields referenced in `expr` have a non nil value in log record.
// Eg: `attributes.x + len(resource.y)` will return the expression `attributes.x != nil && resource.y != nil`
func fieldsReferencedInExprNotNilCheck(expr string) (string, error) {
referencedFields, err := logFieldsReferencedInExpr(expr)
if err != nil {
return "", fmt.Errorf("couldn't extract log fields referenced in expr %s: %w", expr, err)
}
// Generating nil check for deepest fields takes care of their prefixes too.
// Eg: `attributes.test.value + len(attributes.test)` needs a nil check only for `attributes.test.value`
deepestFieldRefs := []string{}
for _, field := range referencedFields {
isPrefixOfAnotherReferencedField := slices.ContainsFunc(
referencedFields, func(e string) bool {
return len(e) > len(field) && strings.HasPrefix(e, field)
},
)
if !isPrefixOfAnotherReferencedField {
deepestFieldRefs = append(deepestFieldRefs, field)
}
}
fieldExprChecks := []string{}
for _, field := range deepestFieldRefs {
checkExpr, err := fieldNotNilCheck(field)
if err != nil {
return "", fmt.Errorf("could not create nil check for %s: %w", field, err)
}
fieldExprChecks = append(fieldExprChecks, fmt.Sprintf("(%s)", checkExpr))
}
return strings.Join(fieldExprChecks, " && "), nil
}
// Expr AST visitor for extracting referenced log fields
// See more at https://github.com/expr-lang/expr/blob/master/ast/visitor.go
type logFieldsInExprExtractor struct {
referencedFields []string
}
func (v *logFieldsInExprExtractor) Visit(node *ast.Node) {
if n, ok := (*node).(*ast.MemberNode); ok {
memberRef := n.String()
// coalesce ops end up as MemberNode right now for some reason.
// ignore such member nodes.
if strings.Contains(memberRef, "??") {
return
}
if strings.HasPrefix(memberRef, "attributes") || strings.HasPrefix(memberRef, "resource") {
v.referencedFields = append(v.referencedFields, memberRef)
}
}
}
func logFieldsReferencedInExpr(expr string) ([]string, error) {
// parse abstract syntax tree for expr
exprAst, err := parser.Parse(expr)
if err != nil {
return nil, fmt.Errorf("could not parse expr: %w", err)
}
// walk ast for expr to collect all member references.
v := &logFieldsInExprExtractor{}
ast.Walk(&exprAst.Node, v)
return v.referencedFields, nil
}