logs-analyzer/signoz/pkg/query-service/app/clickhouseReader/reader.go

5028 lines
181 KiB
Go
Raw Permalink Normal View History

2024-09-02 22:47:30 +03:00
package clickhouseReader
import (
"bytes"
"context"
"database/sql"
"encoding/json"
"fmt"
"io"
"math"
"math/rand"
"net/http"
"os"
"reflect"
"regexp"
"sort"
"strconv"
"strings"
"sync"
"time"
"github.com/go-kit/log"
"github.com/go-kit/log/level"
"github.com/google/uuid"
"github.com/mailru/easyjson"
"github.com/oklog/oklog/pkg/group"
"github.com/pkg/errors"
"github.com/prometheus/common/promlog"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/storage/remote"
"github.com/prometheus/prometheus/util/stats"
"github.com/ClickHouse/clickhouse-go/v2"
"github.com/ClickHouse/clickhouse-go/v2/lib/driver"
"github.com/jmoiron/sqlx"
promModel "github.com/prometheus/common/model"
"go.uber.org/zap"
"go.signoz.io/signoz/pkg/query-service/app/dashboards"
"go.signoz.io/signoz/pkg/query-service/app/explorer"
"go.signoz.io/signoz/pkg/query-service/app/logs"
"go.signoz.io/signoz/pkg/query-service/app/services"
"go.signoz.io/signoz/pkg/query-service/auth"
"go.signoz.io/signoz/pkg/query-service/common"
"go.signoz.io/signoz/pkg/query-service/constants"
"go.signoz.io/signoz/pkg/query-service/dao"
chErrors "go.signoz.io/signoz/pkg/query-service/errors"
am "go.signoz.io/signoz/pkg/query-service/integrations/alertManager"
"go.signoz.io/signoz/pkg/query-service/interfaces"
"go.signoz.io/signoz/pkg/query-service/model"
v3 "go.signoz.io/signoz/pkg/query-service/model/v3"
"go.signoz.io/signoz/pkg/query-service/telemetry"
"go.signoz.io/signoz/pkg/query-service/utils"
)
const (
primaryNamespace = "clickhouse"
archiveNamespace = "clickhouse-archive"
signozTraceDBName = "signoz_traces"
signozDurationMVTable = "distributed_durationSort"
signozUsageExplorerTable = "distributed_usage_explorer"
signozSpansTable = "distributed_signoz_spans"
signozErrorIndexTable = "distributed_signoz_error_index_v2"
signozTraceTableName = "distributed_signoz_index_v2"
signozTraceLocalTableName = "signoz_index_v2"
signozMetricDBName = "signoz_metrics"
signozSampleLocalTableName = "samples_v4"
signozSampleTableName = "distributed_samples_v4"
signozTSLocalTableNameV4 = "time_series_v4"
signozTSTableNameV4 = "distributed_time_series_v4"
signozTSLocalTableNameV46Hrs = "time_series_v4_6hrs"
signozTSTableNameV46Hrs = "distributed_time_series_v4_6hrs"
signozTSLocalTableNameV41Day = "time_series_v4_1day"
signozTSTableNameV41Day = "distributed_time_series_v4_1day"
minTimespanForProgressiveSearch = time.Hour
minTimespanForProgressiveSearchMargin = time.Minute
maxProgressiveSteps = 4
charset = "abcdefghijklmnopqrstuvwxyz" +
"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
)
var (
ErrNoOperationsTable = errors.New("no operations table supplied")
ErrNoIndexTable = errors.New("no index table supplied")
ErrStartTimeRequired = errors.New("start time is required for search queries")
seededRand *rand.Rand = rand.New(
rand.NewSource(time.Now().UnixNano()))
)
// SpanWriter for reading spans from ClickHouse
type ClickHouseReader struct {
db clickhouse.Conn
localDB *sqlx.DB
TraceDB string
operationsTable string
durationTable string
indexTable string
errorTable string
usageExplorerTable string
SpansTable string
spanAttributeTable string
spanAttributesKeysTable string
dependencyGraphTable string
topLevelOperationsTable string
logsDB string
logsTable string
logsLocalTable string
logsAttributeKeys string
logsResourceKeys string
logsTagAttributeTable string
queryEngine *promql.Engine
remoteStorage *remote.Storage
fanoutStorage *storage.Storage
promConfigFile string
promConfig *config.Config
alertManager am.Manager
featureFlags interfaces.FeatureLookup
liveTailRefreshSeconds int
cluster string
}
// NewTraceReader returns a TraceReader for the database
func NewReader(
localDB *sqlx.DB,
configFile string,
featureFlag interfaces.FeatureLookup,
maxIdleConns int,
maxOpenConns int,
dialTimeout time.Duration,
cluster string,
) *ClickHouseReader {
datasource := os.Getenv("ClickHouseUrl")
options := NewOptions(datasource, maxIdleConns, maxOpenConns, dialTimeout, primaryNamespace, archiveNamespace)
db, err := initialize(options)
if err != nil {
zap.L().Fatal("failed to initialize ClickHouse", zap.Error(err))
}
return NewReaderFromClickhouseConnection(db, options, localDB, configFile, featureFlag, cluster)
}
func NewReaderFromClickhouseConnection(
db driver.Conn,
options *Options,
localDB *sqlx.DB,
configFile string,
featureFlag interfaces.FeatureLookup,
cluster string,
) *ClickHouseReader {
alertManager, err := am.New("")
if err != nil {
zap.L().Error("failed to initialize alert manager", zap.Error(err))
zap.L().Error("check if the alert manager URL is correctly set and valid")
os.Exit(1)
}
regex := os.Getenv("ClickHouseOptimizeReadInOrderRegex")
var regexCompiled *regexp.Regexp
if regex != "" {
regexCompiled, err = regexp.Compile(regex)
if err != nil {
zap.L().Error("Incorrect regex for ClickHouseOptimizeReadInOrderRegex")
os.Exit(1)
}
}
wrap := clickhouseConnWrapper{
conn: db,
settings: ClickhouseQuerySettings{
MaxExecutionTimeLeaf: os.Getenv("ClickHouseMaxExecutionTimeLeaf"),
TimeoutBeforeCheckingExecutionSpeed: os.Getenv("ClickHouseTimeoutBeforeCheckingExecutionSpeed"),
MaxBytesToRead: os.Getenv("ClickHouseMaxBytesToRead"),
OptimizeReadInOrderRegex: os.Getenv("ClickHouseOptimizeReadInOrderRegex"),
OptimizeReadInOrderRegexCompiled: regexCompiled,
},
}
return &ClickHouseReader{
db: wrap,
localDB: localDB,
TraceDB: options.primary.TraceDB,
alertManager: alertManager,
operationsTable: options.primary.OperationsTable,
indexTable: options.primary.IndexTable,
errorTable: options.primary.ErrorTable,
usageExplorerTable: options.primary.UsageExplorerTable,
durationTable: options.primary.DurationTable,
SpansTable: options.primary.SpansTable,
spanAttributeTable: options.primary.SpanAttributeTable,
spanAttributesKeysTable: options.primary.SpanAttributeKeysTable,
dependencyGraphTable: options.primary.DependencyGraphTable,
topLevelOperationsTable: options.primary.TopLevelOperationsTable,
logsDB: options.primary.LogsDB,
logsTable: options.primary.LogsTable,
logsLocalTable: options.primary.LogsLocalTable,
logsAttributeKeys: options.primary.LogsAttributeKeysTable,
logsResourceKeys: options.primary.LogsResourceKeysTable,
logsTagAttributeTable: options.primary.LogsTagAttributeTable,
liveTailRefreshSeconds: options.primary.LiveTailRefreshSeconds,
promConfigFile: configFile,
featureFlags: featureFlag,
cluster: cluster,
}
}
func (r *ClickHouseReader) Start(readerReady chan bool) {
logLevel := promlog.AllowedLevel{}
logLevel.Set("debug")
allowedFormat := promlog.AllowedFormat{}
allowedFormat.Set("logfmt")
promlogConfig := promlog.Config{
Level: &logLevel,
Format: &allowedFormat,
}
logger := promlog.New(&promlogConfig)
startTime := func() (int64, error) {
return int64(promModel.Latest), nil
}
remoteStorage := remote.NewStorage(
log.With(logger, "component", "remote"),
nil,
startTime,
"",
time.Duration(1*time.Minute),
nil,
)
cfg := struct {
configFile string
localStoragePath string
lookbackDelta promModel.Duration
webTimeout promModel.Duration
queryTimeout promModel.Duration
queryConcurrency int
queryMaxSamples int
RemoteFlushDeadline promModel.Duration
prometheusURL string
logLevel promlog.AllowedLevel
}{
configFile: r.promConfigFile,
}
fanoutStorage := storage.NewFanout(logger, remoteStorage)
opts := promql.EngineOpts{
Logger: log.With(logger, "component", "query engine"),
Reg: nil,
MaxSamples: 50000000,
Timeout: time.Duration(2 * time.Minute),
ActiveQueryTracker: promql.NewActiveQueryTracker(
"",
20,
log.With(logger, "component", "activeQueryTracker"),
),
}
queryEngine := promql.NewEngine(opts)
reloaders := []func(cfg *config.Config) error{
remoteStorage.ApplyConfig,
}
// sync.Once is used to make sure we can close the channel at different execution stages(SIGTERM or when the config is loaded).
type closeOnce struct {
C chan struct{}
once sync.Once
Close func()
}
// Wait until the server is ready to handle reloading.
reloadReady := &closeOnce{
C: make(chan struct{}),
}
reloadReady.Close = func() {
reloadReady.once.Do(func() {
close(reloadReady.C)
})
}
var g group.Group
{
// Initial configuration loading.
cancel := make(chan struct{})
g.Add(
func() error {
var err error
r.promConfig, err = reloadConfig(cfg.configFile, logger, reloaders...)
if err != nil {
return fmt.Errorf("error loading config from %q: %s", cfg.configFile, err)
}
reloadReady.Close()
<-cancel
return nil
},
func(err error) {
close(cancel)
},
)
}
r.queryEngine = queryEngine
r.remoteStorage = remoteStorage
r.fanoutStorage = &fanoutStorage
readerReady <- true
if err := g.Run(); err != nil {
level.Error(logger).Log("err", err)
os.Exit(1)
}
}
func (r *ClickHouseReader) GetQueryEngine() *promql.Engine {
return r.queryEngine
}
func (r *ClickHouseReader) GetFanoutStorage() *storage.Storage {
return r.fanoutStorage
}
func reloadConfig(filename string, logger log.Logger, rls ...func(*config.Config) error) (promConfig *config.Config, err error) {
level.Info(logger).Log("msg", "Loading configuration file", "filename", filename)
conf, err := config.LoadFile(filename, false, false, logger)
if err != nil {
return nil, fmt.Errorf("couldn't load configuration (--config.file=%q): %v", filename, err)
}
failed := false
for _, rl := range rls {
if err := rl(conf); err != nil {
level.Error(logger).Log("msg", "Failed to apply configuration", "err", err)
failed = true
}
}
if failed {
return nil, fmt.Errorf("one or more errors occurred while applying the new configuration (--config.file=%q)", filename)
}
level.Info(logger).Log("msg", "Completed loading of configuration file", "filename", filename)
return conf, nil
}
func initialize(options *Options) (clickhouse.Conn, error) {
db, err := connect(options.getPrimary())
if err != nil {
return nil, fmt.Errorf("error connecting to primary db: %v", err)
}
return db, nil
}
func connect(cfg *namespaceConfig) (clickhouse.Conn, error) {
if cfg.Encoding != EncodingJSON && cfg.Encoding != EncodingProto {
return nil, fmt.Errorf("unknown encoding %q, supported: %q, %q", cfg.Encoding, EncodingJSON, EncodingProto)
}
return cfg.Connector(cfg)
}
func (r *ClickHouseReader) GetConn() clickhouse.Conn {
return r.db
}
func (r *ClickHouseReader) LoadChannel(channel *model.ChannelItem) *model.ApiError {
receiver := &am.Receiver{}
if err := json.Unmarshal([]byte(channel.Data), receiver); err != nil { // Parse []byte to go struct pointer
return &model.ApiError{Typ: model.ErrorBadData, Err: err}
}
response, err := http.Post(constants.GetAlertManagerApiPrefix()+"v1/receivers", "application/json", bytes.NewBuffer([]byte(channel.Data)))
if err != nil {
zap.L().Error("Error in getting response of API call to alertmanager/v1/receivers", zap.Error(err))
return &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
if response.StatusCode > 299 {
responseData, _ := io.ReadAll(response.Body)
err := fmt.Errorf("error in getting 2xx response in API call to alertmanager/v1/receivers")
zap.L().Error("Error in getting 2xx response in API call to alertmanager/v1/receivers", zap.String("Status", response.Status), zap.String("Data", string(responseData)))
return &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
return nil
}
func (r *ClickHouseReader) GetChannel(id string) (*model.ChannelItem, *model.ApiError) {
idInt, _ := strconv.Atoi(id)
channel := model.ChannelItem{}
query := "SELECT id, created_at, updated_at, name, type, data data FROM notification_channels WHERE id=? "
stmt, err := r.localDB.Preparex(query)
if err != nil {
zap.L().Error("Error in preparing sql query for GetChannel", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
err = stmt.Get(&channel, idInt)
if err != nil {
zap.L().Error("Error in getting channel with id", zap.Int("id", idInt), zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
return &channel, nil
}
func (r *ClickHouseReader) DeleteChannel(id string) *model.ApiError {
idInt, _ := strconv.Atoi(id)
channelToDelete, apiErrorObj := r.GetChannel(id)
if apiErrorObj != nil {
return apiErrorObj
}
tx, err := r.localDB.Begin()
if err != nil {
return &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
{
stmt, err := tx.Prepare(`DELETE FROM notification_channels WHERE id=$1;`)
if err != nil {
zap.L().Error("Error in preparing statement for INSERT to notification_channels", zap.Error(err))
tx.Rollback()
return &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
defer stmt.Close()
if _, err := stmt.Exec(idInt); err != nil {
zap.L().Error("Error in Executing prepared statement for INSERT to notification_channels", zap.Error(err))
tx.Rollback() // return an error too, we may want to wrap them
return &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
}
apiError := r.alertManager.DeleteRoute(channelToDelete.Name)
if apiError != nil {
tx.Rollback()
return apiError
}
err = tx.Commit()
if err != nil {
zap.L().Error("Error in committing transaction for DELETE command to notification_channels", zap.Error(err))
return &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
return nil
}
func (r *ClickHouseReader) GetChannels() (*[]model.ChannelItem, *model.ApiError) {
channels := []model.ChannelItem{}
query := "SELECT id, created_at, updated_at, name, type, data data FROM notification_channels"
err := r.localDB.Select(&channels, query)
zap.L().Info(query)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
return &channels, nil
}
func getChannelType(receiver *am.Receiver) string {
if receiver.EmailConfigs != nil {
return "email"
}
if receiver.OpsGenieConfigs != nil {
return "opsgenie"
}
if receiver.PagerdutyConfigs != nil {
return "pagerduty"
}
if receiver.PushoverConfigs != nil {
return "pushover"
}
if receiver.SNSConfigs != nil {
return "sns"
}
if receiver.SlackConfigs != nil {
return "slack"
}
if receiver.VictorOpsConfigs != nil {
return "victorops"
}
if receiver.WebhookConfigs != nil {
return "webhook"
}
if receiver.WechatConfigs != nil {
return "wechat"
}
if receiver.MSTeamsConfigs != nil {
return "msteams"
}
return ""
}
func (r *ClickHouseReader) EditChannel(receiver *am.Receiver, id string) (*am.Receiver, *model.ApiError) {
idInt, _ := strconv.Atoi(id)
channel, apiErrObj := r.GetChannel(id)
if apiErrObj != nil {
return nil, apiErrObj
}
if channel.Name != receiver.Name {
return nil, &model.ApiError{Typ: model.ErrorBadData, Err: fmt.Errorf("channel name cannot be changed")}
}
tx, err := r.localDB.Begin()
if err != nil {
return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
channel_type := getChannelType(receiver)
// check if channel type is supported in the current user plan
if err := r.featureFlags.CheckFeature(fmt.Sprintf("ALERT_CHANNEL_%s", strings.ToUpper(channel_type))); err != nil {
zap.L().Warn("an unsupported feature was blocked", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorBadData, Err: fmt.Errorf("unsupported feature. please upgrade your plan to access this feature")}
}
receiverString, _ := json.Marshal(receiver)
{
stmt, err := tx.Prepare(`UPDATE notification_channels SET updated_at=$1, type=$2, data=$3 WHERE id=$4;`)
if err != nil {
zap.L().Error("Error in preparing statement for UPDATE to notification_channels", zap.Error(err))
tx.Rollback()
return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
defer stmt.Close()
if _, err := stmt.Exec(time.Now(), channel_type, string(receiverString), idInt); err != nil {
zap.L().Error("Error in Executing prepared statement for UPDATE to notification_channels", zap.Error(err))
tx.Rollback() // return an error too, we may want to wrap them
return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
}
apiError := r.alertManager.EditRoute(receiver)
if apiError != nil {
tx.Rollback()
return nil, apiError
}
err = tx.Commit()
if err != nil {
zap.L().Error("Error in committing transaction for INSERT to notification_channels", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
return receiver, nil
}
func (r *ClickHouseReader) CreateChannel(receiver *am.Receiver) (*am.Receiver, *model.ApiError) {
channel_type := getChannelType(receiver)
// check if channel type is supported in the current user plan
if err := r.featureFlags.CheckFeature(fmt.Sprintf("ALERT_CHANNEL_%s", strings.ToUpper(channel_type))); err != nil {
zap.L().Warn("an unsupported feature was blocked", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorBadData, Err: fmt.Errorf("unsupported feature. please upgrade your plan to access this feature")}
}
receiverString, _ := json.Marshal(receiver)
tx, err := r.localDB.Begin()
if err != nil {
return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
{
stmt, err := tx.Prepare(`INSERT INTO notification_channels (created_at, updated_at, name, type, data) VALUES($1,$2,$3,$4,$5);`)
if err != nil {
zap.L().Error("Error in preparing statement for INSERT to notification_channels", zap.Error(err))
tx.Rollback()
return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
defer stmt.Close()
if _, err := stmt.Exec(time.Now(), time.Now(), receiver.Name, channel_type, string(receiverString)); err != nil {
zap.L().Error("Error in Executing prepared statement for INSERT to notification_channels", zap.Error(err))
tx.Rollback() // return an error too, we may want to wrap them
return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
}
apiError := r.alertManager.AddRoute(receiver)
if apiError != nil {
tx.Rollback()
return nil, apiError
}
err = tx.Commit()
if err != nil {
zap.L().Error("Error in committing transaction for INSERT to notification_channels", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
return receiver, nil
}
func (r *ClickHouseReader) GetInstantQueryMetricsResult(ctx context.Context, queryParams *model.InstantQueryMetricsParams) (*promql.Result, *stats.QueryStats, *model.ApiError) {
qry, err := r.queryEngine.NewInstantQuery(ctx, r.remoteStorage, nil, queryParams.Query, queryParams.Time)
if err != nil {
return nil, nil, &model.ApiError{Typ: model.ErrorBadData, Err: err}
}
res := qry.Exec(ctx)
// Optional stats field in response if parameter "stats" is not empty.
var qs stats.QueryStats
if queryParams.Stats != "" {
qs = stats.NewQueryStats(qry.Stats())
}
qry.Close()
return res, &qs, nil
}
func (r *ClickHouseReader) GetQueryRangeResult(ctx context.Context, query *model.QueryRangeParams) (*promql.Result, *stats.QueryStats, *model.ApiError) {
qry, err := r.queryEngine.NewRangeQuery(ctx, r.remoteStorage, nil, query.Query, query.Start, query.End, query.Step)
if err != nil {
return nil, nil, &model.ApiError{Typ: model.ErrorBadData, Err: err}
}
res := qry.Exec(ctx)
// Optional stats field in response if parameter "stats" is not empty.
var qs stats.QueryStats
if query.Stats != "" {
qs = stats.NewQueryStats(qry.Stats())
}
qry.Close()
return res, &qs, nil
}
func (r *ClickHouseReader) GetServicesList(ctx context.Context) (*[]string, error) {
services := []string{}
query := fmt.Sprintf(`SELECT DISTINCT serviceName FROM %s.%s WHERE toDate(timestamp) > now() - INTERVAL 1 DAY`, r.TraceDB, r.indexTable)
rows, err := r.db.Query(ctx, query)
zap.L().Info(query)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, fmt.Errorf("error in processing sql query")
}
defer rows.Close()
for rows.Next() {
var serviceName string
if err := rows.Scan(&serviceName); err != nil {
return &services, err
}
services = append(services, serviceName)
}
return &services, nil
}
func (r *ClickHouseReader) GetTopLevelOperations(ctx context.Context, skipConfig *model.SkipConfig, start, end time.Time, services []string) (*map[string][]string, *model.ApiError) {
start = start.In(time.UTC)
// The `top_level_operations` that have `time` >= start
operations := map[string][]string{}
// We can't use the `end` because the `top_level_operations` table has the most recent instances of the operations
// We can only use the `start` time to filter the operations
query := fmt.Sprintf(`SELECT name, serviceName, max(time) as ts FROM %s.%s WHERE time >= @start`, r.TraceDB, r.topLevelOperationsTable)
if len(services) > 0 {
query += ` AND serviceName IN @services`
}
query += ` GROUP BY name, serviceName ORDER BY ts DESC LIMIT 5000`
rows, err := r.db.Query(ctx, query, clickhouse.Named("start", start), clickhouse.Named("services", services))
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query")}
}
defer rows.Close()
for rows.Next() {
var name, serviceName string
var t time.Time
if err := rows.Scan(&name, &serviceName, &t); err != nil {
return nil, &model.ApiError{Typ: model.ErrorInternal, Err: fmt.Errorf("error in reading data")}
}
if _, ok := operations[serviceName]; !ok {
operations[serviceName] = []string{"overflow_operation"}
}
if skipConfig.ShouldSkip(serviceName, name) {
continue
}
operations[serviceName] = append(operations[serviceName], name)
}
return &operations, nil
}
func (r *ClickHouseReader) GetServices(ctx context.Context, queryParams *model.GetServicesParams, skipConfig *model.SkipConfig) (*[]model.ServiceItem, *model.ApiError) {
if r.indexTable == "" {
return nil, &model.ApiError{Typ: model.ErrorExec, Err: ErrNoIndexTable}
}
topLevelOps, apiErr := r.GetTopLevelOperations(ctx, skipConfig, *queryParams.Start, *queryParams.End, nil)
if apiErr != nil {
return nil, apiErr
}
serviceItems := []model.ServiceItem{}
var wg sync.WaitGroup
// limit the number of concurrent queries to not overload the clickhouse server
sem := make(chan struct{}, 10)
var mtx sync.RWMutex
for svc, ops := range *topLevelOps {
sem <- struct{}{}
wg.Add(1)
go func(svc string, ops []string) {
defer wg.Done()
defer func() { <-sem }()
var serviceItem model.ServiceItem
var numErrors uint64
// Even if the total number of operations within the time range is less and the all
// the top level operations are high, we want to warn to let user know the issue
// with the instrumentation
serviceItem.DataWarning = model.DataWarning{
TopLevelOps: (*topLevelOps)[svc],
}
// default max_query_size = 262144
// Let's assume the average size of the item in `ops` is 50 bytes
// We can have 262144/50 = 5242 items in the `ops` array
// Although we have make it as big as 5k, We cap the number of items
// in the `ops` array to 1500
ops = ops[:int(math.Min(1500, float64(len(ops))))]
query := fmt.Sprintf(
`SELECT
quantile(0.99)(durationNano) as p99,
avg(durationNano) as avgDuration,
count(*) as numCalls
FROM %s.%s
WHERE serviceName = @serviceName AND name In @names AND timestamp>= @start AND timestamp<= @end`,
r.TraceDB, r.indexTable,
)
errorQuery := fmt.Sprintf(
`SELECT
count(*) as numErrors
FROM %s.%s
WHERE serviceName = @serviceName AND name In @names AND timestamp>= @start AND timestamp<= @end AND statusCode=2`,
r.TraceDB, r.indexTable,
)
args := []interface{}{}
args = append(args,
clickhouse.Named("start", strconv.FormatInt(queryParams.Start.UnixNano(), 10)),
clickhouse.Named("end", strconv.FormatInt(queryParams.End.UnixNano(), 10)),
clickhouse.Named("serviceName", svc),
clickhouse.Named("names", ops),
)
// create TagQuery from TagQueryParams
tags := createTagQueryFromTagQueryParams(queryParams.Tags)
subQuery, argsSubQuery, errStatus := buildQueryWithTagParams(ctx, tags)
query += subQuery
args = append(args, argsSubQuery...)
if errStatus != nil {
zap.L().Error("Error in processing sql query", zap.Error(errStatus))
return
}
err := r.db.QueryRow(
ctx,
query,
args...,
).ScanStruct(&serviceItem)
if serviceItem.NumCalls == 0 {
return
}
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return
}
subQuery, argsSubQuery, errStatus = buildQueryWithTagParams(ctx, tags)
if errStatus != nil {
zap.L().Error("Error building query with tag params", zap.Error(errStatus))
return
}
errorQuery += subQuery
args = append(args, argsSubQuery...)
err = r.db.QueryRow(ctx, errorQuery, args...).Scan(&numErrors)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return
}
serviceItem.ServiceName = svc
serviceItem.NumErrors = numErrors
mtx.Lock()
serviceItems = append(serviceItems, serviceItem)
mtx.Unlock()
}(svc, ops)
}
wg.Wait()
for idx := range serviceItems {
serviceItems[idx].CallRate = float64(serviceItems[idx].NumCalls) / float64(queryParams.Period)
serviceItems[idx].ErrorRate = float64(serviceItems[idx].NumErrors) * 100 / float64(serviceItems[idx].NumCalls)
}
return &serviceItems, nil
}
func (r *ClickHouseReader) GetServiceOverview(ctx context.Context, queryParams *model.GetServiceOverviewParams, skipConfig *model.SkipConfig) (*[]model.ServiceOverviewItem, *model.ApiError) {
topLevelOps, apiErr := r.GetTopLevelOperations(ctx, skipConfig, *queryParams.Start, *queryParams.End, nil)
if apiErr != nil {
return nil, apiErr
}
ops, ok := (*topLevelOps)[queryParams.ServiceName]
if !ok {
return nil, &model.ApiError{Typ: model.ErrorNotFound, Err: fmt.Errorf("service not found")}
}
namedArgs := []interface{}{
clickhouse.Named("interval", strconv.Itoa(int(queryParams.StepSeconds/60))),
clickhouse.Named("start", strconv.FormatInt(queryParams.Start.UnixNano(), 10)),
clickhouse.Named("end", strconv.FormatInt(queryParams.End.UnixNano(), 10)),
clickhouse.Named("serviceName", queryParams.ServiceName),
clickhouse.Named("names", ops),
}
serviceOverviewItems := []model.ServiceOverviewItem{}
query := fmt.Sprintf(`
SELECT
toStartOfInterval(timestamp, INTERVAL @interval minute) as time,
quantile(0.99)(durationNano) as p99,
quantile(0.95)(durationNano) as p95,
quantile(0.50)(durationNano) as p50,
count(*) as numCalls
FROM %s.%s
WHERE serviceName = @serviceName AND name In @names AND timestamp>= @start AND timestamp<= @end`,
r.TraceDB, r.indexTable,
)
args := []interface{}{}
args = append(args, namedArgs...)
// create TagQuery from TagQueryParams
tags := createTagQueryFromTagQueryParams(queryParams.Tags)
subQuery, argsSubQuery, errStatus := buildQueryWithTagParams(ctx, tags)
query += subQuery
args = append(args, argsSubQuery...)
if errStatus != nil {
return nil, errStatus
}
query += " GROUP BY time ORDER BY time DESC"
err := r.db.Select(ctx, &serviceOverviewItems, query, args...)
zap.L().Debug("running query", zap.String("query", query))
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query")}
}
serviceErrorItems := []model.ServiceErrorItem{}
query = fmt.Sprintf(`
SELECT
toStartOfInterval(timestamp, INTERVAL @interval minute) as time,
count(*) as numErrors
FROM %s.%s
WHERE serviceName = @serviceName AND name In @names AND timestamp>= @start AND timestamp<= @end AND statusCode=2`,
r.TraceDB, r.indexTable,
)
args = []interface{}{}
args = append(args, namedArgs...)
subQuery, argsSubQuery, errStatus = buildQueryWithTagParams(ctx, tags)
query += subQuery
args = append(args, argsSubQuery...)
if errStatus != nil {
return nil, errStatus
}
query += " GROUP BY time ORDER BY time DESC"
err = r.db.Select(ctx, &serviceErrorItems, query, args...)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query")}
}
m := make(map[int64]int)
for j := range serviceErrorItems {
m[int64(serviceErrorItems[j].Time.UnixNano())] = int(serviceErrorItems[j].NumErrors)
}
for i := range serviceOverviewItems {
serviceOverviewItems[i].Timestamp = int64(serviceOverviewItems[i].Time.UnixNano())
if val, ok := m[serviceOverviewItems[i].Timestamp]; ok {
serviceOverviewItems[i].NumErrors = uint64(val)
}
serviceOverviewItems[i].ErrorRate = float64(serviceOverviewItems[i].NumErrors) * 100 / float64(serviceOverviewItems[i].NumCalls)
serviceOverviewItems[i].CallRate = float64(serviceOverviewItems[i].NumCalls) / float64(queryParams.StepSeconds)
}
return &serviceOverviewItems, nil
}
func buildFilterArrayQuery(ctx context.Context, excludeMap map[string]struct{}, params []string, filter string, query *string, args []interface{}) []interface{} {
for i, e := range params {
filterKey := filter + String(5)
if i == 0 && i == len(params)-1 {
if _, ok := excludeMap[filter]; ok {
*query += fmt.Sprintf(" AND NOT (%s=@%s)", filter, filterKey)
} else {
*query += fmt.Sprintf(" AND (%s=@%s)", filter, filterKey)
}
} else if i == 0 && i != len(params)-1 {
if _, ok := excludeMap[filter]; ok {
*query += fmt.Sprintf(" AND NOT (%s=@%s", filter, filterKey)
} else {
*query += fmt.Sprintf(" AND (%s=@%s", filter, filterKey)
}
} else if i != 0 && i == len(params)-1 {
*query += fmt.Sprintf(" OR %s=@%s)", filter, filterKey)
} else {
*query += fmt.Sprintf(" OR %s=@%s", filter, filterKey)
}
args = append(args, clickhouse.Named(filterKey, e))
}
return args
}
func (r *ClickHouseReader) GetSpanFilters(ctx context.Context, queryParams *model.SpanFilterParams) (*model.SpanFiltersResponse, *model.ApiError) {
var query string
excludeMap := make(map[string]struct{})
for _, e := range queryParams.Exclude {
if e == constants.OperationRequest {
excludeMap[constants.OperationDB] = struct{}{}
continue
}
excludeMap[e] = struct{}{}
}
args := []interface{}{clickhouse.Named("timestampL", strconv.FormatInt(queryParams.Start.UnixNano(), 10)), clickhouse.Named("timestampU", strconv.FormatInt(queryParams.End.UnixNano(), 10))}
if len(queryParams.TraceID) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.TraceID, constants.TraceID, &query, args)
}
if len(queryParams.ServiceName) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.ServiceName, constants.ServiceName, &query, args)
}
if len(queryParams.HttpRoute) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpRoute, constants.HttpRoute, &query, args)
}
if len(queryParams.HttpHost) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpHost, constants.HttpHost, &query, args)
}
if len(queryParams.HttpMethod) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpMethod, constants.HttpMethod, &query, args)
}
if len(queryParams.HttpUrl) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpUrl, constants.HttpUrl, &query, args)
}
if len(queryParams.Operation) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.Operation, constants.OperationDB, &query, args)
}
if len(queryParams.RPCMethod) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.RPCMethod, constants.RPCMethod, &query, args)
}
if len(queryParams.ResponseStatusCode) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.ResponseStatusCode, constants.ResponseStatusCode, &query, args)
}
if len(queryParams.MinDuration) != 0 {
query = query + " AND durationNano >= @durationNanoMin"
args = append(args, clickhouse.Named("durationNanoMin", queryParams.MinDuration))
}
if len(queryParams.MaxDuration) != 0 {
query = query + " AND durationNano <= @durationNanoMax"
args = append(args, clickhouse.Named("durationNanoMax", queryParams.MaxDuration))
}
if len(queryParams.SpanKind) != 0 {
query = query + " AND kind = @kind"
args = append(args, clickhouse.Named("kind", queryParams.SpanKind))
}
query = getStatusFilters(query, queryParams.Status, excludeMap)
traceFilterReponse := model.SpanFiltersResponse{
Status: map[string]uint64{},
Duration: map[string]uint64{},
ServiceName: map[string]uint64{},
Operation: map[string]uint64{},
ResponseStatusCode: map[string]uint64{},
RPCMethod: map[string]uint64{},
HttpMethod: map[string]uint64{},
HttpUrl: map[string]uint64{},
HttpRoute: map[string]uint64{},
HttpHost: map[string]uint64{},
}
for _, e := range queryParams.GetFilters {
switch e {
case constants.TraceID:
continue
case constants.ServiceName:
finalQuery := fmt.Sprintf("SELECT serviceName, count() as count FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", r.TraceDB, r.indexTable)
finalQuery += query
finalQuery += " GROUP BY serviceName"
var dBResponse []model.DBResponseServiceName
err := r.db.Select(ctx, &dBResponse, finalQuery, args...)
zap.L().Info(finalQuery)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query: %s", err)}
}
for _, service := range dBResponse {
if service.ServiceName != "" {
traceFilterReponse.ServiceName[service.ServiceName] = service.Count
}
}
case constants.HttpRoute:
finalQuery := fmt.Sprintf("SELECT httpRoute, count() as count FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", r.TraceDB, r.indexTable)
finalQuery += query
finalQuery += " GROUP BY httpRoute"
var dBResponse []model.DBResponseHttpRoute
err := r.db.Select(ctx, &dBResponse, finalQuery, args...)
zap.L().Info(finalQuery)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query: %s", err)}
}
for _, service := range dBResponse {
if service.HttpRoute != "" {
traceFilterReponse.HttpRoute[service.HttpRoute] = service.Count
}
}
case constants.HttpUrl:
finalQuery := fmt.Sprintf("SELECT httpUrl, count() as count FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", r.TraceDB, r.indexTable)
finalQuery += query
finalQuery += " GROUP BY httpUrl"
var dBResponse []model.DBResponseHttpUrl
err := r.db.Select(ctx, &dBResponse, finalQuery, args...)
zap.L().Info(finalQuery)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query: %s", err)}
}
for _, service := range dBResponse {
if service.HttpUrl != "" {
traceFilterReponse.HttpUrl[service.HttpUrl] = service.Count
}
}
case constants.HttpMethod:
finalQuery := fmt.Sprintf("SELECT httpMethod, count() as count FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", r.TraceDB, r.indexTable)
finalQuery += query
finalQuery += " GROUP BY httpMethod"
var dBResponse []model.DBResponseHttpMethod
err := r.db.Select(ctx, &dBResponse, finalQuery, args...)
zap.L().Info(finalQuery)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query: %s", err)}
}
for _, service := range dBResponse {
if service.HttpMethod != "" {
traceFilterReponse.HttpMethod[service.HttpMethod] = service.Count
}
}
case constants.HttpHost:
finalQuery := fmt.Sprintf("SELECT httpHost, count() as count FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", r.TraceDB, r.indexTable)
finalQuery += query
finalQuery += " GROUP BY httpHost"
var dBResponse []model.DBResponseHttpHost
err := r.db.Select(ctx, &dBResponse, finalQuery, args...)
zap.L().Info(finalQuery)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query: %s", err)}
}
for _, service := range dBResponse {
if service.HttpHost != "" {
traceFilterReponse.HttpHost[service.HttpHost] = service.Count
}
}
case constants.OperationRequest:
finalQuery := fmt.Sprintf("SELECT name, count() as count FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", r.TraceDB, r.indexTable)
finalQuery += query
finalQuery += " GROUP BY name"
var dBResponse []model.DBResponseOperation
err := r.db.Select(ctx, &dBResponse, finalQuery, args...)
zap.L().Info(finalQuery)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query: %s", err)}
}
for _, service := range dBResponse {
if service.Operation != "" {
traceFilterReponse.Operation[service.Operation] = service.Count
}
}
case constants.Status:
finalQuery := fmt.Sprintf("SELECT COUNT(*) as numTotal FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU AND hasError = true", r.TraceDB, r.indexTable)
finalQuery += query
var dBResponse []model.DBResponseTotal
err := r.db.Select(ctx, &dBResponse, finalQuery, args...)
zap.L().Info(finalQuery)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query: %s", err)}
}
finalQuery2 := fmt.Sprintf("SELECT COUNT(*) as numTotal FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU AND hasError = false", r.TraceDB, r.indexTable)
finalQuery2 += query
var dBResponse2 []model.DBResponseTotal
err = r.db.Select(ctx, &dBResponse2, finalQuery2, args...)
zap.L().Info(finalQuery2)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query: %s", err)}
}
if len(dBResponse) > 0 && len(dBResponse2) > 0 {
traceFilterReponse.Status = map[string]uint64{"ok": dBResponse2[0].NumTotal, "error": dBResponse[0].NumTotal}
} else if len(dBResponse) > 0 {
traceFilterReponse.Status = map[string]uint64{"ok": 0, "error": dBResponse[0].NumTotal}
} else if len(dBResponse2) > 0 {
traceFilterReponse.Status = map[string]uint64{"ok": dBResponse2[0].NumTotal, "error": 0}
} else {
traceFilterReponse.Status = map[string]uint64{"ok": 0, "error": 0}
}
case constants.Duration:
err := r.featureFlags.CheckFeature(constants.DurationSort)
durationSortEnabled := err == nil
finalQuery := ""
if !durationSortEnabled {
// if duration sort is not enabled, we need to get the min and max duration from the index table
finalQuery = fmt.Sprintf("SELECT min(durationNano) as min, max(durationNano) as max FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", r.TraceDB, r.indexTable)
finalQuery += query
var dBResponse []model.DBResponseMinMax
err = r.db.Select(ctx, &dBResponse, finalQuery, args...)
zap.L().Info(finalQuery)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query: %s", err)}
}
if len(dBResponse) > 0 {
traceFilterReponse.Duration = map[string]uint64{"minDuration": dBResponse[0].Min, "maxDuration": dBResponse[0].Max}
}
} else {
// when duration sort is enabled, we need to get the min and max duration from the duration table
finalQuery = fmt.Sprintf("SELECT durationNano as numTotal FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", r.TraceDB, r.durationTable)
finalQuery += query
finalQuery += " ORDER BY durationNano LIMIT 1"
var dBResponse []model.DBResponseTotal
err = r.db.Select(ctx, &dBResponse, finalQuery, args...)
zap.L().Info(finalQuery)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query: %s", err)}
}
finalQuery = fmt.Sprintf("SELECT durationNano as numTotal FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", r.TraceDB, r.durationTable)
finalQuery += query
finalQuery += " ORDER BY durationNano DESC LIMIT 1"
var dBResponse2 []model.DBResponseTotal
err = r.db.Select(ctx, &dBResponse2, finalQuery, args...)
zap.L().Info(finalQuery)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query: %s", err)}
}
if len(dBResponse) > 0 {
traceFilterReponse.Duration["minDuration"] = dBResponse[0].NumTotal
}
if len(dBResponse2) > 0 {
traceFilterReponse.Duration["maxDuration"] = dBResponse2[0].NumTotal
}
}
case constants.RPCMethod:
finalQuery := fmt.Sprintf("SELECT rpcMethod, count() as count FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", r.TraceDB, r.indexTable)
finalQuery += query
finalQuery += " GROUP BY rpcMethod"
var dBResponse []model.DBResponseRPCMethod
err := r.db.Select(ctx, &dBResponse, finalQuery, args...)
zap.L().Info(finalQuery)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query: %s", err)}
}
for _, service := range dBResponse {
if service.RPCMethod != "" {
traceFilterReponse.RPCMethod[service.RPCMethod] = service.Count
}
}
case constants.ResponseStatusCode:
finalQuery := fmt.Sprintf("SELECT responseStatusCode, count() as count FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", r.TraceDB, r.indexTable)
finalQuery += query
finalQuery += " GROUP BY responseStatusCode"
var dBResponse []model.DBResponseStatusCodeMethod
err := r.db.Select(ctx, &dBResponse, finalQuery, args...)
zap.L().Info(finalQuery)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query: %s", err)}
}
for _, service := range dBResponse {
if service.ResponseStatusCode != "" {
traceFilterReponse.ResponseStatusCode[service.ResponseStatusCode] = service.Count
}
}
default:
return nil, &model.ApiError{Typ: model.ErrorBadData, Err: fmt.Errorf("filter type: %s not supported", e)}
}
}
return &traceFilterReponse, nil
}
func getStatusFilters(query string, statusParams []string, excludeMap map[string]struct{}) string {
// status can only be two and if both are selected than they are equivalent to none selected
if _, ok := excludeMap["status"]; ok {
if len(statusParams) == 1 {
if statusParams[0] == "error" {
query += " AND hasError = false"
} else if statusParams[0] == "ok" {
query += " AND hasError = true"
}
}
} else if len(statusParams) == 1 {
if statusParams[0] == "error" {
query += " AND hasError = true"
} else if statusParams[0] == "ok" {
query += " AND hasError = false"
}
}
return query
}
func (r *ClickHouseReader) GetFilteredSpans(ctx context.Context, queryParams *model.GetFilteredSpansParams) (*model.GetFilterSpansResponse, *model.ApiError) {
queryTable := fmt.Sprintf("%s.%s", r.TraceDB, r.indexTable)
excludeMap := make(map[string]struct{})
for _, e := range queryParams.Exclude {
if e == constants.OperationRequest {
excludeMap[constants.OperationDB] = struct{}{}
continue
}
excludeMap[e] = struct{}{}
}
var query string
args := []interface{}{clickhouse.Named("timestampL", strconv.FormatInt(queryParams.Start.UnixNano(), 10)), clickhouse.Named("timestampU", strconv.FormatInt(queryParams.End.UnixNano(), 10))}
if len(queryParams.TraceID) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.TraceID, constants.TraceID, &query, args)
}
if len(queryParams.ServiceName) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.ServiceName, constants.ServiceName, &query, args)
}
if len(queryParams.HttpRoute) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpRoute, constants.HttpRoute, &query, args)
}
if len(queryParams.HttpHost) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpHost, constants.HttpHost, &query, args)
}
if len(queryParams.HttpMethod) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpMethod, constants.HttpMethod, &query, args)
}
if len(queryParams.HttpUrl) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpUrl, constants.HttpUrl, &query, args)
}
if len(queryParams.Operation) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.Operation, constants.OperationDB, &query, args)
}
if len(queryParams.RPCMethod) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.RPCMethod, constants.RPCMethod, &query, args)
}
if len(queryParams.ResponseStatusCode) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.ResponseStatusCode, constants.ResponseStatusCode, &query, args)
}
if len(queryParams.MinDuration) != 0 {
query = query + " AND durationNano >= @durationNanoMin"
args = append(args, clickhouse.Named("durationNanoMin", queryParams.MinDuration))
}
if len(queryParams.MaxDuration) != 0 {
query = query + " AND durationNano <= @durationNanoMax"
args = append(args, clickhouse.Named("durationNanoMax", queryParams.MaxDuration))
}
query = getStatusFilters(query, queryParams.Status, excludeMap)
if len(queryParams.SpanKind) != 0 {
query = query + " AND kind = @kind"
args = append(args, clickhouse.Named("kind", queryParams.SpanKind))
}
// create TagQuery from TagQueryParams
tags := createTagQueryFromTagQueryParams(queryParams.Tags)
subQuery, argsSubQuery, errStatus := buildQueryWithTagParams(ctx, tags)
query += subQuery
args = append(args, argsSubQuery...)
if errStatus != nil {
return nil, errStatus
}
if len(queryParams.OrderParam) != 0 {
if queryParams.OrderParam == constants.Duration {
queryTable = fmt.Sprintf("%s.%s", r.TraceDB, r.durationTable)
if queryParams.Order == constants.Descending {
query = query + " ORDER BY durationNano DESC"
}
if queryParams.Order == constants.Ascending {
query = query + " ORDER BY durationNano ASC"
}
} else if queryParams.OrderParam == constants.Timestamp {
projectionOptQuery := "SET allow_experimental_projection_optimization = 1"
err := r.db.Exec(ctx, projectionOptQuery)
zap.L().Info(projectionOptQuery)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query")}
}
if queryParams.Order == constants.Descending {
query = query + " ORDER BY timestamp DESC"
}
if queryParams.Order == constants.Ascending {
query = query + " ORDER BY timestamp ASC"
}
}
}
if queryParams.Limit > 0 {
query = query + " LIMIT @limit"
args = append(args, clickhouse.Named("limit", queryParams.Limit))
}
if queryParams.Offset > 0 {
query = query + " OFFSET @offset"
args = append(args, clickhouse.Named("offset", queryParams.Offset))
}
var getFilterSpansResponseItems []model.GetFilterSpansResponseItem
baseQuery := fmt.Sprintf("SELECT timestamp, spanID, traceID, serviceName, name, durationNano, httpMethod, rpcMethod, responseStatusCode FROM %s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", queryTable)
baseQuery += query
err := r.db.Select(ctx, &getFilterSpansResponseItems, baseQuery, args...)
// Fill status and method
for i, e := range getFilterSpansResponseItems {
if e.RPCMethod != "" {
getFilterSpansResponseItems[i].Method = e.RPCMethod
} else {
getFilterSpansResponseItems[i].Method = e.HttpMethod
}
}
zap.L().Info(baseQuery)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query")}
}
getFilterSpansResponse := model.GetFilterSpansResponse{
Spans: getFilterSpansResponseItems,
TotalSpans: 1000,
}
return &getFilterSpansResponse, nil
}
func createTagQueryFromTagQueryParams(queryParams []model.TagQueryParam) []model.TagQuery {
tags := []model.TagQuery{}
for _, tag := range queryParams {
if len(tag.StringValues) > 0 {
tags = append(tags, model.NewTagQueryString(tag))
}
if len(tag.NumberValues) > 0 {
tags = append(tags, model.NewTagQueryNumber(tag))
}
if len(tag.BoolValues) > 0 {
tags = append(tags, model.NewTagQueryBool(tag))
}
}
return tags
}
func StringWithCharset(length int, charset string) string {
b := make([]byte, length)
for i := range b {
b[i] = charset[seededRand.Intn(len(charset))]
}
return string(b)
}
func String(length int) string {
return StringWithCharset(length, charset)
}
func buildQueryWithTagParams(ctx context.Context, tags []model.TagQuery) (string, []interface{}, *model.ApiError) {
query := ""
var args []interface{}
for _, item := range tags {
var subQuery string
var argsSubQuery []interface{}
tagMapType := item.GetTagMapColumn()
switch item.GetOperator() {
case model.EqualOperator:
subQuery, argsSubQuery = addArithmeticOperator(item, tagMapType, "=")
case model.NotEqualOperator:
subQuery, argsSubQuery = addArithmeticOperator(item, tagMapType, "!=")
case model.LessThanOperator:
subQuery, argsSubQuery = addArithmeticOperator(item, tagMapType, "<")
case model.GreaterThanOperator:
subQuery, argsSubQuery = addArithmeticOperator(item, tagMapType, ">")
case model.InOperator:
subQuery, argsSubQuery = addInOperator(item, tagMapType, false)
case model.NotInOperator:
subQuery, argsSubQuery = addInOperator(item, tagMapType, true)
case model.LessThanEqualOperator:
subQuery, argsSubQuery = addArithmeticOperator(item, tagMapType, "<=")
case model.GreaterThanEqualOperator:
subQuery, argsSubQuery = addArithmeticOperator(item, tagMapType, ">=")
case model.ContainsOperator:
subQuery, argsSubQuery = addContainsOperator(item, tagMapType, false)
case model.NotContainsOperator:
subQuery, argsSubQuery = addContainsOperator(item, tagMapType, true)
case model.StartsWithOperator:
subQuery, argsSubQuery = addStartsWithOperator(item, tagMapType, false)
case model.NotStartsWithOperator:
subQuery, argsSubQuery = addStartsWithOperator(item, tagMapType, true)
case model.ExistsOperator:
subQuery, argsSubQuery = addExistsOperator(item, tagMapType, false)
case model.NotExistsOperator:
subQuery, argsSubQuery = addExistsOperator(item, tagMapType, true)
default:
return "", nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("filter operator %s not supported", item.GetOperator())}
}
query += subQuery
args = append(args, argsSubQuery...)
}
return query, args, nil
}
func addInOperator(item model.TagQuery, tagMapType string, not bool) (string, []interface{}) {
values := item.GetValues()
args := []interface{}{}
notStr := ""
if not {
notStr = "NOT"
}
tagValuePair := []string{}
for _, value := range values {
tagKey := "inTagKey" + String(5)
tagValue := "inTagValue" + String(5)
tagValuePair = append(tagValuePair, fmt.Sprintf("%s[@%s] = @%s", tagMapType, tagKey, tagValue))
args = append(args, clickhouse.Named(tagKey, item.GetKey()))
args = append(args, clickhouse.Named(tagValue, value))
}
return fmt.Sprintf(" AND %s (%s)", notStr, strings.Join(tagValuePair, " OR ")), args
}
func addContainsOperator(item model.TagQuery, tagMapType string, not bool) (string, []interface{}) {
values := item.GetValues()
args := []interface{}{}
notStr := ""
if not {
notStr = "NOT"
}
tagValuePair := []string{}
for _, value := range values {
tagKey := "containsTagKey" + String(5)
tagValue := "containsTagValue" + String(5)
tagValuePair = append(tagValuePair, fmt.Sprintf("%s[@%s] ILIKE @%s", tagMapType, tagKey, tagValue))
args = append(args, clickhouse.Named(tagKey, item.GetKey()))
args = append(args, clickhouse.Named(tagValue, "%"+fmt.Sprintf("%v", value)+"%"))
}
return fmt.Sprintf(" AND %s (%s)", notStr, strings.Join(tagValuePair, " OR ")), args
}
func addStartsWithOperator(item model.TagQuery, tagMapType string, not bool) (string, []interface{}) {
values := item.GetValues()
args := []interface{}{}
notStr := ""
if not {
notStr = "NOT"
}
tagValuePair := []string{}
for _, value := range values {
tagKey := "startsWithTagKey" + String(5)
tagValue := "startsWithTagValue" + String(5)
tagValuePair = append(tagValuePair, fmt.Sprintf("%s[@%s] ILIKE @%s", tagMapType, tagKey, tagValue))
args = append(args, clickhouse.Named(tagKey, item.GetKey()))
args = append(args, clickhouse.Named(tagValue, "%"+fmt.Sprintf("%v", value)+"%"))
}
return fmt.Sprintf(" AND %s (%s)", notStr, strings.Join(tagValuePair, " OR ")), args
}
func addArithmeticOperator(item model.TagQuery, tagMapType string, operator string) (string, []interface{}) {
values := item.GetValues()
args := []interface{}{}
tagValuePair := []string{}
for _, value := range values {
tagKey := "arithmeticTagKey" + String(5)
tagValue := "arithmeticTagValue" + String(5)
tagValuePair = append(tagValuePair, fmt.Sprintf("%s[@%s] %s @%s", tagMapType, tagKey, operator, tagValue))
args = append(args, clickhouse.Named(tagKey, item.GetKey()))
args = append(args, clickhouse.Named(tagValue, value))
}
return fmt.Sprintf(" AND (%s)", strings.Join(tagValuePair, " OR ")), args
}
func addExistsOperator(item model.TagQuery, tagMapType string, not bool) (string, []interface{}) {
values := item.GetValues()
notStr := ""
if not {
notStr = "NOT"
}
args := []interface{}{}
tagOperatorPair := []string{}
for range values {
tagKey := "existsTagKey" + String(5)
tagOperatorPair = append(tagOperatorPair, fmt.Sprintf("mapContains(%s, @%s)", tagMapType, tagKey))
args = append(args, clickhouse.Named(tagKey, item.GetKey()))
}
return fmt.Sprintf(" AND %s (%s)", notStr, strings.Join(tagOperatorPair, " OR ")), args
}
func (r *ClickHouseReader) GetTagFilters(ctx context.Context, queryParams *model.TagFilterParams) (*model.TagFilters, *model.ApiError) {
excludeMap := make(map[string]struct{})
for _, e := range queryParams.Exclude {
if e == constants.OperationRequest {
excludeMap[constants.OperationDB] = struct{}{}
continue
}
excludeMap[e] = struct{}{}
}
var query string
args := []interface{}{clickhouse.Named("timestampL", strconv.FormatInt(queryParams.Start.UnixNano(), 10)), clickhouse.Named("timestampU", strconv.FormatInt(queryParams.End.UnixNano(), 10))}
if len(queryParams.TraceID) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.TraceID, constants.TraceID, &query, args)
}
if len(queryParams.ServiceName) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.ServiceName, constants.ServiceName, &query, args)
}
if len(queryParams.HttpRoute) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpRoute, constants.HttpRoute, &query, args)
}
if len(queryParams.HttpHost) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpHost, constants.HttpHost, &query, args)
}
if len(queryParams.HttpMethod) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpMethod, constants.HttpMethod, &query, args)
}
if len(queryParams.HttpUrl) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpUrl, constants.HttpUrl, &query, args)
}
if len(queryParams.Operation) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.Operation, constants.OperationDB, &query, args)
}
if len(queryParams.RPCMethod) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.RPCMethod, constants.RPCMethod, &query, args)
}
if len(queryParams.ResponseStatusCode) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.ResponseStatusCode, constants.ResponseStatusCode, &query, args)
}
if len(queryParams.MinDuration) != 0 {
query = query + " AND durationNano >= @durationNanoMin"
args = append(args, clickhouse.Named("durationNanoMin", queryParams.MinDuration))
}
if len(queryParams.MaxDuration) != 0 {
query = query + " AND durationNano <= @durationNanoMax"
args = append(args, clickhouse.Named("durationNanoMax", queryParams.MaxDuration))
}
if len(queryParams.SpanKind) != 0 {
query = query + " AND kind = @kind"
args = append(args, clickhouse.Named("kind", queryParams.SpanKind))
}
query = getStatusFilters(query, queryParams.Status, excludeMap)
tagFilters := []model.TagFilters{}
// Alternative finalQuery := fmt.Sprintf(`SELECT DISTINCT arrayJoin(tagMap.keys) as tagKeys FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU`, r.TraceDB, r.indexTable)
finalQuery := fmt.Sprintf(`SELECT groupUniqArrayArray(mapKeys(stringTagMap)) as stringTagKeys, groupUniqArrayArray(mapKeys(numberTagMap)) as numberTagKeys, groupUniqArrayArray(mapKeys(boolTagMap)) as boolTagKeys FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU`, r.TraceDB, r.indexTable)
finalQuery += query
err := r.db.Select(ctx, &tagFilters, finalQuery, args...)
zap.L().Info(query)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query")}
}
tagFiltersResult := model.TagFilters{
StringTagKeys: make([]string, 0),
NumberTagKeys: make([]string, 0),
BoolTagKeys: make([]string, 0),
}
if len(tagFilters) != 0 {
tagFiltersResult.StringTagKeys = excludeTags(ctx, tagFilters[0].StringTagKeys)
tagFiltersResult.NumberTagKeys = excludeTags(ctx, tagFilters[0].NumberTagKeys)
tagFiltersResult.BoolTagKeys = excludeTags(ctx, tagFilters[0].BoolTagKeys)
}
return &tagFiltersResult, nil
}
func excludeTags(ctx context.Context, tags []string) []string {
excludedTagsMap := map[string]bool{
"http.code": true,
"http.route": true,
"http.method": true,
"http.url": true,
"http.status_code": true,
"http.host": true,
"messaging.system": true,
"messaging.operation": true,
"error": true,
"service.name": true,
}
newTags := make([]string, 0)
for _, tag := range tags {
_, ok := excludedTagsMap[tag]
if !ok {
newTags = append(newTags, tag)
}
}
return newTags
}
func (r *ClickHouseReader) GetTagValues(ctx context.Context, queryParams *model.TagFilterParams) (*model.TagValues, *model.ApiError) {
if queryParams.TagKey.Type == model.TagTypeNumber {
return &model.TagValues{
NumberTagValues: make([]float64, 0),
StringTagValues: make([]string, 0),
BoolTagValues: make([]bool, 0),
}, nil
} else if queryParams.TagKey.Type == model.TagTypeBool {
return &model.TagValues{
NumberTagValues: make([]float64, 0),
StringTagValues: make([]string, 0),
BoolTagValues: []bool{true, false},
}, nil
}
excludeMap := make(map[string]struct{})
for _, e := range queryParams.Exclude {
if e == constants.OperationRequest {
excludeMap[constants.OperationDB] = struct{}{}
continue
}
excludeMap[e] = struct{}{}
}
var query string
args := []interface{}{clickhouse.Named("timestampL", strconv.FormatInt(queryParams.Start.UnixNano(), 10)), clickhouse.Named("timestampU", strconv.FormatInt(queryParams.End.UnixNano(), 10))}
if len(queryParams.TraceID) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.TraceID, constants.TraceID, &query, args)
}
if len(queryParams.ServiceName) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.ServiceName, constants.ServiceName, &query, args)
}
if len(queryParams.HttpRoute) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpRoute, constants.HttpRoute, &query, args)
}
if len(queryParams.HttpHost) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpHost, constants.HttpHost, &query, args)
}
if len(queryParams.HttpMethod) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpMethod, constants.HttpMethod, &query, args)
}
if len(queryParams.HttpUrl) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpUrl, constants.HttpUrl, &query, args)
}
if len(queryParams.Operation) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.Operation, constants.OperationDB, &query, args)
}
if len(queryParams.MinDuration) != 0 {
query = query + " AND durationNano >= @durationNanoMin"
args = append(args, clickhouse.Named("durationNanoMin", queryParams.MinDuration))
}
if len(queryParams.MaxDuration) != 0 {
query = query + " AND durationNano <= @durationNanoMax"
args = append(args, clickhouse.Named("durationNanoMax", queryParams.MaxDuration))
}
if len(queryParams.SpanKind) != 0 {
query = query + " AND kind = @kind"
args = append(args, clickhouse.Named("kind", queryParams.SpanKind))
}
query = getStatusFilters(query, queryParams.Status, excludeMap)
tagValues := []model.TagValues{}
finalQuery := fmt.Sprintf(`SELECT groupArray(DISTINCT stringTagMap[@key]) as stringTagValues FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU`, r.TraceDB, r.indexTable)
finalQuery += query
finalQuery += " LIMIT @limit"
args = append(args, clickhouse.Named("key", queryParams.TagKey.Key))
args = append(args, clickhouse.Named("limit", queryParams.Limit))
err := r.db.Select(ctx, &tagValues, finalQuery, args...)
zap.L().Info(query)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query")}
}
cleanedTagValues := model.TagValues{
StringTagValues: []string{},
NumberTagValues: []float64{},
BoolTagValues: []bool{},
}
if len(tagValues) == 0 {
return &cleanedTagValues, nil
}
for _, e := range tagValues[0].StringTagValues {
if e != "" {
cleanedTagValues.StringTagValues = append(cleanedTagValues.StringTagValues, e)
}
}
return &cleanedTagValues, nil
}
func (r *ClickHouseReader) GetTopOperations(ctx context.Context, queryParams *model.GetTopOperationsParams) (*[]model.TopOperationsItem, *model.ApiError) {
namedArgs := []interface{}{
clickhouse.Named("start", strconv.FormatInt(queryParams.Start.UnixNano(), 10)),
clickhouse.Named("end", strconv.FormatInt(queryParams.End.UnixNano(), 10)),
clickhouse.Named("serviceName", queryParams.ServiceName),
}
var topOperationsItems []model.TopOperationsItem
query := fmt.Sprintf(`
SELECT
quantile(0.5)(durationNano) as p50,
quantile(0.95)(durationNano) as p95,
quantile(0.99)(durationNano) as p99,
COUNT(*) as numCalls,
countIf(statusCode=2) as errorCount,
name
FROM %s.%s
WHERE serviceName = @serviceName AND timestamp>= @start AND timestamp<= @end`,
r.TraceDB, r.indexTable,
)
args := []interface{}{}
args = append(args, namedArgs...)
// create TagQuery from TagQueryParams
tags := createTagQueryFromTagQueryParams(queryParams.Tags)
subQuery, argsSubQuery, errStatus := buildQueryWithTagParams(ctx, tags)
query += subQuery
args = append(args, argsSubQuery...)
if errStatus != nil {
return nil, errStatus
}
query += " GROUP BY name ORDER BY p99 DESC"
if queryParams.Limit > 0 {
query += " LIMIT @limit"
args = append(args, clickhouse.Named("limit", queryParams.Limit))
}
err := r.db.Select(ctx, &topOperationsItems, query, args...)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query")}
}
if topOperationsItems == nil {
topOperationsItems = []model.TopOperationsItem{}
}
return &topOperationsItems, nil
}
func (r *ClickHouseReader) GetUsage(ctx context.Context, queryParams *model.GetUsageParams) (*[]model.UsageItem, error) {
var usageItems []model.UsageItem
namedArgs := []interface{}{
clickhouse.Named("interval", queryParams.StepHour),
clickhouse.Named("start", strconv.FormatInt(queryParams.Start.UnixNano(), 10)),
clickhouse.Named("end", strconv.FormatInt(queryParams.End.UnixNano(), 10)),
}
var query string
if len(queryParams.ServiceName) != 0 {
namedArgs = append(namedArgs, clickhouse.Named("serviceName", queryParams.ServiceName))
query = fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL @interval HOUR) as time, sum(count) as count FROM %s.%s WHERE service_name=@serviceName AND timestamp>=@start AND timestamp<=@end GROUP BY time ORDER BY time ASC", r.TraceDB, r.usageExplorerTable)
} else {
query = fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL @interval HOUR) as time, sum(count) as count FROM %s.%s WHERE timestamp>=@start AND timestamp<=@end GROUP BY time ORDER BY time ASC", r.TraceDB, r.usageExplorerTable)
}
err := r.db.Select(ctx, &usageItems, query, namedArgs...)
zap.L().Info(query)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, fmt.Errorf("error in processing sql query")
}
for i := range usageItems {
usageItems[i].Timestamp = uint64(usageItems[i].Time.UnixNano())
}
if usageItems == nil {
usageItems = []model.UsageItem{}
}
return &usageItems, nil
}
func (r *ClickHouseReader) SearchTraces(ctx context.Context, params *model.SearchTracesParams,
smartTraceAlgorithm func(payload []model.SearchSpanResponseItem, targetSpanId string,
levelUp int, levelDown int, spanLimit int) ([]model.SearchSpansResult, error)) (*[]model.SearchSpansResult, error) {
var countSpans uint64
countQuery := fmt.Sprintf("SELECT count() as count from %s.%s WHERE traceID=$1", r.TraceDB, r.SpansTable)
err := r.db.QueryRow(ctx, countQuery, params.TraceID).Scan(&countSpans)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, fmt.Errorf("error in processing sql query")
}
if countSpans > uint64(params.MaxSpansInTrace) {
zap.L().Error("Max spans allowed in a trace limit reached", zap.Int("MaxSpansInTrace", params.MaxSpansInTrace),
zap.Uint64("Count", countSpans))
userEmail, err := auth.GetEmailFromJwt(ctx)
if err == nil {
data := map[string]interface{}{
"traceSize": countSpans,
"maxSpansInTraceLimit": params.MaxSpansInTrace,
}
telemetry.GetInstance().SendEvent(telemetry.TELEMETRY_EVENT_MAX_SPANS_ALLOWED_LIMIT_REACHED, data, userEmail, true, false)
}
return nil, fmt.Errorf("max spans allowed in trace limit reached, please contact support for more details")
}
userEmail, err := auth.GetEmailFromJwt(ctx)
if err == nil {
data := map[string]interface{}{
"traceSize": countSpans,
}
telemetry.GetInstance().SendEvent(telemetry.TELEMETRY_EVENT_TRACE_DETAIL_API, data, userEmail, true, false)
}
var startTime, endTime, durationNano uint64
var searchScanResponses []model.SearchSpanDBResponseItem
query := fmt.Sprintf("SELECT timestamp, traceID, model FROM %s.%s WHERE traceID=$1", r.TraceDB, r.SpansTable)
start := time.Now()
err = r.db.Select(ctx, &searchScanResponses, query, params.TraceID)
zap.L().Info(query)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, fmt.Errorf("error in processing sql query")
}
end := time.Now()
zap.L().Debug("getTraceSQLQuery took: ", zap.Duration("duration", end.Sub(start)))
searchSpansResult := []model.SearchSpansResult{{
Columns: []string{"__time", "SpanId", "TraceId", "ServiceName", "Name", "Kind", "DurationNano", "TagsKeys", "TagsValues", "References", "Events", "HasError", "StatusMessage", "StatusCodeString", "SpanKind"},
Events: make([][]interface{}, len(searchScanResponses)),
IsSubTree: false,
},
}
searchSpanResponses := []model.SearchSpanResponseItem{}
start = time.Now()
for _, item := range searchScanResponses {
var jsonItem model.SearchSpanResponseItem
easyjson.Unmarshal([]byte(item.Model), &jsonItem)
jsonItem.TimeUnixNano = uint64(item.Timestamp.UnixNano() / 1000000)
searchSpanResponses = append(searchSpanResponses, jsonItem)
if startTime == 0 || jsonItem.TimeUnixNano < startTime {
startTime = jsonItem.TimeUnixNano
}
if endTime == 0 || jsonItem.TimeUnixNano > endTime {
endTime = jsonItem.TimeUnixNano
}
if durationNano == 0 || uint64(jsonItem.DurationNano) > durationNano {
durationNano = uint64(jsonItem.DurationNano)
}
}
end = time.Now()
zap.L().Debug("getTraceSQLQuery unmarshal took: ", zap.Duration("duration", end.Sub(start)))
err = r.featureFlags.CheckFeature(model.SmartTraceDetail)
smartAlgoEnabled := err == nil
if len(searchScanResponses) > params.SpansRenderLimit && smartAlgoEnabled {
start = time.Now()
searchSpansResult, err = smartTraceAlgorithm(searchSpanResponses, params.SpanID, params.LevelUp, params.LevelDown, params.SpansRenderLimit)
if err != nil {
return nil, err
}
end = time.Now()
zap.L().Debug("smartTraceAlgo took: ", zap.Duration("duration", end.Sub(start)))
userEmail, err := auth.GetEmailFromJwt(ctx)
if err == nil {
data := map[string]interface{}{
"traceSize": len(searchScanResponses),
"spansRenderLimit": params.SpansRenderLimit,
}
telemetry.GetInstance().SendEvent(telemetry.TELEMETRY_EVENT_LARGE_TRACE_OPENED, data, userEmail, true, false)
}
} else {
for i, item := range searchSpanResponses {
spanEvents := item.GetValues()
searchSpansResult[0].Events[i] = spanEvents
}
}
searchSpansResult[0].StartTimestampMillis = startTime - (durationNano / 1000000)
searchSpansResult[0].EndTimestampMillis = endTime + (durationNano / 1000000)
return &searchSpansResult, nil
}
func (r *ClickHouseReader) GetDependencyGraph(ctx context.Context, queryParams *model.GetServicesParams) (*[]model.ServiceMapDependencyResponseItem, error) {
response := []model.ServiceMapDependencyResponseItem{}
args := []interface{}{}
args = append(args,
clickhouse.Named("start", uint64(queryParams.Start.Unix())),
clickhouse.Named("end", uint64(queryParams.End.Unix())),
clickhouse.Named("duration", uint64(queryParams.End.Unix()-queryParams.Start.Unix())),
)
query := fmt.Sprintf(`
WITH
quantilesMergeState(0.5, 0.75, 0.9, 0.95, 0.99)(duration_quantiles_state) AS duration_quantiles_state,
finalizeAggregation(duration_quantiles_state) AS result
SELECT
src as parent,
dest as child,
result[1] AS p50,
result[2] AS p75,
result[3] AS p90,
result[4] AS p95,
result[5] AS p99,
sum(total_count) as callCount,
sum(total_count)/ @duration AS callRate,
sum(error_count)/sum(total_count) * 100 as errorRate
FROM %s.%s
WHERE toUInt64(toDateTime(timestamp)) >= @start AND toUInt64(toDateTime(timestamp)) <= @end`,
r.TraceDB, r.dependencyGraphTable,
)
tags := createTagQueryFromTagQueryParams(queryParams.Tags)
filterQuery, filterArgs := services.BuildServiceMapQuery(tags)
query += filterQuery + " GROUP BY src, dest;"
args = append(args, filterArgs...)
zap.L().Debug("GetDependencyGraph query", zap.String("query", query), zap.Any("args", args))
err := r.db.Select(ctx, &response, query, args...)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, fmt.Errorf("error in processing sql query %w", err)
}
return &response, nil
}
func (r *ClickHouseReader) GetFilteredSpansAggregates(ctx context.Context, queryParams *model.GetFilteredSpanAggregatesParams) (*model.GetFilteredSpansAggregatesResponse, *model.ApiError) {
excludeMap := make(map[string]struct{})
for _, e := range queryParams.Exclude {
if e == constants.OperationRequest {
excludeMap[constants.OperationDB] = struct{}{}
continue
}
excludeMap[e] = struct{}{}
}
SpanAggregatesDBResponseItems := []model.SpanAggregatesDBResponseItem{}
aggregation_query := ""
if queryParams.Dimension == "duration" {
switch queryParams.AggregationOption {
case "p50":
aggregation_query = " quantile(0.50)(durationNano) as float64Value "
case "p95":
aggregation_query = " quantile(0.95)(durationNano) as float64Value "
case "p90":
aggregation_query = " quantile(0.90)(durationNano) as float64Value "
case "p99":
aggregation_query = " quantile(0.99)(durationNano) as float64Value "
case "max":
aggregation_query = " max(durationNano) as value "
case "min":
aggregation_query = " min(durationNano) as value "
case "avg":
aggregation_query = " avg(durationNano) as float64Value "
case "sum":
aggregation_query = " sum(durationNano) as value "
default:
return nil, &model.ApiError{Typ: model.ErrorBadData, Err: fmt.Errorf("aggregate type: %s not supported", queryParams.AggregationOption)}
}
} else if queryParams.Dimension == "calls" {
aggregation_query = " count(*) as value "
}
args := []interface{}{clickhouse.Named("timestampL", strconv.FormatInt(queryParams.Start.UnixNano(), 10)), clickhouse.Named("timestampU", strconv.FormatInt(queryParams.End.UnixNano(), 10))}
var query string
var customStr []string
_, columnExists := constants.GroupByColMap[queryParams.GroupBy]
// Using %s for groupBy params as it can be a custom column and custom columns are not supported by clickhouse-go yet:
// issue link: https://github.com/ClickHouse/clickhouse-go/issues/870
if queryParams.GroupBy != "" && columnExists {
query = fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL %d minute) as time, %s as groupBy, %s FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", queryParams.StepSeconds/60, queryParams.GroupBy, aggregation_query, r.TraceDB, r.indexTable)
args = append(args, clickhouse.Named("groupByVar", queryParams.GroupBy))
} else if queryParams.GroupBy != "" {
customStr = strings.Split(queryParams.GroupBy, ".(")
if len(customStr) < 2 {
return nil, &model.ApiError{Typ: model.ErrorBadData, Err: fmt.Errorf("GroupBy: %s not supported", queryParams.GroupBy)}
}
if customStr[1] == string(model.TagTypeString)+")" {
query = fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL %d minute) as time, stringTagMap['%s'] as groupBy, %s FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", queryParams.StepSeconds/60, customStr[0], aggregation_query, r.TraceDB, r.indexTable)
} else if customStr[1] == string(model.TagTypeNumber)+")" {
query = fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL %d minute) as time, toString(numberTagMap['%s']) as groupBy, %s FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", queryParams.StepSeconds/60, customStr[0], aggregation_query, r.TraceDB, r.indexTable)
} else if customStr[1] == string(model.TagTypeBool)+")" {
query = fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL %d minute) as time, toString(boolTagMap['%s']) as groupBy, %s FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", queryParams.StepSeconds/60, customStr[0], aggregation_query, r.TraceDB, r.indexTable)
} else {
// return error for unsupported group by
return nil, &model.ApiError{Typ: model.ErrorBadData, Err: fmt.Errorf("GroupBy: %s not supported", queryParams.GroupBy)}
}
} else {
query = fmt.Sprintf("SELECT toStartOfInterval(timestamp, INTERVAL %d minute) as time, %s FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", queryParams.StepSeconds/60, aggregation_query, r.TraceDB, r.indexTable)
}
if len(queryParams.TraceID) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.TraceID, constants.TraceID, &query, args)
}
if len(queryParams.ServiceName) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.ServiceName, constants.ServiceName, &query, args)
}
if len(queryParams.HttpRoute) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpRoute, constants.HttpRoute, &query, args)
}
if len(queryParams.HttpHost) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpHost, constants.HttpHost, &query, args)
}
if len(queryParams.HttpMethod) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpMethod, constants.HttpMethod, &query, args)
}
if len(queryParams.HttpUrl) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.HttpUrl, constants.HttpUrl, &query, args)
}
if len(queryParams.Operation) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.Operation, constants.OperationDB, &query, args)
}
if len(queryParams.RPCMethod) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.RPCMethod, constants.RPCMethod, &query, args)
}
if len(queryParams.ResponseStatusCode) > 0 {
args = buildFilterArrayQuery(ctx, excludeMap, queryParams.ResponseStatusCode, constants.ResponseStatusCode, &query, args)
}
if len(queryParams.MinDuration) != 0 {
query = query + " AND durationNano >= @durationNanoMin"
args = append(args, clickhouse.Named("durationNanoMin", queryParams.MinDuration))
}
if len(queryParams.MaxDuration) != 0 {
query = query + " AND durationNano <= @durationNanoMax"
args = append(args, clickhouse.Named("durationNanoMax", queryParams.MaxDuration))
}
query = getStatusFilters(query, queryParams.Status, excludeMap)
if len(queryParams.SpanKind) != 0 {
query = query + " AND kind = @kind"
args = append(args, clickhouse.Named("kind", queryParams.SpanKind))
}
// create TagQuery from TagQueryParams
tags := createTagQueryFromTagQueryParams(queryParams.Tags)
subQuery, argsSubQuery, errStatus := buildQueryWithTagParams(ctx, tags)
query += subQuery
args = append(args, argsSubQuery...)
if errStatus != nil {
return nil, errStatus
}
if queryParams.GroupBy != "" && columnExists {
query = query + fmt.Sprintf(" GROUP BY time, %s as groupBy ORDER BY time", queryParams.GroupBy)
} else if queryParams.GroupBy != "" {
if customStr[1] == string(model.TagTypeString)+")" {
query = query + fmt.Sprintf(" GROUP BY time, stringTagMap['%s'] as groupBy ORDER BY time", customStr[0])
} else if customStr[1] == string(model.TagTypeNumber)+")" {
query = query + fmt.Sprintf(" GROUP BY time, toString(numberTagMap['%s']) as groupBy ORDER BY time", customStr[0])
} else if customStr[1] == string(model.TagTypeBool)+")" {
query = query + fmt.Sprintf(" GROUP BY time, toString(boolTagMap['%s']) as groupBy ORDER BY time", customStr[0])
}
} else {
query = query + " GROUP BY time ORDER BY time"
}
err := r.db.Select(ctx, &SpanAggregatesDBResponseItems, query, args...)
zap.L().Info(query)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query")}
}
GetFilteredSpansAggregatesResponse := model.GetFilteredSpansAggregatesResponse{
Items: map[int64]model.SpanAggregatesResponseItem{},
}
for i := range SpanAggregatesDBResponseItems {
if SpanAggregatesDBResponseItems[i].Value == 0 {
SpanAggregatesDBResponseItems[i].Value = uint64(SpanAggregatesDBResponseItems[i].Float64Value)
}
SpanAggregatesDBResponseItems[i].Timestamp = int64(SpanAggregatesDBResponseItems[i].Time.UnixNano())
SpanAggregatesDBResponseItems[i].FloatValue = float32(SpanAggregatesDBResponseItems[i].Value)
if queryParams.AggregationOption == "rate_per_sec" {
SpanAggregatesDBResponseItems[i].FloatValue = float32(SpanAggregatesDBResponseItems[i].Value) / float32(queryParams.StepSeconds)
}
if responseElement, ok := GetFilteredSpansAggregatesResponse.Items[SpanAggregatesDBResponseItems[i].Timestamp]; !ok {
if queryParams.GroupBy != "" && SpanAggregatesDBResponseItems[i].GroupBy != "" {
GetFilteredSpansAggregatesResponse.Items[SpanAggregatesDBResponseItems[i].Timestamp] = model.SpanAggregatesResponseItem{
Timestamp: SpanAggregatesDBResponseItems[i].Timestamp,
GroupBy: map[string]float32{SpanAggregatesDBResponseItems[i].GroupBy: SpanAggregatesDBResponseItems[i].FloatValue},
}
} else if queryParams.GroupBy == "" {
GetFilteredSpansAggregatesResponse.Items[SpanAggregatesDBResponseItems[i].Timestamp] = model.SpanAggregatesResponseItem{
Timestamp: SpanAggregatesDBResponseItems[i].Timestamp,
Value: SpanAggregatesDBResponseItems[i].FloatValue,
}
}
} else {
if queryParams.GroupBy != "" && SpanAggregatesDBResponseItems[i].GroupBy != "" {
responseElement.GroupBy[SpanAggregatesDBResponseItems[i].GroupBy] = SpanAggregatesDBResponseItems[i].FloatValue
}
GetFilteredSpansAggregatesResponse.Items[SpanAggregatesDBResponseItems[i].Timestamp] = responseElement
}
}
return &GetFilteredSpansAggregatesResponse, nil
}
func getLocalTableName(tableName string) string {
tableNameSplit := strings.Split(tableName, ".")
return tableNameSplit[0] + "." + strings.Split(tableNameSplit[1], "distributed_")[1]
}
// SetTTL sets the TTL for traces or metrics or logs tables.
// This is an async API which creates goroutines to set TTL.
// Status of TTL update is tracked with ttl_status table in sqlite db.
func (r *ClickHouseReader) SetTTL(ctx context.Context,
params *model.TTLParams) (*model.SetTTLResponseItem, *model.ApiError) {
// Keep only latest 100 transactions/requests
r.deleteTtlTransactions(ctx, 100)
// uuid is used as transaction id
uuidWithHyphen := uuid.New()
uuid := strings.Replace(uuidWithHyphen.String(), "-", "", -1)
coldStorageDuration := -1
if len(params.ColdStorageVolume) > 0 {
coldStorageDuration = int(params.ToColdStorageDuration)
}
switch params.Type {
case constants.TraceTTL:
tableNameArray := []string{signozTraceDBName + "." + signozTraceTableName, signozTraceDBName + "." + signozDurationMVTable, signozTraceDBName + "." + signozSpansTable, signozTraceDBName + "." + signozErrorIndexTable, signozTraceDBName + "." + signozUsageExplorerTable, signozTraceDBName + "." + defaultDependencyGraphTable}
for _, tableName := range tableNameArray {
tableName := getLocalTableName(tableName)
statusItem, err := r.checkTTLStatusItem(ctx, tableName)
if err != nil {
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing ttl_status check sql query")}
}
if statusItem.Status == constants.StatusPending {
return nil, &model.ApiError{Typ: model.ErrorConflict, Err: fmt.Errorf("TTL is already running")}
}
}
for _, tableName := range tableNameArray {
tableName := getLocalTableName(tableName)
// TODO: DB queries should be implemented with transactional statements but currently clickhouse doesn't support them. Issue: https://github.com/ClickHouse/ClickHouse/issues/22086
go func(tableName string) {
_, dbErr := r.localDB.Exec("INSERT INTO ttl_status (transaction_id, created_at, updated_at, table_name, ttl, status, cold_storage_ttl) VALUES (?, ?, ?, ?, ?, ?, ?)", uuid, time.Now(), time.Now(), tableName, params.DelDuration, constants.StatusPending, coldStorageDuration)
if dbErr != nil {
zap.L().Error("Error in inserting to ttl_status table", zap.Error(dbErr))
return
}
req := fmt.Sprintf(
"ALTER TABLE %v ON CLUSTER %s MODIFY TTL toDateTime(timestamp) + INTERVAL %v SECOND DELETE",
tableName, r.cluster, params.DelDuration)
if len(params.ColdStorageVolume) > 0 {
req += fmt.Sprintf(", toDateTime(timestamp) + INTERVAL %v SECOND TO VOLUME '%s'",
params.ToColdStorageDuration, params.ColdStorageVolume)
}
err := r.setColdStorage(context.Background(), tableName, params.ColdStorageVolume)
if err != nil {
zap.L().Error("Error in setting cold storage", zap.Error(err))
statusItem, err := r.checkTTLStatusItem(ctx, tableName)
if err == nil {
_, dbErr := r.localDB.Exec("UPDATE ttl_status SET updated_at = ?, status = ? WHERE id = ?", time.Now(), constants.StatusFailed, statusItem.Id)
if dbErr != nil {
zap.L().Error("Error in processing ttl_status update sql query", zap.Error(dbErr))
return
}
}
return
}
req += " SETTINGS distributed_ddl_task_timeout = -1;"
zap.L().Error("Executing TTL request: ", zap.String("request", req))
statusItem, _ := r.checkTTLStatusItem(ctx, tableName)
if err := r.db.Exec(context.Background(), req); err != nil {
zap.L().Error("Error in executing set TTL query", zap.Error(err))
_, dbErr := r.localDB.Exec("UPDATE ttl_status SET updated_at = ?, status = ? WHERE id = ?", time.Now(), constants.StatusFailed, statusItem.Id)
if dbErr != nil {
zap.L().Error("Error in processing ttl_status update sql query", zap.Error(dbErr))
return
}
return
}
_, dbErr = r.localDB.Exec("UPDATE ttl_status SET updated_at = ?, status = ? WHERE id = ?", time.Now(), constants.StatusSuccess, statusItem.Id)
if dbErr != nil {
zap.L().Error("Error in processing ttl_status update sql query", zap.Error(dbErr))
return
}
}(tableName)
}
case constants.MetricsTTL:
tableNames := []string{signozMetricDBName + "." + signozSampleLocalTableName, signozMetricDBName + "." + signozTSLocalTableNameV4, signozMetricDBName + "." + signozTSLocalTableNameV46Hrs, signozMetricDBName + "." + signozTSLocalTableNameV41Day}
for _, tableName := range tableNames {
statusItem, err := r.checkTTLStatusItem(ctx, tableName)
if err != nil {
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing ttl_status check sql query")}
}
if statusItem.Status == constants.StatusPending {
return nil, &model.ApiError{Typ: model.ErrorConflict, Err: fmt.Errorf("TTL is already running")}
}
}
metricTTL := func(tableName string) {
_, dbErr := r.localDB.Exec("INSERT INTO ttl_status (transaction_id, created_at, updated_at, table_name, ttl, status, cold_storage_ttl) VALUES (?, ?, ?, ?, ?, ?, ?)", uuid, time.Now(), time.Now(), tableName, params.DelDuration, constants.StatusPending, coldStorageDuration)
if dbErr != nil {
zap.L().Error("Error in inserting to ttl_status table", zap.Error(dbErr))
return
}
timeColumn := "timestamp_ms"
if strings.Contains(tableName, "v4") {
timeColumn = "unix_milli"
}
req := fmt.Sprintf(
"ALTER TABLE %v ON CLUSTER %s MODIFY TTL toDateTime(toUInt32(%s / 1000), 'UTC') + "+
"INTERVAL %v SECOND DELETE", tableName, r.cluster, timeColumn, params.DelDuration)
if len(params.ColdStorageVolume) > 0 {
req += fmt.Sprintf(", toDateTime(toUInt32(%s / 1000), 'UTC')"+
" + INTERVAL %v SECOND TO VOLUME '%s'",
timeColumn, params.ToColdStorageDuration, params.ColdStorageVolume)
}
err := r.setColdStorage(context.Background(), tableName, params.ColdStorageVolume)
if err != nil {
zap.L().Error("Error in setting cold storage", zap.Error(err))
statusItem, err := r.checkTTLStatusItem(ctx, tableName)
if err == nil {
_, dbErr := r.localDB.Exec("UPDATE ttl_status SET updated_at = ?, status = ? WHERE id = ?", time.Now(), constants.StatusFailed, statusItem.Id)
if dbErr != nil {
zap.L().Error("Error in processing ttl_status update sql query", zap.Error(dbErr))
return
}
}
return
}
req += " SETTINGS distributed_ddl_task_timeout = -1"
zap.L().Info("Executing TTL request: ", zap.String("request", req))
statusItem, _ := r.checkTTLStatusItem(ctx, tableName)
if err := r.db.Exec(ctx, req); err != nil {
zap.L().Error("error while setting ttl.", zap.Error(err))
_, dbErr := r.localDB.Exec("UPDATE ttl_status SET updated_at = ?, status = ? WHERE id = ?", time.Now(), constants.StatusFailed, statusItem.Id)
if dbErr != nil {
zap.L().Error("Error in processing ttl_status update sql query", zap.Error(dbErr))
return
}
return
}
_, dbErr = r.localDB.Exec("UPDATE ttl_status SET updated_at = ?, status = ? WHERE id = ?", time.Now(), constants.StatusSuccess, statusItem.Id)
if dbErr != nil {
zap.L().Error("Error in processing ttl_status update sql query", zap.Error(dbErr))
return
}
}
for _, tableName := range tableNames {
go metricTTL(tableName)
}
case constants.LogsTTL:
tableName := r.logsDB + "." + r.logsLocalTable
statusItem, err := r.checkTTLStatusItem(ctx, tableName)
if err != nil {
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing ttl_status check sql query")}
}
if statusItem.Status == constants.StatusPending {
return nil, &model.ApiError{Typ: model.ErrorConflict, Err: fmt.Errorf("TTL is already running")}
}
go func(tableName string) {
_, dbErr := r.localDB.Exec("INSERT INTO ttl_status (transaction_id, created_at, updated_at, table_name, ttl, status, cold_storage_ttl) VALUES (?, ?, ?, ?, ?, ?, ?)", uuid, time.Now(), time.Now(), tableName, params.DelDuration, constants.StatusPending, coldStorageDuration)
if dbErr != nil {
zap.L().Error("error in inserting to ttl_status table", zap.Error(dbErr))
return
}
req := fmt.Sprintf(
"ALTER TABLE %v ON CLUSTER %s MODIFY TTL toDateTime(timestamp / 1000000000) + "+
"INTERVAL %v SECOND DELETE", tableName, r.cluster, params.DelDuration)
if len(params.ColdStorageVolume) > 0 {
req += fmt.Sprintf(", toDateTime(timestamp / 1000000000)"+
" + INTERVAL %v SECOND TO VOLUME '%s'",
params.ToColdStorageDuration, params.ColdStorageVolume)
}
err := r.setColdStorage(context.Background(), tableName, params.ColdStorageVolume)
if err != nil {
zap.L().Error("error in setting cold storage", zap.Error(err))
statusItem, err := r.checkTTLStatusItem(ctx, tableName)
if err == nil {
_, dbErr := r.localDB.Exec("UPDATE ttl_status SET updated_at = ?, status = ? WHERE id = ?", time.Now(), constants.StatusFailed, statusItem.Id)
if dbErr != nil {
zap.L().Error("Error in processing ttl_status update sql query", zap.Error(dbErr))
return
}
}
return
}
req += " SETTINGS distributed_ddl_task_timeout = -1"
zap.L().Info("Executing TTL request: ", zap.String("request", req))
statusItem, _ := r.checkTTLStatusItem(ctx, tableName)
if err := r.db.Exec(ctx, req); err != nil {
zap.L().Error("error while setting ttl", zap.Error(err))
_, dbErr := r.localDB.Exec("UPDATE ttl_status SET updated_at = ?, status = ? WHERE id = ?", time.Now(), constants.StatusFailed, statusItem.Id)
if dbErr != nil {
zap.L().Error("Error in processing ttl_status update sql query", zap.Error(dbErr))
return
}
return
}
_, dbErr = r.localDB.Exec("UPDATE ttl_status SET updated_at = ?, status = ? WHERE id = ?", time.Now(), constants.StatusSuccess, statusItem.Id)
if dbErr != nil {
zap.L().Error("Error in processing ttl_status update sql query", zap.Error(dbErr))
return
}
}(tableName)
default:
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error while setting ttl. ttl type should be <metrics|traces>, got %v",
params.Type)}
}
return &model.SetTTLResponseItem{Message: "move ttl has been successfully set up"}, nil
}
func (r *ClickHouseReader) deleteTtlTransactions(ctx context.Context, numberOfTransactionsStore int) {
_, err := r.localDB.Exec("DELETE FROM ttl_status WHERE transaction_id NOT IN (SELECT distinct transaction_id FROM ttl_status ORDER BY created_at DESC LIMIT ?)", numberOfTransactionsStore)
if err != nil {
zap.L().Error("Error in processing ttl_status delete sql query", zap.Error(err))
}
}
// checkTTLStatusItem checks if ttl_status table has an entry for the given table name
func (r *ClickHouseReader) checkTTLStatusItem(ctx context.Context, tableName string) (model.TTLStatusItem, *model.ApiError) {
statusItem := []model.TTLStatusItem{}
query := `SELECT id, status, ttl, cold_storage_ttl FROM ttl_status WHERE table_name = ? ORDER BY created_at DESC`
zap.L().Info("checkTTLStatusItem query", zap.String("query", query), zap.String("tableName", tableName))
stmt, err := r.localDB.Preparex(query)
if err != nil {
zap.L().Error("Error preparing query for checkTTLStatusItem", zap.Error(err))
return model.TTLStatusItem{}, &model.ApiError{Typ: model.ErrorInternal, Err: err}
}
err = stmt.Select(&statusItem, tableName)
if len(statusItem) == 0 {
return model.TTLStatusItem{}, nil
}
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return model.TTLStatusItem{}, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing ttl_status check sql query")}
}
return statusItem[0], nil
}
// setTTLQueryStatus fetches ttl_status table status from DB
func (r *ClickHouseReader) setTTLQueryStatus(ctx context.Context, tableNameArray []string) (string, *model.ApiError) {
failFlag := false
status := constants.StatusSuccess
for _, tableName := range tableNameArray {
statusItem, err := r.checkTTLStatusItem(ctx, tableName)
emptyStatusStruct := model.TTLStatusItem{}
if statusItem == emptyStatusStruct {
return "", nil
}
if err != nil {
return "", &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing ttl_status check sql query")}
}
if statusItem.Status == constants.StatusPending && statusItem.UpdatedAt.Unix()-time.Now().Unix() < 3600 {
status = constants.StatusPending
return status, nil
}
if statusItem.Status == constants.StatusFailed {
failFlag = true
}
}
if failFlag {
status = constants.StatusFailed
}
return status, nil
}
func (r *ClickHouseReader) setColdStorage(ctx context.Context, tableName string, coldStorageVolume string) *model.ApiError {
// Set the storage policy for the required table. If it is already set, then setting it again
// will not a problem.
if len(coldStorageVolume) > 0 {
policyReq := fmt.Sprintf("ALTER TABLE %s ON CLUSTER %s MODIFY SETTING storage_policy='tiered'", tableName, r.cluster)
zap.L().Info("Executing Storage policy request: ", zap.String("request", policyReq))
if err := r.db.Exec(ctx, policyReq); err != nil {
zap.L().Error("error while setting storage policy", zap.Error(err))
return &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error while setting storage policy. Err=%v", err)}
}
}
return nil
}
// GetDisks returns a list of disks {name, type} configured in clickhouse DB.
func (r *ClickHouseReader) GetDisks(ctx context.Context) (*[]model.DiskItem, *model.ApiError) {
diskItems := []model.DiskItem{}
query := "SELECT name,type FROM system.disks"
if err := r.db.Select(ctx, &diskItems, query); err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error while getting disks. Err=%v", err)}
}
return &diskItems, nil
}
func getLocalTableNameArray(tableNames []string) []string {
var localTableNames []string
for _, name := range tableNames {
tableNameSplit := strings.Split(name, ".")
localTableNames = append(localTableNames, tableNameSplit[0]+"."+strings.Split(tableNameSplit[1], "distributed_")[1])
}
return localTableNames
}
// GetTTL returns current ttl, expected ttl and past setTTL status for metrics/traces.
func (r *ClickHouseReader) GetTTL(ctx context.Context, ttlParams *model.GetTTLParams) (*model.GetTTLResponseItem, *model.ApiError) {
parseTTL := func(queryResp string) (int, int) {
zap.L().Info("Parsing TTL from: ", zap.String("queryResp", queryResp))
deleteTTLExp := regexp.MustCompile(`toIntervalSecond\(([0-9]*)\)`)
moveTTLExp := regexp.MustCompile(`toIntervalSecond\(([0-9]*)\) TO VOLUME`)
var delTTL, moveTTL int = -1, -1
m := deleteTTLExp.FindStringSubmatch(queryResp)
if len(m) > 1 {
seconds_int, err := strconv.Atoi(m[1])
if err != nil {
return -1, -1
}
delTTL = seconds_int / 3600
}
m = moveTTLExp.FindStringSubmatch(queryResp)
if len(m) > 1 {
seconds_int, err := strconv.Atoi(m[1])
if err != nil {
return -1, -1
}
moveTTL = seconds_int / 3600
}
return delTTL, moveTTL
}
getMetricsTTL := func() (*model.DBResponseTTL, *model.ApiError) {
var dbResp []model.DBResponseTTL
query := fmt.Sprintf("SELECT engine_full FROM system.tables WHERE name='%v'", signozSampleLocalTableName)
err := r.db.Select(ctx, &dbResp, query)
if err != nil {
zap.L().Error("error while getting ttl", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error while getting ttl. Err=%v", err)}
}
if len(dbResp) == 0 {
return nil, nil
} else {
return &dbResp[0], nil
}
}
getTracesTTL := func() (*model.DBResponseTTL, *model.ApiError) {
var dbResp []model.DBResponseTTL
query := fmt.Sprintf("SELECT engine_full FROM system.tables WHERE name='%v' AND database='%v'", signozTraceLocalTableName, signozTraceDBName)
err := r.db.Select(ctx, &dbResp, query)
if err != nil {
zap.L().Error("error while getting ttl", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error while getting ttl. Err=%v", err)}
}
if len(dbResp) == 0 {
return nil, nil
} else {
return &dbResp[0], nil
}
}
getLogsTTL := func() (*model.DBResponseTTL, *model.ApiError) {
var dbResp []model.DBResponseTTL
query := fmt.Sprintf("SELECT engine_full FROM system.tables WHERE name='%v' AND database='%v'", r.logsLocalTable, r.logsDB)
err := r.db.Select(ctx, &dbResp, query)
if err != nil {
zap.L().Error("error while getting ttl", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error while getting ttl. Err=%v", err)}
}
if len(dbResp) == 0 {
return nil, nil
} else {
return &dbResp[0], nil
}
}
switch ttlParams.Type {
case constants.TraceTTL:
tableNameArray := []string{signozTraceDBName + "." + signozTraceTableName, signozTraceDBName + "." + signozDurationMVTable, signozTraceDBName + "." + signozSpansTable, signozTraceDBName + "." + signozErrorIndexTable, signozTraceDBName + "." + signozUsageExplorerTable, signozTraceDBName + "." + defaultDependencyGraphTable}
tableNameArray = getLocalTableNameArray(tableNameArray)
status, err := r.setTTLQueryStatus(ctx, tableNameArray)
if err != nil {
return nil, err
}
dbResp, err := getTracesTTL()
if err != nil {
return nil, err
}
ttlQuery, err := r.checkTTLStatusItem(ctx, tableNameArray[0])
if err != nil {
return nil, err
}
ttlQuery.TTL = ttlQuery.TTL / 3600 // convert to hours
if ttlQuery.ColdStorageTtl != -1 {
ttlQuery.ColdStorageTtl = ttlQuery.ColdStorageTtl / 3600 // convert to hours
}
delTTL, moveTTL := parseTTL(dbResp.EngineFull)
return &model.GetTTLResponseItem{TracesTime: delTTL, TracesMoveTime: moveTTL, ExpectedTracesTime: ttlQuery.TTL, ExpectedTracesMoveTime: ttlQuery.ColdStorageTtl, Status: status}, nil
case constants.MetricsTTL:
tableNameArray := []string{signozMetricDBName + "." + signozSampleTableName}
tableNameArray = getLocalTableNameArray(tableNameArray)
status, err := r.setTTLQueryStatus(ctx, tableNameArray)
if err != nil {
return nil, err
}
dbResp, err := getMetricsTTL()
if err != nil {
return nil, err
}
ttlQuery, err := r.checkTTLStatusItem(ctx, tableNameArray[0])
if err != nil {
return nil, err
}
ttlQuery.TTL = ttlQuery.TTL / 3600 // convert to hours
if ttlQuery.ColdStorageTtl != -1 {
ttlQuery.ColdStorageTtl = ttlQuery.ColdStorageTtl / 3600 // convert to hours
}
delTTL, moveTTL := parseTTL(dbResp.EngineFull)
return &model.GetTTLResponseItem{MetricsTime: delTTL, MetricsMoveTime: moveTTL, ExpectedMetricsTime: ttlQuery.TTL, ExpectedMetricsMoveTime: ttlQuery.ColdStorageTtl, Status: status}, nil
case constants.LogsTTL:
tableNameArray := []string{r.logsDB + "." + r.logsTable}
tableNameArray = getLocalTableNameArray(tableNameArray)
status, err := r.setTTLQueryStatus(ctx, tableNameArray)
if err != nil {
return nil, err
}
dbResp, err := getLogsTTL()
if err != nil {
return nil, err
}
ttlQuery, err := r.checkTTLStatusItem(ctx, tableNameArray[0])
if err != nil {
return nil, err
}
ttlQuery.TTL = ttlQuery.TTL / 3600 // convert to hours
if ttlQuery.ColdStorageTtl != -1 {
ttlQuery.ColdStorageTtl = ttlQuery.ColdStorageTtl / 3600 // convert to hours
}
delTTL, moveTTL := parseTTL(dbResp.EngineFull)
return &model.GetTTLResponseItem{LogsTime: delTTL, LogsMoveTime: moveTTL, ExpectedLogsTime: ttlQuery.TTL, ExpectedLogsMoveTime: ttlQuery.ColdStorageTtl, Status: status}, nil
default:
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error while getting ttl. ttl type should be metrics|traces, got %v",
ttlParams.Type)}
}
}
func (r *ClickHouseReader) ListErrors(ctx context.Context, queryParams *model.ListErrorsParams) (*[]model.Error, *model.ApiError) {
var getErrorResponses []model.Error
query := "SELECT any(exceptionMessage) as exceptionMessage, count() AS exceptionCount, min(timestamp) as firstSeen, max(timestamp) as lastSeen, groupID"
if len(queryParams.ServiceName) != 0 {
query = query + ", serviceName"
} else {
query = query + ", any(serviceName) as serviceName"
}
if len(queryParams.ExceptionType) != 0 {
query = query + ", exceptionType"
} else {
query = query + ", any(exceptionType) as exceptionType"
}
query += fmt.Sprintf(" FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", r.TraceDB, r.errorTable)
args := []interface{}{clickhouse.Named("timestampL", strconv.FormatInt(queryParams.Start.UnixNano(), 10)), clickhouse.Named("timestampU", strconv.FormatInt(queryParams.End.UnixNano(), 10))}
if len(queryParams.ServiceName) != 0 {
query = query + " AND serviceName ilike @serviceName"
args = append(args, clickhouse.Named("serviceName", "%"+queryParams.ServiceName+"%"))
}
if len(queryParams.ExceptionType) != 0 {
query = query + " AND exceptionType ilike @exceptionType"
args = append(args, clickhouse.Named("exceptionType", "%"+queryParams.ExceptionType+"%"))
}
// create TagQuery from TagQueryParams
tags := createTagQueryFromTagQueryParams(queryParams.Tags)
subQuery, argsSubQuery, errStatus := buildQueryWithTagParams(ctx, tags)
query += subQuery
args = append(args, argsSubQuery...)
if errStatus != nil {
zap.L().Error("Error in processing tags", zap.Error(errStatus))
return nil, errStatus
}
query = query + " GROUP BY groupID"
if len(queryParams.ServiceName) != 0 {
query = query + ", serviceName"
}
if len(queryParams.ExceptionType) != 0 {
query = query + ", exceptionType"
}
if len(queryParams.OrderParam) != 0 {
if queryParams.Order == constants.Descending {
query = query + " ORDER BY " + queryParams.OrderParam + " DESC"
} else if queryParams.Order == constants.Ascending {
query = query + " ORDER BY " + queryParams.OrderParam + " ASC"
}
}
if queryParams.Limit > 0 {
query = query + " LIMIT @limit"
args = append(args, clickhouse.Named("limit", queryParams.Limit))
}
if queryParams.Offset > 0 {
query = query + " OFFSET @offset"
args = append(args, clickhouse.Named("offset", queryParams.Offset))
}
err := r.db.Select(ctx, &getErrorResponses, query, args...)
zap.L().Info(query)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query")}
}
return &getErrorResponses, nil
}
func (r *ClickHouseReader) CountErrors(ctx context.Context, queryParams *model.CountErrorsParams) (uint64, *model.ApiError) {
var errorCount uint64
query := fmt.Sprintf("SELECT count(distinct(groupID)) FROM %s.%s WHERE timestamp >= @timestampL AND timestamp <= @timestampU", r.TraceDB, r.errorTable)
args := []interface{}{clickhouse.Named("timestampL", strconv.FormatInt(queryParams.Start.UnixNano(), 10)), clickhouse.Named("timestampU", strconv.FormatInt(queryParams.End.UnixNano(), 10))}
if len(queryParams.ServiceName) != 0 {
query = query + " AND serviceName ilike @serviceName"
args = append(args, clickhouse.Named("serviceName", "%"+queryParams.ServiceName+"%"))
}
if len(queryParams.ExceptionType) != 0 {
query = query + " AND exceptionType ilike @exceptionType"
args = append(args, clickhouse.Named("exceptionType", "%"+queryParams.ExceptionType+"%"))
}
// create TagQuery from TagQueryParams
tags := createTagQueryFromTagQueryParams(queryParams.Tags)
subQuery, argsSubQuery, errStatus := buildQueryWithTagParams(ctx, tags)
query += subQuery
args = append(args, argsSubQuery...)
if errStatus != nil {
zap.L().Error("Error in processing tags", zap.Error(errStatus))
return 0, errStatus
}
err := r.db.QueryRow(ctx, query, args...).Scan(&errorCount)
zap.L().Info(query)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return 0, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query")}
}
return errorCount, nil
}
func (r *ClickHouseReader) GetErrorFromErrorID(ctx context.Context, queryParams *model.GetErrorParams) (*model.ErrorWithSpan, *model.ApiError) {
if queryParams.ErrorID == "" {
zap.L().Error("errorId missing from params")
return nil, &model.ApiError{Typ: model.ErrorBadData, Err: fmt.Errorf("ErrorID missing from params")}
}
var getErrorWithSpanReponse []model.ErrorWithSpan
query := fmt.Sprintf("SELECT errorID, exceptionType, exceptionStacktrace, exceptionEscaped, exceptionMessage, timestamp, spanID, traceID, serviceName, groupID FROM %s.%s WHERE timestamp = @timestamp AND groupID = @groupID AND errorID = @errorID LIMIT 1", r.TraceDB, r.errorTable)
args := []interface{}{clickhouse.Named("errorID", queryParams.ErrorID), clickhouse.Named("groupID", queryParams.GroupID), clickhouse.Named("timestamp", strconv.FormatInt(queryParams.Timestamp.UnixNano(), 10))}
err := r.db.Select(ctx, &getErrorWithSpanReponse, query, args...)
zap.L().Info(query)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query")}
}
if len(getErrorWithSpanReponse) > 0 {
return &getErrorWithSpanReponse[0], nil
} else {
return nil, &model.ApiError{Typ: model.ErrorNotFound, Err: fmt.Errorf("Error/Exception not found")}
}
}
func (r *ClickHouseReader) GetErrorFromGroupID(ctx context.Context, queryParams *model.GetErrorParams) (*model.ErrorWithSpan, *model.ApiError) {
var getErrorWithSpanReponse []model.ErrorWithSpan
query := fmt.Sprintf("SELECT errorID, exceptionType, exceptionStacktrace, exceptionEscaped, exceptionMessage, timestamp, spanID, traceID, serviceName, groupID FROM %s.%s WHERE timestamp = @timestamp AND groupID = @groupID LIMIT 1", r.TraceDB, r.errorTable)
args := []interface{}{clickhouse.Named("groupID", queryParams.GroupID), clickhouse.Named("timestamp", strconv.FormatInt(queryParams.Timestamp.UnixNano(), 10))}
err := r.db.Select(ctx, &getErrorWithSpanReponse, query, args...)
zap.L().Info(query)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query")}
}
if len(getErrorWithSpanReponse) > 0 {
return &getErrorWithSpanReponse[0], nil
} else {
return nil, &model.ApiError{Typ: model.ErrorNotFound, Err: fmt.Errorf("Error/Exception not found")}
}
}
func (r *ClickHouseReader) GetNextPrevErrorIDs(ctx context.Context, queryParams *model.GetErrorParams) (*model.NextPrevErrorIDs, *model.ApiError) {
if queryParams.ErrorID == "" {
zap.L().Error("errorId missing from params")
return nil, &model.ApiError{Typ: model.ErrorBadData, Err: fmt.Errorf("ErrorID missing from params")}
}
var err *model.ApiError
getNextPrevErrorIDsResponse := model.NextPrevErrorIDs{
GroupID: queryParams.GroupID,
}
getNextPrevErrorIDsResponse.NextErrorID, getNextPrevErrorIDsResponse.NextTimestamp, err = r.getNextErrorID(ctx, queryParams)
if err != nil {
zap.L().Error("Unable to get next error ID due to err: ", zap.Error(err))
return nil, err
}
getNextPrevErrorIDsResponse.PrevErrorID, getNextPrevErrorIDsResponse.PrevTimestamp, err = r.getPrevErrorID(ctx, queryParams)
if err != nil {
zap.L().Error("Unable to get prev error ID due to err: ", zap.Error(err))
return nil, err
}
return &getNextPrevErrorIDsResponse, nil
}
func (r *ClickHouseReader) getNextErrorID(ctx context.Context, queryParams *model.GetErrorParams) (string, time.Time, *model.ApiError) {
var getNextErrorIDReponse []model.NextPrevErrorIDsDBResponse
query := fmt.Sprintf("SELECT errorID as nextErrorID, timestamp as nextTimestamp FROM %s.%s WHERE groupID = @groupID AND timestamp >= @timestamp AND errorID != @errorID ORDER BY timestamp ASC LIMIT 2", r.TraceDB, r.errorTable)
args := []interface{}{clickhouse.Named("errorID", queryParams.ErrorID), clickhouse.Named("groupID", queryParams.GroupID), clickhouse.Named("timestamp", strconv.FormatInt(queryParams.Timestamp.UnixNano(), 10))}
err := r.db.Select(ctx, &getNextErrorIDReponse, query, args...)
zap.L().Info(query)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return "", time.Time{}, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query")}
}
if len(getNextErrorIDReponse) == 0 {
zap.L().Info("NextErrorID not found")
return "", time.Time{}, nil
} else if len(getNextErrorIDReponse) == 1 {
zap.L().Info("NextErrorID found")
return getNextErrorIDReponse[0].NextErrorID, getNextErrorIDReponse[0].NextTimestamp, nil
} else {
if getNextErrorIDReponse[0].Timestamp.UnixNano() == getNextErrorIDReponse[1].Timestamp.UnixNano() {
var getNextErrorIDReponse []model.NextPrevErrorIDsDBResponse
query := fmt.Sprintf("SELECT errorID as nextErrorID, timestamp as nextTimestamp FROM %s.%s WHERE groupID = @groupID AND timestamp = @timestamp AND errorID > @errorID ORDER BY errorID ASC LIMIT 1", r.TraceDB, r.errorTable)
args := []interface{}{clickhouse.Named("errorID", queryParams.ErrorID), clickhouse.Named("groupID", queryParams.GroupID), clickhouse.Named("timestamp", strconv.FormatInt(queryParams.Timestamp.UnixNano(), 10))}
err := r.db.Select(ctx, &getNextErrorIDReponse, query, args...)
zap.L().Info(query)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return "", time.Time{}, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query")}
}
if len(getNextErrorIDReponse) == 0 {
var getNextErrorIDReponse []model.NextPrevErrorIDsDBResponse
query := fmt.Sprintf("SELECT errorID as nextErrorID, timestamp as nextTimestamp FROM %s.%s WHERE groupID = @groupID AND timestamp > @timestamp ORDER BY timestamp ASC LIMIT 1", r.TraceDB, r.errorTable)
args := []interface{}{clickhouse.Named("errorID", queryParams.ErrorID), clickhouse.Named("groupID", queryParams.GroupID), clickhouse.Named("timestamp", strconv.FormatInt(queryParams.Timestamp.UnixNano(), 10))}
err := r.db.Select(ctx, &getNextErrorIDReponse, query, args...)
zap.L().Info(query)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return "", time.Time{}, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query")}
}
if len(getNextErrorIDReponse) == 0 {
zap.L().Info("NextErrorID not found")
return "", time.Time{}, nil
} else {
zap.L().Info("NextErrorID found")
return getNextErrorIDReponse[0].NextErrorID, getNextErrorIDReponse[0].NextTimestamp, nil
}
} else {
zap.L().Info("NextErrorID found")
return getNextErrorIDReponse[0].NextErrorID, getNextErrorIDReponse[0].NextTimestamp, nil
}
} else {
zap.L().Info("NextErrorID found")
return getNextErrorIDReponse[0].NextErrorID, getNextErrorIDReponse[0].NextTimestamp, nil
}
}
}
func (r *ClickHouseReader) getPrevErrorID(ctx context.Context, queryParams *model.GetErrorParams) (string, time.Time, *model.ApiError) {
var getPrevErrorIDReponse []model.NextPrevErrorIDsDBResponse
query := fmt.Sprintf("SELECT errorID as prevErrorID, timestamp as prevTimestamp FROM %s.%s WHERE groupID = @groupID AND timestamp <= @timestamp AND errorID != @errorID ORDER BY timestamp DESC LIMIT 2", r.TraceDB, r.errorTable)
args := []interface{}{clickhouse.Named("errorID", queryParams.ErrorID), clickhouse.Named("groupID", queryParams.GroupID), clickhouse.Named("timestamp", strconv.FormatInt(queryParams.Timestamp.UnixNano(), 10))}
err := r.db.Select(ctx, &getPrevErrorIDReponse, query, args...)
zap.L().Info(query)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return "", time.Time{}, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query")}
}
if len(getPrevErrorIDReponse) == 0 {
zap.L().Info("PrevErrorID not found")
return "", time.Time{}, nil
} else if len(getPrevErrorIDReponse) == 1 {
zap.L().Info("PrevErrorID found")
return getPrevErrorIDReponse[0].PrevErrorID, getPrevErrorIDReponse[0].PrevTimestamp, nil
} else {
if getPrevErrorIDReponse[0].Timestamp.UnixNano() == getPrevErrorIDReponse[1].Timestamp.UnixNano() {
var getPrevErrorIDReponse []model.NextPrevErrorIDsDBResponse
query := fmt.Sprintf("SELECT errorID as prevErrorID, timestamp as prevTimestamp FROM %s.%s WHERE groupID = @groupID AND timestamp = @timestamp AND errorID < @errorID ORDER BY errorID DESC LIMIT 1", r.TraceDB, r.errorTable)
args := []interface{}{clickhouse.Named("errorID", queryParams.ErrorID), clickhouse.Named("groupID", queryParams.GroupID), clickhouse.Named("timestamp", strconv.FormatInt(queryParams.Timestamp.UnixNano(), 10))}
err := r.db.Select(ctx, &getPrevErrorIDReponse, query, args...)
zap.L().Info(query)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return "", time.Time{}, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query")}
}
if len(getPrevErrorIDReponse) == 0 {
var getPrevErrorIDReponse []model.NextPrevErrorIDsDBResponse
query := fmt.Sprintf("SELECT errorID as prevErrorID, timestamp as prevTimestamp FROM %s.%s WHERE groupID = @groupID AND timestamp < @timestamp ORDER BY timestamp DESC LIMIT 1", r.TraceDB, r.errorTable)
args := []interface{}{clickhouse.Named("errorID", queryParams.ErrorID), clickhouse.Named("groupID", queryParams.GroupID), clickhouse.Named("timestamp", strconv.FormatInt(queryParams.Timestamp.UnixNano(), 10))}
err := r.db.Select(ctx, &getPrevErrorIDReponse, query, args...)
zap.L().Info(query)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return "", time.Time{}, &model.ApiError{Typ: model.ErrorExec, Err: fmt.Errorf("error in processing sql query")}
}
if len(getPrevErrorIDReponse) == 0 {
zap.L().Info("PrevErrorID not found")
return "", time.Time{}, nil
} else {
zap.L().Info("PrevErrorID found")
return getPrevErrorIDReponse[0].PrevErrorID, getPrevErrorIDReponse[0].PrevTimestamp, nil
}
} else {
zap.L().Info("PrevErrorID found")
return getPrevErrorIDReponse[0].PrevErrorID, getPrevErrorIDReponse[0].PrevTimestamp, nil
}
} else {
zap.L().Info("PrevErrorID found")
return getPrevErrorIDReponse[0].PrevErrorID, getPrevErrorIDReponse[0].PrevTimestamp, nil
}
}
}
func (r *ClickHouseReader) GetMetricResultEE(ctx context.Context, query string) ([]*model.Series, string, error) {
zap.L().Error("GetMetricResultEE is not implemented for opensource version")
return nil, "", fmt.Errorf("GetMetricResultEE is not implemented for opensource version")
}
// GetMetricResult runs the query and returns list of time series
func (r *ClickHouseReader) GetMetricResult(ctx context.Context, query string) ([]*model.Series, error) {
defer utils.Elapsed("GetMetricResult", nil)()
zap.L().Info("Executing metric result query: ", zap.String("query", query))
rows, err := r.db.Query(ctx, query)
if err != nil {
zap.L().Error("Error in processing query", zap.Error(err))
return nil, err
}
var (
columnTypes = rows.ColumnTypes()
columnNames = rows.Columns()
vars = make([]interface{}, len(columnTypes))
)
for i := range columnTypes {
vars[i] = reflect.New(columnTypes[i].ScanType()).Interface()
}
// when group by is applied, each combination of cartesian product
// of attributes is separate series. each item in metricPointsMap
// represent a unique series.
metricPointsMap := make(map[string][]model.MetricPoint)
// attribute key-value pairs for each group selection
attributesMap := make(map[string]map[string]string)
defer rows.Close()
for rows.Next() {
if err := rows.Scan(vars...); err != nil {
return nil, err
}
var groupBy []string
var metricPoint model.MetricPoint
groupAttributes := make(map[string]string)
// Assuming that the end result row contains a timestamp, value and option labels
// Label key and value are both strings.
for idx, v := range vars {
colName := columnNames[idx]
switch v := v.(type) {
case *string:
// special case for returning all labels
if colName == "fullLabels" {
var metric map[string]string
err := json.Unmarshal([]byte(*v), &metric)
if err != nil {
return nil, err
}
for key, val := range metric {
groupBy = append(groupBy, val)
groupAttributes[key] = val
}
} else {
groupBy = append(groupBy, *v)
groupAttributes[colName] = *v
}
case *time.Time:
metricPoint.Timestamp = v.UnixMilli()
case *float64:
metricPoint.Value = *v
case **float64:
// ch seems to return this type when column is derived from
// SELECT count(*)/ SELECT count(*)
floatVal := *v
if floatVal != nil {
metricPoint.Value = *floatVal
}
case *float32:
float32Val := float32(*v)
metricPoint.Value = float64(float32Val)
case *uint8, *uint64, *uint16, *uint32:
if _, ok := constants.ReservedColumnTargetAliases[colName]; ok {
metricPoint.Value = float64(reflect.ValueOf(v).Elem().Uint())
} else {
groupBy = append(groupBy, fmt.Sprintf("%v", reflect.ValueOf(v).Elem().Uint()))
groupAttributes[colName] = fmt.Sprintf("%v", reflect.ValueOf(v).Elem().Uint())
}
case *int8, *int16, *int32, *int64:
if _, ok := constants.ReservedColumnTargetAliases[colName]; ok {
metricPoint.Value = float64(reflect.ValueOf(v).Elem().Int())
} else {
groupBy = append(groupBy, fmt.Sprintf("%v", reflect.ValueOf(v).Elem().Int()))
groupAttributes[colName] = fmt.Sprintf("%v", reflect.ValueOf(v).Elem().Int())
}
default:
zap.L().Error("invalid var found in metric builder query result", zap.Any("v", v), zap.String("colName", colName))
}
}
sort.Strings(groupBy)
key := strings.Join(groupBy, "")
attributesMap[key] = groupAttributes
metricPointsMap[key] = append(metricPointsMap[key], metricPoint)
}
var seriesList []*model.Series
for key := range metricPointsMap {
points := metricPointsMap[key]
// first point in each series could be invalid since the
// aggregations are applied with point from prev series
if len(points) != 0 && len(points) > 1 {
points = points[1:]
}
attributes := attributesMap[key]
series := model.Series{Labels: attributes, Points: points}
seriesList = append(seriesList, &series)
}
return seriesList, nil
}
func (r *ClickHouseReader) GetTotalSpans(ctx context.Context) (uint64, error) {
var totalSpans uint64
queryStr := fmt.Sprintf("SELECT count() from %s.%s;", signozTraceDBName, signozTraceTableName)
r.db.QueryRow(ctx, queryStr).Scan(&totalSpans)
return totalSpans, nil
}
func (r *ClickHouseReader) GetSpansInLastHeartBeatInterval(ctx context.Context, interval time.Duration) (uint64, error) {
var spansInLastHeartBeatInterval uint64
queryStr := fmt.Sprintf("SELECT count() from %s.%s where timestamp > toUnixTimestamp(now()-toIntervalMinute(%d));", signozTraceDBName, signozSpansTable, int(interval.Minutes()))
r.db.QueryRow(ctx, queryStr).Scan(&spansInLastHeartBeatInterval)
return spansInLastHeartBeatInterval, nil
}
func (r *ClickHouseReader) GetTotalLogs(ctx context.Context) (uint64, error) {
var totalLogs uint64
queryStr := fmt.Sprintf("SELECT count() from %s.%s;", r.logsDB, r.logsTable)
r.db.QueryRow(ctx, queryStr).Scan(&totalLogs)
return totalLogs, nil
}
func (r *ClickHouseReader) FetchTemporality(ctx context.Context, metricNames []string) (map[string]map[v3.Temporality]bool, error) {
metricNameToTemporality := make(map[string]map[v3.Temporality]bool)
query := fmt.Sprintf(`SELECT DISTINCT metric_name, temporality FROM %s.%s WHERE metric_name IN $1`, signozMetricDBName, signozTSTableNameV41Day)
rows, err := r.db.Query(ctx, query, metricNames)
if err != nil {
return nil, err
}
defer rows.Close()
for rows.Next() {
var metricName, temporality string
err := rows.Scan(&metricName, &temporality)
if err != nil {
return nil, err
}
if _, ok := metricNameToTemporality[metricName]; !ok {
metricNameToTemporality[metricName] = make(map[v3.Temporality]bool)
}
metricNameToTemporality[metricName][v3.Temporality(temporality)] = true
}
return metricNameToTemporality, nil
}
func (r *ClickHouseReader) GetTimeSeriesInfo(ctx context.Context) (map[string]interface{}, error) {
queryStr := fmt.Sprintf("SELECT countDistinct(fingerprint) as count from %s.%s where metric_name not like 'signoz_%%' group by metric_name order by count desc;", signozMetricDBName, signozTSTableNameV41Day)
rows, _ := r.db.Query(ctx, queryStr)
var totalTS uint64
totalTS = 0
var maxTS uint64
maxTS = 0
count := 0
for rows.Next() {
var value uint64
rows.Scan(&value)
totalTS += value
if count == 0 {
maxTS = value
}
count += 1
}
timeSeriesData := map[string]interface{}{}
timeSeriesData["totalTS"] = totalTS
timeSeriesData["maxTS"] = maxTS
return timeSeriesData, nil
}
func (r *ClickHouseReader) GetSamplesInfoInLastHeartBeatInterval(ctx context.Context, interval time.Duration) (uint64, error) {
var totalSamples uint64
queryStr := fmt.Sprintf("select count() from %s.%s where metric_name not like 'signoz_%%' and unix_milli > toUnixTimestamp(now()-toIntervalMinute(%d))*1000;", signozMetricDBName, signozSampleTableName, int(interval.Minutes()))
r.db.QueryRow(ctx, queryStr).Scan(&totalSamples)
return totalSamples, nil
}
func (r *ClickHouseReader) GetTotalSamples(ctx context.Context) (uint64, error) {
var totalSamples uint64
queryStr := fmt.Sprintf("select count() from %s.%s where metric_name not like 'signoz_%%';", signozMetricDBName, signozSampleTableName)
r.db.QueryRow(ctx, queryStr).Scan(&totalSamples)
return totalSamples, nil
}
func (r *ClickHouseReader) GetDistributedInfoInLastHeartBeatInterval(ctx context.Context) (map[string]interface{}, error) {
clusterInfo := []model.ClusterInfo{}
queryStr := `SELECT shard_num, shard_weight, replica_num, errors_count, slowdowns_count, estimated_recovery_time FROM system.clusters where cluster='cluster';`
r.db.Select(ctx, &clusterInfo, queryStr)
if len(clusterInfo) == 1 {
return clusterInfo[0].GetMapFromStruct(), nil
}
return nil, nil
}
func (r *ClickHouseReader) GetLogsInfoInLastHeartBeatInterval(ctx context.Context, interval time.Duration) (uint64, error) {
var totalLogLines uint64
queryStr := fmt.Sprintf("select count() from %s.%s where timestamp > toUnixTimestamp(now()-toIntervalMinute(%d))*1000000000;", r.logsDB, r.logsTable, int(interval.Minutes()))
err := r.db.QueryRow(ctx, queryStr).Scan(&totalLogLines)
return totalLogLines, err
}
func (r *ClickHouseReader) GetTagsInfoInLastHeartBeatInterval(ctx context.Context, interval time.Duration) (*model.TagsInfo, error) {
queryStr := fmt.Sprintf(`select serviceName, stringTagMap['deployment.environment'] as env,
stringTagMap['telemetry.sdk.language'] as language from %s.%s
where timestamp > toUnixTimestamp(now()-toIntervalMinute(%d))
group by serviceName, env, language;`, r.TraceDB, r.indexTable, int(interval.Minutes()))
tagTelemetryDataList := []model.TagTelemetryData{}
err := r.db.Select(ctx, &tagTelemetryDataList, queryStr)
if err != nil {
zap.L().Error("Error in processing sql query: ", zap.Error(err))
return nil, err
}
tagsInfo := model.TagsInfo{
Languages: make(map[string]interface{}),
Services: make(map[string]interface{}),
}
for _, tagTelemetryData := range tagTelemetryDataList {
if len(tagTelemetryData.ServiceName) != 0 && strings.Contains(tagTelemetryData.ServiceName, "prod") {
tagsInfo.Env = tagTelemetryData.ServiceName
}
if len(tagTelemetryData.Env) != 0 && strings.Contains(tagTelemetryData.Env, "prod") {
tagsInfo.Env = tagTelemetryData.Env
}
if len(tagTelemetryData.Language) != 0 {
tagsInfo.Languages[tagTelemetryData.Language] = struct{}{}
}
if len(tagTelemetryData.ServiceName) != 0 {
tagsInfo.Services[tagTelemetryData.ServiceName] = struct{}{}
}
}
return &tagsInfo, nil
}
// remove this after sometime
func removeUnderscoreDuplicateFields(fields []model.LogField) []model.LogField {
lookup := map[string]model.LogField{}
for _, v := range fields {
lookup[v.Name+v.DataType] = v
}
for k := range lookup {
if strings.Contains(k, ".") {
delete(lookup, strings.ReplaceAll(k, ".", "_"))
}
}
updatedFields := []model.LogField{}
for _, v := range lookup {
updatedFields = append(updatedFields, v)
}
return updatedFields
}
// GetDashboardsInfo returns analytics data for dashboards
func (r *ClickHouseReader) GetDashboardsInfo(ctx context.Context) (*model.DashboardsInfo, error) {
dashboardsInfo := model.DashboardsInfo{}
// fetch dashboards from dashboard db
query := "SELECT data FROM dashboards"
var dashboardsData []dashboards.Dashboard
err := r.localDB.Select(&dashboardsData, query)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return &dashboardsInfo, err
}
totalDashboardsWithPanelAndName := 0
for _, dashboard := range dashboardsData {
if isDashboardWithPanelAndName(dashboard.Data) {
totalDashboardsWithPanelAndName = totalDashboardsWithPanelAndName + 1
}
dashboardsInfo = countPanelsInDashboard(dashboard.Data)
}
dashboardsInfo.TotalDashboards = len(dashboardsData)
dashboardsInfo.TotalDashboardsWithPanelAndName = totalDashboardsWithPanelAndName
return &dashboardsInfo, nil
}
func isDashboardWithPanelAndName(data map[string]interface{}) bool {
isDashboardName := false
isDashboardWithPanelAndName := false
if data != nil && data["title"] != nil && data["widgets"] != nil {
title, ok := data["title"].(string)
if ok && title != "Sample Title" {
isDashboardName = true
}
widgets, ok := data["widgets"]
if ok && isDashboardName {
data, ok := widgets.([]interface{})
if ok && len(data) > 0 {
isDashboardWithPanelAndName = true
}
}
}
return isDashboardWithPanelAndName
}
func countPanelsInDashboard(data map[string]interface{}) model.DashboardsInfo {
var logsPanelCount, tracesPanelCount, metricsPanelCount int
// totalPanels := 0
if data != nil && data["widgets"] != nil {
widgets, ok := data["widgets"]
if ok {
data, ok := widgets.([]interface{})
if ok {
for _, widget := range data {
sData, ok := widget.(map[string]interface{})
if ok && sData["query"] != nil {
// totalPanels++
query, ok := sData["query"].(map[string]interface{})
if ok && query["queryType"] == "builder" && query["builder"] != nil {
builderData, ok := query["builder"].(map[string]interface{})
if ok && builderData["queryData"] != nil {
builderQueryData, ok := builderData["queryData"].([]interface{})
if ok {
for _, queryData := range builderQueryData {
data, ok := queryData.(map[string]interface{})
if ok {
if data["dataSource"] == "traces" {
tracesPanelCount++
} else if data["dataSource"] == "metrics" {
metricsPanelCount++
} else if data["dataSource"] == "logs" {
logsPanelCount++
}
}
}
}
}
}
}
}
}
}
}
return model.DashboardsInfo{
LogsBasedPanels: logsPanelCount,
TracesBasedPanels: tracesPanelCount,
MetricBasedPanels: metricsPanelCount,
}
}
func (r *ClickHouseReader) GetSavedViewsInfo(ctx context.Context) (*model.SavedViewsInfo, error) {
savedViewsInfo := model.SavedViewsInfo{}
savedViews, err := explorer.GetViews()
if err != nil {
zap.S().Debug("Error in fetching saved views info: ", err)
return &savedViewsInfo, err
}
savedViewsInfo.TotalSavedViews = len(savedViews)
for _, view := range savedViews {
if view.SourcePage == "traces" {
savedViewsInfo.TracesSavedViews += 1
} else if view.SourcePage == "logs" {
savedViewsInfo.LogsSavedViews += 1
}
}
return &savedViewsInfo, nil
}
func (r *ClickHouseReader) GetUsers(ctx context.Context) ([]model.UserPayload, error) {
users, apiErr := dao.DB().GetUsers(ctx)
if apiErr != nil {
return nil, apiErr.Err
}
return users, nil
}
func (r *ClickHouseReader) GetLogFields(ctx context.Context) (*model.GetFieldsResponse, *model.ApiError) {
// response will contain top level fields from the otel log model
response := model.GetFieldsResponse{
Selected: constants.StaticSelectedLogFields,
Interesting: []model.LogField{},
}
// get attribute keys
attributes := []model.LogField{}
query := fmt.Sprintf("SELECT DISTINCT name, datatype from %s.%s group by name, datatype", r.logsDB, r.logsAttributeKeys)
err := r.db.Select(ctx, &attributes, query)
if err != nil {
return nil, &model.ApiError{Err: err, Typ: model.ErrorInternal}
}
// get resource keys
resources := []model.LogField{}
query = fmt.Sprintf("SELECT DISTINCT name, datatype from %s.%s group by name, datatype", r.logsDB, r.logsResourceKeys)
err = r.db.Select(ctx, &resources, query)
if err != nil {
return nil, &model.ApiError{Err: err, Typ: model.ErrorInternal}
}
//remove this code after sometime
attributes = removeUnderscoreDuplicateFields(attributes)
resources = removeUnderscoreDuplicateFields(resources)
statements := []model.ShowCreateTableStatement{}
query = fmt.Sprintf("SHOW CREATE TABLE %s.%s", r.logsDB, r.logsLocalTable)
err = r.db.Select(ctx, &statements, query)
if err != nil {
return nil, &model.ApiError{Err: err, Typ: model.ErrorInternal}
}
extractSelectedAndInterestingFields(statements[0].Statement, constants.Attributes, &attributes, &response)
extractSelectedAndInterestingFields(statements[0].Statement, constants.Resources, &resources, &response)
return &response, nil
}
func extractSelectedAndInterestingFields(tableStatement string, fieldType string, fields *[]model.LogField, response *model.GetFieldsResponse) {
for _, field := range *fields {
field.Type = fieldType
// all static fields are assumed to be selected as we don't allow changing them
if isSelectedField(tableStatement, field) {
response.Selected = append(response.Selected, field)
} else {
response.Interesting = append(response.Interesting, field)
}
}
}
func isSelectedField(tableStatement string, field model.LogField) bool {
// in case of attributes and resources, if there is a materialized column present then it is selected
// TODO: handle partial change complete eg:- index is removed but materialized column is still present
name := utils.GetClickhouseColumnName(field.Type, field.DataType, field.Name)
return strings.Contains(tableStatement, name)
}
func (r *ClickHouseReader) UpdateLogField(ctx context.Context, field *model.UpdateField) *model.ApiError {
// don't allow updating static fields
if field.Type == constants.Static {
err := errors.New("cannot update static fields")
return &model.ApiError{Err: err, Typ: model.ErrorBadData}
}
colname := utils.GetClickhouseColumnName(field.Type, field.DataType, field.Name)
// if a field is selected it means that the field needs to be indexed
if field.Selected {
keyColName := fmt.Sprintf("%s_%s_key", field.Type, strings.ToLower(field.DataType))
valueColName := fmt.Sprintf("%s_%s_value", field.Type, strings.ToLower(field.DataType))
// create materialized column
for _, table := range []string{r.logsLocalTable, r.logsTable} {
q := "ALTER TABLE %s.%s ON CLUSTER %s ADD COLUMN IF NOT EXISTS %s %s DEFAULT %s[indexOf(%s, '%s')] CODEC(ZSTD(1))"
query := fmt.Sprintf(q,
r.logsDB, table,
r.cluster,
colname, field.DataType,
valueColName,
keyColName,
field.Name,
)
err := r.db.Exec(ctx, query)
if err != nil {
return &model.ApiError{Err: err, Typ: model.ErrorInternal}
}
query = fmt.Sprintf("ALTER TABLE %s.%s ON CLUSTER %s ADD COLUMN IF NOT EXISTS %s_exists` bool DEFAULT if(indexOf(%s, '%s') != 0, true, false) CODEC(ZSTD(1))",
r.logsDB, table,
r.cluster,
strings.TrimSuffix(colname, "`"),
keyColName,
field.Name,
)
err = r.db.Exec(ctx, query)
if err != nil {
return &model.ApiError{Err: err, Typ: model.ErrorInternal}
}
}
// create the index
if strings.ToLower(field.DataType) == "bool" {
// there is no point in creating index for bool attributes as the cardinality is just 2
return nil
}
if field.IndexType == "" {
field.IndexType = constants.DefaultLogSkipIndexType
}
if field.IndexGranularity == 0 {
field.IndexGranularity = constants.DefaultLogSkipIndexGranularity
}
query := fmt.Sprintf("ALTER TABLE %s.%s ON CLUSTER %s ADD INDEX IF NOT EXISTS %s_idx` (%s) TYPE %s GRANULARITY %d",
r.logsDB, r.logsLocalTable,
r.cluster,
strings.TrimSuffix(colname, "`"),
colname,
field.IndexType,
field.IndexGranularity,
)
err := r.db.Exec(ctx, query)
if err != nil {
return &model.ApiError{Err: err, Typ: model.ErrorInternal}
}
} else {
// We are not allowing to delete a materialized column
// For more details please check https://github.com/SigNoz/signoz/issues/4566
return model.ForbiddenError(errors.New("Removing a selected field is not allowed, please reach out to support."))
// Delete the index first
// query := fmt.Sprintf("ALTER TABLE %s.%s ON CLUSTER %s DROP INDEX IF EXISTS %s_idx`", r.logsDB, r.logsLocalTable, r.cluster, strings.TrimSuffix(colname, "`"))
// err := r.db.Exec(ctx, query)
// if err != nil {
// return &model.ApiError{Err: err, Typ: model.ErrorInternal}
// }
// for _, table := range []string{r.logsTable, r.logsLocalTable} {
// // drop materialized column from logs table
// query := "ALTER TABLE %s.%s ON CLUSTER %s DROP COLUMN IF EXISTS %s "
// err := r.db.Exec(ctx, fmt.Sprintf(query,
// r.logsDB, table,
// r.cluster,
// colname,
// ),
// )
// if err != nil {
// return &model.ApiError{Err: err, Typ: model.ErrorInternal}
// }
// // drop exists column on logs table
// query = "ALTER TABLE %s.%s ON CLUSTER %s DROP COLUMN IF EXISTS %s_exists` "
// err = r.db.Exec(ctx, fmt.Sprintf(query,
// r.logsDB, table,
// r.cluster,
// strings.TrimSuffix(colname, "`"),
// ),
// )
// if err != nil {
// return &model.ApiError{Err: err, Typ: model.ErrorInternal}
// }
// }
}
return nil
}
func (r *ClickHouseReader) GetLogs(ctx context.Context, params *model.LogsFilterParams) (*[]model.SignozLog, *model.ApiError) {
response := []model.SignozLog{}
fields, apiErr := r.GetLogFields(ctx)
if apiErr != nil {
return nil, apiErr
}
isPaginatePrev := logs.CheckIfPrevousPaginateAndModifyOrder(params)
filterSql, lenFilters, err := logs.GenerateSQLWhere(fields, params)
if err != nil {
return nil, &model.ApiError{Err: err, Typ: model.ErrorBadData}
}
data := map[string]interface{}{
"lenFilters": lenFilters,
}
if lenFilters != 0 {
userEmail, err := auth.GetEmailFromJwt(ctx)
if err == nil {
telemetry.GetInstance().SendEvent(telemetry.TELEMETRY_EVENT_LOGS_FILTERS, data, userEmail, true, false)
}
}
query := fmt.Sprintf("%s from %s.%s", constants.LogsSQLSelect, r.logsDB, r.logsTable)
if filterSql != "" {
query = fmt.Sprintf("%s where %s", query, filterSql)
}
query = fmt.Sprintf("%s order by %s %s limit %d", query, params.OrderBy, params.Order, params.Limit)
err = r.db.Select(ctx, &response, query)
if err != nil {
return nil, &model.ApiError{Err: err, Typ: model.ErrorInternal}
}
if isPaginatePrev {
// rever the results from db
for i, j := 0, len(response)-1; i < j; i, j = i+1, j-1 {
response[i], response[j] = response[j], response[i]
}
}
return &response, nil
}
func (r *ClickHouseReader) TailLogs(ctx context.Context, client *model.LogsTailClient) {
fields, apiErr := r.GetLogFields(ctx)
if apiErr != nil {
client.Error <- apiErr.Err
return
}
filterSql, lenFilters, err := logs.GenerateSQLWhere(fields, &model.LogsFilterParams{
Query: client.Filter.Query,
})
data := map[string]interface{}{
"lenFilters": lenFilters,
}
if lenFilters != 0 {
userEmail, err := auth.GetEmailFromJwt(ctx)
if err == nil {
telemetry.GetInstance().SendEvent(telemetry.TELEMETRY_EVENT_LOGS_FILTERS, data, userEmail, true, false)
}
}
if err != nil {
client.Error <- err
return
}
query := fmt.Sprintf("%s from %s.%s", constants.LogsSQLSelect, r.logsDB, r.logsTable)
tsStart := uint64(time.Now().UnixNano())
if client.Filter.TimestampStart != 0 {
tsStart = client.Filter.TimestampStart
}
var idStart string
if client.Filter.IdGt != "" {
idStart = client.Filter.IdGt
}
ticker := time.NewTicker(time.Duration(r.liveTailRefreshSeconds) * time.Second)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
done := true
client.Done <- &done
zap.L().Debug("closing go routine : " + client.Name)
return
case <-ticker.C:
// get the new 100 logs as anything more older won't make sense
tmpQuery := fmt.Sprintf("%s where timestamp >='%d'", query, tsStart)
if filterSql != "" {
tmpQuery = fmt.Sprintf("%s and %s", tmpQuery, filterSql)
}
if idStart != "" {
tmpQuery = fmt.Sprintf("%s and id > '%s'", tmpQuery, idStart)
}
tmpQuery = fmt.Sprintf("%s order by timestamp desc, id desc limit 100", tmpQuery)
response := []model.SignozLog{}
err := r.db.Select(ctx, &response, tmpQuery)
if err != nil {
zap.L().Error("Error while getting logs", zap.Error(err))
client.Error <- err
return
}
for i := len(response) - 1; i >= 0; i-- {
select {
case <-ctx.Done():
done := true
client.Done <- &done
zap.L().Debug("closing go routine while sending logs : " + client.Name)
return
default:
client.Logs <- &response[i]
if i == 0 {
tsStart = response[i].Timestamp
idStart = response[i].ID
}
}
}
}
}
}
func (r *ClickHouseReader) AggregateLogs(ctx context.Context, params *model.LogsAggregateParams) (*model.GetLogsAggregatesResponse, *model.ApiError) {
logAggregatesDBResponseItems := []model.LogsAggregatesDBResponseItem{}
function := "toFloat64(count()) as value"
if params.Function != "" {
function = fmt.Sprintf("toFloat64(%s) as value", params.Function)
}
fields, apiErr := r.GetLogFields(ctx)
if apiErr != nil {
return nil, apiErr
}
filterSql, lenFilters, err := logs.GenerateSQLWhere(fields, &model.LogsFilterParams{
Query: params.Query,
})
if err != nil {
return nil, &model.ApiError{Err: err, Typ: model.ErrorBadData}
}
data := map[string]interface{}{
"lenFilters": lenFilters,
}
if lenFilters != 0 {
userEmail, err := auth.GetEmailFromJwt(ctx)
if err == nil {
telemetry.GetInstance().SendEvent(telemetry.TELEMETRY_EVENT_LOGS_FILTERS, data, userEmail, true, false)
}
}
query := ""
if params.GroupBy != "" {
query = fmt.Sprintf("SELECT toInt64(toUnixTimestamp(toStartOfInterval(toDateTime(timestamp/1000000000), INTERVAL %d minute))*1000000000) as ts_start_interval, toString(%s) as groupBy, "+
"%s "+
"FROM %s.%s WHERE (timestamp >= '%d' AND timestamp <= '%d' )",
params.StepSeconds/60, params.GroupBy, function, r.logsDB, r.logsTable, params.TimestampStart, params.TimestampEnd)
} else {
query = fmt.Sprintf("SELECT toInt64(toUnixTimestamp(toStartOfInterval(toDateTime(timestamp/1000000000), INTERVAL %d minute))*1000000000) as ts_start_interval, "+
"%s "+
"FROM %s.%s WHERE (timestamp >= '%d' AND timestamp <= '%d' )",
params.StepSeconds/60, function, r.logsDB, r.logsTable, params.TimestampStart, params.TimestampEnd)
}
if filterSql != "" {
query = fmt.Sprintf("%s AND ( %s ) ", query, filterSql)
}
if params.GroupBy != "" {
query = fmt.Sprintf("%s GROUP BY ts_start_interval, toString(%s) as groupBy ORDER BY ts_start_interval", query, params.GroupBy)
} else {
query = fmt.Sprintf("%s GROUP BY ts_start_interval ORDER BY ts_start_interval", query)
}
err = r.db.Select(ctx, &logAggregatesDBResponseItems, query)
if err != nil {
return nil, &model.ApiError{Err: err, Typ: model.ErrorInternal}
}
aggregateResponse := model.GetLogsAggregatesResponse{
Items: make(map[int64]model.LogsAggregatesResponseItem),
}
for i := range logAggregatesDBResponseItems {
if elem, ok := aggregateResponse.Items[int64(logAggregatesDBResponseItems[i].Timestamp)]; ok {
if params.GroupBy != "" && logAggregatesDBResponseItems[i].GroupBy != "" {
elem.GroupBy[logAggregatesDBResponseItems[i].GroupBy] = logAggregatesDBResponseItems[i].Value
}
aggregateResponse.Items[logAggregatesDBResponseItems[i].Timestamp] = elem
} else {
if params.GroupBy != "" && logAggregatesDBResponseItems[i].GroupBy != "" {
aggregateResponse.Items[logAggregatesDBResponseItems[i].Timestamp] = model.LogsAggregatesResponseItem{
Timestamp: logAggregatesDBResponseItems[i].Timestamp,
GroupBy: map[string]interface{}{logAggregatesDBResponseItems[i].GroupBy: logAggregatesDBResponseItems[i].Value},
}
} else if params.GroupBy == "" {
aggregateResponse.Items[logAggregatesDBResponseItems[i].Timestamp] = model.LogsAggregatesResponseItem{
Timestamp: logAggregatesDBResponseItems[i].Timestamp,
Value: logAggregatesDBResponseItems[i].Value,
}
}
}
}
return &aggregateResponse, nil
}
func (r *ClickHouseReader) QueryDashboardVars(ctx context.Context, query string) (*model.DashboardVar, error) {
var result model.DashboardVar
rows, err := r.db.Query(ctx, query)
zap.L().Info(query)
if err != nil {
zap.L().Error("Error in processing sql query", zap.Error(err))
return nil, err
}
var (
columnTypes = rows.ColumnTypes()
vars = make([]interface{}, len(columnTypes))
)
for i := range columnTypes {
vars[i] = reflect.New(columnTypes[i].ScanType()).Interface()
}
defer rows.Close()
for rows.Next() {
if err := rows.Scan(vars...); err != nil {
return nil, err
}
for _, v := range vars {
switch v := v.(type) {
case *string, *int8, *int16, *int32, *int64, *uint8, *uint16, *uint32, *uint64, *float32, *float64, *time.Time, *bool:
result.VariableValues = append(result.VariableValues, reflect.ValueOf(v).Elem().Interface())
default:
return nil, fmt.Errorf("unsupported value type encountered")
}
}
}
return &result, nil
}
func (r *ClickHouseReader) GetMetricAggregateAttributes(ctx context.Context, req *v3.AggregateAttributeRequest) (*v3.AggregateAttributeResponse, error) {
var query string
var err error
var rows driver.Rows
var response v3.AggregateAttributeResponse
query = fmt.Sprintf("SELECT metric_name, type, is_monotonic, temporality FROM %s.%s WHERE metric_name ILIKE $1 GROUP BY metric_name, type, is_monotonic, temporality", signozMetricDBName, signozTSTableNameV41Day)
if req.Limit != 0 {
query = query + fmt.Sprintf(" LIMIT %d;", req.Limit)
}
rows, err = r.db.Query(ctx, query, fmt.Sprintf("%%%s%%", req.SearchText))
if err != nil {
zap.L().Error("Error while executing query", zap.Error(err))
return nil, fmt.Errorf("error while executing query: %s", err.Error())
}
defer rows.Close()
seen := make(map[string]struct{})
var metricName, typ, temporality string
var isMonotonic bool
for rows.Next() {
if err := rows.Scan(&metricName, &typ, &isMonotonic, &temporality); err != nil {
return nil, fmt.Errorf("error while scanning rows: %s", err.Error())
}
// Non-monotonic cumulative sums are treated as gauges
if typ == "Sum" && !isMonotonic && temporality == string(v3.Cumulative) {
typ = "Gauge"
}
// unlike traces/logs `tag`/`resource` type, the `Type` will be metric type
key := v3.AttributeKey{
Key: metricName,
DataType: v3.AttributeKeyDataTypeFloat64,
Type: v3.AttributeKeyType(typ),
IsColumn: true,
}
// remove duplicates
if _, ok := seen[metricName+typ]; ok {
continue
}
seen[metricName+typ] = struct{}{}
response.AttributeKeys = append(response.AttributeKeys, key)
}
return &response, nil
}
func (r *ClickHouseReader) GetMetricAttributeKeys(ctx context.Context, req *v3.FilterAttributeKeyRequest) (*v3.FilterAttributeKeyResponse, error) {
var query string
var err error
var rows driver.Rows
var response v3.FilterAttributeKeyResponse
// skips the internal attributes i.e attributes starting with __
query = fmt.Sprintf("SELECT arrayJoin(tagKeys) AS distinctTagKey FROM (SELECT JSONExtractKeys(labels) AS tagKeys FROM %s.%s WHERE metric_name=$1 AND unix_milli >= $2 GROUP BY tagKeys) WHERE distinctTagKey ILIKE $3 AND distinctTagKey NOT LIKE '\\_\\_%%' GROUP BY distinctTagKey", signozMetricDBName, signozTSTableNameV41Day)
if req.Limit != 0 {
query = query + fmt.Sprintf(" LIMIT %d;", req.Limit)
}
rows, err = r.db.Query(ctx, query, req.AggregateAttribute, common.PastDayRoundOff(), fmt.Sprintf("%%%s%%", req.SearchText))
if err != nil {
zap.L().Error("Error while executing query", zap.Error(err))
return nil, fmt.Errorf("error while executing query: %s", err.Error())
}
defer rows.Close()
var attributeKey string
for rows.Next() {
if err := rows.Scan(&attributeKey); err != nil {
return nil, fmt.Errorf("error while scanning rows: %s", err.Error())
}
key := v3.AttributeKey{
Key: attributeKey,
DataType: v3.AttributeKeyDataTypeString, // https://github.com/OpenObservability/OpenMetrics/blob/main/proto/openmetrics_data_model.proto#L64-L72.
Type: v3.AttributeKeyTypeTag,
IsColumn: false,
}
response.AttributeKeys = append(response.AttributeKeys, key)
}
return &response, nil
}
func (r *ClickHouseReader) GetMetricAttributeValues(ctx context.Context, req *v3.FilterAttributeValueRequest) (*v3.FilterAttributeValueResponse, error) {
var query string
var err error
var rows driver.Rows
var attributeValues v3.FilterAttributeValueResponse
query = fmt.Sprintf("SELECT JSONExtractString(labels, $1) AS tagValue FROM %s.%s WHERE metric_name=$2 AND JSONExtractString(labels, $3) ILIKE $4 AND unix_milli >= $5 GROUP BY tagValue", signozMetricDBName, signozTSTableNameV41Day)
if req.Limit != 0 {
query = query + fmt.Sprintf(" LIMIT %d;", req.Limit)
}
rows, err = r.db.Query(ctx, query, req.FilterAttributeKey, req.AggregateAttribute, req.FilterAttributeKey, fmt.Sprintf("%%%s%%", req.SearchText), common.PastDayRoundOff())
if err != nil {
zap.L().Error("Error while executing query", zap.Error(err))
return nil, fmt.Errorf("error while executing query: %s", err.Error())
}
defer rows.Close()
var atrributeValue string
for rows.Next() {
if err := rows.Scan(&atrributeValue); err != nil {
return nil, fmt.Errorf("error while scanning rows: %s", err.Error())
}
// https://github.com/OpenObservability/OpenMetrics/blob/main/proto/openmetrics_data_model.proto#L64-L72
// this may change in future if we use OTLP as the data model
attributeValues.StringAttributeValues = append(attributeValues.StringAttributeValues, atrributeValue)
}
return &attributeValues, nil
}
func (r *ClickHouseReader) GetMetricMetadata(ctx context.Context, metricName, serviceName string) (*v3.MetricMetadataResponse, error) {
unixMilli := common.PastDayRoundOff()
// Note: metric metadata should be accessible regardless of the time range selection
// our standard retention period is 30 days, so we are querying the table v4_1_day to reduce the
// amount of data scanned
query := fmt.Sprintf("SELECT temporality, description, type, unit, is_monotonic from %s.%s WHERE metric_name=$1 AND unix_milli >= $2 GROUP BY temporality, description, type, unit, is_monotonic", signozMetricDBName, signozTSTableNameV41Day)
rows, err := r.db.Query(ctx, query, metricName, unixMilli)
if err != nil {
zap.L().Error("Error while fetching metric metadata", zap.Error(err))
return nil, fmt.Errorf("error while fetching metric metadata: %s", err.Error())
}
defer rows.Close()
var deltaExists, isMonotonic bool
var temporality, description, metricType, unit string
for rows.Next() {
if err := rows.Scan(&temporality, &description, &metricType, &unit, &isMonotonic); err != nil {
return nil, fmt.Errorf("error while scanning rows: %s", err.Error())
}
if temporality == string(v3.Delta) {
deltaExists = true
}
}
query = fmt.Sprintf("SELECT JSONExtractString(labels, 'le') as le from %s.%s WHERE metric_name=$1 AND unix_milli >= $2 AND type = 'Histogram' AND JSONExtractString(labels, 'service_name') = $3 GROUP BY le ORDER BY le", signozMetricDBName, signozTSTableNameV41Day)
rows, err = r.db.Query(ctx, query, metricName, unixMilli, serviceName)
if err != nil {
zap.L().Error("Error while executing query", zap.Error(err))
return nil, fmt.Errorf("error while executing query: %s", err.Error())
}
defer rows.Close()
var leFloat64 []float64
for rows.Next() {
var leStr string
if err := rows.Scan(&leStr); err != nil {
return nil, fmt.Errorf("error while scanning rows: %s", err.Error())
}
le, err := strconv.ParseFloat(leStr, 64)
// ignore the error and continue if the value is not a float
// ideally this should not happen but we have seen ClickHouse
// returning empty string for some values
if err != nil {
zap.L().Error("error while parsing le value", zap.Error(err))
continue
}
if math.IsInf(le, 0) {
continue
}
leFloat64 = append(leFloat64, le)
}
return &v3.MetricMetadataResponse{
Delta: deltaExists,
Le: leFloat64,
Description: description,
Unit: unit,
Type: metricType,
IsMonotonic: isMonotonic,
Temporality: temporality,
}, nil
}
func (r *ClickHouseReader) GetLatestReceivedMetric(
ctx context.Context, metricNames []string,
) (*model.MetricStatus, *model.ApiError) {
if len(metricNames) < 1 {
return nil, nil
}
quotedMetricNames := []string{}
for _, m := range metricNames {
quotedMetricNames = append(quotedMetricNames, fmt.Sprintf(`'%s'`, m))
}
commaSeparatedMetricNames := strings.Join(quotedMetricNames, ", ")
query := fmt.Sprintf(`
SELECT metric_name, labels, unix_milli
from %s.%s
where metric_name in (
%s
)
order by unix_milli desc
limit 1
`, signozMetricDBName, signozTSTableNameV4, commaSeparatedMetricNames,
)
rows, err := r.db.Query(ctx, query)
if err != nil {
return nil, model.InternalError(fmt.Errorf(
"couldn't query clickhouse for received metrics status: %w", err,
))
}
defer rows.Close()
var result *model.MetricStatus
if rows.Next() {
result = &model.MetricStatus{}
var labelsJson string
err := rows.Scan(
&result.MetricName,
&labelsJson,
&result.LastReceivedTsMillis,
)
if err != nil {
return nil, model.InternalError(fmt.Errorf(
"couldn't scan metric status row: %w", err,
))
}
err = json.Unmarshal([]byte(labelsJson), &result.LastReceivedLabels)
if err != nil {
return nil, model.InternalError(fmt.Errorf(
"couldn't unmarshal metric labels json: %w", err,
))
}
}
return result, nil
}
func isColumn(tableStatement, attrType, field, datType string) bool {
// value of attrType will be `resource` or `tag`, if `tag` change it to `attribute`
name := utils.GetClickhouseColumnName(attrType, datType, field)
return strings.Contains(tableStatement, fmt.Sprintf("%s ", name))
}
func (r *ClickHouseReader) GetLogAggregateAttributes(ctx context.Context, req *v3.AggregateAttributeRequest) (*v3.AggregateAttributeResponse, error) {
var query string
var err error
var rows driver.Rows
var response v3.AggregateAttributeResponse
var stringAllowed bool
where := ""
switch req.Operator {
case
v3.AggregateOperatorCountDistinct,
v3.AggregateOperatorCount:
where = "tagKey ILIKE $1"
stringAllowed = true
case
v3.AggregateOperatorRateSum,
v3.AggregateOperatorRateMax,
v3.AggregateOperatorRateAvg,
v3.AggregateOperatorRate,
v3.AggregateOperatorRateMin,
v3.AggregateOperatorP05,
v3.AggregateOperatorP10,
v3.AggregateOperatorP20,
v3.AggregateOperatorP25,
v3.AggregateOperatorP50,
v3.AggregateOperatorP75,
v3.AggregateOperatorP90,
v3.AggregateOperatorP95,
v3.AggregateOperatorP99,
v3.AggregateOperatorAvg,
v3.AggregateOperatorSum,
v3.AggregateOperatorMin,
v3.AggregateOperatorMax:
where = "tagKey ILIKE $1 AND (tagDataType='int64' or tagDataType='float64')"
stringAllowed = false
case
v3.AggregateOperatorNoOp:
return &v3.AggregateAttributeResponse{}, nil
default:
return nil, fmt.Errorf("unsupported aggregate operator")
}
query = fmt.Sprintf("SELECT DISTINCT(tagKey), tagType, tagDataType from %s.%s WHERE %s limit $2", r.logsDB, r.logsTagAttributeTable, where)
rows, err = r.db.Query(ctx, query, fmt.Sprintf("%%%s%%", req.SearchText), req.Limit)
if err != nil {
zap.L().Error("Error while executing query", zap.Error(err))
return nil, fmt.Errorf("error while executing query: %s", err.Error())
}
defer rows.Close()
statements := []model.ShowCreateTableStatement{}
query = fmt.Sprintf("SHOW CREATE TABLE %s.%s", r.logsDB, r.logsLocalTable)
err = r.db.Select(ctx, &statements, query)
if err != nil {
return nil, fmt.Errorf("error while fetching logs schema: %s", err.Error())
}
var tagKey string
var dataType string
var attType string
for rows.Next() {
if err := rows.Scan(&tagKey, &attType, &dataType); err != nil {
return nil, fmt.Errorf("error while scanning rows: %s", err.Error())
}
key := v3.AttributeKey{
Key: tagKey,
DataType: v3.AttributeKeyDataType(dataType),
Type: v3.AttributeKeyType(attType),
IsColumn: isColumn(statements[0].Statement, attType, tagKey, dataType),
}
response.AttributeKeys = append(response.AttributeKeys, key)
}
// add other attributes
for _, field := range constants.StaticFieldsLogsV3 {
if (!stringAllowed && field.DataType == v3.AttributeKeyDataTypeString) || (v3.AttributeKey{} == field) {
continue
} else if len(req.SearchText) == 0 || strings.Contains(field.Key, req.SearchText) {
response.AttributeKeys = append(response.AttributeKeys, field)
}
}
return &response, nil
}
func (r *ClickHouseReader) GetLogAttributeKeys(ctx context.Context, req *v3.FilterAttributeKeyRequest) (*v3.FilterAttributeKeyResponse, error) {
var query string
var err error
var rows driver.Rows
var response v3.FilterAttributeKeyResponse
if len(req.SearchText) != 0 {
query = fmt.Sprintf("select distinct tagKey, tagType, tagDataType from %s.%s where tagKey ILIKE $1 limit $2", r.logsDB, r.logsTagAttributeTable)
rows, err = r.db.Query(ctx, query, fmt.Sprintf("%%%s%%", req.SearchText), req.Limit)
} else {
query = fmt.Sprintf("select distinct tagKey, tagType, tagDataType from %s.%s limit $1", r.logsDB, r.logsTagAttributeTable)
rows, err = r.db.Query(ctx, query, req.Limit)
}
if err != nil {
zap.L().Error("Error while executing query", zap.Error(err))
return nil, fmt.Errorf("error while executing query: %s", err.Error())
}
defer rows.Close()
statements := []model.ShowCreateTableStatement{}
query = fmt.Sprintf("SHOW CREATE TABLE %s.%s", r.logsDB, r.logsLocalTable)
err = r.db.Select(ctx, &statements, query)
if err != nil {
return nil, fmt.Errorf("error while fetching logs schema: %s", err.Error())
}
var attributeKey string
var attributeDataType string
var tagType string
for rows.Next() {
if err := rows.Scan(&attributeKey, &tagType, &attributeDataType); err != nil {
return nil, fmt.Errorf("error while scanning rows: %s", err.Error())
}
key := v3.AttributeKey{
Key: attributeKey,
DataType: v3.AttributeKeyDataType(attributeDataType),
Type: v3.AttributeKeyType(tagType),
IsColumn: isColumn(statements[0].Statement, tagType, attributeKey, attributeDataType),
}
response.AttributeKeys = append(response.AttributeKeys, key)
}
// add other attributes
for _, f := range constants.StaticFieldsLogsV3 {
if (v3.AttributeKey{} == f) {
continue
}
if len(req.SearchText) == 0 || strings.Contains(f.Key, req.SearchText) {
response.AttributeKeys = append(response.AttributeKeys, f)
}
}
return &response, nil
}
func (r *ClickHouseReader) GetLogAttributeValues(ctx context.Context, req *v3.FilterAttributeValueRequest) (*v3.FilterAttributeValueResponse, error) {
var err error
var filterValueColumn string
var rows driver.Rows
var attributeValues v3.FilterAttributeValueResponse
// if dataType or tagType is not present return empty response
if len(req.FilterAttributeKeyDataType) == 0 || len(req.TagType) == 0 {
// also check if it is not a top level key
if _, ok := constants.StaticFieldsLogsV3[req.FilterAttributeKey]; !ok {
return &v3.FilterAttributeValueResponse{}, nil
}
}
// ignore autocomplete request for body
if req.FilterAttributeKey == "body" {
return &v3.FilterAttributeValueResponse{}, nil
}
// if data type is bool, return true and false
if req.FilterAttributeKeyDataType == v3.AttributeKeyDataTypeBool {
return &v3.FilterAttributeValueResponse{
BoolAttributeValues: []bool{true, false},
}, nil
}
query := "select distinct"
switch req.FilterAttributeKeyDataType {
case v3.AttributeKeyDataTypeInt64:
filterValueColumn = "int64TagValue"
case v3.AttributeKeyDataTypeFloat64:
filterValueColumn = "float64TagValue"
case v3.AttributeKeyDataTypeString:
filterValueColumn = "stringTagValue"
}
searchText := fmt.Sprintf("%%%s%%", req.SearchText)
// check if the tagKey is a topLevelColumn
if _, ok := constants.StaticFieldsLogsV3[req.FilterAttributeKey]; ok {
// query the column for the last 48 hours
filterValueColumnWhere := req.FilterAttributeKey
selectKey := req.FilterAttributeKey
if req.FilterAttributeKeyDataType != v3.AttributeKeyDataTypeString {
filterValueColumnWhere = fmt.Sprintf("toString(%s)", req.FilterAttributeKey)
selectKey = fmt.Sprintf("toInt64(%s)", req.FilterAttributeKey)
}
// prepare the query and run
if len(req.SearchText) != 0 {
query = fmt.Sprintf("select distinct %s from %s.%s where timestamp >= toInt64(toUnixTimestamp(now() - INTERVAL 48 HOUR)*1000000000) and %s ILIKE $1 limit $2", selectKey, r.logsDB, r.logsTable, filterValueColumnWhere)
rows, err = r.db.Query(ctx, query, searchText, req.Limit)
} else {
query = fmt.Sprintf("select distinct %s from %s.%s where timestamp >= toInt64(toUnixTimestamp(now() - INTERVAL 48 HOUR)*1000000000) limit $1", selectKey, r.logsDB, r.logsTable)
rows, err = r.db.Query(ctx, query, req.Limit)
}
} else if len(req.SearchText) != 0 {
filterValueColumnWhere := filterValueColumn
if req.FilterAttributeKeyDataType != v3.AttributeKeyDataTypeString {
filterValueColumnWhere = fmt.Sprintf("toString(%s)", filterValueColumn)
}
query = fmt.Sprintf("select distinct %s from %s.%s where tagKey=$1 and %s ILIKE $2 and tagType=$3 limit $4", filterValueColumn, r.logsDB, r.logsTagAttributeTable, filterValueColumnWhere)
rows, err = r.db.Query(ctx, query, req.FilterAttributeKey, searchText, req.TagType, req.Limit)
} else {
query = fmt.Sprintf("select distinct %s from %s.%s where tagKey=$1 and tagType=$2 limit $3", filterValueColumn, r.logsDB, r.logsTagAttributeTable)
rows, err = r.db.Query(ctx, query, req.FilterAttributeKey, req.TagType, req.Limit)
}
if err != nil {
zap.L().Error("Error while executing query", zap.Error(err))
return nil, fmt.Errorf("error while executing query: %s", err.Error())
}
defer rows.Close()
var strAttributeValue string
var float64AttributeValue sql.NullFloat64
var int64AttributeValue sql.NullInt64
for rows.Next() {
switch req.FilterAttributeKeyDataType {
case v3.AttributeKeyDataTypeInt64:
if err := rows.Scan(&int64AttributeValue); err != nil {
return nil, fmt.Errorf("error while scanning rows: %s", err.Error())
}
if int64AttributeValue.Valid {
attributeValues.NumberAttributeValues = append(attributeValues.NumberAttributeValues, int64AttributeValue.Int64)
}
case v3.AttributeKeyDataTypeFloat64:
if err := rows.Scan(&float64AttributeValue); err != nil {
return nil, fmt.Errorf("error while scanning rows: %s", err.Error())
}
if float64AttributeValue.Valid {
attributeValues.NumberAttributeValues = append(attributeValues.NumberAttributeValues, float64AttributeValue.Float64)
}
case v3.AttributeKeyDataTypeString:
if err := rows.Scan(&strAttributeValue); err != nil {
return nil, fmt.Errorf("error while scanning rows: %s", err.Error())
}
attributeValues.StringAttributeValues = append(attributeValues.StringAttributeValues, strAttributeValue)
}
}
return &attributeValues, nil
}
func readRow(vars []interface{}, columnNames []string, countOfNumberCols int) ([]string, map[string]string, []map[string]string, *v3.Point) {
// Each row will have a value and a timestamp, and an optional list of label values
// example: {Timestamp: ..., Value: ...}
// The timestamp may also not present in some cases where the time series is reduced to single value
var point v3.Point
// groupBy is a container to hold label values for the current point
// example: ["frontend", "/fetch"]
var groupBy []string
var groupAttributesArray []map[string]string
// groupAttributes is a container to hold the key-value pairs for the current
// metric point.
// example: {"serviceName": "frontend", "operation": "/fetch"}
groupAttributes := make(map[string]string)
isValidPoint := false
for idx, v := range vars {
colName := columnNames[idx]
switch v := v.(type) {
case *string:
// special case for returning all labels in metrics datasource
if colName == "fullLabels" {
var metric map[string]string
err := json.Unmarshal([]byte(*v), &metric)
if err != nil {
zap.L().Error("unexpected error encountered", zap.Error(err))
}
for key, val := range metric {
groupBy = append(groupBy, val)
if _, ok := groupAttributes[key]; !ok {
groupAttributesArray = append(groupAttributesArray, map[string]string{key: val})
}
groupAttributes[key] = val
}
} else {
groupBy = append(groupBy, *v)
if _, ok := groupAttributes[colName]; !ok {
groupAttributesArray = append(groupAttributesArray, map[string]string{colName: *v})
}
groupAttributes[colName] = *v
}
case *time.Time:
point.Timestamp = v.UnixMilli()
case *float64, *float32:
if _, ok := constants.ReservedColumnTargetAliases[colName]; ok || countOfNumberCols == 1 {
isValidPoint = true
point.Value = float64(reflect.ValueOf(v).Elem().Float())
} else {
groupBy = append(groupBy, fmt.Sprintf("%v", reflect.ValueOf(v).Elem().Float()))
if _, ok := groupAttributes[colName]; !ok {
groupAttributesArray = append(groupAttributesArray, map[string]string{colName: fmt.Sprintf("%v", reflect.ValueOf(v).Elem().Float())})
}
groupAttributes[colName] = fmt.Sprintf("%v", reflect.ValueOf(v).Elem().Float())
}
case **float64, **float32:
val := reflect.ValueOf(v)
if val.IsValid() && !val.IsNil() && !val.Elem().IsNil() {
value := reflect.ValueOf(v).Elem().Elem().Float()
if _, ok := constants.ReservedColumnTargetAliases[colName]; ok || countOfNumberCols == 1 {
isValidPoint = true
point.Value = value
} else {
groupBy = append(groupBy, fmt.Sprintf("%v", value))
if _, ok := groupAttributes[colName]; !ok {
groupAttributesArray = append(groupAttributesArray, map[string]string{colName: fmt.Sprintf("%v", value)})
}
groupAttributes[colName] = fmt.Sprintf("%v", value)
}
}
case *uint, *uint8, *uint64, *uint16, *uint32:
if _, ok := constants.ReservedColumnTargetAliases[colName]; ok || countOfNumberCols == 1 {
isValidPoint = true
point.Value = float64(reflect.ValueOf(v).Elem().Uint())
} else {
groupBy = append(groupBy, fmt.Sprintf("%v", reflect.ValueOf(v).Elem().Uint()))
if _, ok := groupAttributes[colName]; !ok {
groupAttributesArray = append(groupAttributesArray, map[string]string{colName: fmt.Sprintf("%v", reflect.ValueOf(v).Elem().Uint())})
}
groupAttributes[colName] = fmt.Sprintf("%v", reflect.ValueOf(v).Elem().Uint())
}
case **uint, **uint8, **uint64, **uint16, **uint32:
val := reflect.ValueOf(v)
if val.IsValid() && !val.IsNil() && !val.Elem().IsNil() {
value := reflect.ValueOf(v).Elem().Elem().Uint()
if _, ok := constants.ReservedColumnTargetAliases[colName]; ok || countOfNumberCols == 1 {
isValidPoint = true
point.Value = float64(value)
} else {
groupBy = append(groupBy, fmt.Sprintf("%v", value))
if _, ok := groupAttributes[colName]; !ok {
groupAttributesArray = append(groupAttributesArray, map[string]string{colName: fmt.Sprintf("%v", value)})
}
groupAttributes[colName] = fmt.Sprintf("%v", value)
}
}
case *int, *int8, *int16, *int32, *int64:
if _, ok := constants.ReservedColumnTargetAliases[colName]; ok || countOfNumberCols == 1 {
isValidPoint = true
point.Value = float64(reflect.ValueOf(v).Elem().Int())
} else {
groupBy = append(groupBy, fmt.Sprintf("%v", reflect.ValueOf(v).Elem().Int()))
if _, ok := groupAttributes[colName]; !ok {
groupAttributesArray = append(groupAttributesArray, map[string]string{colName: fmt.Sprintf("%v", reflect.ValueOf(v).Elem().Int())})
}
groupAttributes[colName] = fmt.Sprintf("%v", reflect.ValueOf(v).Elem().Int())
}
case **int, **int8, **int16, **int32, **int64:
val := reflect.ValueOf(v)
if val.IsValid() && !val.IsNil() && !val.Elem().IsNil() {
value := reflect.ValueOf(v).Elem().Elem().Int()
if _, ok := constants.ReservedColumnTargetAliases[colName]; ok || countOfNumberCols == 1 {
isValidPoint = true
point.Value = float64(value)
} else {
groupBy = append(groupBy, fmt.Sprintf("%v", value))
if _, ok := groupAttributes[colName]; !ok {
groupAttributesArray = append(groupAttributesArray, map[string]string{colName: fmt.Sprintf("%v", value)})
}
groupAttributes[colName] = fmt.Sprintf("%v", value)
}
}
case *bool:
groupBy = append(groupBy, fmt.Sprintf("%v", *v))
if _, ok := groupAttributes[colName]; !ok {
groupAttributesArray = append(groupAttributesArray, map[string]string{colName: fmt.Sprintf("%v", *v)})
}
groupAttributes[colName] = fmt.Sprintf("%v", *v)
default:
zap.L().Error("unsupported var type found in query builder query result", zap.Any("v", v), zap.String("colName", colName))
}
}
if isValidPoint {
return groupBy, groupAttributes, groupAttributesArray, &point
}
return groupBy, groupAttributes, groupAttributesArray, nil
}
func readRowsForTimeSeriesResult(rows driver.Rows, vars []interface{}, columnNames []string, countOfNumberCols int) ([]*v3.Series, error) {
// when groupBy is applied, each combination of cartesian product
// of attribute values is a separate series. Each item in seriesToPoints
// represent a unique series where the key is sorted attribute values joined
// by "," and the value is the list of points for that series
// For instance, group by (serviceName, operation)
// with two services and three operations in each will result in (maximum of) 6 series
// ("frontend", "order") x ("/fetch", "/fetch/{Id}", "/order")
//
// ("frontend", "/fetch")
// ("frontend", "/fetch/{Id}")
// ("frontend", "/order")
// ("order", "/fetch")
// ("order", "/fetch/{Id}")
// ("order", "/order")
seriesToPoints := make(map[string][]v3.Point)
var keys []string
// seriesToAttrs is a mapping of key to a map of attribute key to attribute value
// for each series. This is used to populate the series' attributes
// For instance, for the above example, the seriesToAttrs will be
// {
// "frontend,/fetch": {"serviceName": "frontend", "operation": "/fetch"},
// "frontend,/fetch/{Id}": {"serviceName": "frontend", "operation": "/fetch/{Id}"},
// "frontend,/order": {"serviceName": "frontend", "operation": "/order"},
// "order,/fetch": {"serviceName": "order", "operation": "/fetch"},
// "order,/fetch/{Id}": {"serviceName": "order", "operation": "/fetch/{Id}"},
// "order,/order": {"serviceName": "order", "operation": "/order"},
// }
seriesToAttrs := make(map[string]map[string]string)
labelsArray := make(map[string][]map[string]string)
for rows.Next() {
if err := rows.Scan(vars...); err != nil {
return nil, err
}
groupBy, groupAttributes, groupAttributesArray, metricPoint := readRow(vars, columnNames, countOfNumberCols)
// skip the point if the value is NaN or Inf
// are they ever useful enough to be returned?
if metricPoint != nil && (math.IsNaN(metricPoint.Value) || math.IsInf(metricPoint.Value, 0)) {
continue
}
sort.Strings(groupBy)
key := strings.Join(groupBy, "")
if _, exists := seriesToAttrs[key]; !exists {
keys = append(keys, key)
}
seriesToAttrs[key] = groupAttributes
labelsArray[key] = groupAttributesArray
if metricPoint != nil {
seriesToPoints[key] = append(seriesToPoints[key], *metricPoint)
}
}
var seriesList []*v3.Series
for _, key := range keys {
points := seriesToPoints[key]
series := v3.Series{Labels: seriesToAttrs[key], Points: points, LabelsArray: labelsArray[key]}
seriesList = append(seriesList, &series)
}
return seriesList, getPersonalisedError(rows.Err())
}
func logCommentKVs(ctx context.Context) map[string]string {
kv := ctx.Value(common.LogCommentKey)
if kv == nil {
return nil
}
logCommentKVs, ok := kv.(map[string]string)
if !ok {
return nil
}
return logCommentKVs
}
// GetTimeSeriesResultV3 runs the query and returns list of time series
func (r *ClickHouseReader) GetTimeSeriesResultV3(ctx context.Context, query string) ([]*v3.Series, error) {
ctxArgs := map[string]interface{}{"query": query}
for k, v := range logCommentKVs(ctx) {
ctxArgs[k] = v
}
defer utils.Elapsed("GetTimeSeriesResultV3", ctxArgs)()
rows, err := r.db.Query(ctx, query)
if err != nil {
zap.L().Error("error while reading time series result", zap.Error(err))
return nil, err
}
defer rows.Close()
var (
columnTypes = rows.ColumnTypes()
columnNames = rows.Columns()
vars = make([]interface{}, len(columnTypes))
)
var countOfNumberCols int
for i := range columnTypes {
vars[i] = reflect.New(columnTypes[i].ScanType()).Interface()
switch columnTypes[i].ScanType().Kind() {
case reflect.Float32,
reflect.Float64,
reflect.Uint,
reflect.Uint8,
reflect.Uint16,
reflect.Uint32,
reflect.Uint64,
reflect.Int,
reflect.Int8,
reflect.Int16,
reflect.Int32,
reflect.Int64:
countOfNumberCols++
}
}
return readRowsForTimeSeriesResult(rows, vars, columnNames, countOfNumberCols)
}
// GetListResultV3 runs the query and returns list of rows
func (r *ClickHouseReader) GetListResultV3(ctx context.Context, query string) ([]*v3.Row, error) {
ctxArgs := map[string]interface{}{"query": query}
for k, v := range logCommentKVs(ctx) {
ctxArgs[k] = v
}
defer utils.Elapsed("GetListResultV3", ctxArgs)()
rows, err := r.db.Query(ctx, query)
if err != nil {
zap.L().Error("error while reading time series result", zap.Error(err))
return nil, err
}
defer rows.Close()
var (
columnTypes = rows.ColumnTypes()
columnNames = rows.Columns()
)
var rowList []*v3.Row
for rows.Next() {
var vars = make([]interface{}, len(columnTypes))
for i := range columnTypes {
vars[i] = reflect.New(columnTypes[i].ScanType()).Interface()
}
if err := rows.Scan(vars...); err != nil {
return nil, err
}
row := map[string]interface{}{}
var t time.Time
for idx, v := range vars {
if columnNames[idx] == "timestamp" {
t = time.Unix(0, int64(*v.(*uint64)))
} else if columnNames[idx] == "timestamp_datetime" {
t = *v.(*time.Time)
} else {
row[columnNames[idx]] = v
}
}
// remove duplicate _ attributes for logs.
// remove this function after a month
removeDuplicateUnderscoreAttributes(row)
rowList = append(rowList, &v3.Row{Timestamp: t, Data: row})
}
return rowList, getPersonalisedError(rows.Err())
}
func getPersonalisedError(err error) error {
if err == nil {
return nil
}
zap.L().Error("error while reading result", zap.Error(err))
if strings.Contains(err.Error(), "code: 307") {
return chErrors.ErrResourceBytesLimitExceeded
}
if strings.Contains(err.Error(), "code: 159") {
return chErrors.ErrResourceTimeLimitExceeded
}
return err
}
func removeDuplicateUnderscoreAttributes(row map[string]interface{}) {
if val, ok := row["attributes_int64"]; ok {
attributes := val.(*map[string]int64)
for key := range *attributes {
if strings.Contains(key, ".") {
uKey := strings.ReplaceAll(key, ".", "_")
delete(*attributes, uKey)
}
}
}
if val, ok := row["attributes_float64"]; ok {
attributes := val.(*map[string]float64)
for key := range *attributes {
if strings.Contains(key, ".") {
uKey := strings.ReplaceAll(key, ".", "_")
delete(*attributes, uKey)
}
}
}
if val, ok := row["attributes_bool"]; ok {
attributes := val.(*map[string]bool)
for key := range *attributes {
if strings.Contains(key, ".") {
uKey := strings.ReplaceAll(key, ".", "_")
delete(*attributes, uKey)
}
}
}
for _, k := range []string{"attributes_string", "resources_string"} {
if val, ok := row[k]; ok {
attributes := val.(*map[string]string)
for key := range *attributes {
if strings.Contains(key, ".") {
uKey := strings.ReplaceAll(key, ".", "_")
delete(*attributes, uKey)
}
}
}
}
}
func (r *ClickHouseReader) CheckClickHouse(ctx context.Context) error {
rows, err := r.db.Query(ctx, "SELECT 1")
if err != nil {
return err
}
defer rows.Close()
return nil
}
func (r *ClickHouseReader) GetTraceAggregateAttributes(ctx context.Context, req *v3.AggregateAttributeRequest) (*v3.AggregateAttributeResponse, error) {
var query string
var err error
var rows driver.Rows
var response v3.AggregateAttributeResponse
where := ""
switch req.Operator {
case
v3.AggregateOperatorCountDistinct,
v3.AggregateOperatorCount:
where = "tagKey ILIKE $1"
case
v3.AggregateOperatorRateSum,
v3.AggregateOperatorRateMax,
v3.AggregateOperatorRateAvg,
v3.AggregateOperatorRate,
v3.AggregateOperatorRateMin,
v3.AggregateOperatorP05,
v3.AggregateOperatorP10,
v3.AggregateOperatorP20,
v3.AggregateOperatorP25,
v3.AggregateOperatorP50,
v3.AggregateOperatorP75,
v3.AggregateOperatorP90,
v3.AggregateOperatorP95,
v3.AggregateOperatorP99,
v3.AggregateOperatorAvg,
v3.AggregateOperatorSum,
v3.AggregateOperatorMin,
v3.AggregateOperatorMax:
where = "tagKey ILIKE $1 AND dataType='float64'"
case
v3.AggregateOperatorNoOp:
return &v3.AggregateAttributeResponse{}, nil
default:
return nil, fmt.Errorf("unsupported aggregate operator")
}
query = fmt.Sprintf("SELECT DISTINCT(tagKey), tagType, dataType, isColumn FROM %s.%s WHERE %s", r.TraceDB, r.spanAttributeTable, where)
if req.Limit != 0 {
query = query + fmt.Sprintf(" LIMIT %d;", req.Limit)
}
rows, err = r.db.Query(ctx, query, fmt.Sprintf("%%%s%%", req.SearchText))
if err != nil {
zap.L().Error("Error while executing query", zap.Error(err))
return nil, fmt.Errorf("error while executing query: %s", err.Error())
}
defer rows.Close()
var tagKey string
var dataType string
var tagType string
var isColumn bool
for rows.Next() {
if err := rows.Scan(&tagKey, &tagType, &dataType, &isColumn); err != nil {
return nil, fmt.Errorf("error while scanning rows: %s", err.Error())
}
// TODO: Remove this once the column name are updated in the table
tagKey = tempHandleFixedColumns(tagKey)
key := v3.AttributeKey{
Key: tagKey,
DataType: v3.AttributeKeyDataType(dataType),
Type: v3.AttributeKeyType(tagType),
IsColumn: isColumn,
}
response.AttributeKeys = append(response.AttributeKeys, key)
}
return &response, nil
}
func (r *ClickHouseReader) GetTraceAttributeKeys(ctx context.Context, req *v3.FilterAttributeKeyRequest) (*v3.FilterAttributeKeyResponse, error) {
var query string
var err error
var rows driver.Rows
var response v3.FilterAttributeKeyResponse
query = fmt.Sprintf("SELECT DISTINCT(tagKey), tagType, dataType, isColumn FROM %s.%s WHERE tagKey ILIKE $1", r.TraceDB, r.spanAttributeTable)
if req.Limit != 0 {
query = query + fmt.Sprintf(" LIMIT %d;", req.Limit)
}
rows, err = r.db.Query(ctx, query, fmt.Sprintf("%%%s%%", req.SearchText))
if err != nil {
zap.L().Error("Error while executing query", zap.Error(err))
return nil, fmt.Errorf("error while executing query: %s", err.Error())
}
defer rows.Close()
var tagKey string
var dataType string
var tagType string
var isColumn bool
for rows.Next() {
if err := rows.Scan(&tagKey, &tagType, &dataType, &isColumn); err != nil {
return nil, fmt.Errorf("error while scanning rows: %s", err.Error())
}
// TODO: Remove this once the column name are updated in the table
tagKey = tempHandleFixedColumns(tagKey)
key := v3.AttributeKey{
Key: tagKey,
DataType: v3.AttributeKeyDataType(dataType),
Type: v3.AttributeKeyType(tagType),
IsColumn: isColumn,
}
response.AttributeKeys = append(response.AttributeKeys, key)
}
return &response, nil
}
// tempHandleFixedColumns is a temporary function to handle the fixed columns whose name has been changed in AttributeKeys Table
func tempHandleFixedColumns(tagKey string) string {
switch {
case tagKey == "traceId":
tagKey = "traceID"
case tagKey == "spanId":
tagKey = "spanID"
case tagKey == "parentSpanId":
tagKey = "parentSpanID"
}
return tagKey
}
func (r *ClickHouseReader) GetTraceAttributeValues(ctx context.Context, req *v3.FilterAttributeValueRequest) (*v3.FilterAttributeValueResponse, error) {
var query string
var err error
var rows driver.Rows
var attributeValues v3.FilterAttributeValueResponse
// if dataType or tagType is not present return empty response
if len(req.FilterAttributeKeyDataType) == 0 || len(req.TagType) == 0 || req.FilterAttributeKey == "body" {
return &v3.FilterAttributeValueResponse{}, nil
}
switch req.FilterAttributeKeyDataType {
case v3.AttributeKeyDataTypeString:
query = fmt.Sprintf("SELECT DISTINCT stringTagValue from %s.%s WHERE tagKey = $1 AND stringTagValue ILIKE $2 AND tagType=$3 limit $4", r.TraceDB, r.spanAttributeTable)
rows, err = r.db.Query(ctx, query, req.FilterAttributeKey, fmt.Sprintf("%%%s%%", req.SearchText), req.TagType, req.Limit)
if err != nil {
zap.L().Error("Error while executing query", zap.Error(err))
return nil, fmt.Errorf("error while executing query: %s", err.Error())
}
defer rows.Close()
var strAttributeValue string
for rows.Next() {
if err := rows.Scan(&strAttributeValue); err != nil {
return nil, fmt.Errorf("error while scanning rows: %s", err.Error())
}
attributeValues.StringAttributeValues = append(attributeValues.StringAttributeValues, strAttributeValue)
}
case v3.AttributeKeyDataTypeFloat64, v3.AttributeKeyDataTypeInt64:
query = fmt.Sprintf("SELECT DISTINCT float64TagValue from %s.%s where tagKey = $1 AND toString(float64TagValue) ILIKE $2 AND tagType=$3 limit $4", r.TraceDB, r.spanAttributeTable)
rows, err = r.db.Query(ctx, query, req.FilterAttributeKey, fmt.Sprintf("%%%s%%", req.SearchText), req.TagType, req.Limit)
if err != nil {
zap.L().Error("Error while executing query", zap.Error(err))
return nil, fmt.Errorf("error while executing query: %s", err.Error())
}
defer rows.Close()
var numberAttributeValue sql.NullFloat64
for rows.Next() {
if err := rows.Scan(&numberAttributeValue); err != nil {
return nil, fmt.Errorf("error while scanning rows: %s", err.Error())
}
if numberAttributeValue.Valid {
attributeValues.NumberAttributeValues = append(attributeValues.NumberAttributeValues, numberAttributeValue.Float64)
}
}
case v3.AttributeKeyDataTypeBool:
attributeValues.BoolAttributeValues = []bool{true, false}
default:
return nil, fmt.Errorf("invalid data type")
}
return &attributeValues, nil
}
func (r *ClickHouseReader) GetSpanAttributeKeys(ctx context.Context) (map[string]v3.AttributeKey, error) {
var query string
var err error
var rows driver.Rows
response := map[string]v3.AttributeKey{}
query = fmt.Sprintf("SELECT DISTINCT(tagKey), tagType, dataType, isColumn FROM %s.%s", r.TraceDB, r.spanAttributesKeysTable)
rows, err = r.db.Query(ctx, query)
if err != nil {
zap.L().Error("Error while executing query", zap.Error(err))
return nil, fmt.Errorf("error while executing query: %s", err.Error())
}
defer rows.Close()
var tagKey string
var dataType string
var tagType string
var isColumn bool
for rows.Next() {
if err := rows.Scan(&tagKey, &tagType, &dataType, &isColumn); err != nil {
return nil, fmt.Errorf("error while scanning rows: %s", err.Error())
}
key := v3.AttributeKey{
Key: tagKey,
DataType: v3.AttributeKeyDataType(dataType),
Type: v3.AttributeKeyType(tagType),
IsColumn: isColumn,
}
response[tagKey] = key
}
return response, nil
}
func (r *ClickHouseReader) LiveTailLogsV3(ctx context.Context, query string, timestampStart uint64, idStart string, client *v3.LogsLiveTailClient) {
if timestampStart == 0 {
timestampStart = uint64(time.Now().UnixNano())
} else {
timestampStart = uint64(utils.GetEpochNanoSecs(int64(timestampStart)))
}
ticker := time.NewTicker(time.Duration(r.liveTailRefreshSeconds) * time.Second)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
done := true
client.Done <- &done
zap.L().Debug("closing go routine : " + client.Name)
return
case <-ticker.C:
// get the new 100 logs as anything more older won't make sense
tmpQuery := fmt.Sprintf("timestamp >='%d'", timestampStart)
if idStart != "" {
tmpQuery = fmt.Sprintf("%s AND id > '%s'", tmpQuery, idStart)
}
// the reason we are doing desc is that we need the latest logs first
tmpQuery = query + tmpQuery + " order by timestamp desc, id desc limit 100"
// using the old structure since we can directly read it to the struct as use it.
response := []model.SignozLog{}
err := r.db.Select(ctx, &response, tmpQuery)
if err != nil {
zap.L().Error("Error while getting logs", zap.Error(err))
client.Error <- err
return
}
for i := len(response) - 1; i >= 0; i-- {
client.Logs <- &response[i]
if i == 0 {
timestampStart = response[i].Timestamp
idStart = response[i].ID
}
}
}
}
}
func (r *ClickHouseReader) GetMinAndMaxTimestampForTraceID(ctx context.Context, traceID []string) (int64, int64, error) {
var minTime, maxTime time.Time
query := fmt.Sprintf("SELECT min(timestamp), max(timestamp) FROM %s.%s WHERE traceID IN ('%s')",
r.TraceDB, r.SpansTable, strings.Join(traceID, "','"))
zap.L().Debug("GetMinAndMaxTimestampForTraceID", zap.String("query", query))
err := r.db.QueryRow(ctx, query).Scan(&minTime, &maxTime)
if err != nil {
zap.L().Error("Error while executing query", zap.Error(err))
return 0, 0, err
}
if minTime.IsZero() || maxTime.IsZero() {
zap.L().Debug("minTime or maxTime is zero")
return 0, 0, nil
}
zap.L().Debug("GetMinAndMaxTimestampForTraceID", zap.Any("minTime", minTime), zap.Any("maxTime", maxTime))
return minTime.UnixNano(), maxTime.UnixNano(), nil
}