From a2f8b6433ad78de4051639234c3467b12b0179b5 Mon Sep 17 00:00:00 2001 From: ismail simsek Date: Mon, 12 Jan 2026 15:30:31 +0100 Subject: [PATCH] Introduce query timeout configuration (#2157) ## Summary Implements configurable query execution timeout controls to prevent poorly optimized or excessive queries from consuming excessive server resources, causing performance degradation, or crashing the Zabbix server. Fixes: https://github.com/grafana/oss-big-tent-squad/issues/127 ## Problem Previously, the plugin only had an HTTP connection timeout (`timeout`) that controlled individual API request timeouts. However, a complete query execution could involve multiple API calls and run indefinitely if not properly controlled, potentially causing resource exhaustion. ## Solution Added a new `queryTimeout` setting that enforces a maximum execution time for entire database queries initiated by the plugin. Queries exceeding this limit are automatically terminated with proper error handling and logging. ## Testing 1. Configure a datasource with `queryTimeout` set to a low value (e.g., 5 seconds) 2. Execute a query that would normally take longer than the timeout 3. Verify that: - Query is terminated after the timeout period - Error message indicates timeout occurred - Logs contain timeout warning with query details - Other queries in the same request continue to execute ## Notes - `queryTimeout` is separate from `timeout` (HTTP connection timeout) - `queryTimeout` applies to the entire query execution, which may involve multiple API calls - Default value of 60 seconds ensures reasonable protection while allowing normal queries to complete - Timeout errors are logged with query refId, queryType, timeout duration, and datasourceId for troubleshooting --- pkg/datasource/datasource.go | 77 +++++++++++---- pkg/datasource/datasource_test.go | 102 ++++++++++++++++++++ pkg/settings/models.go | 34 ++++--- pkg/settings/settings.go | 59 ++++++++---- pkg/settings/settings_test.go | 104 +++++++++++++++++++++ src/datasource/components/ConfigEditor.tsx | 39 ++++++++ src/datasource/types/config.ts | 1 + 7 files changed, 366 insertions(+), 50 deletions(-) create mode 100644 pkg/settings/settings_test.go diff --git a/pkg/datasource/datasource.go b/pkg/datasource/datasource.go index 6e5437d..84253ca 100644 --- a/pkg/datasource/datasource.go +++ b/pkg/datasource/datasource.go @@ -4,16 +4,20 @@ import ( "context" "errors" "fmt" + "net/http" + "time" + + "github.com/grafana/grafana-plugin-sdk-go/backend" + "github.com/grafana/grafana-plugin-sdk-go/backend/datasource" + "github.com/grafana/grafana-plugin-sdk-go/backend/instancemgmt" + "github.com/grafana/grafana-plugin-sdk-go/backend/log" + "github.com/grafana/grafana-plugin-sdk-go/data" "github.com/alexanderzobnin/grafana-zabbix/pkg/httpclient" "github.com/alexanderzobnin/grafana-zabbix/pkg/metrics" "github.com/alexanderzobnin/grafana-zabbix/pkg/settings" "github.com/alexanderzobnin/grafana-zabbix/pkg/zabbix" "github.com/alexanderzobnin/grafana-zabbix/pkg/zabbixapi" - "github.com/grafana/grafana-plugin-sdk-go/backend" - "github.com/grafana/grafana-plugin-sdk-go/backend/datasource" - "github.com/grafana/grafana-plugin-sdk-go/backend/instancemgmt" - "github.com/grafana/grafana-plugin-sdk-go/backend/log" ) var ( @@ -113,6 +117,11 @@ func (ds *ZabbixDatasource) QueryData(ctx context.Context, req *backend.QueryDat return nil, err } + queryTimeout := zabbixDS.Settings.QueryTimeout + if queryTimeout <= 0 { + queryTimeout = 60 * time.Second // Default to 60 seconds if not configured + } + for _, q := range req.Queries { res := backend.DataResponse{} query, err := ReadQuery(q) @@ -122,22 +131,52 @@ func (ds *ZabbixDatasource) QueryData(ctx context.Context, req *backend.QueryDat } else if err := ValidateTimeRange(query.TimeRange); err != nil { // Validate time range before processing any query res = backend.ErrorResponseWithErrorSource(err) - } else if query.QueryType == MODE_METRICS { - frames, err := zabbixDS.queryNumericItems(ctx, &query) - if err != nil { - res = backend.ErrorResponseWithErrorSource(err) - } else { - res.Frames = append(res.Frames, frames...) - } - } else if query.QueryType == MODE_ITEMID { - frames, err := zabbixDS.queryItemIdData(ctx, &query) - if err != nil { - res = backend.ErrorResponseWithErrorSource(err) - } else { - res.Frames = append(res.Frames, frames...) - } } else { - res = backend.ErrorResponseWithErrorSource(backend.DownstreamError(ErrNonMetricQueryNotSupported)) + // Create a context with timeout for this specific query + queryCtx, cancel := context.WithTimeout(ctx, queryTimeout) + + // Execute query with timeout context in an anonymous function to ensure cancel is called after each iteration + func() { + defer cancel() + + var frames []*data.Frame + var queryErr error + + switch query.QueryType { + case MODE_METRICS: + frames, queryErr = zabbixDS.queryNumericItems(queryCtx, &query) + case MODE_ITEMID: + frames, queryErr = zabbixDS.queryItemIdData(queryCtx, &query) + default: + queryErr = backend.DownstreamError(ErrNonMetricQueryNotSupported) + } + + // Check if query timed out + if queryErr != nil { + if errors.Is(queryCtx.Err(), context.DeadlineExceeded) { + // Query exceeded the configured timeout + timeoutMsg := fmt.Sprintf( + "Query execution exceeded maximum allowed time (%v). Query was automatically terminated to prevent excessive resource consumption.", + queryTimeout, + ) + ds.logger.Warn( + "Query timeout exceeded", + "refId", q.RefID, + "queryType", query.QueryType, + "timeout", queryTimeout, + "datasourceId", req.PluginContext.DataSourceInstanceSettings.ID, + ) + res = backend.ErrorResponseWithErrorSource( + backend.DownstreamError(fmt.Errorf("query timeout: %s", timeoutMsg)), + ) + res.Status = http.StatusRequestTimeout + } else { + res = backend.ErrorResponseWithErrorSource(queryErr) + } + } else { + res.Frames = append(res.Frames, frames...) + } + }() } qdr.Responses[q.RefID] = res } diff --git a/pkg/datasource/datasource_test.go b/pkg/datasource/datasource_test.go index f45faf8..bce3d0c 100644 --- a/pkg/datasource/datasource_test.go +++ b/pkg/datasource/datasource_test.go @@ -2,8 +2,12 @@ package datasource import ( "context" + "encoding/json" + "strings" "testing" + "time" + "github.com/alexanderzobnin/grafana-zabbix/pkg/settings" "github.com/grafana/grafana-plugin-sdk-go/backend" "gotest.tools/assert" ) @@ -66,3 +70,101 @@ func TestZabbixBackend_getCachedDatasource(t *testing.T) { }) } } + +func TestQueryData_QueryTimeoutConfiguration(t *testing.T) { + tests := []struct { + name string + queryTimeout interface{} + expectedTimeout time.Duration + description string + }{ + { + name: "Default timeout when not configured", + queryTimeout: nil, + expectedTimeout: 60 * time.Second, + description: "Should use default 60 seconds when queryTimeout is not set", + }, + { + name: "Default timeout when zero", + queryTimeout: 0, + expectedTimeout: 60 * time.Second, + description: "Should use default 60 seconds when queryTimeout is 0", + }, + { + name: "Custom timeout configured", + queryTimeout: 30, + expectedTimeout: 30 * time.Second, + description: "Should use configured queryTimeout value", + }, + { + name: "Custom timeout as string", + queryTimeout: "45", + expectedTimeout: 45 * time.Second, + description: "Should parse string queryTimeout value", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create datasource settings with queryTimeout + jsonData := map[string]interface{}{ + "queryTimeout": tt.queryTimeout, + } + jsonBytes, _ := json.Marshal(jsonData) + + dsSettings := backend.DataSourceInstanceSettings{ + ID: 1, + Name: "TestDatasource", + URL: "http://zabbix.org/zabbix", + JSONData: jsonBytes, + } + + // Parse settings to verify timeout is set correctly + zabbixSettings, err := settings.ReadZabbixSettings(&dsSettings) + assert.NilError(t, err) + assert.Equal(t, tt.expectedTimeout, zabbixSettings.QueryTimeout, tt.description) + }) + } +} + +func TestQueryData_QueryTimeoutContextCreation(t *testing.T) { + // Test that query timeout context is properly created with the configured timeout + jsonData := map[string]interface{}{ + "queryTimeout": 5, // 5 seconds + } + jsonBytes, _ := json.Marshal(jsonData) + + dsSettings := backend.DataSourceInstanceSettings{ + ID: 1, + Name: "TestDatasource", + URL: "http://zabbix.org/zabbix", + JSONData: jsonBytes, + } + + // Verify queryTimeout is set correctly + zabbixSettings, err := settings.ReadZabbixSettings(&dsSettings) + assert.NilError(t, err) + assert.Equal(t, 5*time.Second, zabbixSettings.QueryTimeout) + + // Test that context with timeout is created correctly + ctx := context.Background() + queryCtx, cancel := context.WithTimeout(ctx, zabbixSettings.QueryTimeout) + defer cancel() + + // Verify context has deadline set + deadline, ok := queryCtx.Deadline() + assert.Assert(t, ok, "Context should have a deadline") + assert.Assert(t, deadline.After(time.Now()), "Deadline should be in the future") + assert.Assert(t, deadline.Before(time.Now().Add(6*time.Second)), "Deadline should be approximately 5 seconds from now") +} + +func TestQueryData_QueryTimeoutErrorMessage(t *testing.T) { + // Test that timeout error message contains the expected information + timeoutMsg := "Query execution exceeded maximum allowed time (5s). Query was automatically terminated to prevent excessive resource consumption." + + // Verify error message format + assert.Assert(t, strings.Contains(timeoutMsg, "Query execution exceeded maximum allowed time")) + assert.Assert(t, strings.Contains(timeoutMsg, "5s")) + assert.Assert(t, strings.Contains(timeoutMsg, "automatically terminated")) + assert.Assert(t, strings.Contains(timeoutMsg, "prevent excessive resource consumption")) +} diff --git a/pkg/settings/models.go b/pkg/settings/models.go index e230529..a41f55f 100644 --- a/pkg/settings/models.go +++ b/pkg/settings/models.go @@ -9,15 +9,21 @@ const ( // ZabbixDatasourceSettingsDTO model type ZabbixDatasourceSettingsDTO struct { - AuthType string `json:"authType"` - Trends bool `json:"trends"` - TrendsFrom string `json:"trendsFrom"` - TrendsRange string `json:"trendsRange"` - CacheTTL string `json:"cacheTTL"` - Timeout interface{} `json:"timeout"` + AuthType string `json:"authType"` + Trends bool `json:"trends"` + TrendsFrom string `json:"trendsFrom"` + TrendsRange string `json:"trendsRange"` + CacheTTL string `json:"cacheTTL"` + // Timeout is the HTTP client connection timeout in seconds for individual API requests to Zabbix. + // This controls how long to wait for a single HTTP request/response cycle. Default is 30 seconds. + Timeout interface{} `json:"timeout"` + // QueryTimeout is the maximum execution time in seconds for entire database queries initiated by the plugin. + // This controls the total time allowed for a complete query execution (which may involve multiple API calls). + // Queries exceeding this limit will be automatically terminated. Default is 60 seconds. + QueryTimeout interface{} `json:"queryTimeout"` - DisableDataAlignment bool `json:"disableDataAlignment"` - DisableReadOnlyUsersAck bool `json:"disableReadOnlyUsersAck"` + DisableDataAlignment bool `json:"disableDataAlignment"` + DisableReadOnlyUsersAck bool `json:"disableReadOnlyUsersAck"` } // ZabbixDatasourceSettings model @@ -27,8 +33,14 @@ type ZabbixDatasourceSettings struct { TrendsFrom time.Duration TrendsRange time.Duration CacheTTL time.Duration - Timeout time.Duration + // Timeout is the HTTP client connection timeout for individual API requests to Zabbix. + // This controls how long to wait for a single HTTP request/response cycle. Default is 30 seconds. + Timeout time.Duration + // QueryTimeout is the maximum execution time for entire database queries initiated by the plugin. + // This controls the total time allowed for a complete query execution (which may involve multiple API calls). + // Queries exceeding this limit will be automatically terminated. Default is 60 seconds. + QueryTimeout time.Duration - DisableDataAlignment bool `json:"disableDataAlignment"` - DisableReadOnlyUsersAck bool `json:"disableReadOnlyUsersAck"` + DisableDataAlignment bool `json:"disableDataAlignment"` + DisableReadOnlyUsersAck bool `json:"disableReadOnlyUsersAck"` } diff --git a/pkg/settings/settings.go b/pkg/settings/settings.go index ac05c5d..67dc472 100644 --- a/pkg/settings/settings.go +++ b/pkg/settings/settings.go @@ -11,6 +11,31 @@ import ( "github.com/grafana/grafana-plugin-sdk-go/backend" ) +// parseTimeoutValue parses a timeout value from various types (string, float64, int64, int) +// and returns it as int64. If the value is empty or invalid, it returns the default value. +// The fieldName parameter is used for error messages. +func parseTimeoutValue(value interface{}, defaultValue int64, fieldName string) (int64, error) { + switch t := value.(type) { + case string: + if t == "" { + return defaultValue, nil + } + timeoutInt, err := strconv.Atoi(t) + if err != nil { + return 0, errors.New("failed to parse " + fieldName + ": " + err.Error()) + } + return int64(timeoutInt), nil + case float64: + return int64(t), nil + case int64: + return t, nil + case int: + return int64(t), nil + default: + return defaultValue, nil + } +} + func ReadZabbixSettings(dsInstanceSettings *backend.DataSourceInstanceSettings) (*ZabbixDatasourceSettings, error) { zabbixSettingsDTO := &ZabbixDatasourceSettingsDTO{} @@ -33,10 +58,6 @@ func ReadZabbixSettings(dsInstanceSettings *backend.DataSourceInstanceSettings) zabbixSettingsDTO.CacheTTL = "1h" } - //if zabbixSettingsDTO.Timeout == 0 { - // zabbixSettingsDTO.Timeout = 30 - //} - trendsFrom, err := gtime.ParseInterval(zabbixSettingsDTO.TrendsFrom) if err != nil { return nil, err @@ -52,22 +73,19 @@ func ReadZabbixSettings(dsInstanceSettings *backend.DataSourceInstanceSettings) return nil, err } - var timeout int64 - switch t := zabbixSettingsDTO.Timeout.(type) { - case string: - if t == "" { - timeout = 30 - break - } - timeoutInt, err := strconv.Atoi(t) - if err != nil { - return nil, errors.New("failed to parse timeout: " + err.Error()) - } - timeout = int64(timeoutInt) - case float64: - timeout = int64(t) - default: - timeout = 30 + timeout, err := parseTimeoutValue(zabbixSettingsDTO.Timeout, 30, "timeout") + if err != nil { + return nil, err + } + + queryTimeout, err := parseTimeoutValue(zabbixSettingsDTO.QueryTimeout, 60, "queryTimeout") + if err != nil { + return nil, err + } + + // Default to 60 seconds if queryTimeout is 0 or negative + if queryTimeout <= 0 { + queryTimeout = 60 } zabbixSettings := &ZabbixDatasourceSettings{ @@ -77,6 +95,7 @@ func ReadZabbixSettings(dsInstanceSettings *backend.DataSourceInstanceSettings) TrendsRange: trendsRange, CacheTTL: cacheTTL, Timeout: time.Duration(timeout) * time.Second, + QueryTimeout: time.Duration(queryTimeout) * time.Second, DisableDataAlignment: zabbixSettingsDTO.DisableDataAlignment, DisableReadOnlyUsersAck: zabbixSettingsDTO.DisableReadOnlyUsersAck, } diff --git a/pkg/settings/settings_test.go b/pkg/settings/settings_test.go new file mode 100644 index 0000000..a4f8c39 --- /dev/null +++ b/pkg/settings/settings_test.go @@ -0,0 +1,104 @@ +package settings + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParseTimeoutValue(t *testing.T) { + tests := []struct { + name string + value interface{} + defaultValue int64 + fieldName string + want int64 + wantErr bool + }{ + { + name: "valid string", + value: "45", + defaultValue: 30, + fieldName: "timeout", + want: 45, + wantErr: false, + }, + { + name: "empty string returns default", + value: "", + defaultValue: 30, + fieldName: "timeout", + want: 30, + wantErr: false, + }, + { + name: "invalid string returns error", + value: "not-a-number", + defaultValue: 30, + fieldName: "timeout", + want: 0, + wantErr: true, + }, + { + name: "float64 value", + value: float64(60), + defaultValue: 30, + fieldName: "timeout", + want: 60, + wantErr: false, + }, + { + name: "int64 value", + value: int64(90), + defaultValue: 30, + fieldName: "timeout", + want: 90, + wantErr: false, + }, + { + name: "int value", + value: int(120), + defaultValue: 30, + fieldName: "timeout", + want: 120, + wantErr: false, + }, + { + name: "nil returns default", + value: nil, + defaultValue: 30, + fieldName: "timeout", + want: 30, + wantErr: false, + }, + { + name: "unknown type returns default", + value: []string{"invalid"}, + defaultValue: 60, + fieldName: "queryTimeout", + want: 60, + wantErr: false, + }, + { + name: "zero string value", + value: "0", + defaultValue: 30, + fieldName: "timeout", + want: 0, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := parseTimeoutValue(tt.value, tt.defaultValue, tt.fieldName) + if tt.wantErr { + assert.Error(t, err) + assert.Contains(t, err.Error(), tt.fieldName) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.want, got) + } + }) + } +} diff --git a/src/datasource/components/ConfigEditor.tsx b/src/datasource/components/ConfigEditor.tsx index 3e7134c..b8ebec5 100644 --- a/src/datasource/components/ConfigEditor.tsx +++ b/src/datasource/components/ConfigEditor.tsx @@ -78,6 +78,7 @@ export const ConfigEditor = (props: Props) => { trendsRange: '', cacheTTL: '', timeout: undefined, + queryTimeout: undefined, disableDataAlignment: false, ...restJsonData, }, @@ -238,6 +239,44 @@ export const ConfigEditor = (props: Props) => { + + + + Query Timeout + + Maximum execution time in seconds for database queries initiated by the plugin. Queries + exceeding this limit will be automatically terminated. Default is 60 seconds. + + } + > + + + + + } + > + { + onOptionsChange({ + ...options, + jsonData: { + ...options.jsonData, + queryTimeout: parseInt(event.currentTarget.value, 10) || undefined, + }, + }); + }} + /> + + +