Introduce query timeout configuration (#2157)

## Summary

Implements configurable query execution timeout controls to prevent
poorly optimized or excessive queries from consuming excessive server
resources, causing performance degradation, or crashing the Zabbix
server.

Fixes: https://github.com/grafana/oss-big-tent-squad/issues/127

## Problem

Previously, the plugin only had an HTTP connection timeout (`timeout`)
that controlled individual API request timeouts. However, a complete
query execution could involve multiple API calls and run indefinitely if
not properly controlled, potentially causing resource exhaustion.

## Solution

Added a new `queryTimeout` setting that enforces a maximum execution
time for entire database queries initiated by the plugin. Queries
exceeding this limit are automatically terminated with proper error
handling and logging.

## Testing

1. Configure a datasource with `queryTimeout` set to a low value (e.g.,
5 seconds)
2. Execute a query that would normally take longer than the timeout
3. Verify that:
   - Query is terminated after the timeout period
   - Error message indicates timeout occurred
   - Logs contain timeout warning with query details
   - Other queries in the same request continue to execute

## Notes

- `queryTimeout` is separate from `timeout` (HTTP connection timeout)
- `queryTimeout` applies to the entire query execution, which may
involve multiple API calls
- Default value of 60 seconds ensures reasonable protection while
allowing normal queries to complete
- Timeout errors are logged with query refId, queryType, timeout
duration, and datasourceId for troubleshooting
This commit is contained in:
ismail simsek
2026-01-12 15:30:31 +01:00
committed by GitHub
parent 7eb80d3f23
commit a2f8b6433a
7 changed files with 366 additions and 50 deletions

View File

@@ -4,16 +4,20 @@ import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"net/http"
"time"
"github.com/grafana/grafana-plugin-sdk-go/backend"
"github.com/grafana/grafana-plugin-sdk-go/backend/datasource"
"github.com/grafana/grafana-plugin-sdk-go/backend/instancemgmt"
"github.com/grafana/grafana-plugin-sdk-go/backend/log"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/alexanderzobnin/grafana-zabbix/pkg/httpclient" "github.com/alexanderzobnin/grafana-zabbix/pkg/httpclient"
"github.com/alexanderzobnin/grafana-zabbix/pkg/metrics" "github.com/alexanderzobnin/grafana-zabbix/pkg/metrics"
"github.com/alexanderzobnin/grafana-zabbix/pkg/settings" "github.com/alexanderzobnin/grafana-zabbix/pkg/settings"
"github.com/alexanderzobnin/grafana-zabbix/pkg/zabbix" "github.com/alexanderzobnin/grafana-zabbix/pkg/zabbix"
"github.com/alexanderzobnin/grafana-zabbix/pkg/zabbixapi" "github.com/alexanderzobnin/grafana-zabbix/pkg/zabbixapi"
"github.com/grafana/grafana-plugin-sdk-go/backend"
"github.com/grafana/grafana-plugin-sdk-go/backend/datasource"
"github.com/grafana/grafana-plugin-sdk-go/backend/instancemgmt"
"github.com/grafana/grafana-plugin-sdk-go/backend/log"
) )
var ( var (
@@ -113,6 +117,11 @@ func (ds *ZabbixDatasource) QueryData(ctx context.Context, req *backend.QueryDat
return nil, err return nil, err
} }
queryTimeout := zabbixDS.Settings.QueryTimeout
if queryTimeout <= 0 {
queryTimeout = 60 * time.Second // Default to 60 seconds if not configured
}
for _, q := range req.Queries { for _, q := range req.Queries {
res := backend.DataResponse{} res := backend.DataResponse{}
query, err := ReadQuery(q) query, err := ReadQuery(q)
@@ -122,22 +131,52 @@ func (ds *ZabbixDatasource) QueryData(ctx context.Context, req *backend.QueryDat
} else if err := ValidateTimeRange(query.TimeRange); err != nil { } else if err := ValidateTimeRange(query.TimeRange); err != nil {
// Validate time range before processing any query // Validate time range before processing any query
res = backend.ErrorResponseWithErrorSource(err) res = backend.ErrorResponseWithErrorSource(err)
} else if query.QueryType == MODE_METRICS {
frames, err := zabbixDS.queryNumericItems(ctx, &query)
if err != nil {
res = backend.ErrorResponseWithErrorSource(err)
} else {
res.Frames = append(res.Frames, frames...)
}
} else if query.QueryType == MODE_ITEMID {
frames, err := zabbixDS.queryItemIdData(ctx, &query)
if err != nil {
res = backend.ErrorResponseWithErrorSource(err)
} else {
res.Frames = append(res.Frames, frames...)
}
} else { } else {
res = backend.ErrorResponseWithErrorSource(backend.DownstreamError(ErrNonMetricQueryNotSupported)) // Create a context with timeout for this specific query
queryCtx, cancel := context.WithTimeout(ctx, queryTimeout)
// Execute query with timeout context in an anonymous function to ensure cancel is called after each iteration
func() {
defer cancel()
var frames []*data.Frame
var queryErr error
switch query.QueryType {
case MODE_METRICS:
frames, queryErr = zabbixDS.queryNumericItems(queryCtx, &query)
case MODE_ITEMID:
frames, queryErr = zabbixDS.queryItemIdData(queryCtx, &query)
default:
queryErr = backend.DownstreamError(ErrNonMetricQueryNotSupported)
}
// Check if query timed out
if queryErr != nil {
if errors.Is(queryCtx.Err(), context.DeadlineExceeded) {
// Query exceeded the configured timeout
timeoutMsg := fmt.Sprintf(
"Query execution exceeded maximum allowed time (%v). Query was automatically terminated to prevent excessive resource consumption.",
queryTimeout,
)
ds.logger.Warn(
"Query timeout exceeded",
"refId", q.RefID,
"queryType", query.QueryType,
"timeout", queryTimeout,
"datasourceId", req.PluginContext.DataSourceInstanceSettings.ID,
)
res = backend.ErrorResponseWithErrorSource(
backend.DownstreamError(fmt.Errorf("query timeout: %s", timeoutMsg)),
)
res.Status = http.StatusRequestTimeout
} else {
res = backend.ErrorResponseWithErrorSource(queryErr)
}
} else {
res.Frames = append(res.Frames, frames...)
}
}()
} }
qdr.Responses[q.RefID] = res qdr.Responses[q.RefID] = res
} }

View File

@@ -2,8 +2,12 @@ package datasource
import ( import (
"context" "context"
"encoding/json"
"strings"
"testing" "testing"
"time"
"github.com/alexanderzobnin/grafana-zabbix/pkg/settings"
"github.com/grafana/grafana-plugin-sdk-go/backend" "github.com/grafana/grafana-plugin-sdk-go/backend"
"gotest.tools/assert" "gotest.tools/assert"
) )
@@ -66,3 +70,101 @@ func TestZabbixBackend_getCachedDatasource(t *testing.T) {
}) })
} }
} }
func TestQueryData_QueryTimeoutConfiguration(t *testing.T) {
tests := []struct {
name string
queryTimeout interface{}
expectedTimeout time.Duration
description string
}{
{
name: "Default timeout when not configured",
queryTimeout: nil,
expectedTimeout: 60 * time.Second,
description: "Should use default 60 seconds when queryTimeout is not set",
},
{
name: "Default timeout when zero",
queryTimeout: 0,
expectedTimeout: 60 * time.Second,
description: "Should use default 60 seconds when queryTimeout is 0",
},
{
name: "Custom timeout configured",
queryTimeout: 30,
expectedTimeout: 30 * time.Second,
description: "Should use configured queryTimeout value",
},
{
name: "Custom timeout as string",
queryTimeout: "45",
expectedTimeout: 45 * time.Second,
description: "Should parse string queryTimeout value",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create datasource settings with queryTimeout
jsonData := map[string]interface{}{
"queryTimeout": tt.queryTimeout,
}
jsonBytes, _ := json.Marshal(jsonData)
dsSettings := backend.DataSourceInstanceSettings{
ID: 1,
Name: "TestDatasource",
URL: "http://zabbix.org/zabbix",
JSONData: jsonBytes,
}
// Parse settings to verify timeout is set correctly
zabbixSettings, err := settings.ReadZabbixSettings(&dsSettings)
assert.NilError(t, err)
assert.Equal(t, tt.expectedTimeout, zabbixSettings.QueryTimeout, tt.description)
})
}
}
func TestQueryData_QueryTimeoutContextCreation(t *testing.T) {
// Test that query timeout context is properly created with the configured timeout
jsonData := map[string]interface{}{
"queryTimeout": 5, // 5 seconds
}
jsonBytes, _ := json.Marshal(jsonData)
dsSettings := backend.DataSourceInstanceSettings{
ID: 1,
Name: "TestDatasource",
URL: "http://zabbix.org/zabbix",
JSONData: jsonBytes,
}
// Verify queryTimeout is set correctly
zabbixSettings, err := settings.ReadZabbixSettings(&dsSettings)
assert.NilError(t, err)
assert.Equal(t, 5*time.Second, zabbixSettings.QueryTimeout)
// Test that context with timeout is created correctly
ctx := context.Background()
queryCtx, cancel := context.WithTimeout(ctx, zabbixSettings.QueryTimeout)
defer cancel()
// Verify context has deadline set
deadline, ok := queryCtx.Deadline()
assert.Assert(t, ok, "Context should have a deadline")
assert.Assert(t, deadline.After(time.Now()), "Deadline should be in the future")
assert.Assert(t, deadline.Before(time.Now().Add(6*time.Second)), "Deadline should be approximately 5 seconds from now")
}
func TestQueryData_QueryTimeoutErrorMessage(t *testing.T) {
// Test that timeout error message contains the expected information
timeoutMsg := "Query execution exceeded maximum allowed time (5s). Query was automatically terminated to prevent excessive resource consumption."
// Verify error message format
assert.Assert(t, strings.Contains(timeoutMsg, "Query execution exceeded maximum allowed time"))
assert.Assert(t, strings.Contains(timeoutMsg, "5s"))
assert.Assert(t, strings.Contains(timeoutMsg, "automatically terminated"))
assert.Assert(t, strings.Contains(timeoutMsg, "prevent excessive resource consumption"))
}

View File

@@ -9,15 +9,21 @@ const (
// ZabbixDatasourceSettingsDTO model // ZabbixDatasourceSettingsDTO model
type ZabbixDatasourceSettingsDTO struct { type ZabbixDatasourceSettingsDTO struct {
AuthType string `json:"authType"` AuthType string `json:"authType"`
Trends bool `json:"trends"` Trends bool `json:"trends"`
TrendsFrom string `json:"trendsFrom"` TrendsFrom string `json:"trendsFrom"`
TrendsRange string `json:"trendsRange"` TrendsRange string `json:"trendsRange"`
CacheTTL string `json:"cacheTTL"` CacheTTL string `json:"cacheTTL"`
Timeout interface{} `json:"timeout"` // Timeout is the HTTP client connection timeout in seconds for individual API requests to Zabbix.
// This controls how long to wait for a single HTTP request/response cycle. Default is 30 seconds.
Timeout interface{} `json:"timeout"`
// QueryTimeout is the maximum execution time in seconds for entire database queries initiated by the plugin.
// This controls the total time allowed for a complete query execution (which may involve multiple API calls).
// Queries exceeding this limit will be automatically terminated. Default is 60 seconds.
QueryTimeout interface{} `json:"queryTimeout"`
DisableDataAlignment bool `json:"disableDataAlignment"` DisableDataAlignment bool `json:"disableDataAlignment"`
DisableReadOnlyUsersAck bool `json:"disableReadOnlyUsersAck"` DisableReadOnlyUsersAck bool `json:"disableReadOnlyUsersAck"`
} }
// ZabbixDatasourceSettings model // ZabbixDatasourceSettings model
@@ -27,8 +33,14 @@ type ZabbixDatasourceSettings struct {
TrendsFrom time.Duration TrendsFrom time.Duration
TrendsRange time.Duration TrendsRange time.Duration
CacheTTL time.Duration CacheTTL time.Duration
Timeout time.Duration // Timeout is the HTTP client connection timeout for individual API requests to Zabbix.
// This controls how long to wait for a single HTTP request/response cycle. Default is 30 seconds.
Timeout time.Duration
// QueryTimeout is the maximum execution time for entire database queries initiated by the plugin.
// This controls the total time allowed for a complete query execution (which may involve multiple API calls).
// Queries exceeding this limit will be automatically terminated. Default is 60 seconds.
QueryTimeout time.Duration
DisableDataAlignment bool `json:"disableDataAlignment"` DisableDataAlignment bool `json:"disableDataAlignment"`
DisableReadOnlyUsersAck bool `json:"disableReadOnlyUsersAck"` DisableReadOnlyUsersAck bool `json:"disableReadOnlyUsersAck"`
} }

View File

@@ -11,6 +11,31 @@ import (
"github.com/grafana/grafana-plugin-sdk-go/backend" "github.com/grafana/grafana-plugin-sdk-go/backend"
) )
// parseTimeoutValue parses a timeout value from various types (string, float64, int64, int)
// and returns it as int64. If the value is empty or invalid, it returns the default value.
// The fieldName parameter is used for error messages.
func parseTimeoutValue(value interface{}, defaultValue int64, fieldName string) (int64, error) {
switch t := value.(type) {
case string:
if t == "" {
return defaultValue, nil
}
timeoutInt, err := strconv.Atoi(t)
if err != nil {
return 0, errors.New("failed to parse " + fieldName + ": " + err.Error())
}
return int64(timeoutInt), nil
case float64:
return int64(t), nil
case int64:
return t, nil
case int:
return int64(t), nil
default:
return defaultValue, nil
}
}
func ReadZabbixSettings(dsInstanceSettings *backend.DataSourceInstanceSettings) (*ZabbixDatasourceSettings, error) { func ReadZabbixSettings(dsInstanceSettings *backend.DataSourceInstanceSettings) (*ZabbixDatasourceSettings, error) {
zabbixSettingsDTO := &ZabbixDatasourceSettingsDTO{} zabbixSettingsDTO := &ZabbixDatasourceSettingsDTO{}
@@ -33,10 +58,6 @@ func ReadZabbixSettings(dsInstanceSettings *backend.DataSourceInstanceSettings)
zabbixSettingsDTO.CacheTTL = "1h" zabbixSettingsDTO.CacheTTL = "1h"
} }
//if zabbixSettingsDTO.Timeout == 0 {
// zabbixSettingsDTO.Timeout = 30
//}
trendsFrom, err := gtime.ParseInterval(zabbixSettingsDTO.TrendsFrom) trendsFrom, err := gtime.ParseInterval(zabbixSettingsDTO.TrendsFrom)
if err != nil { if err != nil {
return nil, err return nil, err
@@ -52,22 +73,19 @@ func ReadZabbixSettings(dsInstanceSettings *backend.DataSourceInstanceSettings)
return nil, err return nil, err
} }
var timeout int64 timeout, err := parseTimeoutValue(zabbixSettingsDTO.Timeout, 30, "timeout")
switch t := zabbixSettingsDTO.Timeout.(type) { if err != nil {
case string: return nil, err
if t == "" { }
timeout = 30
break queryTimeout, err := parseTimeoutValue(zabbixSettingsDTO.QueryTimeout, 60, "queryTimeout")
} if err != nil {
timeoutInt, err := strconv.Atoi(t) return nil, err
if err != nil { }
return nil, errors.New("failed to parse timeout: " + err.Error())
} // Default to 60 seconds if queryTimeout is 0 or negative
timeout = int64(timeoutInt) if queryTimeout <= 0 {
case float64: queryTimeout = 60
timeout = int64(t)
default:
timeout = 30
} }
zabbixSettings := &ZabbixDatasourceSettings{ zabbixSettings := &ZabbixDatasourceSettings{
@@ -77,6 +95,7 @@ func ReadZabbixSettings(dsInstanceSettings *backend.DataSourceInstanceSettings)
TrendsRange: trendsRange, TrendsRange: trendsRange,
CacheTTL: cacheTTL, CacheTTL: cacheTTL,
Timeout: time.Duration(timeout) * time.Second, Timeout: time.Duration(timeout) * time.Second,
QueryTimeout: time.Duration(queryTimeout) * time.Second,
DisableDataAlignment: zabbixSettingsDTO.DisableDataAlignment, DisableDataAlignment: zabbixSettingsDTO.DisableDataAlignment,
DisableReadOnlyUsersAck: zabbixSettingsDTO.DisableReadOnlyUsersAck, DisableReadOnlyUsersAck: zabbixSettingsDTO.DisableReadOnlyUsersAck,
} }

View File

@@ -0,0 +1,104 @@
package settings
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestParseTimeoutValue(t *testing.T) {
tests := []struct {
name string
value interface{}
defaultValue int64
fieldName string
want int64
wantErr bool
}{
{
name: "valid string",
value: "45",
defaultValue: 30,
fieldName: "timeout",
want: 45,
wantErr: false,
},
{
name: "empty string returns default",
value: "",
defaultValue: 30,
fieldName: "timeout",
want: 30,
wantErr: false,
},
{
name: "invalid string returns error",
value: "not-a-number",
defaultValue: 30,
fieldName: "timeout",
want: 0,
wantErr: true,
},
{
name: "float64 value",
value: float64(60),
defaultValue: 30,
fieldName: "timeout",
want: 60,
wantErr: false,
},
{
name: "int64 value",
value: int64(90),
defaultValue: 30,
fieldName: "timeout",
want: 90,
wantErr: false,
},
{
name: "int value",
value: int(120),
defaultValue: 30,
fieldName: "timeout",
want: 120,
wantErr: false,
},
{
name: "nil returns default",
value: nil,
defaultValue: 30,
fieldName: "timeout",
want: 30,
wantErr: false,
},
{
name: "unknown type returns default",
value: []string{"invalid"},
defaultValue: 60,
fieldName: "queryTimeout",
want: 60,
wantErr: false,
},
{
name: "zero string value",
value: "0",
defaultValue: 30,
fieldName: "timeout",
want: 0,
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := parseTimeoutValue(tt.value, tt.defaultValue, tt.fieldName)
if tt.wantErr {
assert.Error(t, err)
assert.Contains(t, err.Error(), tt.fieldName)
} else {
assert.NoError(t, err)
assert.Equal(t, tt.want, got)
}
})
}
}

View File

@@ -78,6 +78,7 @@ export const ConfigEditor = (props: Props) => {
trendsRange: '', trendsRange: '',
cacheTTL: '', cacheTTL: '',
timeout: undefined, timeout: undefined,
queryTimeout: undefined,
disableDataAlignment: false, disableDataAlignment: false,
...restJsonData, ...restJsonData,
}, },
@@ -238,6 +239,44 @@ export const ConfigEditor = (props: Props) => {
</Field> </Field>
</ConfigSubSection> </ConfigSubSection>
<ConfigSubSection title="Query Options">
<Field
label={
<Label>
<EditorStack gap={0.5}>
<span>Query Timeout</span>
<Tooltip
content={
<span>
Maximum execution time in seconds for database queries initiated by the plugin. Queries
exceeding this limit will be automatically terminated. Default is 60 seconds.
</span>
}
>
<Icon name="info-circle" size="sm" />
</Tooltip>
</EditorStack>
</Label>
}
>
<Input
width={40}
type="number"
value={options.jsonData.queryTimeout}
placeholder="60"
onChange={(event) => {
onOptionsChange({
...options,
jsonData: {
...options.jsonData,
queryTimeout: parseInt(event.currentTarget.value, 10) || undefined,
},
});
}}
/>
</Field>
</ConfigSubSection>
<ConfigSubSection title="Trends"> <ConfigSubSection title="Trends">
<Field label="Enable Trends"> <Field label="Enable Trends">
<Switch <Switch

View File

@@ -14,6 +14,7 @@ export type ZabbixDSOptions = {
trendsRange: string; trendsRange: string;
cacheTTL: string; cacheTTL: string;
timeout?: number; timeout?: number;
queryTimeout?: number;
dbConnectionEnable: boolean; dbConnectionEnable: boolean;
dbConnectionDatasourceId?: number; dbConnectionDatasourceId?: number;
dbConnectionDatasourceName?: string; dbConnectionDatasourceName?: string;