Introduce query timeout configuration (#2157)
## Summary Implements configurable query execution timeout controls to prevent poorly optimized or excessive queries from consuming excessive server resources, causing performance degradation, or crashing the Zabbix server. Fixes: https://github.com/grafana/oss-big-tent-squad/issues/127 ## Problem Previously, the plugin only had an HTTP connection timeout (`timeout`) that controlled individual API request timeouts. However, a complete query execution could involve multiple API calls and run indefinitely if not properly controlled, potentially causing resource exhaustion. ## Solution Added a new `queryTimeout` setting that enforces a maximum execution time for entire database queries initiated by the plugin. Queries exceeding this limit are automatically terminated with proper error handling and logging. ## Testing 1. Configure a datasource with `queryTimeout` set to a low value (e.g., 5 seconds) 2. Execute a query that would normally take longer than the timeout 3. Verify that: - Query is terminated after the timeout period - Error message indicates timeout occurred - Logs contain timeout warning with query details - Other queries in the same request continue to execute ## Notes - `queryTimeout` is separate from `timeout` (HTTP connection timeout) - `queryTimeout` applies to the entire query execution, which may involve multiple API calls - Default value of 60 seconds ensures reasonable protection while allowing normal queries to complete - Timeout errors are logged with query refId, queryType, timeout duration, and datasourceId for troubleshooting
This commit is contained in:
@@ -4,16 +4,20 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend/datasource"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend/instancemgmt"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend/log"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/data"
|
||||
|
||||
"github.com/alexanderzobnin/grafana-zabbix/pkg/httpclient"
|
||||
"github.com/alexanderzobnin/grafana-zabbix/pkg/metrics"
|
||||
"github.com/alexanderzobnin/grafana-zabbix/pkg/settings"
|
||||
"github.com/alexanderzobnin/grafana-zabbix/pkg/zabbix"
|
||||
"github.com/alexanderzobnin/grafana-zabbix/pkg/zabbixapi"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend/datasource"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend/instancemgmt"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend/log"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -113,6 +117,11 @@ func (ds *ZabbixDatasource) QueryData(ctx context.Context, req *backend.QueryDat
|
||||
return nil, err
|
||||
}
|
||||
|
||||
queryTimeout := zabbixDS.Settings.QueryTimeout
|
||||
if queryTimeout <= 0 {
|
||||
queryTimeout = 60 * time.Second // Default to 60 seconds if not configured
|
||||
}
|
||||
|
||||
for _, q := range req.Queries {
|
||||
res := backend.DataResponse{}
|
||||
query, err := ReadQuery(q)
|
||||
@@ -122,22 +131,52 @@ func (ds *ZabbixDatasource) QueryData(ctx context.Context, req *backend.QueryDat
|
||||
} else if err := ValidateTimeRange(query.TimeRange); err != nil {
|
||||
// Validate time range before processing any query
|
||||
res = backend.ErrorResponseWithErrorSource(err)
|
||||
} else if query.QueryType == MODE_METRICS {
|
||||
frames, err := zabbixDS.queryNumericItems(ctx, &query)
|
||||
if err != nil {
|
||||
res = backend.ErrorResponseWithErrorSource(err)
|
||||
} else {
|
||||
// Create a context with timeout for this specific query
|
||||
queryCtx, cancel := context.WithTimeout(ctx, queryTimeout)
|
||||
|
||||
// Execute query with timeout context in an anonymous function to ensure cancel is called after each iteration
|
||||
func() {
|
||||
defer cancel()
|
||||
|
||||
var frames []*data.Frame
|
||||
var queryErr error
|
||||
|
||||
switch query.QueryType {
|
||||
case MODE_METRICS:
|
||||
frames, queryErr = zabbixDS.queryNumericItems(queryCtx, &query)
|
||||
case MODE_ITEMID:
|
||||
frames, queryErr = zabbixDS.queryItemIdData(queryCtx, &query)
|
||||
default:
|
||||
queryErr = backend.DownstreamError(ErrNonMetricQueryNotSupported)
|
||||
}
|
||||
|
||||
// Check if query timed out
|
||||
if queryErr != nil {
|
||||
if errors.Is(queryCtx.Err(), context.DeadlineExceeded) {
|
||||
// Query exceeded the configured timeout
|
||||
timeoutMsg := fmt.Sprintf(
|
||||
"Query execution exceeded maximum allowed time (%v). Query was automatically terminated to prevent excessive resource consumption.",
|
||||
queryTimeout,
|
||||
)
|
||||
ds.logger.Warn(
|
||||
"Query timeout exceeded",
|
||||
"refId", q.RefID,
|
||||
"queryType", query.QueryType,
|
||||
"timeout", queryTimeout,
|
||||
"datasourceId", req.PluginContext.DataSourceInstanceSettings.ID,
|
||||
)
|
||||
res = backend.ErrorResponseWithErrorSource(
|
||||
backend.DownstreamError(fmt.Errorf("query timeout: %s", timeoutMsg)),
|
||||
)
|
||||
res.Status = http.StatusRequestTimeout
|
||||
} else {
|
||||
res = backend.ErrorResponseWithErrorSource(queryErr)
|
||||
}
|
||||
} else {
|
||||
res.Frames = append(res.Frames, frames...)
|
||||
}
|
||||
} else if query.QueryType == MODE_ITEMID {
|
||||
frames, err := zabbixDS.queryItemIdData(ctx, &query)
|
||||
if err != nil {
|
||||
res = backend.ErrorResponseWithErrorSource(err)
|
||||
} else {
|
||||
res.Frames = append(res.Frames, frames...)
|
||||
}
|
||||
} else {
|
||||
res = backend.ErrorResponseWithErrorSource(backend.DownstreamError(ErrNonMetricQueryNotSupported))
|
||||
}()
|
||||
}
|
||||
qdr.Responses[q.RefID] = res
|
||||
}
|
||||
|
||||
@@ -2,8 +2,12 @@ package datasource
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/alexanderzobnin/grafana-zabbix/pkg/settings"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend"
|
||||
"gotest.tools/assert"
|
||||
)
|
||||
@@ -66,3 +70,101 @@ func TestZabbixBackend_getCachedDatasource(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestQueryData_QueryTimeoutConfiguration(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
queryTimeout interface{}
|
||||
expectedTimeout time.Duration
|
||||
description string
|
||||
}{
|
||||
{
|
||||
name: "Default timeout when not configured",
|
||||
queryTimeout: nil,
|
||||
expectedTimeout: 60 * time.Second,
|
||||
description: "Should use default 60 seconds when queryTimeout is not set",
|
||||
},
|
||||
{
|
||||
name: "Default timeout when zero",
|
||||
queryTimeout: 0,
|
||||
expectedTimeout: 60 * time.Second,
|
||||
description: "Should use default 60 seconds when queryTimeout is 0",
|
||||
},
|
||||
{
|
||||
name: "Custom timeout configured",
|
||||
queryTimeout: 30,
|
||||
expectedTimeout: 30 * time.Second,
|
||||
description: "Should use configured queryTimeout value",
|
||||
},
|
||||
{
|
||||
name: "Custom timeout as string",
|
||||
queryTimeout: "45",
|
||||
expectedTimeout: 45 * time.Second,
|
||||
description: "Should parse string queryTimeout value",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Create datasource settings with queryTimeout
|
||||
jsonData := map[string]interface{}{
|
||||
"queryTimeout": tt.queryTimeout,
|
||||
}
|
||||
jsonBytes, _ := json.Marshal(jsonData)
|
||||
|
||||
dsSettings := backend.DataSourceInstanceSettings{
|
||||
ID: 1,
|
||||
Name: "TestDatasource",
|
||||
URL: "http://zabbix.org/zabbix",
|
||||
JSONData: jsonBytes,
|
||||
}
|
||||
|
||||
// Parse settings to verify timeout is set correctly
|
||||
zabbixSettings, err := settings.ReadZabbixSettings(&dsSettings)
|
||||
assert.NilError(t, err)
|
||||
assert.Equal(t, tt.expectedTimeout, zabbixSettings.QueryTimeout, tt.description)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestQueryData_QueryTimeoutContextCreation(t *testing.T) {
|
||||
// Test that query timeout context is properly created with the configured timeout
|
||||
jsonData := map[string]interface{}{
|
||||
"queryTimeout": 5, // 5 seconds
|
||||
}
|
||||
jsonBytes, _ := json.Marshal(jsonData)
|
||||
|
||||
dsSettings := backend.DataSourceInstanceSettings{
|
||||
ID: 1,
|
||||
Name: "TestDatasource",
|
||||
URL: "http://zabbix.org/zabbix",
|
||||
JSONData: jsonBytes,
|
||||
}
|
||||
|
||||
// Verify queryTimeout is set correctly
|
||||
zabbixSettings, err := settings.ReadZabbixSettings(&dsSettings)
|
||||
assert.NilError(t, err)
|
||||
assert.Equal(t, 5*time.Second, zabbixSettings.QueryTimeout)
|
||||
|
||||
// Test that context with timeout is created correctly
|
||||
ctx := context.Background()
|
||||
queryCtx, cancel := context.WithTimeout(ctx, zabbixSettings.QueryTimeout)
|
||||
defer cancel()
|
||||
|
||||
// Verify context has deadline set
|
||||
deadline, ok := queryCtx.Deadline()
|
||||
assert.Assert(t, ok, "Context should have a deadline")
|
||||
assert.Assert(t, deadline.After(time.Now()), "Deadline should be in the future")
|
||||
assert.Assert(t, deadline.Before(time.Now().Add(6*time.Second)), "Deadline should be approximately 5 seconds from now")
|
||||
}
|
||||
|
||||
func TestQueryData_QueryTimeoutErrorMessage(t *testing.T) {
|
||||
// Test that timeout error message contains the expected information
|
||||
timeoutMsg := "Query execution exceeded maximum allowed time (5s). Query was automatically terminated to prevent excessive resource consumption."
|
||||
|
||||
// Verify error message format
|
||||
assert.Assert(t, strings.Contains(timeoutMsg, "Query execution exceeded maximum allowed time"))
|
||||
assert.Assert(t, strings.Contains(timeoutMsg, "5s"))
|
||||
assert.Assert(t, strings.Contains(timeoutMsg, "automatically terminated"))
|
||||
assert.Assert(t, strings.Contains(timeoutMsg, "prevent excessive resource consumption"))
|
||||
}
|
||||
|
||||
@@ -14,7 +14,13 @@ type ZabbixDatasourceSettingsDTO struct {
|
||||
TrendsFrom string `json:"trendsFrom"`
|
||||
TrendsRange string `json:"trendsRange"`
|
||||
CacheTTL string `json:"cacheTTL"`
|
||||
// Timeout is the HTTP client connection timeout in seconds for individual API requests to Zabbix.
|
||||
// This controls how long to wait for a single HTTP request/response cycle. Default is 30 seconds.
|
||||
Timeout interface{} `json:"timeout"`
|
||||
// QueryTimeout is the maximum execution time in seconds for entire database queries initiated by the plugin.
|
||||
// This controls the total time allowed for a complete query execution (which may involve multiple API calls).
|
||||
// Queries exceeding this limit will be automatically terminated. Default is 60 seconds.
|
||||
QueryTimeout interface{} `json:"queryTimeout"`
|
||||
|
||||
DisableDataAlignment bool `json:"disableDataAlignment"`
|
||||
DisableReadOnlyUsersAck bool `json:"disableReadOnlyUsersAck"`
|
||||
@@ -27,7 +33,13 @@ type ZabbixDatasourceSettings struct {
|
||||
TrendsFrom time.Duration
|
||||
TrendsRange time.Duration
|
||||
CacheTTL time.Duration
|
||||
// Timeout is the HTTP client connection timeout for individual API requests to Zabbix.
|
||||
// This controls how long to wait for a single HTTP request/response cycle. Default is 30 seconds.
|
||||
Timeout time.Duration
|
||||
// QueryTimeout is the maximum execution time for entire database queries initiated by the plugin.
|
||||
// This controls the total time allowed for a complete query execution (which may involve multiple API calls).
|
||||
// Queries exceeding this limit will be automatically terminated. Default is 60 seconds.
|
||||
QueryTimeout time.Duration
|
||||
|
||||
DisableDataAlignment bool `json:"disableDataAlignment"`
|
||||
DisableReadOnlyUsersAck bool `json:"disableReadOnlyUsersAck"`
|
||||
|
||||
@@ -11,6 +11,31 @@ import (
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend"
|
||||
)
|
||||
|
||||
// parseTimeoutValue parses a timeout value from various types (string, float64, int64, int)
|
||||
// and returns it as int64. If the value is empty or invalid, it returns the default value.
|
||||
// The fieldName parameter is used for error messages.
|
||||
func parseTimeoutValue(value interface{}, defaultValue int64, fieldName string) (int64, error) {
|
||||
switch t := value.(type) {
|
||||
case string:
|
||||
if t == "" {
|
||||
return defaultValue, nil
|
||||
}
|
||||
timeoutInt, err := strconv.Atoi(t)
|
||||
if err != nil {
|
||||
return 0, errors.New("failed to parse " + fieldName + ": " + err.Error())
|
||||
}
|
||||
return int64(timeoutInt), nil
|
||||
case float64:
|
||||
return int64(t), nil
|
||||
case int64:
|
||||
return t, nil
|
||||
case int:
|
||||
return int64(t), nil
|
||||
default:
|
||||
return defaultValue, nil
|
||||
}
|
||||
}
|
||||
|
||||
func ReadZabbixSettings(dsInstanceSettings *backend.DataSourceInstanceSettings) (*ZabbixDatasourceSettings, error) {
|
||||
zabbixSettingsDTO := &ZabbixDatasourceSettingsDTO{}
|
||||
|
||||
@@ -33,10 +58,6 @@ func ReadZabbixSettings(dsInstanceSettings *backend.DataSourceInstanceSettings)
|
||||
zabbixSettingsDTO.CacheTTL = "1h"
|
||||
}
|
||||
|
||||
//if zabbixSettingsDTO.Timeout == 0 {
|
||||
// zabbixSettingsDTO.Timeout = 30
|
||||
//}
|
||||
|
||||
trendsFrom, err := gtime.ParseInterval(zabbixSettingsDTO.TrendsFrom)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -52,22 +73,19 @@ func ReadZabbixSettings(dsInstanceSettings *backend.DataSourceInstanceSettings)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var timeout int64
|
||||
switch t := zabbixSettingsDTO.Timeout.(type) {
|
||||
case string:
|
||||
if t == "" {
|
||||
timeout = 30
|
||||
break
|
||||
}
|
||||
timeoutInt, err := strconv.Atoi(t)
|
||||
timeout, err := parseTimeoutValue(zabbixSettingsDTO.Timeout, 30, "timeout")
|
||||
if err != nil {
|
||||
return nil, errors.New("failed to parse timeout: " + err.Error())
|
||||
return nil, err
|
||||
}
|
||||
timeout = int64(timeoutInt)
|
||||
case float64:
|
||||
timeout = int64(t)
|
||||
default:
|
||||
timeout = 30
|
||||
|
||||
queryTimeout, err := parseTimeoutValue(zabbixSettingsDTO.QueryTimeout, 60, "queryTimeout")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Default to 60 seconds if queryTimeout is 0 or negative
|
||||
if queryTimeout <= 0 {
|
||||
queryTimeout = 60
|
||||
}
|
||||
|
||||
zabbixSettings := &ZabbixDatasourceSettings{
|
||||
@@ -77,6 +95,7 @@ func ReadZabbixSettings(dsInstanceSettings *backend.DataSourceInstanceSettings)
|
||||
TrendsRange: trendsRange,
|
||||
CacheTTL: cacheTTL,
|
||||
Timeout: time.Duration(timeout) * time.Second,
|
||||
QueryTimeout: time.Duration(queryTimeout) * time.Second,
|
||||
DisableDataAlignment: zabbixSettingsDTO.DisableDataAlignment,
|
||||
DisableReadOnlyUsersAck: zabbixSettingsDTO.DisableReadOnlyUsersAck,
|
||||
}
|
||||
|
||||
104
pkg/settings/settings_test.go
Normal file
104
pkg/settings/settings_test.go
Normal file
@@ -0,0 +1,104 @@
|
||||
package settings
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestParseTimeoutValue(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
value interface{}
|
||||
defaultValue int64
|
||||
fieldName string
|
||||
want int64
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "valid string",
|
||||
value: "45",
|
||||
defaultValue: 30,
|
||||
fieldName: "timeout",
|
||||
want: 45,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "empty string returns default",
|
||||
value: "",
|
||||
defaultValue: 30,
|
||||
fieldName: "timeout",
|
||||
want: 30,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "invalid string returns error",
|
||||
value: "not-a-number",
|
||||
defaultValue: 30,
|
||||
fieldName: "timeout",
|
||||
want: 0,
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "float64 value",
|
||||
value: float64(60),
|
||||
defaultValue: 30,
|
||||
fieldName: "timeout",
|
||||
want: 60,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "int64 value",
|
||||
value: int64(90),
|
||||
defaultValue: 30,
|
||||
fieldName: "timeout",
|
||||
want: 90,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "int value",
|
||||
value: int(120),
|
||||
defaultValue: 30,
|
||||
fieldName: "timeout",
|
||||
want: 120,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "nil returns default",
|
||||
value: nil,
|
||||
defaultValue: 30,
|
||||
fieldName: "timeout",
|
||||
want: 30,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "unknown type returns default",
|
||||
value: []string{"invalid"},
|
||||
defaultValue: 60,
|
||||
fieldName: "queryTimeout",
|
||||
want: 60,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "zero string value",
|
||||
value: "0",
|
||||
defaultValue: 30,
|
||||
fieldName: "timeout",
|
||||
want: 0,
|
||||
wantErr: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := parseTimeoutValue(tt.value, tt.defaultValue, tt.fieldName)
|
||||
if tt.wantErr {
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), tt.fieldName)
|
||||
} else {
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, tt.want, got)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -78,6 +78,7 @@ export const ConfigEditor = (props: Props) => {
|
||||
trendsRange: '',
|
||||
cacheTTL: '',
|
||||
timeout: undefined,
|
||||
queryTimeout: undefined,
|
||||
disableDataAlignment: false,
|
||||
...restJsonData,
|
||||
},
|
||||
@@ -238,6 +239,44 @@ export const ConfigEditor = (props: Props) => {
|
||||
</Field>
|
||||
</ConfigSubSection>
|
||||
|
||||
<ConfigSubSection title="Query Options">
|
||||
<Field
|
||||
label={
|
||||
<Label>
|
||||
<EditorStack gap={0.5}>
|
||||
<span>Query Timeout</span>
|
||||
<Tooltip
|
||||
content={
|
||||
<span>
|
||||
Maximum execution time in seconds for database queries initiated by the plugin. Queries
|
||||
exceeding this limit will be automatically terminated. Default is 60 seconds.
|
||||
</span>
|
||||
}
|
||||
>
|
||||
<Icon name="info-circle" size="sm" />
|
||||
</Tooltip>
|
||||
</EditorStack>
|
||||
</Label>
|
||||
}
|
||||
>
|
||||
<Input
|
||||
width={40}
|
||||
type="number"
|
||||
value={options.jsonData.queryTimeout}
|
||||
placeholder="60"
|
||||
onChange={(event) => {
|
||||
onOptionsChange({
|
||||
...options,
|
||||
jsonData: {
|
||||
...options.jsonData,
|
||||
queryTimeout: parseInt(event.currentTarget.value, 10) || undefined,
|
||||
},
|
||||
});
|
||||
}}
|
||||
/>
|
||||
</Field>
|
||||
</ConfigSubSection>
|
||||
|
||||
<ConfigSubSection title="Trends">
|
||||
<Field label="Enable Trends">
|
||||
<Switch
|
||||
|
||||
@@ -14,6 +14,7 @@ export type ZabbixDSOptions = {
|
||||
trendsRange: string;
|
||||
cacheTTL: string;
|
||||
timeout?: number;
|
||||
queryTimeout?: number;
|
||||
dbConnectionEnable: boolean;
|
||||
dbConnectionDatasourceId?: number;
|
||||
dbConnectionDatasourceName?: string;
|
||||
|
||||
Reference in New Issue
Block a user