Introduce query timeout configuration (#2157)
## Summary Implements configurable query execution timeout controls to prevent poorly optimized or excessive queries from consuming excessive server resources, causing performance degradation, or crashing the Zabbix server. Fixes: https://github.com/grafana/oss-big-tent-squad/issues/127 ## Problem Previously, the plugin only had an HTTP connection timeout (`timeout`) that controlled individual API request timeouts. However, a complete query execution could involve multiple API calls and run indefinitely if not properly controlled, potentially causing resource exhaustion. ## Solution Added a new `queryTimeout` setting that enforces a maximum execution time for entire database queries initiated by the plugin. Queries exceeding this limit are automatically terminated with proper error handling and logging. ## Testing 1. Configure a datasource with `queryTimeout` set to a low value (e.g., 5 seconds) 2. Execute a query that would normally take longer than the timeout 3. Verify that: - Query is terminated after the timeout period - Error message indicates timeout occurred - Logs contain timeout warning with query details - Other queries in the same request continue to execute ## Notes - `queryTimeout` is separate from `timeout` (HTTP connection timeout) - `queryTimeout` applies to the entire query execution, which may involve multiple API calls - Default value of 60 seconds ensures reasonable protection while allowing normal queries to complete - Timeout errors are logged with query refId, queryType, timeout duration, and datasourceId for troubleshooting
This commit is contained in:
@@ -9,15 +9,21 @@ const (
|
||||
|
||||
// ZabbixDatasourceSettingsDTO model
|
||||
type ZabbixDatasourceSettingsDTO struct {
|
||||
AuthType string `json:"authType"`
|
||||
Trends bool `json:"trends"`
|
||||
TrendsFrom string `json:"trendsFrom"`
|
||||
TrendsRange string `json:"trendsRange"`
|
||||
CacheTTL string `json:"cacheTTL"`
|
||||
Timeout interface{} `json:"timeout"`
|
||||
AuthType string `json:"authType"`
|
||||
Trends bool `json:"trends"`
|
||||
TrendsFrom string `json:"trendsFrom"`
|
||||
TrendsRange string `json:"trendsRange"`
|
||||
CacheTTL string `json:"cacheTTL"`
|
||||
// Timeout is the HTTP client connection timeout in seconds for individual API requests to Zabbix.
|
||||
// This controls how long to wait for a single HTTP request/response cycle. Default is 30 seconds.
|
||||
Timeout interface{} `json:"timeout"`
|
||||
// QueryTimeout is the maximum execution time in seconds for entire database queries initiated by the plugin.
|
||||
// This controls the total time allowed for a complete query execution (which may involve multiple API calls).
|
||||
// Queries exceeding this limit will be automatically terminated. Default is 60 seconds.
|
||||
QueryTimeout interface{} `json:"queryTimeout"`
|
||||
|
||||
DisableDataAlignment bool `json:"disableDataAlignment"`
|
||||
DisableReadOnlyUsersAck bool `json:"disableReadOnlyUsersAck"`
|
||||
DisableDataAlignment bool `json:"disableDataAlignment"`
|
||||
DisableReadOnlyUsersAck bool `json:"disableReadOnlyUsersAck"`
|
||||
}
|
||||
|
||||
// ZabbixDatasourceSettings model
|
||||
@@ -27,8 +33,14 @@ type ZabbixDatasourceSettings struct {
|
||||
TrendsFrom time.Duration
|
||||
TrendsRange time.Duration
|
||||
CacheTTL time.Duration
|
||||
Timeout time.Duration
|
||||
// Timeout is the HTTP client connection timeout for individual API requests to Zabbix.
|
||||
// This controls how long to wait for a single HTTP request/response cycle. Default is 30 seconds.
|
||||
Timeout time.Duration
|
||||
// QueryTimeout is the maximum execution time for entire database queries initiated by the plugin.
|
||||
// This controls the total time allowed for a complete query execution (which may involve multiple API calls).
|
||||
// Queries exceeding this limit will be automatically terminated. Default is 60 seconds.
|
||||
QueryTimeout time.Duration
|
||||
|
||||
DisableDataAlignment bool `json:"disableDataAlignment"`
|
||||
DisableReadOnlyUsersAck bool `json:"disableReadOnlyUsersAck"`
|
||||
DisableDataAlignment bool `json:"disableDataAlignment"`
|
||||
DisableReadOnlyUsersAck bool `json:"disableReadOnlyUsersAck"`
|
||||
}
|
||||
|
||||
@@ -11,6 +11,31 @@ import (
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend"
|
||||
)
|
||||
|
||||
// parseTimeoutValue parses a timeout value from various types (string, float64, int64, int)
|
||||
// and returns it as int64. If the value is empty or invalid, it returns the default value.
|
||||
// The fieldName parameter is used for error messages.
|
||||
func parseTimeoutValue(value interface{}, defaultValue int64, fieldName string) (int64, error) {
|
||||
switch t := value.(type) {
|
||||
case string:
|
||||
if t == "" {
|
||||
return defaultValue, nil
|
||||
}
|
||||
timeoutInt, err := strconv.Atoi(t)
|
||||
if err != nil {
|
||||
return 0, errors.New("failed to parse " + fieldName + ": " + err.Error())
|
||||
}
|
||||
return int64(timeoutInt), nil
|
||||
case float64:
|
||||
return int64(t), nil
|
||||
case int64:
|
||||
return t, nil
|
||||
case int:
|
||||
return int64(t), nil
|
||||
default:
|
||||
return defaultValue, nil
|
||||
}
|
||||
}
|
||||
|
||||
func ReadZabbixSettings(dsInstanceSettings *backend.DataSourceInstanceSettings) (*ZabbixDatasourceSettings, error) {
|
||||
zabbixSettingsDTO := &ZabbixDatasourceSettingsDTO{}
|
||||
|
||||
@@ -33,10 +58,6 @@ func ReadZabbixSettings(dsInstanceSettings *backend.DataSourceInstanceSettings)
|
||||
zabbixSettingsDTO.CacheTTL = "1h"
|
||||
}
|
||||
|
||||
//if zabbixSettingsDTO.Timeout == 0 {
|
||||
// zabbixSettingsDTO.Timeout = 30
|
||||
//}
|
||||
|
||||
trendsFrom, err := gtime.ParseInterval(zabbixSettingsDTO.TrendsFrom)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -52,22 +73,19 @@ func ReadZabbixSettings(dsInstanceSettings *backend.DataSourceInstanceSettings)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var timeout int64
|
||||
switch t := zabbixSettingsDTO.Timeout.(type) {
|
||||
case string:
|
||||
if t == "" {
|
||||
timeout = 30
|
||||
break
|
||||
}
|
||||
timeoutInt, err := strconv.Atoi(t)
|
||||
if err != nil {
|
||||
return nil, errors.New("failed to parse timeout: " + err.Error())
|
||||
}
|
||||
timeout = int64(timeoutInt)
|
||||
case float64:
|
||||
timeout = int64(t)
|
||||
default:
|
||||
timeout = 30
|
||||
timeout, err := parseTimeoutValue(zabbixSettingsDTO.Timeout, 30, "timeout")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
queryTimeout, err := parseTimeoutValue(zabbixSettingsDTO.QueryTimeout, 60, "queryTimeout")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Default to 60 seconds if queryTimeout is 0 or negative
|
||||
if queryTimeout <= 0 {
|
||||
queryTimeout = 60
|
||||
}
|
||||
|
||||
zabbixSettings := &ZabbixDatasourceSettings{
|
||||
@@ -77,6 +95,7 @@ func ReadZabbixSettings(dsInstanceSettings *backend.DataSourceInstanceSettings)
|
||||
TrendsRange: trendsRange,
|
||||
CacheTTL: cacheTTL,
|
||||
Timeout: time.Duration(timeout) * time.Second,
|
||||
QueryTimeout: time.Duration(queryTimeout) * time.Second,
|
||||
DisableDataAlignment: zabbixSettingsDTO.DisableDataAlignment,
|
||||
DisableReadOnlyUsersAck: zabbixSettingsDTO.DisableReadOnlyUsersAck,
|
||||
}
|
||||
|
||||
104
pkg/settings/settings_test.go
Normal file
104
pkg/settings/settings_test.go
Normal file
@@ -0,0 +1,104 @@
|
||||
package settings
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestParseTimeoutValue(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
value interface{}
|
||||
defaultValue int64
|
||||
fieldName string
|
||||
want int64
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "valid string",
|
||||
value: "45",
|
||||
defaultValue: 30,
|
||||
fieldName: "timeout",
|
||||
want: 45,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "empty string returns default",
|
||||
value: "",
|
||||
defaultValue: 30,
|
||||
fieldName: "timeout",
|
||||
want: 30,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "invalid string returns error",
|
||||
value: "not-a-number",
|
||||
defaultValue: 30,
|
||||
fieldName: "timeout",
|
||||
want: 0,
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "float64 value",
|
||||
value: float64(60),
|
||||
defaultValue: 30,
|
||||
fieldName: "timeout",
|
||||
want: 60,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "int64 value",
|
||||
value: int64(90),
|
||||
defaultValue: 30,
|
||||
fieldName: "timeout",
|
||||
want: 90,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "int value",
|
||||
value: int(120),
|
||||
defaultValue: 30,
|
||||
fieldName: "timeout",
|
||||
want: 120,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "nil returns default",
|
||||
value: nil,
|
||||
defaultValue: 30,
|
||||
fieldName: "timeout",
|
||||
want: 30,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "unknown type returns default",
|
||||
value: []string{"invalid"},
|
||||
defaultValue: 60,
|
||||
fieldName: "queryTimeout",
|
||||
want: 60,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "zero string value",
|
||||
value: "0",
|
||||
defaultValue: 30,
|
||||
fieldName: "timeout",
|
||||
want: 0,
|
||||
wantErr: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got, err := parseTimeoutValue(tt.value, tt.defaultValue, tt.fieldName)
|
||||
if tt.wantErr {
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), tt.fieldName)
|
||||
} else {
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, tt.want, got)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user