Introduce query timeout configuration (#2157)
## Summary Implements configurable query execution timeout controls to prevent poorly optimized or excessive queries from consuming excessive server resources, causing performance degradation, or crashing the Zabbix server. Fixes: https://github.com/grafana/oss-big-tent-squad/issues/127 ## Problem Previously, the plugin only had an HTTP connection timeout (`timeout`) that controlled individual API request timeouts. However, a complete query execution could involve multiple API calls and run indefinitely if not properly controlled, potentially causing resource exhaustion. ## Solution Added a new `queryTimeout` setting that enforces a maximum execution time for entire database queries initiated by the plugin. Queries exceeding this limit are automatically terminated with proper error handling and logging. ## Testing 1. Configure a datasource with `queryTimeout` set to a low value (e.g., 5 seconds) 2. Execute a query that would normally take longer than the timeout 3. Verify that: - Query is terminated after the timeout period - Error message indicates timeout occurred - Logs contain timeout warning with query details - Other queries in the same request continue to execute ## Notes - `queryTimeout` is separate from `timeout` (HTTP connection timeout) - `queryTimeout` applies to the entire query execution, which may involve multiple API calls - Default value of 60 seconds ensures reasonable protection while allowing normal queries to complete - Timeout errors are logged with query refId, queryType, timeout duration, and datasourceId for troubleshooting
This commit is contained in:
@@ -4,16 +4,20 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend/datasource"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend/instancemgmt"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend/log"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/data"
|
||||
|
||||
"github.com/alexanderzobnin/grafana-zabbix/pkg/httpclient"
|
||||
"github.com/alexanderzobnin/grafana-zabbix/pkg/metrics"
|
||||
"github.com/alexanderzobnin/grafana-zabbix/pkg/settings"
|
||||
"github.com/alexanderzobnin/grafana-zabbix/pkg/zabbix"
|
||||
"github.com/alexanderzobnin/grafana-zabbix/pkg/zabbixapi"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend/datasource"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend/instancemgmt"
|
||||
"github.com/grafana/grafana-plugin-sdk-go/backend/log"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -113,6 +117,11 @@ func (ds *ZabbixDatasource) QueryData(ctx context.Context, req *backend.QueryDat
|
||||
return nil, err
|
||||
}
|
||||
|
||||
queryTimeout := zabbixDS.Settings.QueryTimeout
|
||||
if queryTimeout <= 0 {
|
||||
queryTimeout = 60 * time.Second // Default to 60 seconds if not configured
|
||||
}
|
||||
|
||||
for _, q := range req.Queries {
|
||||
res := backend.DataResponse{}
|
||||
query, err := ReadQuery(q)
|
||||
@@ -122,22 +131,52 @@ func (ds *ZabbixDatasource) QueryData(ctx context.Context, req *backend.QueryDat
|
||||
} else if err := ValidateTimeRange(query.TimeRange); err != nil {
|
||||
// Validate time range before processing any query
|
||||
res = backend.ErrorResponseWithErrorSource(err)
|
||||
} else if query.QueryType == MODE_METRICS {
|
||||
frames, err := zabbixDS.queryNumericItems(ctx, &query)
|
||||
if err != nil {
|
||||
res = backend.ErrorResponseWithErrorSource(err)
|
||||
} else {
|
||||
res.Frames = append(res.Frames, frames...)
|
||||
}
|
||||
} else if query.QueryType == MODE_ITEMID {
|
||||
frames, err := zabbixDS.queryItemIdData(ctx, &query)
|
||||
if err != nil {
|
||||
res = backend.ErrorResponseWithErrorSource(err)
|
||||
} else {
|
||||
res.Frames = append(res.Frames, frames...)
|
||||
}
|
||||
} else {
|
||||
res = backend.ErrorResponseWithErrorSource(backend.DownstreamError(ErrNonMetricQueryNotSupported))
|
||||
// Create a context with timeout for this specific query
|
||||
queryCtx, cancel := context.WithTimeout(ctx, queryTimeout)
|
||||
|
||||
// Execute query with timeout context in an anonymous function to ensure cancel is called after each iteration
|
||||
func() {
|
||||
defer cancel()
|
||||
|
||||
var frames []*data.Frame
|
||||
var queryErr error
|
||||
|
||||
switch query.QueryType {
|
||||
case MODE_METRICS:
|
||||
frames, queryErr = zabbixDS.queryNumericItems(queryCtx, &query)
|
||||
case MODE_ITEMID:
|
||||
frames, queryErr = zabbixDS.queryItemIdData(queryCtx, &query)
|
||||
default:
|
||||
queryErr = backend.DownstreamError(ErrNonMetricQueryNotSupported)
|
||||
}
|
||||
|
||||
// Check if query timed out
|
||||
if queryErr != nil {
|
||||
if errors.Is(queryCtx.Err(), context.DeadlineExceeded) {
|
||||
// Query exceeded the configured timeout
|
||||
timeoutMsg := fmt.Sprintf(
|
||||
"Query execution exceeded maximum allowed time (%v). Query was automatically terminated to prevent excessive resource consumption.",
|
||||
queryTimeout,
|
||||
)
|
||||
ds.logger.Warn(
|
||||
"Query timeout exceeded",
|
||||
"refId", q.RefID,
|
||||
"queryType", query.QueryType,
|
||||
"timeout", queryTimeout,
|
||||
"datasourceId", req.PluginContext.DataSourceInstanceSettings.ID,
|
||||
)
|
||||
res = backend.ErrorResponseWithErrorSource(
|
||||
backend.DownstreamError(fmt.Errorf("query timeout: %s", timeoutMsg)),
|
||||
)
|
||||
res.Status = http.StatusRequestTimeout
|
||||
} else {
|
||||
res = backend.ErrorResponseWithErrorSource(queryErr)
|
||||
}
|
||||
} else {
|
||||
res.Frames = append(res.Frames, frames...)
|
||||
}
|
||||
}()
|
||||
}
|
||||
qdr.Responses[q.RefID] = res
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user