Introduce query timeout configuration (#2157)

## Summary

Implements configurable query execution timeout controls to prevent
poorly optimized or excessive queries from consuming excessive server
resources, causing performance degradation, or crashing the Zabbix
server.

Fixes: https://github.com/grafana/oss-big-tent-squad/issues/127

## Problem

Previously, the plugin only had an HTTP connection timeout (`timeout`)
that controlled individual API request timeouts. However, a complete
query execution could involve multiple API calls and run indefinitely if
not properly controlled, potentially causing resource exhaustion.

## Solution

Added a new `queryTimeout` setting that enforces a maximum execution
time for entire database queries initiated by the plugin. Queries
exceeding this limit are automatically terminated with proper error
handling and logging.

## Testing

1. Configure a datasource with `queryTimeout` set to a low value (e.g.,
5 seconds)
2. Execute a query that would normally take longer than the timeout
3. Verify that:
   - Query is terminated after the timeout period
   - Error message indicates timeout occurred
   - Logs contain timeout warning with query details
   - Other queries in the same request continue to execute

## Notes

- `queryTimeout` is separate from `timeout` (HTTP connection timeout)
- `queryTimeout` applies to the entire query execution, which may
involve multiple API calls
- Default value of 60 seconds ensures reasonable protection while
allowing normal queries to complete
- Timeout errors are logged with query refId, queryType, timeout
duration, and datasourceId for troubleshooting
This commit is contained in:
ismail simsek
2026-01-12 15:30:31 +01:00
committed by GitHub
parent 7eb80d3f23
commit a2f8b6433a
7 changed files with 366 additions and 50 deletions

View File

@@ -4,16 +4,20 @@ import (
"context"
"errors"
"fmt"
"net/http"
"time"
"github.com/grafana/grafana-plugin-sdk-go/backend"
"github.com/grafana/grafana-plugin-sdk-go/backend/datasource"
"github.com/grafana/grafana-plugin-sdk-go/backend/instancemgmt"
"github.com/grafana/grafana-plugin-sdk-go/backend/log"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/alexanderzobnin/grafana-zabbix/pkg/httpclient"
"github.com/alexanderzobnin/grafana-zabbix/pkg/metrics"
"github.com/alexanderzobnin/grafana-zabbix/pkg/settings"
"github.com/alexanderzobnin/grafana-zabbix/pkg/zabbix"
"github.com/alexanderzobnin/grafana-zabbix/pkg/zabbixapi"
"github.com/grafana/grafana-plugin-sdk-go/backend"
"github.com/grafana/grafana-plugin-sdk-go/backend/datasource"
"github.com/grafana/grafana-plugin-sdk-go/backend/instancemgmt"
"github.com/grafana/grafana-plugin-sdk-go/backend/log"
)
var (
@@ -113,6 +117,11 @@ func (ds *ZabbixDatasource) QueryData(ctx context.Context, req *backend.QueryDat
return nil, err
}
queryTimeout := zabbixDS.Settings.QueryTimeout
if queryTimeout <= 0 {
queryTimeout = 60 * time.Second // Default to 60 seconds if not configured
}
for _, q := range req.Queries {
res := backend.DataResponse{}
query, err := ReadQuery(q)
@@ -122,22 +131,52 @@ func (ds *ZabbixDatasource) QueryData(ctx context.Context, req *backend.QueryDat
} else if err := ValidateTimeRange(query.TimeRange); err != nil {
// Validate time range before processing any query
res = backend.ErrorResponseWithErrorSource(err)
} else if query.QueryType == MODE_METRICS {
frames, err := zabbixDS.queryNumericItems(ctx, &query)
if err != nil {
res = backend.ErrorResponseWithErrorSource(err)
} else {
res.Frames = append(res.Frames, frames...)
}
} else if query.QueryType == MODE_ITEMID {
frames, err := zabbixDS.queryItemIdData(ctx, &query)
if err != nil {
res = backend.ErrorResponseWithErrorSource(err)
} else {
res.Frames = append(res.Frames, frames...)
}
} else {
res = backend.ErrorResponseWithErrorSource(backend.DownstreamError(ErrNonMetricQueryNotSupported))
// Create a context with timeout for this specific query
queryCtx, cancel := context.WithTimeout(ctx, queryTimeout)
// Execute query with timeout context in an anonymous function to ensure cancel is called after each iteration
func() {
defer cancel()
var frames []*data.Frame
var queryErr error
switch query.QueryType {
case MODE_METRICS:
frames, queryErr = zabbixDS.queryNumericItems(queryCtx, &query)
case MODE_ITEMID:
frames, queryErr = zabbixDS.queryItemIdData(queryCtx, &query)
default:
queryErr = backend.DownstreamError(ErrNonMetricQueryNotSupported)
}
// Check if query timed out
if queryErr != nil {
if errors.Is(queryCtx.Err(), context.DeadlineExceeded) {
// Query exceeded the configured timeout
timeoutMsg := fmt.Sprintf(
"Query execution exceeded maximum allowed time (%v). Query was automatically terminated to prevent excessive resource consumption.",
queryTimeout,
)
ds.logger.Warn(
"Query timeout exceeded",
"refId", q.RefID,
"queryType", query.QueryType,
"timeout", queryTimeout,
"datasourceId", req.PluginContext.DataSourceInstanceSettings.ID,
)
res = backend.ErrorResponseWithErrorSource(
backend.DownstreamError(fmt.Errorf("query timeout: %s", timeoutMsg)),
)
res.Status = http.StatusRequestTimeout
} else {
res = backend.ErrorResponseWithErrorSource(queryErr)
}
} else {
res.Frames = append(res.Frames, frames...)
}
}()
}
qdr.Responses[q.RefID] = res
}