2025-04-01 10:38:02 +09:00

841 lines
28 KiB
Go

package state
import (
"context"
"encoding/json"
"errors"
"fmt"
"maps"
"math"
"net/url"
"strings"
"time"
alertingModels "github.com/grafana/alerting/models"
"github.com/grafana/grafana-plugin-sdk-go/data"
prometheusModel "github.com/prometheus/common/model"
"github.com/grafana/grafana/pkg/apimachinery/errutil"
"github.com/grafana/grafana/pkg/expr"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
"github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/services/screenshot"
)
type State struct {
OrgID int64
AlertRuleUID string
// CacheID is a unique, opaque identifier for the state, and is used to find the state
// in the state cache. It tends to be derived from the state's labels.
CacheID data.Fingerprint
// State represents the current state.
State eval.State
// StateReason is a textual description to explain why the state has its current state.
StateReason string
// ResultFingerprint is a hash of labels of the result before it is processed by
ResultFingerprint data.Fingerprint
// LatestResult contains the result of the most recent evaluation, if available.
LatestResult *Evaluation
// Error is set if the current evaluation returned an error. If error is non-nil results
// can still contain the results of previous evaluations.
Error error
// Image contains an optional image for the state. It tends to be included in notifications
// as a visualization to show why the alert fired.
Image *models.Image
// Annotations contains the annotations from the alert rule. If an annotation is templated
// then the template is first evaluated to derive the final annotation.
Annotations map[string]string
// Labels contain the labels from the query and any custom labels from the alert rule.
// If a label is templated then the template is first evaluated to derive the final label.
Labels data.Labels
// Values contains the values of any instant vectors, reduce and math expressions, or classic
// conditions.
Values map[string]float64
StartsAt time.Time
// EndsAt is different from the Prometheus EndsAt as EndsAt is updated for both Normal states
// and states that have been resolved. It cannot be used to determine when a state was resolved.
EndsAt time.Time
// ResolvedAt is set when the state is first resolved. That is to say, when the state first transitions
// from Alerting, NoData, or Error to Normal. It is reset to zero when the state transitions from Normal
// to any other state.
ResolvedAt *time.Time
LastSentAt *time.Time
LastEvaluationString string
LastEvaluationTime time.Time
EvaluationDuration time.Duration
}
func newState(ctx context.Context, log log.Logger, alertRule *models.AlertRule, result eval.Result, extraLabels data.Labels, externalURL *url.URL) *State {
lbs, annotations := expandAnnotationsAndLabels(ctx, log, alertRule, result, extraLabels, externalURL)
cacheID := lbs.Fingerprint()
// For new states, we set StartsAt & EndsAt to EvaluatedAt as this is the
// expected value for a Normal state during state transition.
return &State{
OrgID: alertRule.OrgID,
AlertRuleUID: alertRule.UID,
CacheID: cacheID,
State: eval.Normal,
StateReason: "",
ResultFingerprint: result.Instance.Fingerprint(), // remember original result fingerprint
LatestResult: nil,
Error: nil,
Image: nil,
Annotations: annotations,
Labels: lbs,
Values: nil,
StartsAt: result.EvaluatedAt,
EndsAt: result.EvaluatedAt,
ResolvedAt: nil,
LastSentAt: nil,
LastEvaluationString: "",
LastEvaluationTime: result.EvaluatedAt,
EvaluationDuration: result.EvaluationDuration,
}
}
// Copy creates a shallow copy of the State except for labels and annotations.
func (a *State) Copy() *State {
// Deep copy annotations and labels
annotationsCopy := make(map[string]string, len(a.Annotations))
maps.Copy(annotationsCopy, a.Annotations)
labelsCopy := make(data.Labels, len(a.Labels))
maps.Copy(labelsCopy, a.Labels)
return &State{
OrgID: a.OrgID,
AlertRuleUID: a.AlertRuleUID,
CacheID: a.CacheID,
State: a.State,
StateReason: a.StateReason,
ResultFingerprint: a.ResultFingerprint,
LatestResult: a.LatestResult,
Error: a.Error,
Image: a.Image,
Annotations: annotationsCopy,
Labels: labelsCopy,
Values: a.Values,
StartsAt: a.StartsAt,
EndsAt: a.EndsAt,
ResolvedAt: a.ResolvedAt,
LastSentAt: a.LastSentAt,
LastEvaluationString: a.LastEvaluationString,
LastEvaluationTime: a.LastEvaluationTime,
EvaluationDuration: a.EvaluationDuration,
}
}
func (a *State) GetRuleKey() models.AlertRuleKey {
return models.AlertRuleKey{
OrgID: a.OrgID,
UID: a.AlertRuleUID,
}
}
func (a *State) GetAlertInstanceKey() (models.AlertInstanceKey, error) {
instanceLabels := models.InstanceLabels(a.Labels)
_, labelsHash, err := instanceLabels.StringAndHash()
if err != nil {
return models.AlertInstanceKey{}, err
}
return models.AlertInstanceKey{RuleOrgID: a.OrgID, RuleUID: a.AlertRuleUID, LabelsHash: labelsHash}, nil
}
// SetAlerting sets the state to Alerting. It changes both the start and end time.
func (a *State) SetAlerting(reason string, startsAt, endsAt time.Time) {
a.State = eval.Alerting
a.StateReason = reason
a.StartsAt = startsAt
a.EndsAt = endsAt
a.Error = nil
}
// SetPending the state to Pending. It changes both the start and end time.
func (a *State) SetPending(reason string, startsAt, endsAt time.Time) {
a.State = eval.Pending
a.StateReason = reason
a.StartsAt = startsAt
a.EndsAt = endsAt
a.Error = nil
}
// SetNoData sets the state to NoData. It changes both the start and end time.
func (a *State) SetNoData(reason string, startsAt, endsAt time.Time) {
a.State = eval.NoData
a.StateReason = reason
a.StartsAt = startsAt
a.EndsAt = endsAt
a.Error = nil
}
// SetError sets the state to Error. It changes both the start and end time.
func (a *State) SetError(err error, startsAt, endsAt time.Time) {
a.State = eval.Error
a.StateReason = models.StateReasonError
a.StartsAt = startsAt
a.EndsAt = endsAt
a.Error = err
}
// SetNormal sets the state to Normal. It changes both the start and end time.
func (a *State) SetNormal(reason string, startsAt, endsAt time.Time) {
a.State = eval.Normal
a.StateReason = reason
a.StartsAt = startsAt
a.EndsAt = endsAt
a.Error = nil
}
// Maintain updates the end time using the most recent evaluation.
func (a *State) Maintain(interval int64, evaluatedAt time.Time) {
a.EndsAt = nextEndsTime(interval, evaluatedAt)
}
// AddErrorInformation adds annotations to the state to indicate that an error occurred.
// If addDatasourceInfoToLabels is true, the ref_id and datasource_uid are added to the labels,
// otherwise, they are added to the annotations.
func (a *State) AddErrorInformation(err error, rule *models.AlertRule, addDatasourceInfoToLabels bool) {
if err == nil {
return
}
a.Annotations["Error"] = err.Error()
// If the evaluation failed because a query returned an error then add the Ref ID and
// Datasource UID as labels or annotations
var utilError errutil.Error
if errors.As(err, &utilError) &&
(errors.Is(err, expr.QueryError) || errors.Is(err, expr.ConversionError)) {
for _, next := range rule.Data {
if next.RefID == utilError.PublicPayload["refId"].(string) {
if addDatasourceInfoToLabels {
a.Labels["ref_id"] = next.RefID
a.Labels["datasource_uid"] = next.DatasourceUID
} else {
a.Annotations["ref_id"] = next.RefID
a.Annotations["datasource_uid"] = next.DatasourceUID
}
break
}
}
} else {
// Remove the ref_id and datasource_uid from the annotations if they are present.
// It can happen if the alert state hasn't changed, but the error is different now.
delete(a.Annotations, "ref_id")
delete(a.Annotations, "datasource_uid")
}
}
func (a *State) SetNextValues(result eval.Result) {
const sentinel = float64(-1)
// We try to provide a reasonable object for Values in the event of nodata/error.
// In order to not break templates that might refer to refIDs,
// we instead fill values with the latest known set of refIDs, but with a sentinel -1 to indicate that the value didn't exist.
if result.State == eval.NoData || result.State == eval.Error {
placeholder := make(map[string]float64, len(a.Values))
for refID := range a.Values {
placeholder[refID] = sentinel
}
a.Values = placeholder
return
}
newValues := make(map[string]float64, len(result.Values))
for k, v := range result.Values {
if v.Value != nil {
newValues[k] = *v.Value
} else {
newValues[k] = math.NaN()
}
}
a.Values = newValues
}
// StateTransition describes the transition from one state to another.
type StateTransition struct {
*State
PreviousState eval.State
PreviousStateReason string
}
func (c StateTransition) Formatted() string {
return FormatStateAndReason(c.State.State, c.State.StateReason)
}
func (c StateTransition) PreviousFormatted() string {
return FormatStateAndReason(c.PreviousState, c.PreviousStateReason)
}
func (c StateTransition) Changed() bool {
return c.PreviousState != c.State.State || c.PreviousStateReason != c.State.StateReason
}
type StateTransitions []StateTransition
// StaleStates returns the subset of StateTransitions that are stale.
func (c StateTransitions) StaleStates() StateTransitions {
var result StateTransitions
for _, t := range c {
if t.IsStale() {
result = append(result, t)
}
}
return result
}
type Evaluation struct {
EvaluationTime time.Time
EvaluationState eval.State
// Values contains the RefID and value of reduce and math expressions.
// Classic conditions can have different values for the same RefID as they can include multiple conditions.
// For these, we use the index of the condition in addition RefID as the key e.g. "A0, A1, A2, etc.".
Values map[string]float64
// Condition is the refID specified as the condition in the alerting rule at the time of the evaluation.
Condition string
}
// NewEvaluationValues returns the labels and values for each RefID in the capture.
func NewEvaluationValues(m map[string]eval.NumberValueCapture) map[string]float64 {
result := make(map[string]float64, len(m))
for k, v := range m {
if v.Value != nil {
result[k] = *v.Value
} else {
result[k] = math.NaN()
}
}
return result
}
func resultNormal(state *State, _ *models.AlertRule, result eval.Result, logger log.Logger, reason string) {
if state.State == eval.Normal {
logger.Debug("Keeping state", "state", state.State)
} else {
nextEndsAt := result.EvaluatedAt
logger.Debug("Changing state",
"previous_state",
state.State,
"next_state",
eval.Normal,
"previous_ends_at",
state.EndsAt,
"next_ends_at",
nextEndsAt)
// Normal states have the same start and end timestamps
state.SetNormal(reason, nextEndsAt, nextEndsAt)
}
}
func resultAlerting(state *State, rule *models.AlertRule, result eval.Result, logger log.Logger, reason string) {
switch state.State {
case eval.Alerting:
prevEndsAt := state.EndsAt
state.Maintain(rule.IntervalSeconds, result.EvaluatedAt)
logger.Debug("Keeping state",
"state",
state.State,
"previous_ends_at",
prevEndsAt,
"next_ends_at",
state.EndsAt)
case eval.Pending:
// If the previous state is Pending then check if the For duration has been observed
if result.EvaluatedAt.Sub(state.StartsAt) >= rule.For {
nextEndsAt := nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt)
logger.Debug("Changing state",
"previous_state",
state.State,
"next_state",
eval.Alerting,
"previous_ends_at",
state.EndsAt,
"next_ends_at",
nextEndsAt)
state.SetAlerting(reason, result.EvaluatedAt, nextEndsAt)
}
default:
nextEndsAt := nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt)
if rule.For > 0 {
// If the alert rule has a For duration that should be observed then the state should be set to Pending
logger.Debug("Changing state",
"previous_state",
state.State,
"next_state",
eval.Pending,
"previous_ends_at",
state.EndsAt,
"next_ends_at",
nextEndsAt)
state.SetPending(reason, result.EvaluatedAt, nextEndsAt)
} else {
logger.Debug("Changing state",
"previous_state",
state.State,
"next_state",
eval.Alerting,
"previous_ends_at",
state.EndsAt,
"next_ends_at",
nextEndsAt)
state.SetAlerting(reason, result.EvaluatedAt, nextEndsAt)
}
}
}
func resultError(state *State, rule *models.AlertRule, result eval.Result, logger log.Logger) {
handlerStr := "resultError"
switch rule.ExecErrState {
case models.AlertingErrState:
logger.Debug("Execution error state is Alerting", "handler", "resultAlerting", "previous_handler", handlerStr)
resultAlerting(state, rule, result, logger, models.StateReasonError)
// This is a special case where Alerting and Pending should also have an error and reason
state.Error = result.Error
state.AddErrorInformation(result.Error, rule, false)
case models.ErrorErrState:
if state.State == eval.Error {
prevEndsAt := state.EndsAt
state.Error = result.Error
state.AddErrorInformation(result.Error, rule, true)
state.Maintain(rule.IntervalSeconds, result.EvaluatedAt)
logger.Debug("Keeping state",
"state",
state.State,
"previous_ends_at",
prevEndsAt,
"next_ends_at",
state.EndsAt)
} else {
nextEndsAt := nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt)
// This is the first occurrence of an error
logger.Debug("Changing state",
"previous_state",
state.State,
"next_state",
eval.Error,
"previous_ends_at",
state.EndsAt,
"next_ends_at",
nextEndsAt)
state.SetError(result.Error, result.EvaluatedAt, nextEndsAt)
state.AddErrorInformation(result.Error, rule, true)
}
case models.OkErrState:
logger.Debug("Execution error state is Normal", "handler", "resultNormal", "previous_handler", handlerStr)
resultNormal(state, rule, result, logger, "") // TODO: Should we add a reason?
state.AddErrorInformation(result.Error, rule, false)
case models.KeepLastErrState:
logger := logger.New("previous_handler", handlerStr)
resultKeepLast(state, rule, result, logger)
state.AddErrorInformation(result.Error, rule, false)
default:
err := fmt.Errorf("unsupported execution error state: %s", rule.ExecErrState)
state.SetError(err, state.StartsAt, nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt))
state.AddErrorInformation(result.Error, rule, false)
}
}
func resultNoData(state *State, rule *models.AlertRule, result eval.Result, logger log.Logger) {
handlerStr := "resultNoData"
switch rule.NoDataState {
case models.Alerting:
logger.Debug("Execution no data state is Alerting", "handler", "resultAlerting", "previous_handler", handlerStr)
resultAlerting(state, rule, result, logger, models.StateReasonNoData)
case models.NoData:
if state.State == eval.NoData {
prevEndsAt := state.EndsAt
state.Maintain(rule.IntervalSeconds, result.EvaluatedAt)
logger.Debug("Keeping state",
"state",
state.State,
"previous_ends_at",
prevEndsAt,
"next_ends_at",
state.EndsAt)
} else {
// This is the first occurrence of no data
nextEndsAt := nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt)
logger.Debug("Changing state",
"previous_state",
state.State,
"next_state",
eval.NoData,
"previous_ends_at",
state.EndsAt,
"next_ends_at",
nextEndsAt)
state.SetNoData("", result.EvaluatedAt, nextEndsAt)
}
case models.OK:
logger.Debug("Execution no data state is Normal", "handler", "resultNormal", "previous_handler", handlerStr)
resultNormal(state, rule, result, logger, models.StateReasonNoData)
case models.KeepLast:
logger := logger.New("previous_handler", handlerStr)
resultKeepLast(state, rule, result, logger)
default:
err := fmt.Errorf("unsupported no data state: %s", rule.NoDataState)
state.SetError(err, state.StartsAt, nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt))
state.Annotations["Error"] = err.Error()
}
}
func resultKeepLast(state *State, rule *models.AlertRule, result eval.Result, logger log.Logger) {
reason := models.ConcatReasons(result.State.String(), models.StateReasonKeepLast)
switch state.State {
case eval.Alerting:
logger.Debug("Execution keep last state is Alerting", "handler", "resultAlerting")
resultAlerting(state, rule, result, logger, reason)
case eval.Pending:
// respect 'for' setting on rule
if result.EvaluatedAt.Sub(state.StartsAt) >= rule.For {
logger.Debug("Execution keep last state is Pending", "handler", "resultAlerting")
resultAlerting(state, rule, result, logger, reason)
} else {
logger.Debug("Ignoring set next state to pending")
}
case eval.Normal:
logger.Debug("Execution keep last state is Normal", "handler", "resultNormal")
resultNormal(state, rule, result, logger, reason)
default:
// this should not happen, add as failsafe
logger.Debug("Reverting invalid state to normal", "handler", "resultNormal")
resultNormal(state, rule, result, logger, reason)
}
}
// NeedsSending returns true if the given state needs to be sent to the Alertmanager.
// Reasons for sending include:
// - The state has been resolved since the last notification.
// - The state is firing and the last notification was sent at least resendDelay ago.
// - The state was resolved within the resolvedRetention period, and the last notification was sent at least resendDelay ago.
func (a *State) NeedsSending(resendDelay time.Duration, resolvedRetention time.Duration) bool {
if a.State == eval.Pending {
// We do not send notifications for pending states.
return false
}
// We should send a notification if the state has been resolved since the last notification.
if a.ResolvedAt != nil && (a.LastSentAt == nil || a.ResolvedAt.After(*a.LastSentAt)) {
return true
}
// For normal states, we should only be sending if this is a resolved notification or a re-send of the resolved
// notification within the resolvedRetention period.
if a.State == eval.Normal && (a.ResolvedAt == nil || a.LastEvaluationTime.Sub(*a.ResolvedAt) > resolvedRetention) {
return false
}
// We should send, and re-send notifications, each time LastSentAt is <= LastEvaluationTime + resendDelay.
// This can include normal->normal transitions that were resolved in recent past evaluations.
return a.LastSentAt == nil || !a.LastSentAt.Add(resendDelay).After(a.LastEvaluationTime)
}
func (a *State) Equals(b *State) bool {
return a.AlertRuleUID == b.AlertRuleUID &&
a.OrgID == b.OrgID &&
a.CacheID == b.CacheID &&
a.Labels.String() == b.Labels.String() &&
a.State.String() == b.State.String() &&
a.StartsAt == b.StartsAt &&
a.EndsAt == b.EndsAt &&
a.LastEvaluationTime == b.LastEvaluationTime &&
data.Labels(a.Annotations).String() == data.Labels(b.Annotations).String()
}
func nextEndsTime(interval int64, evaluatedAt time.Time) time.Time {
ends := ResendDelay
intv := time.Second * time.Duration(interval)
if intv > ResendDelay {
ends = intv
}
// Allow for at least two evaluation cycles to pass before expiring, every time.
// Synchronized with Prometheus:
// https://github.com/prometheus/prometheus/blob/6a9b3263ffdba5ea8c23e6f9ef69fb7a15b566f8/rules/alerting.go#L493
return evaluatedAt.Add(4 * ends)
}
func (a *State) GetLabels(opts ...models.LabelOption) map[string]string {
labels := a.Labels.Copy()
for _, opt := range opts {
opt(labels)
}
return labels
}
func (a *State) GetLastEvaluationValuesForCondition() map[string]float64 {
if a.LatestResult == nil {
return nil
}
lastResult := *a.LatestResult
r := make(map[string]float64, len(lastResult.Values))
for refID, value := range lastResult.Values {
if strings.Contains(refID, lastResult.Condition) {
r[refID] = value
}
}
return r
}
// IsStale returns true if the state is stale, meaning that the state is ready to be evicted from the cache.
func (a *State) IsStale() bool {
return a.StateReason == models.StateReasonMissingSeries
}
// shouldTakeImage determines whether a new image should be taken for a given transition. This should return true when
// newly transitioning to an alerting state, when no valid image exists, or when the alert has been resolved.
func shouldTakeImage(state, previousState eval.State, previousImage *models.Image, resolved bool) string {
if resolved {
return "resolved"
}
if state == eval.Alerting {
if previousState != eval.Alerting {
return "transition to alerting"
}
if previousImage == nil {
return "no image"
}
if previousImage.HasExpired() {
return "expired image"
}
}
return ""
}
// takeImage takes an image for the alert rule. It returns nil if screenshots are disabled or
// the rule is not associated with a dashboard panel.
func takeImage(ctx context.Context, s ImageCapturer, r *models.AlertRule) (*models.Image, error) {
img, err := s.NewImage(ctx, r)
if err != nil {
if errors.Is(err, screenshot.ErrScreenshotsUnavailable) ||
errors.Is(err, models.ErrNoDashboard) ||
errors.Is(err, models.ErrNoPanel) {
return nil, nil
}
return nil, err
}
return img, nil
}
func FormatStateAndReason(state eval.State, reason string) string {
s := fmt.Sprintf("%v", state)
if len(reason) > 0 {
s += fmt.Sprintf(" (%v)", reason)
}
return s
}
// ParseFormattedState parses a state string in the format "state (reason)"
// and returns the state and reason separately.
func ParseFormattedState(stateStr string) (eval.State, string, error) {
p := 0
// walk string until we find a space
for i, c := range stateStr {
if c == ' ' {
p = i
break
}
}
if p == 0 {
p = len(stateStr)
}
state, err := eval.ParseStateString(stateStr[:p])
if err != nil {
return -1, "", err
}
if p == len(stateStr) {
return state, "", nil
}
reason := strings.Trim(stateStr[p+1:], "()")
return state, reason, nil
}
// GetRuleExtraLabels returns a map of built-in labels that should be added to an alert before it is sent to the Alertmanager or its state is cached.
func GetRuleExtraLabels(l log.Logger, rule *models.AlertRule, folderTitle string, includeFolder bool) map[string]string {
extraLabels := make(map[string]string, 4)
extraLabels[alertingModels.NamespaceUIDLabel] = rule.NamespaceUID
extraLabels[prometheusModel.AlertNameLabel] = rule.Title
extraLabels[alertingModels.RuleUIDLabel] = rule.UID
if includeFolder {
extraLabels[models.FolderTitleLabel] = folderTitle
}
if len(rule.NotificationSettings) > 0 {
// Notification settings are defined as a slice to workaround xorm behavior.
// Any items past the first should not exist so we ignore them.
if len(rule.NotificationSettings) > 1 {
ignored, _ := json.Marshal(rule.NotificationSettings[1:])
l.Error("Detected multiple notification settings, which is not supported. Only the first will be applied", "ignored_settings", string(ignored))
}
return mergeLabels(extraLabels, rule.NotificationSettings[0].ToLabels())
}
return extraLabels
}
func patch(newState, existingState *State, result eval.Result) {
// if there is existing state, copy over the current values that may be needed to determine the final state.
// TODO remove some unnecessary assignments below because they are overridden in setNextState
newState.State = existingState.State
newState.StateReason = existingState.StateReason
newState.Image = existingState.Image
newState.LatestResult = existingState.LatestResult
newState.Error = existingState.Error
newState.Values = existingState.Values
newState.LastEvaluationString = existingState.LastEvaluationString
newState.StartsAt = existingState.StartsAt
newState.EndsAt = existingState.EndsAt
newState.ResolvedAt = existingState.ResolvedAt
newState.LastSentAt = existingState.LastSentAt
// Annotations can change over time, however we also want to maintain
// certain annotations across evaluations
for key := range models.InternalAnnotationNameSet { // Changing in
value, ok := existingState.Annotations[key]
if !ok {
continue
}
// If the annotation is not present then it should be copied from
// the current state to the new state
if _, ok = newState.Annotations[key]; !ok {
newState.Annotations[key] = value
}
}
// if the current state is "data source error" then it may have additional labels that may not exist in the new state.
// See https://github.com/grafana/grafana/blob/c7fdf8ce706c2c9d438f5e6eabd6e580bac4946b/pkg/services/ngalert/state/state.go#L161-L163
// copy known labels over to the new instance, it can help reduce flapping
// TODO fix this?
if existingState.State == eval.Error && result.State == eval.Error {
setIfExist := func(lbl string) {
if v, ok := existingState.Labels[lbl]; ok {
newState.Labels[lbl] = v
}
}
setIfExist("datasource_uid")
setIfExist("ref_id")
}
}
func (a *State) transition(alertRule *models.AlertRule, result eval.Result, extraAnnotations data.Labels, logger log.Logger, takeImageFn takeImageFn) StateTransition {
a.LastEvaluationTime = result.EvaluatedAt
a.EvaluationDuration = result.EvaluationDuration
a.SetNextValues(result)
a.LatestResult = &Evaluation{
EvaluationTime: result.EvaluatedAt,
EvaluationState: result.State,
Values: a.Values,
Condition: alertRule.Condition,
}
a.LastEvaluationString = result.EvaluationString
oldState := a.State
oldReason := a.StateReason
// Add the instance to the log context to help correlate log lines for a state
logger = logger.New("instance", result.Instance)
// if the current state is Error but the result is different, then we need o clean up the extra labels
// that were added after the state key was calculated
// https://github.com/grafana/grafana/blob/1df4d332c982dc5e394201bb2ef35b442727ce63/pkg/services/ngalert/state/state.go#L298-L311
// Usually, it happens in the case of classic conditions when the evalResult does not have labels.
//
// This is temporary change to make sure that the labels are not persistent in the state after it was in Error state
// TODO yuri. Remove it when correct Error result with labels is provided
if a.State == eval.Error && result.State != eval.Error {
// This is possible because state was updated after the CacheID was calculated.
_, curOk := a.Labels["ref_id"]
_, resOk := result.Instance["ref_id"]
if curOk && !resOk {
delete(a.Labels, "ref_id")
}
_, curOk = a.Labels["datasource_uid"]
_, resOk = result.Instance["datasource_uid"]
if curOk && !resOk {
delete(a.Labels, "datasource_uid")
}
}
switch result.State {
case eval.Normal:
logger.Debug("Setting next state", "handler", "resultNormal")
resultNormal(a, alertRule, result, logger, "")
case eval.Alerting:
logger.Debug("Setting next state", "handler", "resultAlerting")
resultAlerting(a, alertRule, result, logger, "")
case eval.Error:
logger.Debug("Setting next state", "handler", "resultError")
resultError(a, alertRule, result, logger)
case eval.NoData:
logger.Debug("Setting next state", "handler", "resultNoData")
resultNoData(a, alertRule, result, logger)
case eval.Pending: // we do not emit results with this state
logger.Debug("Ignoring set next state as result is pending")
}
// Set reason iff: result and state are different, reason is not Alerting or Normal
a.StateReason = ""
if a.State != result.State &&
result.State != eval.Normal &&
result.State != eval.Alerting {
a.StateReason = resultStateReason(result, alertRule)
}
// Set Resolved property so the scheduler knows to send a postable alert
// to Alertmanager.
newlyResolved := false
if oldState == eval.Alerting && a.State == eval.Normal {
a.ResolvedAt = &result.EvaluatedAt
newlyResolved = true
} else if a.State != eval.Normal && a.State != eval.Pending { // Retain the last resolved time for Normal->Normal and Normal->Pending.
a.ResolvedAt = nil
}
if reason := shouldTakeImage(a.State, oldState, a.Image, newlyResolved); reason != "" {
image := takeImageFn(reason)
if image != nil {
a.Image = image
}
}
for key, val := range extraAnnotations {
a.Annotations[key] = val
}
nextState := StateTransition{
State: a,
PreviousState: oldState,
PreviousStateReason: oldReason,
}
return nextState
}
func resultStateReason(result eval.Result, rule *models.AlertRule) string {
if result.State == eval.Error && rule.ExecErrState == models.KeepLastErrState ||
result.State == eval.NoData && rule.NoDataState == models.KeepLast {
return models.ConcatReasons(result.State.String(), models.StateReasonKeepLast)
}
return result.State.String()
}