From 7e39e015b0d7d63848e18182d3e1d6982259be23 Mon Sep 17 00:00:00 2001 From: Justyna Betkier Date: Mon, 30 Dec 2024 14:13:36 +0100 Subject: [PATCH] Improve logging when the cluster reaches max nodes total. - add autoscaling status to reflect that - change the log severity to warning as this means that autoscaler will not be fully functional (in praticular scaling up will not work) --- cluster-autoscaler/core/static_autoscaler.go | 4 ++-- .../processors/status/scale_up_status_processor.go | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index 25fe35f5c749..07f20da58b7d 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -533,8 +533,8 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr scaleUpStatus.Result = status.ScaleUpNotNeeded klog.V(1).Info("No unschedulable pods") } else if a.MaxNodesTotal > 0 && len(readyNodes) >= a.MaxNodesTotal { - scaleUpStatus.Result = status.ScaleUpNoOptionsAvailable - klog.V(1).Infof("Max total nodes in cluster reached: %v. Current number of ready nodes: %v", a.MaxNodesTotal, len(readyNodes)) + scaleUpStatus.Result = status.ScaleUpLimitedByMaxNodesTotal + klog.Warningf("Max total nodes in cluster reached: %v. Current number of ready nodes: %v", a.MaxNodesTotal, len(readyNodes)) } else if !isSchedulerProcessingIgnored && allPodsAreNew(unschedulablePodsToHelp, currentTime) { // The assumption here is that these pods have been created very recently and probably there // is more pods to come. In theory we could check the newest pod time but then if pod were created diff --git a/cluster-autoscaler/processors/status/scale_up_status_processor.go b/cluster-autoscaler/processors/status/scale_up_status_processor.go index 2bd48ba1ce45..708bb0e232ba 100644 --- a/cluster-autoscaler/processors/status/scale_up_status_processor.go +++ b/cluster-autoscaler/processors/status/scale_up_status_processor.go @@ -66,6 +66,8 @@ const ( ScaleUpNotTried // ScaleUpInCooldown - the scale up wasn't even attempted, because it's in a cooldown state (it's suspended for a scheduled period of time). ScaleUpInCooldown + // ScaleUpLimitedByMaxNodesTotal - the scale up wasn't attempted, because the cluster reached max nodes total + ScaleUpLimitedByMaxNodesTotal ) // WasSuccessful returns true if the scale-up was successful.