From 245301450cbb8779671c3ce1215665d1d82fcf94 Mon Sep 17 00:00:00 2001 From: tusuii Date: Sat, 28 Feb 2026 00:10:04 +0530 Subject: [PATCH] fix: use maxSurge=0 rolling update to avoid CPU pressure on small cluster MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During rolling updates with the default maxSurge=1, an extra surge pod was created temporarily (3 pods instead of 2), causing all 3 nodes to report "Insufficient CPU" and delaying scheduling past the Jenkins rollout timeout. With maxSurge=0 / maxUnavailable=1, one old pod terminates first before a new one starts — pod count stays at 2 throughout, no extra CPU needed. Also increase Jenkins rollout timeout from 300s to 600s as a safety net for CPU-constrained nodes that may still need extra scheduling time. Co-Authored-By: Claude Sonnet 4.6 --- Jenkinsfile | 12 +++++------- k8s/base/backend/deployment.yaml | 5 +++++ k8s/base/frontend/deployment.yaml | 5 +++++ 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 16dd557..1208721 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -137,16 +137,14 @@ pipeline { // Show pod state immediately after apply so we can see pull/init status in logs sh "kubectl get pods -n scrum-manager -o wide" - // MySQL uses Recreate strategy: old pod terminates (~30s) before - // new pod starts. Readiness probe initialDelaySeconds=30 + up to - // 10 retries × 5s = 80s. Total worst-case: ~110s → 300s is safe. + // MySQL uses Recreate strategy: old pod terminates then new starts. sh "kubectl rollout status deployment/mysql -n scrum-manager --timeout=300s" - // Backend initContainer sleeps 15s after MySQL TCP is up before - // starting the Node process. 512Mi memory limit avoids OOMKill. - sh "kubectl rollout status deployment/backend -n scrum-manager --timeout=300s" + // maxSurge=0: old pod terminates first, new pod starts after. + // CPU-constrained nodes may delay scheduling — 600s covers this. + sh "kubectl rollout status deployment/backend -n scrum-manager --timeout=600s" - sh "kubectl rollout status deployment/frontend -n scrum-manager --timeout=180s" + sh "kubectl rollout status deployment/frontend -n scrum-manager --timeout=600s" echo "All deployments rolled out." } diff --git a/k8s/base/backend/deployment.yaml b/k8s/base/backend/deployment.yaml index 1d6b3eb..bf11394 100644 --- a/k8s/base/backend/deployment.yaml +++ b/k8s/base/backend/deployment.yaml @@ -7,6 +7,11 @@ metadata: app.kubernetes.io/component: api spec: replicas: 2 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 0 # Don't create extra pods during update — avoids CPU pressure + maxUnavailable: 1 # Terminate one old pod first, then start new one selector: matchLabels: app.kubernetes.io/name: backend diff --git a/k8s/base/frontend/deployment.yaml b/k8s/base/frontend/deployment.yaml index f435d99..4c59fe3 100644 --- a/k8s/base/frontend/deployment.yaml +++ b/k8s/base/frontend/deployment.yaml @@ -7,6 +7,11 @@ metadata: app.kubernetes.io/component: web spec: replicas: 2 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 0 # Don't create extra pods during update — avoids CPU pressure + maxUnavailable: 1 # Terminate one old pod first, then start new one selector: matchLabels: app.kubernetes.io/name: frontend