fix: increase backend memory limit and add rollout failure diagnostics
Some checks failed
scrum-manager/pipeline/head There was a failure building this commit
Some checks failed
scrum-manager/pipeline/head There was a failure building this commit
Backend was OOMKilled during rolling update startup (Node.js + Socket.io + MySQL pool exceeds 256Mi). Raised limit to 512Mi and request to 256Mi. Jenkinsfile: show kubectl get pods immediately after apply so pod state is visible in build logs. Added full diagnostics (describe + logs) in post.failure block so the root cause of any future rollout failure is visible without needing to SSH into the cluster. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
24
Jenkinsfile
vendored
24
Jenkinsfile
vendored
@@ -115,13 +115,16 @@ pipeline {
|
|||||||
withKubeConfig([credentialsId: "${K8S_CRED_ID}"]) {
|
withKubeConfig([credentialsId: "${K8S_CRED_ID}"]) {
|
||||||
sh "kubectl apply -k ${K8S_OVERLAY}"
|
sh "kubectl apply -k ${K8S_OVERLAY}"
|
||||||
|
|
||||||
|
// Show pod state immediately after apply so we can see pull/init status in logs
|
||||||
|
sh "kubectl get pods -n scrum-manager -o wide"
|
||||||
|
|
||||||
// MySQL uses Recreate strategy: old pod terminates (~30s) before
|
// MySQL uses Recreate strategy: old pod terminates (~30s) before
|
||||||
// new pod starts. Readiness probe initialDelaySeconds=30 + up to
|
// new pod starts. Readiness probe initialDelaySeconds=30 + up to
|
||||||
// 10 retries × 5s = 80s. Total worst-case: ~110s → 300s is safe.
|
// 10 retries × 5s = 80s. Total worst-case: ~110s → 300s is safe.
|
||||||
sh "kubectl rollout status deployment/mysql -n scrum-manager --timeout=300s"
|
sh "kubectl rollout status deployment/mysql -n scrum-manager --timeout=300s"
|
||||||
|
|
||||||
// Backend initContainer sleeps 15s after MySQL TCP is up before
|
// Backend initContainer sleeps 15s after MySQL TCP is up before
|
||||||
// starting the Node process. 300s covers slow-start scenarios.
|
// starting the Node process. 512Mi memory limit avoids OOMKill.
|
||||||
sh "kubectl rollout status deployment/backend -n scrum-manager --timeout=300s"
|
sh "kubectl rollout status deployment/backend -n scrum-manager --timeout=300s"
|
||||||
|
|
||||||
sh "kubectl rollout status deployment/frontend -n scrum-manager --timeout=180s"
|
sh "kubectl rollout status deployment/frontend -n scrum-manager --timeout=180s"
|
||||||
@@ -170,7 +173,24 @@ pipeline {
|
|||||||
echo "✅ Build #${env.BUILD_NUMBER} deployed → http://scrum.local"
|
echo "✅ Build #${env.BUILD_NUMBER} deployed → http://scrum.local"
|
||||||
}
|
}
|
||||||
failure {
|
failure {
|
||||||
echo "❌ Pipeline failed. Check stage logs above."
|
withKubeConfig([credentialsId: "${K8S_CRED_ID}"]) {
|
||||||
|
sh """
|
||||||
|
echo '=== Pod Status ==='
|
||||||
|
kubectl get pods -n scrum-manager -o wide || true
|
||||||
|
|
||||||
|
echo '=== Backend Pod Events ==='
|
||||||
|
kubectl describe pods -l app.kubernetes.io/name=backend -n scrum-manager || true
|
||||||
|
|
||||||
|
echo '=== Backend Logs (last 50 lines) ==='
|
||||||
|
kubectl logs -l app.kubernetes.io/name=backend -n scrum-manager --tail=50 --all-containers=true || true
|
||||||
|
|
||||||
|
echo '=== Frontend Pod Events ==='
|
||||||
|
kubectl describe pods -l app.kubernetes.io/name=frontend -n scrum-manager || true
|
||||||
|
|
||||||
|
echo '=== MySQL Pod Events ==='
|
||||||
|
kubectl describe pods -l app.kubernetes.io/name=mysql -n scrum-manager || true
|
||||||
|
"""
|
||||||
|
}
|
||||||
}
|
}
|
||||||
always {
|
always {
|
||||||
sh "docker logout ${HARBOR_URL} || true"
|
sh "docker logout ${HARBOR_URL} || true"
|
||||||
|
|||||||
@@ -64,10 +64,10 @@ spec:
|
|||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 100m
|
cpu: 100m
|
||||||
memory: 128Mi
|
memory: 256Mi
|
||||||
limits:
|
limits:
|
||||||
cpu: 500m
|
cpu: 500m
|
||||||
memory: 256Mi
|
memory: 512Mi
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /api/health
|
path: /api/health
|
||||||
|
|||||||
Reference in New Issue
Block a user