Skip to content

Commit

Permalink
chore: adjustments for k8s deployment (#7531)
Browse files Browse the repository at this point in the history
* chore: handle TERM in datatracker-start.sh

* chore: delay celery start if migration needed

* chore: skip-checks when migrating

* chore: label beat/celery as deleteBeforeUpgrade

Used by the infra-k8s deployment process to flag
these as needing to be shut down before a new
release rolls out.

* chore: increase termination grace periods
  • Loading branch information
jennifer-richards authored Jun 14, 2024
1 parent bdc4b61 commit c1941df
Show file tree
Hide file tree
Showing 5 changed files with 171 additions and 144 deletions.
8 changes: 8 additions & 0 deletions dev/build/celery-start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@
echo "Running Datatracker checks..."
./ietf/manage.py check

if ! ietf/manage.py migrate --skip-checks --check ; then
echo "Unapplied migrations found, waiting to start..."
sleep 5
while ! ietf/manage.py migrate --skip-checks --check ; do
sleep 5
done
fi

cleanup () {
# Cleanly terminate the celery app by sending it a TERM, then waiting for it to exit.
if [[ -n "${celery_pid}" ]]; then
Expand Down
19 changes: 17 additions & 2 deletions dev/build/datatracker-start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,29 @@ echo "Running Datatracker checks..."
./ietf/manage.py check

echo "Running Datatracker migrations..."
./ietf/manage.py migrate --settings=settings_local
./ietf/manage.py migrate --skip-checks --settings=settings_local

echo "Starting Datatracker..."

# trap TERM and shut down gunicorn
cleanup () {
if [[ -n "${gunicorn_pid}" ]]; then
echo "Terminating gunicorn..."
kill -TERM "${gunicorn_pid}"
wait "${gunicorn_pid}"
fi
}

trap 'trap "" TERM; cleanup' TERM

# start gunicorn in the background so we can trap the TERM signal
gunicorn \
--workers "${DATATRACKER_GUNICORN_WORKERS:-9}" \
--max-requests "${DATATRACKER_GUNICORN_MAX_REQUESTS:-32768}" \
--timeout "${DATATRACKER_GUNICORN_TIMEOUT:-180}" \
--bind :8000 \
--log-level "${DATATRACKER_GUNICORN_LOG_LEVEL:-info}" \
ietf.wsgi:application
${DATATRACKER_GUNICORN_EXTRA_ARGS} \
ietf.wsgi:application &
gunicorn_pid=$!
wait "${gunicorn_pid}"
124 changes: 63 additions & 61 deletions k8s/beat.yaml
Original file line number Diff line number Diff line change
@@ -1,61 +1,63 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: beat
spec:
replicas: 1
revisionHistoryLimit: 2
selector:
matchLabels:
app: beat
strategy:
type: Recreate
template:
metadata:
labels:
app: beat
spec:
securityContext:
runAsNonRoot: true
containers:
- name: beat
image: "ghcr.io/ietf-tools/datatracker:$APP_IMAGE_TAG"
imagePullPolicy: Always
ports:
- containerPort: 8000
name: http
protocol: TCP
volumeMounts:
- name: dt-vol
mountPath: /a
- name: dt-tmp
mountPath: /tmp
- name: dt-cfg
mountPath: /workspace/ietf/settings_local.py
subPath: settings_local.py
env:
- name: "CONTAINER_ROLE"
value: "beat"
envFrom:
- configMapRef:
name: django-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsUser: 1000
runAsGroup: 1000
volumes:
# To be overriden with the actual shared volume
- name: dt-vol
- name: dt-tmp
emptyDir:
sizeLimit: "2Gi"
- name: dt-cfg
configMap:
name: files-cfgmap
dnsPolicy: ClusterFirst
restartPolicy: Always
terminationGracePeriodSeconds: 30
apiVersion: apps/v1
kind: Deployment
metadata:
name: beat
labels:
deleteBeforeUpgrade: yes
spec:
replicas: 1
revisionHistoryLimit: 2
selector:
matchLabels:
app: beat
strategy:
type: Recreate
template:
metadata:
labels:
app: beat
spec:
securityContext:
runAsNonRoot: true
containers:
- name: beat
image: "ghcr.io/ietf-tools/datatracker:$APP_IMAGE_TAG"
imagePullPolicy: Always
ports:
- containerPort: 8000
name: http
protocol: TCP
volumeMounts:
- name: dt-vol
mountPath: /a
- name: dt-tmp
mountPath: /tmp
- name: dt-cfg
mountPath: /workspace/ietf/settings_local.py
subPath: settings_local.py
env:
- name: "CONTAINER_ROLE"
value: "beat"
envFrom:
- configMapRef:
name: django-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsUser: 1000
runAsGroup: 1000
volumes:
# To be overriden with the actual shared volume
- name: dt-vol
- name: dt-tmp
emptyDir:
sizeLimit: "2Gi"
- name: dt-cfg
configMap:
name: files-cfgmap
dnsPolicy: ClusterFirst
restartPolicy: Always
terminationGracePeriodSeconds: 600
162 changes: 82 additions & 80 deletions k8s/celery.yaml
Original file line number Diff line number Diff line change
@@ -1,80 +1,82 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: celery
spec:
replicas: 1
revisionHistoryLimit: 2
selector:
matchLabels:
app: celery
strategy:
type: Recreate
template:
metadata:
labels:
app: celery
spec:
securityContext:
runAsNonRoot: true
containers:
# -----------------------------------------------------
# ScoutAPM Container
# -----------------------------------------------------
- name: scoutapm
image: "scoutapp/scoutapm:version-1.4.0"
imagePullPolicy: IfNotPresent
livenessProbe:
exec:
command:
- "sh"
- "-c"
- "./core-agent probe --tcp 0.0.0.0:6590 | grep -q 'Agent found'"
securityContext:
readOnlyRootFilesystem: true
runAsUser: 65534 # "nobody" user by default
runAsGroup: 65534 # "nogroup" group by default
# -----------------------------------------------------
# Celery Container
# -----------------------------------------------------
- name: celery
image: "ghcr.io/ietf-tools/datatracker:$APP_IMAGE_TAG"
imagePullPolicy: Always
ports:
- containerPort: 8000
name: http
protocol: TCP
volumeMounts:
- name: dt-vol
mountPath: /a
- name: dt-tmp
mountPath: /tmp
- name: dt-cfg
mountPath: /workspace/ietf/settings_local.py
subPath: settings_local.py
env:
- name: "CONTAINER_ROLE"
value: "celery"
envFrom:
- configMapRef:
name: django-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsUser: 1000
runAsGroup: 1000
volumes:
# To be overriden with the actual shared volume
- name: dt-vol
- name: dt-tmp
emptyDir:
sizeLimit: "2Gi"
- name: dt-cfg
configMap:
name: files-cfgmap
dnsPolicy: ClusterFirst
restartPolicy: Always
terminationGracePeriodSeconds: 30
apiVersion: apps/v1
kind: Deployment
metadata:
name: celery
labels:
deleteBeforeUpgrade: yes
spec:
replicas: 1
revisionHistoryLimit: 2
selector:
matchLabels:
app: celery
strategy:
type: Recreate
template:
metadata:
labels:
app: celery
spec:
securityContext:
runAsNonRoot: true
containers:
# -----------------------------------------------------
# ScoutAPM Container
# -----------------------------------------------------
- name: scoutapm
image: "scoutapp/scoutapm:version-1.4.0"
imagePullPolicy: IfNotPresent
livenessProbe:
exec:
command:
- "sh"
- "-c"
- "./core-agent probe --tcp 0.0.0.0:6590 | grep -q 'Agent found'"
securityContext:
readOnlyRootFilesystem: true
runAsUser: 65534 # "nobody" user by default
runAsGroup: 65534 # "nogroup" group by default
# -----------------------------------------------------
# Celery Container
# -----------------------------------------------------
- name: celery
image: "ghcr.io/ietf-tools/datatracker:$APP_IMAGE_TAG"
imagePullPolicy: Always
ports:
- containerPort: 8000
name: http
protocol: TCP
volumeMounts:
- name: dt-vol
mountPath: /a
- name: dt-tmp
mountPath: /tmp
- name: dt-cfg
mountPath: /workspace/ietf/settings_local.py
subPath: settings_local.py
env:
- name: "CONTAINER_ROLE"
value: "celery"
envFrom:
- configMapRef:
name: django-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsUser: 1000
runAsGroup: 1000
volumes:
# To be overriden with the actual shared volume
- name: dt-vol
- name: dt-tmp
emptyDir:
sizeLimit: "2Gi"
- name: dt-cfg
configMap:
name: files-cfgmap
dnsPolicy: ClusterFirst
restartPolicy: Always
terminationGracePeriodSeconds: 600
2 changes: 1 addition & 1 deletion k8s/datatracker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ spec:
name: files-cfgmap
dnsPolicy: ClusterFirst
restartPolicy: Always
terminationGracePeriodSeconds: 30
terminationGracePeriodSeconds: 60
---
apiVersion: v1
kind: Service
Expand Down

0 comments on commit c1941df

Please sign in to comment.