Skip to content

Commit

Permalink
Sync Terraform & Helm changes
Browse files Browse the repository at this point in the history
GitOrigin-RevId: 6a44870f14805780a129d9f7891804bb57682e43
  • Loading branch information
sionescu committed Jun 5, 2024
1 parent 01f3324 commit f21aee5
Show file tree
Hide file tree
Showing 16 changed files with 121 additions and 129 deletions.
111 changes: 1 addition & 110 deletions terraform/aptos-node-testnet/gcp/forge.tf
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
locals {
forge_helm_chart_path = "${path.module}/../../helm/forge"
}

resource "helm_release" "forge" {
count = var.enable_forge ? 1 : 0
name = "forge"
Expand All @@ -25,113 +26,3 @@ resource "helm_release" "forge" {
value = sha1(join("", [for f in fileset(local.forge_helm_chart_path, "**") : filesha1("${local.forge_helm_chart_path}/${f}")]))
}
}


resource "kubernetes_secret" "grafana_credentials" {
metadata {
name = "credentials"
namespace = "grafana"
}

# Ignore changes to the data field to prevent replacing the manually updated password
lifecycle {
ignore_changes = [
data,
]
}

# Create a placeholder password. This should be set manually in each cluster
data = {
password = base64encode("placeholder")
}
}

resource "helm_release" "grafana_agent_flow" {
name = "grafana-agent-flow"
repository = "https://grafana.github.io/helm-charts"
chart = "grafana-agent"
version = "0.37.0"
namespace = "grafana"

values = [
yamlencode({
agent = {
mode = "flow"
configMap = {
create = true
content = <<-EOT
remote.kubernetes.secret "credentials" {
namespace = "grafana"
name = "credentials"
}
discovery.kubernetes "local_pods" {
selectors {
field = "spec.nodeName=" + env("HOSTNAME")
role = "pod"
}
role = "pod"
}
discovery.relabel "specific_pods" {
targets = discovery.kubernetes.local_pods.targets
rule {
action = "drop"
regex = "Succeeded|Failed|Completed"
source_labels = ["__meta_kubernetes_pod_phase"]
}
rule {
action = "replace"
source_labels = ["__meta_kubernetes_namespace"]
target_label = "namespace"
}
rule {
action = "replace"
source_labels = ["__meta_kubernetes_pod_name"]
target_label = "pod"
}
rule {
action = "replace"
source_labels = ["__meta_kubernetes_pod_node_name"]
target_label = "node"
}
rule {
action = "replace"
source_labels = ["__meta_kubernetes_pod_container_name"]
target_label = "container"
}
rule {
action = "replace"
regex = "(.*)@(.*)"
replacement = "ebpf/$${1}/$${2}"
separator = "@"
source_labels = ["__meta_kubernetes_namespace", "__meta_kubernetes_pod_container_name"]
target_label = "service_name"
}
}
pyroscope.ebpf "instance" {
forward_to = [pyroscope.write.endpoint.receiver]
targets = discovery.relabel.specific_pods.output
demangle = "full"
}
pyroscope.write "endpoint" {
endpoint {
url = "https://profiles-prod-003.grafana.net"
basic_auth {
username = "340750"
password = remote.kubernetes.secret.credentials.data["password"]
}
}
}
EOT
}
securityContext = {
privileged = true
runAsGroup = 0
runAsUser = 0
}
}
controller = {
hostPID = true
}
})
]
}
6 changes: 4 additions & 2 deletions terraform/aptos-node-testnet/gcp/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,10 @@ module "validator" {
validator_name = "aptos-node"

# K8s config
k8s_api_sources = var.k8s_api_sources
cluster_ipv4_cidr_block = var.cluster_ipv4_cidr_block
k8s_api_sources = var.k8s_api_sources
cluster_ipv4_cidr_block = var.cluster_ipv4_cidr_block
router_nat_ip_allocate_option = var.router_nat_ip_allocate_option
enable_endpoint_independent_mapping = var.enable_endpoint_independent_mapping

# autoscaling
gke_enable_node_autoprovisioning = var.gke_enable_node_autoprovisioning
Expand Down
12 changes: 12 additions & 0 deletions terraform/aptos-node-testnet/gcp/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,18 @@ variable "cluster_ipv4_cidr_block" {
default = ""
}

variable "router_nat_ip_allocate_option" {
description = "The method of NAT IP allocation for the cluster. See https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/container_cluster#router_nat_ip_allocate_option"
type = string
default = "MANUAL_ONLY"
}

variable "enable_endpoint_independent_mapping" {
description = "Enable endpoint independent mapping for the NAT router"
type = bool
default = true
}

variable "enable_clouddns" {
description = "Enable CloudDNS (Google-managed cluster DNS)"
type = bool
Expand Down
14 changes: 9 additions & 5 deletions terraform/aptos-node/gcp/network.tf
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,13 @@ resource "google_compute_address" "nat" {
}

resource "google_compute_router_nat" "nat" {
name = "aptos-${local.workspace_name}-nat"
router = google_compute_router.nat.name
nat_ip_allocate_option = "MANUAL_ONLY"
nat_ips = [google_compute_address.nat.self_link]
source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_PRIMARY_IP_RANGES"
name = "aptos-${local.workspace_name}-nat"
router = google_compute_router.nat.name
nat_ip_allocate_option = var.router_nat_ip_allocate_option
nat_ips = var.router_nat_ip_allocate_option == "MANUAL_ONLY" ? [google_compute_address.nat.self_link] : null
source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_PRIMARY_IP_RANGES"
min_ports_per_vm = var.router_nat_ip_allocate_option == "MANUAL_ONLY" ? null : 32
enable_endpoint_independent_mapping = var.enable_endpoint_independent_mapping
# EndpointIndependentMapping and DynamicPortAllocation are mutually exclusive.
enable_dynamic_port_allocation = !var.enable_endpoint_independent_mapping
}
12 changes: 12 additions & 0 deletions terraform/aptos-node/gcp/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,18 @@ variable "cluster_ipv4_cidr_block" {
default = ""
}

variable "router_nat_ip_allocate_option" {
description = "The method of NAT IP allocation for the cluster. See https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/container_cluster#router_nat_ip_allocate_option"
type = string
default = "MANUAL_ONLY"
}

variable "enable_endpoint_independent_mapping" {
description = "Enable endpoint independent mapping for the NAT router"
type = bool
default = true
}

variable "enable_clouddns" {
description = "Enable CloudDNS (Google-managed cluster DNS)"
type = bool
Expand Down
14 changes: 9 additions & 5 deletions terraform/fullnode/gcp/network.tf
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,13 @@ resource "google_compute_address" "nat" {
}

resource "google_compute_router_nat" "nat" {
name = "aptos-${terraform.workspace}-nat"
router = google_compute_router.nat.name
nat_ip_allocate_option = "MANUAL_ONLY"
nat_ips = [google_compute_address.nat.self_link]
source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_PRIMARY_IP_RANGES"
name = "aptos-${terraform.workspace}-nat"
router = google_compute_router.nat.name
nat_ip_allocate_option = var.router_nat_ip_allocate_option
nat_ips = var.router_nat_ip_allocate_option == "MANUAL_ONLY" ? [google_compute_address.nat.self_link] : null
source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_PRIMARY_IP_RANGES"
min_ports_per_vm = var.router_nat_ip_allocate_option == "MANUAL_ONLY" ? null : 32
enable_endpoint_independent_mapping = var.enable_endpoint_independent_mapping
# EndpointIndependentMapping and DynamicPortAllocation are mutually exclusive.
enable_dynamic_port_allocation = !var.enable_endpoint_independent_mapping
}
12 changes: 12 additions & 0 deletions terraform/fullnode/gcp/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,18 @@ variable "workspace_name_override" {

### GKE cluster config

variable "router_nat_ip_allocate_option" {
description = "The method of NAT IP allocation for the cluster. See https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/container_cluster#router_nat_ip_allocate_option"
type = string
default = "MANUAL_ONLY"
}

variable "enable_endpoint_independent_mapping" {
description = "Enable endpoint independent mapping for the NAT router"
type = bool
default = false
}

variable "enable_clouddns" {
description = "Enable CloudDNS (Google-managed cluster DNS)"
type = bool
Expand Down
6 changes: 6 additions & 0 deletions terraform/helm/aptos-node/templates/fullnode.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,17 @@ spec:
{{- include "aptos-validator.selectorLabels" $ | nindent 8 }}
app.kubernetes.io/name: fullnode
app.kubernetes.io/instance: fullnode-{{$i}}
{{- if $.Values.chain.name }}
chain_name: {{ $.Values.chain.name }}
{{- end}}
group: {{ .name }}
annotations:
checksum/fullnode.yaml: {{ tpl ($.Files.Get "files/configs/fullnode.yaml") $ | sha256sum }}
prometheus.io/scrape: "true"
prometheus.io/port: "9101"
{{- if $.Values.metrics.destination }}
aptos.dev/metrics-destination: {{ $.Values.metrics.destination }}
{{- end}}
spec:
terminationGracePeriodSeconds: 0
securityContext:
Expand Down
6 changes: 6 additions & 0 deletions terraform/helm/aptos-node/templates/validator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,16 @@ spec:
{{- include "aptos-validator.selectorLabels" $ | nindent 8 }}
app.kubernetes.io/name: validator
app.kubernetes.io/instance: validator-{{$i}}
{{- if $.Values.chain.name }}
chain_name: {{ $.Values.chain.name }}
{{- end}}
annotations:
checksum/validator.yaml: {{ tpl ($.Files.Get "files/configs/validator.yaml") $ | sha256sum }}
prometheus.io/scrape: "true"
prometheus.io/port: "9101"
{{- if $.Values.metrics.destination }}
aptos.dev/metrics-destination: {{ $.Values.metrics.destination }}
{{- end}}
spec:
terminationGracePeriodSeconds: 0
securityContext:
Expand Down
4 changes: 4 additions & 0 deletions terraform/helm/aptos-node/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,10 @@ serviceAccount:
# -- The name of the service account to use. If not set and create is true, a name is generated using the fullname template
name:

metrics:
# -- The upstream sink for metrics. Supported values are "dev" and "prod"
destination: dev

# -- Load test-data for starting a test network
loadTestGenesis: false

Expand Down
27 changes: 21 additions & 6 deletions terraform/helm/fullnode/files/backup/gcs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,27 @@ commands:
FILE_HANDLE="$BACKUP_HANDLE/$FILE_NAME"
echo "$FILE_HANDLE"
exec 1>&- # close stdout
gzip -c | gsutil -q cp - "gs://$BUCKET/$SUB_DIR/$FILE_HANDLE" > /dev/null
open_for_read: 'gsutil -q cp "gs://$BUCKET/$SUB_DIR/$FILE_HANDLE" - | gzip -cd'
save_metadata_line: |
gzip -c | gcloud storage cp - "gs://$BUCKET/$SUB_DIR/$FILE_HANDLE" > /dev/null
open_for_read: |
TEMP=$(mktemp)
trap "rm -f $TEMP" EXIT
for try in {0..4}
do
if [ $try -gt 0 ]; then
SLEEP=$((10 * $try))
echo "sleeping for $SLEEP seconds before retry #$try" >&2
sleep $SLEEP
fi
gcloud storage cp "gs://$BUCKET/$SUB_DIR/$FILE_HANDLE" $TEMP 1>&2 || continue
cat $TEMP | gzip -cd
exit
done
echo "Failed after 5 tries" >&2
exit 1
save_metadata_line: |
FILE_HANDLE="metadata/$FILE_NAME"
echo "$FILE_HANDLE"
exec 1>&-
gzip -c | gsutil -q cp - "gs://$BUCKET/$SUB_DIR/$FILE_HANDLE" > /dev/null
list_metadata_files: '(gsutil -q ls gs://$BUCKET/$SUB_DIR/metadata/ ||:) | sed -ne "s#gs://.*/metadata/#metadata/#p"'
backup_metadata_file: 'gsutil mv gs://$BUCKET/$SUB_DIR/metadata/$FILE_NAME gs://$BUCKET/$SUB_DIR/metadata_backup/$FILE_NAME'
gzip -c | gcloud storage cp - "gs://$BUCKET/$SUB_DIR/$FILE_HANDLE" > /dev/null
list_metadata_files: '(gcloud storage ls gs://$BUCKET/$SUB_DIR/metadata/ ||:) | sed -ne "s#gs://.*/metadata/#metadata/#p"'
backup_metadata_file: "gcloud storage mv gs://$BUCKET/$SUB_DIR/metadata/$FILE_NAME gs://$BUCKET/$SUB_DIR/metadata_backup/$FILE_NAME"
4 changes: 4 additions & 0 deletions terraform/helm/fullnode/templates/backup-compaction.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ spec:
labels:
{{- include "backup.selectorLabels" . | nindent 12 }}
app.kubernetes.io/name: backup-compaction
annotations:
{{- if $.Values.metrics.destination }}
aptos.dev/metrics-destination: {{ $.Values.metrics.destination }}
{{- end}}
spec:
restartPolicy: Never
terminationGracePeriodSeconds: 0
Expand Down
4 changes: 4 additions & 0 deletions terraform/helm/fullnode/templates/backup-verify.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ spec:
labels:
{{- include "backup.selectorLabels" . | nindent 12 }}
app.kubernetes.io/name: backup-verify
annotations:
{{- if $.Values.metrics.destination }}
aptos.dev/metrics-destination: {{ $.Values.metrics.destination }}
{{- end}}
spec:
restartPolicy: Never
terminationGracePeriodSeconds: 0
Expand Down
8 changes: 7 additions & 1 deletion terraform/helm/fullnode/templates/backup.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{- if $.Values.backup.enable }}
apiVersion: v1
kind: ConfigMap
metadata:
Expand All @@ -18,7 +19,7 @@ metadata:
app.kubernetes.io/name: backup
spec:
serviceName: {{ include "backup.fullname" . }}-backup
replicas: {{ int .Values.backup.enable }}
replicas: 1
podManagementPolicy: Parallel
selector:
matchLabels:
Expand All @@ -29,6 +30,10 @@ spec:
labels:
{{- include "backup.selectorLabels" . | nindent 8 }}
app.kubernetes.io/name: backup
annotations:
{{- if $.Values.metrics.destination }}
aptos.dev/metrics-destination: {{ $.Values.metrics.destination }}
{{- end}}
spec:
terminationGracePeriodSeconds: 0
containers:
Expand Down Expand Up @@ -113,3 +118,4 @@ spec:
imagePullSecrets:
- name: {{.Values.imagePullSecret}}
{{- end }}
{{- end }}
6 changes: 6 additions & 0 deletions terraform/helm/fullnode/templates/fullnode.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,15 @@ spec:
labels:
{{- include "aptos-fullnode.selectorLabels" . | nindent 8 }}
app.kubernetes.io/name: fullnode
{{- if $.Values.chain.name }}
chain_name: {{ $.Values.chain.name }}
{{- end}}
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9101"
{{- if $.Values.metrics.destination }}
aptos.dev/metrics-destination: {{ $.Values.metrics.destination }}
{{- end}}
spec:
terminationGracePeriodSeconds: 0
initContainers:
Expand Down
4 changes: 4 additions & 0 deletions terraform/helm/fullnode/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ logging:
# -- Address for remote logging
address:

metrics:
# -- The upstream sink for metrics. Supported values are "dev" and "prod"
destination: dev

backup:
image:
# -- Image repo to use for backup images
Expand Down

0 comments on commit f21aee5

Please sign in to comment.