newrelic · jack-berg · Jul 24, 2024 · Jun 28, 2024 · Jul 12, 2024 · Jul 12, 2024
diff --git a/other-examples/collector/host-monitoring/README.md b/other-examples/collector/host-monitoring/README.md
@@ -60,4 +60,13 @@ See [get started with querying](https://docs.newrelic.com/docs/query-your-data/e
 
 This example deploys the collector as a kubernetes DaemonSet to run a collector instance on each node in the kubernetes cluster. When running in this type of configuration, it's common to route application telemetry from pods to the collector instance each pod is respectively running on, and to enrich that telemetry with additional metadata via the [kubernetes attributes processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/k8sattributesprocessor). This example omits that configuration for brevity. See [important components for kubernetes](https://opentelemetry.io/docs/kubernetes/collector/components/#filelog-receiver) for common configuration running the collector in kubernetes.
 
+This example makes optimizations in an effort to reduce the exported data volume: 
+
+* Configure the [transform processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/transformprocessor) to override the metric description and unit.
+* Configures the [cumulativetodelta processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/cumulativetodeltaprocessor) to transform cumulative host metrics to the delta metrics [preferred by the New Relic OTLP endpoint](https://docs.newrelic.com/docs/opentelemetry/best-practices/opentelemetry-otlp/#metric-aggregation-temporality). 
+* Disables process metrics by default. These tend to be noisy and many users tend to tune them to the set most valuable. 
+* Configure the [filter processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/filterprocessor), [metricstransform processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/metricstransformprocessor), and [attributes processor](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/attributesprocessor) to filter and merge various metric series not relevant to the New Relic host UI.
+
+These optimizations can be adjusted if required, at the expense of higher data ingest.
+
 In order to demonstrate correlation between OpenTelemetry APM entities and host entities, this example deploys an instance of the opentelemetry demo [AdService](https://opentelemetry.io/docs/demo/services/ad/), defined in [adservice.yaml](./k8s/adservice.yaml). The AdService application is configured to export OTLP data to the collector DaemonSet pod running on the same host. The collector enriches the AdService telemetry with `host.id` (and other attributes) which New Relic uses to create a relationship with the host entity.
diff --git a/other-examples/collector/host-monitoring/k8s/collector.yaml b/other-examples/collector/host-monitoring/k8s/collector.yaml
@@ -10,20 +10,29 @@ metadata:
 data:
   collector-config: |
     receivers:
+      # Keep configuration in sync with: https://github.com/newrelic/opentelemetry-collector-releases/blob/main/configs/nr-otel-collector-agent-linux.yaml
       hostmetrics:
         root_path: /hostfs
-        collection_interval: 20s
+        # Default collection interval is 60s. Lower if you need finer granularity.
+        collection_interval: 60s
         scrapers:
           cpu:
             metrics:
+              system.cpu.time:
+                enabled: false
               system.cpu.utilization:
                 enabled: true
           load:
           memory:
             metrics:
               system.memory.utilization:
                 enabled: true
-          disk:
+          paging:
+            metrics:
+              system.paging.utilization:
+                enabled: false
+              system.paging.faults:
+                enabled: false
           filesystem:
             metrics:
               system.filesystem.utilization:
@@ -33,23 +42,31 @@ data:
             exclude_mount_points:
               mount_points: ["/containers/services"]
               match_type: strict
-          network:
-          paging:
+          disk:
             metrics:
-              system.paging.utilization:
-                enabled: true
-          processes:
-          process:
+              system.disk.merged:
+                enabled: false
+              system.disk.pending_operations:
+                enabled: false
+              system.disk.weighted_io_time:
+                enabled: false
+          network:
             metrics:
-              process.cpu.utilization:
-                enabled: true
-              process.cpu.time:
+              system.network.connections:
                 enabled: false
-            # Mute various errors reading process metrics running locally in docker.
-            # Delete for production deployments.
-            mute_process_exe_error: true
-            mute_process_user_error: true
-            mute_process_io_error: true
+          # Uncomment to enable process metrics, which can be noisy but valuable.
+          # processes:
+          # process:
+          #  metrics:
+          #    process.cpu.utilization:
+          #      enabled: true
+          #    process.cpu.time:
+          #      enabled: false
+          # Mute various errors reading process metrics running locally in docker.
+          # Delete for production deployments.
+          #   mute_process_exe_error: true
+          #   mute_process_user_error: true
+          #   mute_process_io_error: true
 
       filelog:
         include:
@@ -70,6 +87,91 @@ data:
     processors:
       batch:
 
+      # group system.cpu metrics by cpu
+      metricstransform:
+        transforms:
+          - include: system.cpu.utilization
+            action: update
+            operations:
+              - action: aggregate_labels
+                label_set: [ state ]
+                aggregation_type: mean
+          - include: system.paging.operations
+            action: update
+            operations:
+              - action: aggregate_labels
+                label_set: [ direction ]
+                aggregation_type: sum
+
+      # remove system.cpu metrics for states
+      filter/exclude_cpu_utilization:
+        metrics:
+          datapoint:
+            - 'metric.name == "system.cpu.utilization" and attributes["state"] == "interrupt"'
+            - 'metric.name == "system.cpu.utilization" and attributes["state"] == "nice"'
+            - 'metric.name == "system.cpu.utilization" and attributes["state"] == "softirq"'
+      filter/exclude_memory_utilization:
+        metrics:
+          datapoint:
+            - 'metric.name == "system.memory.utilization" and attributes["state"] == "slab_unreclaimable"'
+            - 'metric.name == "system.memory.utilization" and attributes["state"] == "inactive"'
+            - 'metric.name == "system.memory.utilization" and attributes["state"] == "cached"'
+            - 'metric.name == "system.memory.utilization" and attributes["state"] == "buffered"'
+            - 'metric.name == "system.memory.utilization" and attributes["state"] == "slab_reclaimable"'
+      filter/exclude_memory_usage:
+        metrics:
+          datapoint:
+            - 'metric.name == "system.memory.usage" and attributes["state"] == "slab_unreclaimable"'
+            - 'metric.name == "system.memory.usage" and attributes["state"] == "inactive"'
+      filter/exclude_filesystem_utilization:
+        metrics:
+          datapoint:
+            - 'metric.name == "system.filesystem.utilization" and attributes["type"] == "squashfs"'
+      filter/exclude_filesystem_usage:
+        metrics:
+          datapoint:
+            - 'metric.name == "system.filesystem.usage" and attributes["type"] == "squashfs"'
+            - 'metric.name == "system.filesystem.usage" and attributes["state"] == "reserved"'
+      filter/exclude_filesystem_inodes_usage:
+        metrics:
+          datapoint:
+            - 'metric.name == "system.filesystem.inodes.usage" and attributes["type"] == "squashfs"'
+            - 'metric.name == "system.filesystem.inodes.usage" and attributes["state"] == "reserved"'
+      filter/exclude_system_disk:
+        metrics:
+          datapoint:
+            - 'metric.name == "system.disk.operations" and IsMatch(attributes["device"], "^loop.*") == true'
+            - 'metric.name == "system.disk.merged" and IsMatch(attributes["device"], "^loop.*") == true'
+            - 'metric.name == "system.disk.io" and IsMatch(attributes["device"], "^loop.*") == true'
+            - 'metric.name == "system.disk.io_time" and IsMatch(attributes["device"], "^loop.*") == true'
+            - 'metric.name == "system.disk.operation_time" and IsMatch(attributes["device"], "^loop.*") == true'
+      filter/exclude_system_paging:
+        metrics:
+          datapoint:
+            - 'metric.name == "system.paging.usage" and attributes["state"] == "cached"'
+            - 'metric.name == "system.paging.operations" and attributes["type"] == "cached"'
+      filter/exclude_network:
+        metrics:
+          datapoint:
+            - 'IsMatch(metric.name, "^system.network.*") == true and attributes["device"] == "lo"'
+      attributes/exclude_system_paging:
+        include:
+          match_type: strict
+          metric_names:
+            - system.paging.operations
+        actions:
+          - key: type
+            action: delete
+
+      cumulativetodelta:
+
+      transform:
+        metric_statements:
+          - context: metric
+            statements:
+              - set(description, "")
+              - set(unit, "")
+
       resourcedetection:
         detectors: ["env", "system"]
         system:
@@ -81,7 +183,10 @@ data:
       resourcedetection/cloud:
         detectors: ["gcp", "ec2", "azure"]
         timeout: 2s
-        override: false
+        ec2:
+          resource_attributes:
+            host.name:
+              enabled: false
 
     exporters:
       logging:
@@ -95,7 +200,22 @@ data:
       pipelines:
         metrics/host:
           receivers: [hostmetrics]
-          processors: [resourcedetection, resourcedetection/cloud, batch]
+          processors:
+            - metricstransform
+            - filter/exclude_cpu_utilization
+            - filter/exclude_memory_utilization
+            - filter/exclude_memory_usage
+            - filter/exclude_filesystem_utilization
+            - filter/exclude_filesystem_usage
+            - filter/exclude_filesystem_inodes_usage
+            - filter/exclude_system_disk
+            - filter/exclude_network
+            - attributes/exclude_system_paging
+            - transform
+            - resourcedetection
+            - resourcedetection/cloud 
+            - cumulativetodelta
+            - batch
           exporters: [otlphttp]
         logs/host:
           receivers: [filelog]