IBM
diff --git a/‎metrics-collector/README.md
Lines changed: 79 additions & 2 deletions b/‎metrics-collector/README.md
Lines changed: 79 additions & 2 deletions
diff --git a/‎metrics-collector/images/icl-dashboard-import-confirm.png
61.4 KB b/‎metrics-collector/images/icl-dashboard-import-confirm.png
61.4 KB
diff --git a/‎metrics-collector/images/icl-dashboard-import.png
62.2 KB b/‎metrics-collector/images/icl-dashboard-import.png
62.2 KB
diff --git a/‎metrics-collector/images/icl-dashboard-new.png
24.2 KB b/‎metrics-collector/images/icl-dashboard-new.png
24.2 KB
diff --git a/‎metrics-collector/images/icl-dashboard-overview.png
416 KB b/‎metrics-collector/images/icl-dashboard-overview.png
416 KB
diff --git a/‎metrics-collector/images/icl-logs-view-columns.png
158 KB b/‎metrics-collector/images/icl-logs-view-columns.png
158 KB
diff --git a/‎metrics-collector/images/icl-logs-view-filters.png
28.7 KB b/‎metrics-collector/images/icl-logs-view-filters.png
28.7 KB
diff --git a/‎metrics-collector/images/icl-logs-view-graph.png
45.4 KB b/‎metrics-collector/images/icl-logs-view-graph.png
45.4 KB
diff --git a/‎metrics-collector/images/icl-logs-view-overview.png
864 KB b/‎metrics-collector/images/icl-logs-view-overview.png
864 KB
diff --git a/‎metrics-collector/images/icl-logs-view-query.png
47 KB b/‎metrics-collector/images/icl-logs-view-query.png
47 KB
diff --git a/‎metrics-collector/images/icl-logs-view-save.png
54.5 KB b/‎metrics-collector/images/icl-logs-view-save.png
54.5 KB
diff --git a/‎metrics-collector/main.go
Lines changed: 13 additions & 6 deletions b/‎metrics-collector/main.go
Lines changed: 13 additions & 6 deletions
@@ -2,6 +2,8 @@
 
 Code Engine job that demonstrates how to collect resource metrics (CPU, memory and disk usage) of running Code Engine apps, jobs, and builds
 
+![Dashboard overview](./images/icl-dashboard-overview.png)
+
 ## Installation
 
 ### Capture metrics every n seconds
@@ -17,11 +19,11 @@ $ ibmcloud ce job create \
     --wait
 ```
 
-* Submit a daemon job that collects metrics in an endless loop. The daemon job queries the Metrics API every 10 seconds
+* Submit a daemon job that collects metrics in an endless loop. The daemon job queries the Metrics API every 30 seconds
 ```
 $ ibmcloud ce jobrun submit \
     --job metrics-collector \
-    --env INTERVAL=10 
+    --env INTERVAL=30 
 ```
 
 
@@ -57,6 +59,81 @@ One can use the environment variable `COLLECT_DISKUSAGE=true` to also collect th
 
 Once your IBM Cloud Code Engine project has detected a corresponding IBM Cloud Logs instance, which is configured to receive platform logs, you can consume the resource metrics in IBM Cloud Logs. Use the filter `metric:instance-resources` to filter for log lines that print resource metrics for each detected IBM Cloud Code Engine instance that is running in a project.
 
+### Custom dashboard
+
+Follow the steps below to create a custom dashboard in your IBM Cloud Logs instance, to gain insights into resource consumption metrics.
+
+![Dashboard overview](./images/icl-dashboard-overview.png)
+
+**Setup instructions:**
+
+* Navigate to the "Custom dashboards" view, hover of the "New" button, and click "Import dashboard"
+
+![New dashboard](./images/icl-dashboard-new.png)
+
+* In the "Import" modal, select the file [./setup/dashboard-code_engine_resource_consumption_metrics.json](./setup/dashboard-code_engine_resource_consumption_metrics.json) located in this repository, and click "Import"
+
+![Import modal](./images/icl-dashboard-import.png)
+
+* Confirm the import by clicking "Import" again
+
+![Import confirmation](./images/icl-dashboard-import-confirm.png)
+
+
+### Logs view
+
+Follow the steps below to create a Logs view in your IBM Cloud Logs instance, that allows you to drill into individual instance-resources log lines.
+
+![Logs overview](./images/icl-logs-view-overview.png)
+
+**Setup instructions:**
+
+* Filter only log lines related collected istio-proxy logs, by filtering for the following query
+```
+app:"codeengine" AND message.metric:"instance-resources"
+```
+
+![Query](./images/icl-logs-view-query.png)
+
+* In the left bar, click "Add Filter" and add the following filters
+    * `Application`
+    * `App`
+    * `Label.Project`
+    * `Message.Component_name`
+
+![Filters](./images/icl-logs-view-filters.png)
+
+* In the top-right corner, click on "Columns" and configure the following columns:
+    * `Timestamp`
+    * `label.Project`
+    * `message.component_type`
+    * `message.component_name`
+    * `message.message`
+    * `Text`
+
+![Columns](./images/icl-logs-view-columns.png)
+
+* Once applied adjust the column widths appropriately
+
+* In the top-right corner, select `1-line` as view mode
+
+![View](./images/icl-logs-view-mode.png)
+
+* In the graph title it says "**Count** all grouped by **Severity**". Click on `Severity` and select `message.component_name` instead. Furthermore, select `Max` as aggregation metric and choose `message.memory.usage` as aggregation field
+
+![Graph](./images/icl-logs-view-graph.png)
+
+* Save the view
+
+![Save](./images/icl-logs-view-save.png)
+
+* Utilize the custom logs view to drill into HTTP requests
+
+![Logs overview](./images/icl-logs-view-overview.png)
+
+
+## IBM Log Analysis setup (deprecated)
+
 ### Log lines
 
 Along with a human readable message, like `Captured metrics of app instance 'load-generator-00001-deployment-677d5b7754-ktcf6': 3m vCPU, 109 MB memory, 50 MB ephemeral storage`, each log line passes specific resource utilization details in a structured way allowing to apply advanced filters on them.
 
@@ -34,9 +34,12 @@ func main() {
 	}
 
 	// If the 'INTERVAL' env var is set then sleep for that many seconds
-	sleepDuration := 10
+	sleepDuration := 30
 	if t := os.Getenv("INTERVAL"); t != "" {
 		sleepDuration, _ = strconv.Atoi(t)
+		if sleepDuration < 30 {
+			sleepDuration = 30
+		}
 	}
 
 	// In daemon mode, collect resource metrics in an endless loop
@@ -111,10 +114,10 @@ func collectInstanceMetrics() {
 
 	// fetches all pods
 	pods := getAllPods(coreClientset, namespace, config)
-	
+
 	// fetch all pod metrics
 	podMetrics := getAllPodMetrics(namespace, config)
-	
+
 	var wg sync.WaitGroup
 
 	for _, metric := range *podMetrics {
@@ -258,7 +261,7 @@ func getAllPods(coreClientset *kubernetes.Clientset, namespace string, config *r
 
 // Helper function to retrieve all pods from the Kube API
 func obtainDiskUsage(coreClientset *kubernetes.Clientset, namespace string, pod string, container string, config *rest.Config) float64 {
-	
+
 	// per default, we do not collect disk space statistics
 	if os.Getenv("COLLECT_DISKUSAGE") != "true" {
 		return 0
@@ -304,12 +307,16 @@ func obtainDiskUsage(coreClientset *kubernetes.Clientset, namespace string, pod
 
 		// Render captured system error messages, in case the stdout stream did not receive any valid content
 		if err != nil {
-			fmt.Println("obtainDiskUsage of pod:" + pod + "/container:" + container + " failed with a stream err - " + err.Error() + " - stderr: '" + errBuf.String() + "'")
+			if err.Error() == "Internal error occurred: failed calling webhook \"validating.webhook.pod-exec-auth-check.codeengine.cloud.ibm.com\": failed to call webhook: Post \"https://validating-webhook-serving.ibm-cfn-system.svc:443/validate/pod-exec?timeout=5s\": EOF" {
+				// Do nothing and silently ignore this issue as it is most likely related to pod terminations
+			} else {
+				fmt.Println("obtainDiskUsage of pod:" + pod + "/container:" + container + " failed with a stream err - " + err.Error() + " - stderr: '" + errBuf.String() + "'")
+			}
 		}
 
 		return float64(0)
 	}
-	
+
 	// Parse the output "4000   /" by splitting the words
 	diskUsageOutput := strings.Fields(strings.TrimSuffix(diskUsageOutputStr, "\n"))
 	if len(diskUsageOutput) > 2 {