From 44af0b6eb3247ef6cdbaa206ad9765c5a2b4a218 Mon Sep 17 00:00:00 2001
From: Richard Hillmann <richie@project0.de>
Date: Thu, 16 Mar 2017 23:45:24 +0100
Subject: [PATCH] save update

---
 Dockerfile         |   2 +-
 README.md          | 100 ++++++++++++++++++++++++++++++++++++++++++++-
 docker-compose.yml |  47 ++++++++++++++++++++-
 exporter.go        |  16 ++------
 prometheus.yml     |  25 ++++++++++++
 5 files changed, 174 insertions(+), 16 deletions(-)
 create mode 100644 prometheus.yml

diff --git a/Dockerfile b/Dockerfile
index dcf1154..0c96203 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-# use prometheus busybox with glibc for golang compatibility
+# use prometheus busybox with glibc for better golang and dns resolve support
 FROM prom/busybox:glibc
 
 MAINTAINER John Doe
diff --git a/README.md b/README.md
index 500c53b..90a5926 100644
--- a/README.md
+++ b/README.md
@@ -38,4 +38,102 @@ or with docker-compose
 docker-compose up
 ```
 
-Metrics should now be availabe on `http://localhost:8081/metrics`
+Metrics should now be available on `http://localhost:8081/metrics`
+
+Bonus
+=====
+
+1. What are good ways of deploying hundreds of instances of our simulated
+   service? How would you deploy your exporter? And how would you configure
+   Prometheus to monitor them all?
+   
+   Pretty easy with *kubernetes*. 
+   Just run the exporter along the app in a pod with an ReplicationController:
+   ```yaml
+   apiVersion: v1
+   kind: ReplicationController
+   metadata:
+     name: replicatedApp
+   spec:
+     replicas: 100
+     selector:
+       app: exportedApp 
+     template:
+       metadata:
+         name: podApp
+         annotations:
+           prometheus.io/scrape: true
+           prometheus.io/port: 8081
+         labels:
+           app: exportedApp
+       spec:
+         containers:
+         - name: challenge
+           image: beorn7/syseng-challenge
+           ports:
+           - containerPort: 8080
+         - name: exporter
+           image: exporter
+           ports:
+           - containerPort: 8081      
+   ```
+   
+   Just use the service discovery in prometheus:
+   ```yaml
+   - job_name: kube-app  
+     kubernetes_sd_config:
+       - role: pod
+     relabel_configs:
+       - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
+         action: keep
+         regex: true
+       - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
+         action: replace
+         regex: ([^:]+)(?::\d+)?;(\d+)
+         replacement: $1:$2
+         target_label: __address__
+       - source_labels: [__meta_kubernetes_pod_name]
+         action: replace
+         target_label: podApp       
+   ```
+   
+   DNS discovery may be an alternative, for example with coredns.
+   
+2. What graphs about the service would you plot in a dashboard builder like Grafana?
+   
+   Usually graph everything where attention is required.
+   It does not make sense to monitor metrics/graphs where nobody needs to get in action. Less is more.
+   
+   Assuming we have a fleet of this service and monitor all of them, it makes sense to graph in groups.
+   
+   *Graph* Request rates per code (QPS):
+    ```
+    sum(app_request_rates) by (code)
+    ```
+
+   *Graph* Highest latencies:
+    ```
+    max(app_duration_avg)
+    ```
+    
+    *Singlestat* Running instances:
+    ```
+    count_scalar(app_up == 1)
+    ```
+   
+3. What would you alert on? What would be the urgency of the various alerts?
+
+   High: Too few apps are up (to handle all requests)
+   Middle/Hight: Request times are too high (priority depends on latency)
+   Middle: Too many bad/failed requests (5xx) codes in comparision to suceeded (2xx)
+    
+4. If you were in control of the microservice, which exported metrics would you
+   add or modify next?
+   
+   Depends a little bit on the service, but probably these will be useful:
+   - CPU/RAM utilization. Probably network throughput. 
+   - Avg duration time per code, method.
+   - Request rates per code and method.
+   
+   In general, monitor more metrics than you need in the moment. 
+   As more than you have, debugging an issue can probably solved by an metric which is not active monitored. 
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
index e76de7e..defb683 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,3 +1,7 @@
+app:
+  image: beorn7/syseng-challenge
+  ports:
+    - 8080:8080
 exporter:
   build: ./
   command: -endpoint=http://app:8080/stats -listen :8081 --timeout 200
@@ -5,7 +9,46 @@ exporter:
     - app
   ports:
     - 8081:8081
-app:
+# Just for full stack testing
+app_2:
   image: beorn7/syseng-challenge
+exporter_2:
+  build: ./
+  command: -endpoint=http://app:8080/stats -listen :8081 --timeout 200 --environment=production
+  links:
+    - app_2:app
+app_3:
+  image: beorn7/syseng-challenge
+exporter_3:
+  build: ./
+  command: -endpoint=http://app:8080/stats -listen :8081 --timeout 200 --environment=production
+  links:
+    - app_3:app
+app_4:
+  image: beorn7/syseng-challenge
+exporter_4:
+  build: ./
+  command: -endpoint=http://app:8080/stats -listen :8081 --timeout 200 --environment=development
+  links:
+    - app_4:app
+prometheus:
+  # There is an issue with docker links and resolving entries in /etc/hosts, open PR: https://github.com/prometheus/prometheus/pull/2502
+  # image: prom/prometheus:latest
+  # use own build docker image...:
+  image: prom
+  links:
+    - exporter
+    - exporter_2
+    - exporter_3
+    - exporter_4
+  volumes:
+    - ./prometheus.yml:/etc/prometheus/prometheus.yml
+  ports:
+    - 8082:9090
+grafana:
+  image: grafana/grafana:latest
+  command: -config.file /etc/prometheus.yml
+  links:
+    - prometheus
   ports:
-    - 8080:8080
\ No newline at end of file
+    - 8083:3000
\ No newline at end of file
diff --git a/exporter.go b/exporter.go
index 5a57e38..a2445bb 100644
--- a/exporter.go
+++ b/exporter.go
@@ -9,7 +9,6 @@ import (
 	"net/http"
 	"net/url"
 	"strings"
-	"syscall"
 	"time"
 
 	"github.com/prometheus/client_golang/prometheus"
@@ -27,7 +26,7 @@ var (
 	listenAddress        = flag.String("listen", ":8080", "The address to listen on for HTTP requests.")
 	endpointApp          = flag.String("endpoint", "http://localhost:8050/stats", "HTTP API address of the application")
 	timeoutApp           = flag.Int("timeout", 500, "Connection timeout in ms")
-	hostname             = flag.String("hostname", "", "Optional hostname which will be added to the exported metrics (defaults to $HOSTNAME)")
+	environment          = flag.String("environment", "", "Optional environment which will be added to the exported metrics")
 	prometheusConstLabel = parseConstLabel()
 )
 
@@ -36,16 +35,9 @@ func parseConstLabel() prometheus.Labels {
 	// parse flags in an early state, so we can retrieve the instance id
 	flag.Parse()
 
-	if *hostname != "" {
-		// Try to set hostname from env var, so we can see if an pod does not work as expected
-		if value, found := syscall.Getenv("HOSTNAME"); found {
-			hostname = &value
-		}
-	}
-
-	// generate constant label if hostname is present
-	if *hostname != "" {
-		return prometheus.Labels{"hostname": *hostname}
+	// generate constant label if environment is present
+	if *environment != "" {
+		return prometheus.Labels{"environment": *environment}
 	}
 	return prometheus.Labels{}
 }
diff --git a/prometheus.yml b/prometheus.yml
new file mode 100644
index 0000000..e117129
--- /dev/null
+++ b/prometheus.yml
@@ -0,0 +1,25 @@
+global:
+  scrape_interval:     15s # By default, scrape targets every 15 seconds.
+
+  # Attach these labels to any time series or alerts when communicating with
+  # external systems (federation, remote storage, Alertmanager).
+  external_labels:
+    monitor: 'codelab-monitor'
+
+# A scrape configuration containing exactly one endpoint to scrape:
+# Here it's Prometheus itself.
+scrape_configs:
+  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
+  - job_name: 'prometheus'
+    # Override the global default and scrape targets from this job every 5 seconds.
+    scrape_interval: 5s
+    static_configs:
+      - targets: ['127.0.0.1:9090']
+
+  # Add our new exporter (assume docker link "exporter")
+  - job_name: 'app'
+    scrape_interval: 1s
+    static_configs:
+      - targets: ['exporter:8081', 'exporter_2:8081','exporter_3:8081','exporter_4:8081']
+        labels:
+          group: 'lab'
\ No newline at end of file