From 44af0b6eb3247ef6cdbaa206ad9765c5a2b4a218 Mon Sep 17 00:00:00 2001 From: Richard Hillmann Date: Thu, 16 Mar 2017 23:45:24 +0100 Subject: [PATCH] save update --- Dockerfile | 2 +- README.md | 100 ++++++++++++++++++++++++++++++++++++++++++++- docker-compose.yml | 47 ++++++++++++++++++++- exporter.go | 16 ++------ prometheus.yml | 25 ++++++++++++ 5 files changed, 174 insertions(+), 16 deletions(-) create mode 100644 prometheus.yml diff --git a/Dockerfile b/Dockerfile index dcf1154..0c96203 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# use prometheus busybox with glibc for golang compatibility +# use prometheus busybox with glibc for better golang and dns resolve support FROM prom/busybox:glibc MAINTAINER John Doe diff --git a/README.md b/README.md index 500c53b..90a5926 100644 --- a/README.md +++ b/README.md @@ -38,4 +38,102 @@ or with docker-compose docker-compose up ``` -Metrics should now be availabe on `http://localhost:8081/metrics` +Metrics should now be available on `http://localhost:8081/metrics` + +Bonus +===== + +1. What are good ways of deploying hundreds of instances of our simulated + service? How would you deploy your exporter? And how would you configure + Prometheus to monitor them all? + + Pretty easy with *kubernetes*. + Just run the exporter along the app in a pod with an ReplicationController: + ```yaml + apiVersion: v1 + kind: ReplicationController + metadata: + name: replicatedApp + spec: + replicas: 100 + selector: + app: exportedApp + template: + metadata: + name: podApp + annotations: + prometheus.io/scrape: true + prometheus.io/port: 8081 + labels: + app: exportedApp + spec: + containers: + - name: challenge + image: beorn7/syseng-challenge + ports: + - containerPort: 8080 + - name: exporter + image: exporter + ports: + - containerPort: 8081 + ``` + + Just use the service discovery in prometheus: + ```yaml + - job_name: kube-app + kubernetes_sd_config: + - role: pod + relabel_configs: + - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] + action: replace + regex: ([^:]+)(?::\d+)?;(\d+) + replacement: $1:$2 + target_label: __address__ + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: podApp + ``` + + DNS discovery may be an alternative, for example with coredns. + +2. What graphs about the service would you plot in a dashboard builder like Grafana? + + Usually graph everything where attention is required. + It does not make sense to monitor metrics/graphs where nobody needs to get in action. Less is more. + + Assuming we have a fleet of this service and monitor all of them, it makes sense to graph in groups. + + *Graph* Request rates per code (QPS): + ``` + sum(app_request_rates) by (code) + ``` + + *Graph* Highest latencies: + ``` + max(app_duration_avg) + ``` + + *Singlestat* Running instances: + ``` + count_scalar(app_up == 1) + ``` + +3. What would you alert on? What would be the urgency of the various alerts? + + High: Too few apps are up (to handle all requests) + Middle/Hight: Request times are too high (priority depends on latency) + Middle: Too many bad/failed requests (5xx) codes in comparision to suceeded (2xx) + +4. If you were in control of the microservice, which exported metrics would you + add or modify next? + + Depends a little bit on the service, but probably these will be useful: + - CPU/RAM utilization. Probably network throughput. + - Avg duration time per code, method. + - Request rates per code and method. + + In general, monitor more metrics than you need in the moment. + As more than you have, debugging an issue can probably solved by an metric which is not active monitored. \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index e76de7e..defb683 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,3 +1,7 @@ +app: + image: beorn7/syseng-challenge + ports: + - 8080:8080 exporter: build: ./ command: -endpoint=http://app:8080/stats -listen :8081 --timeout 200 @@ -5,7 +9,46 @@ exporter: - app ports: - 8081:8081 -app: +# Just for full stack testing +app_2: image: beorn7/syseng-challenge +exporter_2: + build: ./ + command: -endpoint=http://app:8080/stats -listen :8081 --timeout 200 --environment=production + links: + - app_2:app +app_3: + image: beorn7/syseng-challenge +exporter_3: + build: ./ + command: -endpoint=http://app:8080/stats -listen :8081 --timeout 200 --environment=production + links: + - app_3:app +app_4: + image: beorn7/syseng-challenge +exporter_4: + build: ./ + command: -endpoint=http://app:8080/stats -listen :8081 --timeout 200 --environment=development + links: + - app_4:app +prometheus: + # There is an issue with docker links and resolving entries in /etc/hosts, open PR: https://github.com/prometheus/prometheus/pull/2502 + # image: prom/prometheus:latest + # use own build docker image...: + image: prom + links: + - exporter + - exporter_2 + - exporter_3 + - exporter_4 + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml + ports: + - 8082:9090 +grafana: + image: grafana/grafana:latest + command: -config.file /etc/prometheus.yml + links: + - prometheus ports: - - 8080:8080 \ No newline at end of file + - 8083:3000 \ No newline at end of file diff --git a/exporter.go b/exporter.go index 5a57e38..a2445bb 100644 --- a/exporter.go +++ b/exporter.go @@ -9,7 +9,6 @@ import ( "net/http" "net/url" "strings" - "syscall" "time" "github.com/prometheus/client_golang/prometheus" @@ -27,7 +26,7 @@ var ( listenAddress = flag.String("listen", ":8080", "The address to listen on for HTTP requests.") endpointApp = flag.String("endpoint", "http://localhost:8050/stats", "HTTP API address of the application") timeoutApp = flag.Int("timeout", 500, "Connection timeout in ms") - hostname = flag.String("hostname", "", "Optional hostname which will be added to the exported metrics (defaults to $HOSTNAME)") + environment = flag.String("environment", "", "Optional environment which will be added to the exported metrics") prometheusConstLabel = parseConstLabel() ) @@ -36,16 +35,9 @@ func parseConstLabel() prometheus.Labels { // parse flags in an early state, so we can retrieve the instance id flag.Parse() - if *hostname != "" { - // Try to set hostname from env var, so we can see if an pod does not work as expected - if value, found := syscall.Getenv("HOSTNAME"); found { - hostname = &value - } - } - - // generate constant label if hostname is present - if *hostname != "" { - return prometheus.Labels{"hostname": *hostname} + // generate constant label if environment is present + if *environment != "" { + return prometheus.Labels{"environment": *environment} } return prometheus.Labels{} } diff --git a/prometheus.yml b/prometheus.yml new file mode 100644 index 0000000..e117129 --- /dev/null +++ b/prometheus.yml @@ -0,0 +1,25 @@ +global: + scrape_interval: 15s # By default, scrape targets every 15 seconds. + + # Attach these labels to any time series or alerts when communicating with + # external systems (federation, remote storage, Alertmanager). + external_labels: + monitor: 'codelab-monitor' + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it's Prometheus itself. +scrape_configs: + # The job name is added as a label `job=` to any timeseries scraped from this config. + - job_name: 'prometheus' + # Override the global default and scrape targets from this job every 5 seconds. + scrape_interval: 5s + static_configs: + - targets: ['127.0.0.1:9090'] + + # Add our new exporter (assume docker link "exporter") + - job_name: 'app' + scrape_interval: 1s + static_configs: + - targets: ['exporter:8081', 'exporter_2:8081','exporter_3:8081','exporter_4:8081'] + labels: + group: 'lab' \ No newline at end of file