From 0bedff7a1edf6d3a506fc2aaaf1b0a719a2d5925 Mon Sep 17 00:00:00 2001 From: tysker Date: Tue, 6 Jan 2026 10:18:36 +0100 Subject: [PATCH 1/8] docs: add project roadmap checklist --- docs/project-checklist.md | 190 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 docs/project-checklist.md diff --git a/docs/project-checklist.md b/docs/project-checklist.md new file mode 100644 index 0000000..481eb8f --- /dev/null +++ b/docs/project-checklist.md @@ -0,0 +1,190 @@ +# Cloud DevOps Lab — Project Roadmap + +This document is the **authoritative roadmap and checklist** for the Cloud DevOps Lab. +It reflects the real implementation state of the project and defines what has been +completed, what is in progress, and what belongs to future expansion. + +--- + +## 1. Servers & Networking (Terraform / Linode) + +- [x] Create three servers (jump, app, monitoring) +- [x] Shared private network between all servers +- [x] Jump server as the only public SSH entry point +- [x] App server has public IP for HTTP access (temporary) +- [x] Monitoring server private-only +- [x] Firewall rules enforced via Terraform +- [ ] Reserved IPv4 address for app server (stable DNS target) +- [ ] Remote Terraform state backend (S3-compatible / Terraform Cloud) +- [ ] Terraform CI checks (`fmt`, `validate`, `tflint`) +- [ ] Document Terraform module contracts (inputs/outputs) + +--- + +## 2. DNS & Domain (Cloudflare + Registrar) + +- [x] Domain registered (simply.com) +- [x] Nameservers delegated to Cloudflare +- [x] DNS records created for app server +- [x] Cloudflare in DNS-only mode (proxy disabled) +- [ ] Terraform-managed DNS records (Cloudflare provider) +- [ ] Stable DNS target via reserved IP +- [ ] Decide exposure model for monitoring (private vs public) + +--- + +## 3. Access Control & SSH Security (Ansible) + +- [x] Non-root `devops` user created on all servers +- [x] Password authentication disabled +- [x] Challenge-response authentication disabled +- [x] Root SSH login disabled +- [x] SSH access restricted via `AllowUsers` +- [x] Bastion (jump host) enforced +- [x] SSH agent forwarding configured and documented +- [x] Ansible runs as `devops` with `become` +- [ ] Restrict SSH on jump server to trusted IP ranges +- [ ] Explicit SSH hardening parameters (`MaxAuthTries`, `LoginGraceTime`) +- [ ] Fail2ban on jump server +- [ ] Break-glass access procedure documented + +--- + +## 4. Firewalls & Host Hardening + +- [x] Linode firewalls applied to all servers +- [x] SSH allowed to app/monitoring only from jump private IP +- [x] Inbound policy DROP, outbound ACCEPT +- [x] App firewall allows HTTP (80) +- [ ] Firewall rules reviewed and minimized +- [ ] Automatic security updates (unattended-upgrades) +- [ ] Disable unused services and packages +- [ ] Basic system auditing and log retention + +--- + +## 5. Secrets Management + +- [x] Terraform secrets via environment variables +- [x] GitHub Actions secrets for CI +- [ ] Ansible Vault for runtime secrets +- [ ] Encrypted `.env` files generated by Ansible +- [ ] Secret rotation strategy documented +- [ ] Optional: HashiCorp Vault (Roadmap Part 2) + +--- + +## 6. Container Runtime (Docker) + +- [x] Docker installed via Ansible (app + monitoring) +- [x] Docker not installed on jump server +- [x] `devops` user added to docker group +- [x] Application container deployed +- [x] Restart policy (`unless-stopped`) +- [x] Healthcheck implemented +- [ ] Container runs as non-root user +- [ ] Resource limits (CPU/memory) +- [ ] Log rotation for Docker containers +- [ ] Migrate app deployment to Docker Compose + +--- + +## 7. CI/CD (GitHub Actions + GHCR) + +- [x] Docker image built in CI +- [x] Images pushed to GHCR +- [x] Immutable SHA tags used for deployment +- [x] Deployment via Ansible using pinned image +- [ ] Linting and unit tests in CI +- [ ] Container vulnerability scanning (Trivy/Grype) +- [ ] SBOM generation +- [ ] Promotion workflow (staging → production) +- [ ] Semantic versioning strategy + +--- + +## 8. Application Deployment + +- [x] App deployed via Ansible +- [x] Health endpoint validated automatically +- [x] HTTP exposed on port 80 +- [ ] Bind app container to localhost only (via reverse proxy) +- [ ] Blue/green or rolling deployment strategy +- [ ] Rollback procedure documented + +--- + +## 9. Monitoring & Observability + +### Node Exporter + +- [x] Node Exporter deployed on app server +- [x] Node Exporter deployed on monitoring server +- [x] Metrics verified on port 9100 + +### Prometheus + +- [ ] Prometheus deployed on monitoring server +- [ ] Scrape node exporters +- [ ] Scrape application metrics +- [ ] Retention and storage configured +- [ ] Alert rules defined + +### Grafana + +- [ ] Grafana deployed on monitoring server +- [ ] Prometheus datasource configured +- [ ] Node exporter dashboards imported +- [ ] App dashboards created +- [ ] Access control (auth / private access) + +--- + +## 10. TLS, Reverse Proxy & Edge Security + +- [ ] Reverse proxy (Nginx / Caddy / Traefik) +- [ ] HTTPS via Let’s Encrypt or Cloudflare origin certs +- [ ] App container bound to localhost +- [ ] Cloudflare proxy enabled +- [ ] Origin access restricted to Cloudflare IPs +- [ ] Security headers enforced (HSTS, etc.) + +--- + +# Roadmap — Part 2 (After Core Project) + +### Reliability & Scaling + +- [ ] Reserved IP + NodeBalancer +- [ ] Blue/green or canary deployments +- [ ] Automated rollbacks +- [ ] Load testing + +### Security Maturity + +- [ ] Centralized logging (Loki / ELK) +- [ ] SSO for Grafana +- [ ] WAF rules and rate limiting +- [ ] Image signing (Cosign) + +### Infrastructure Maturity + +- [ ] Remote Terraform state + workspaces +- [ ] Terraform → Ansible dynamic inventory +- [ ] Pre-commit hooks +- [ ] Policy as Code (OPA / Conftest) + +### Orchestration (Choose One) + +- [ ] Docker Compose (production-grade) +- [ ] Docker Swarm +- [ ] Kubernetes (k3s, ingress, cert-manager, GitOps) + +--- + +## Project Rules + +- Every stage starts with a new git branch +- No manual changes on servers +- Pinned versions only +- Documentation updated at the end of each stage From ba60c02575adff9809f93ad3338d2d46c2e2ac61 Mon Sep 17 00:00:00 2001 From: tysker Date: Fri, 9 Jan 2026 11:04:31 +0100 Subject: [PATCH 2/8] (ansible) add roles for prometheus and port 80 access on app server --- infrastructure/terraform/main.tf | 50 +++++--------------------------- 1 file changed, 8 insertions(+), 42 deletions(-) diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index 082e5d3..3d0d0b1 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -1,37 +1,3 @@ -# resource "linode_instance" "jump" { -# label = "${var.project_name}-${var.environment}-jump" -# region = var.region -# type = var.instance_type -# image = var.image -# -# authorized_keys = [chomp(file(var.ssh_public_key_path))] -# -# private_ip = true -# } -# -# resource "linode_instance" "app" { -# label = "${var.project_name}-${var.environment}-app" -# region = var.region -# type = var.instance_type -# image = var.image -# -# authorized_keys = [chomp(file(var.ssh_public_key_path))] -# -# -# private_ip = true -# } -# -# resource "linode_instance" "monitoring" { -# label = "${var.project_name}-${var.environment}-monitoring" -# region = var.region -# type = var.instance_type -# image = var.image -# -# authorized_keys = [chomp(file(var.ssh_public_key_path))] -# -# private_ip = true -# } - module "jump" { source = "./modules/compute" @@ -98,6 +64,14 @@ resource "linode_firewall" "app_fw" { ipv4 = ["192.168.0.0/16"] } + inbound { + label = "allow-http" + action = "ACCEPT" + protocol = "TCP" + ports = "80" + ipv4 = ["0.0.0.0/0"] + } + inbound_policy = "DROP" outbound_policy = "ACCEPT" @@ -123,14 +97,6 @@ resource "linode_firewall" "monitoring_fw" { ipv4 = ["192.168.0.0/16"] } - inbound { - label = "allow-http" - action = "ACCEPT" - protocol = "TCP" - ports = "80" - ipv4 = ["0.0.0.0/0"] - } - inbound_policy = "DROP" outbound_policy = "ACCEPT" From b828911fce61bd59c559afda1f832a7c659656ce Mon Sep 17 00:00:00 2001 From: tysker Date: Fri, 9 Jan 2026 11:07:19 +0100 Subject: [PATCH 3/8] (ansible) add roles for prometheus and port 80 access on app server --- ansible/group_vars/all.yml | 6 --- ansible/group_vars/app.yml | 4 ++ ansible/group_vars/monitoring.yml | 6 +++ ansible/playbooks/monitoring_prometheus.yml | 6 +++ ansible/roles/prometheus/tasks/main.yml | 37 +++++++++++++++++++ .../prometheus/templates/prometheus.yml.j2 | 11 ++++++ 6 files changed, 64 insertions(+), 6 deletions(-) create mode 100644 ansible/group_vars/app.yml create mode 100644 ansible/group_vars/monitoring.yml create mode 100644 ansible/playbooks/monitoring_prometheus.yml create mode 100644 ansible/roles/prometheus/tasks/main.yml create mode 100644 ansible/roles/prometheus/templates/prometheus.yml.j2 diff --git a/ansible/group_vars/all.yml b/ansible/group_vars/all.yml index e746599..baac036 100644 --- a/ansible/group_vars/all.yml +++ b/ansible/group_vars/all.yml @@ -4,12 +4,6 @@ ansible_python_interpreter: /usr/bin/python3 devops_user: devops devops_public_key: "{{ lookup('file', lookup('env', 'HOME') + '/.ssh/linode.pub') }}" -# app image -app_image: "ghcr.io/tysker/cloud_devops_app:77ecd38" -app_container_name: "cloud-devops-app" -app_container_port: 5000 -app_public_port: 80 - # github account ghcr_username: "tysker" ghcr_token: "{{ lookup('env', 'GHCR_TOKEN') }}" diff --git a/ansible/group_vars/app.yml b/ansible/group_vars/app.yml new file mode 100644 index 0000000..b6f3cab --- /dev/null +++ b/ansible/group_vars/app.yml @@ -0,0 +1,4 @@ +app_image: "ghcr.io/tysker/cloud_devops_app:77ecd38" +app_container_name: "cloud-devops-app" +app_container_port: 5000 +app_public_port: 80 diff --git a/ansible/group_vars/monitoring.yml b/ansible/group_vars/monitoring.yml new file mode 100644 index 0000000..b056f0e --- /dev/null +++ b/ansible/group_vars/monitoring.yml @@ -0,0 +1,6 @@ +prometheus_image: "prom/prometheus:v2.52.0" +prometheus_port: 9090 +prometheus_config_dir: "/opt/prometheus" +prometheus_data_dir: "/opt/prometheus/data" +node_exporter_image: "prom/node-exporter:v1.8.1" +node_exporter_port: 9100 diff --git a/ansible/playbooks/monitoring_prometheus.yml b/ansible/playbooks/monitoring_prometheus.yml new file mode 100644 index 0000000..ab90297 --- /dev/null +++ b/ansible/playbooks/monitoring_prometheus.yml @@ -0,0 +1,6 @@ +- name: Deploy Prometheus on monitoring server + hosts: monitoring + gather_facts: true + become: true + roles: + - prometheus diff --git a/ansible/roles/prometheus/tasks/main.yml b/ansible/roles/prometheus/tasks/main.yml new file mode 100644 index 0000000..18edeb3 --- /dev/null +++ b/ansible/roles/prometheus/tasks/main.yml @@ -0,0 +1,37 @@ +- name: Ensure Prometheus directories exist + become: true + ansible.builtin.file: + path: "{{ item }}" + state: directory + owner: "65534" + group: "65534" + mode: "0755" + loop: + - "{{ prometheus_config_dir }}" + - "{{ prometheus_data_dir }}" + +- name: Render Prometheus configuration + ansible.builtin.template: + src: prometheus.yml.j2 + dest: "{{ prometheus_config_dir }}/prometheus.yml" + owner: root + group: root + mode: "0644" + +- name: Ensure Prometheus container is running + community.docker.docker_container: + name: prometheus + image: "{{ prometheus_image }}" + state: started + restart_policy: unless-stopped + network_mode: host + command: + - "--config.file={{ prometheus_config_dir }}/prometheus.yml" + - "--storage.tsdb.path={{ prometheus_data_dir }}" + - "--web.listen-address=0.0.0.0:{{ prometheus_port }}" + volumes: + - "{{ prometheus_config_dir }}/prometheus.yml:{{ prometheus_config_dir }}/prometheus.yml:ro" + - "{{ prometheus_data_dir }}:{{ prometheus_data_dir }}" + read_only: true + tmpfs: + - /tmp diff --git a/ansible/roles/prometheus/templates/prometheus.yml.j2 b/ansible/roles/prometheus/templates/prometheus.yml.j2 new file mode 100644 index 0000000..0989a9f --- /dev/null +++ b/ansible/roles/prometheus/templates/prometheus.yml.j2 @@ -0,0 +1,11 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: "node_exporter" + static_configs: + - targets: + - "{{ hostvars['monitoring-1'].ansible_host }}:9100" + - "{{ hostvars['app-1'].ansible_host }}:9100" + From c1449e96e86add131b9e52c47fd2dc4727ef73f7 Mon Sep 17 00:00:00 2001 From: tysker Date: Fri, 9 Jan 2026 11:12:05 +0100 Subject: [PATCH 4/8] fix(terraform) change app and montoring to use different server setup. 1gb to 2gb --- infrastructure/terraform/main.tf | 22 +++++++++++----------- infrastructure/terraform/variables.tf | 10 ++++++++-- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index 3d0d0b1..1f851cb 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -3,7 +3,7 @@ module "jump" { label = "${var.project_name}-${var.environment}-jump" region = var.region - instance_type = var.instance_type + instance_type = var.instance_type_1gb image = var.image authorized_keys = [chomp(file(var.ssh_public_key_path))] } @@ -13,7 +13,7 @@ module "app" { label = "${var.project_name}-${var.environment}-app" region = var.region - instance_type = var.instance_type + instance_type = var.instance_type_2gb image = var.image authorized_keys = [chomp(file(var.ssh_public_key_path))] } @@ -23,7 +23,7 @@ module "monitoring" { label = "${var.project_name}-${var.environment}-monitoring" region = var.region - instance_type = var.instance_type + instance_type = var.instance_type_2gb image = var.image authorized_keys = [chomp(file(var.ssh_public_key_path))] } @@ -64,14 +64,6 @@ resource "linode_firewall" "app_fw" { ipv4 = ["192.168.0.0/16"] } - inbound { - label = "allow-http" - action = "ACCEPT" - protocol = "TCP" - ports = "80" - ipv4 = ["0.0.0.0/0"] - } - inbound_policy = "DROP" outbound_policy = "ACCEPT" @@ -97,6 +89,14 @@ resource "linode_firewall" "monitoring_fw" { ipv4 = ["192.168.0.0/16"] } + inbound { + label = "allow-http" + action = "ACCEPT" + protocol = "TCP" + ports = "80" + ipv4 = ["0.0.0.0/0"] + } + inbound_policy = "DROP" outbound_policy = "ACCEPT" diff --git a/infrastructure/terraform/variables.tf b/infrastructure/terraform/variables.tf index 34d508c..00a4fe3 100644 --- a/infrastructure/terraform/variables.tf +++ b/infrastructure/terraform/variables.tf @@ -10,12 +10,18 @@ variable "region" { default = "eu-central" } -variable "instance_type" { - description = "Linode instance type" +variable "instance_type_1gb" { + description = "Linode instance type 1GB ram" type = string default = "g6-nanode-1" } +variable "instance_type_2gb" { + description = "Linode instance type 2GB ram" + type = string + default = "g6-standard-1" +} + variable "ssh_public_key_path" { description = "Path to the SSH public key used to access servers" type = string From cc129304c52e60990244796a780960ef947d19cb Mon Sep 17 00:00:00 2001 From: tysker Date: Sun, 11 Jan 2026 12:15:47 +0100 Subject: [PATCH 5/8] feat(monitoring): deploy node exporter on app and monitoring --- ansible/group_vars/all.yml | 4 ++++ ansible/playbooks/monitoring_node_exporter.yml | 6 ++++++ ansible/roles/node_exporter/tasks/main.yml | 12 ++++++++++++ 3 files changed, 22 insertions(+) create mode 100644 ansible/playbooks/monitoring_node_exporter.yml create mode 100644 ansible/roles/node_exporter/tasks/main.yml diff --git a/ansible/group_vars/all.yml b/ansible/group_vars/all.yml index baac036..424a62b 100644 --- a/ansible/group_vars/all.yml +++ b/ansible/group_vars/all.yml @@ -7,3 +7,7 @@ devops_public_key: "{{ lookup('file', lookup('env', 'HOME') + '/.ssh/linode.pub' # github account ghcr_username: "tysker" ghcr_token: "{{ lookup('env', 'GHCR_TOKEN') }}" + +# node-exporter +node_exporter_image: "prom/node-exporter:v1.8.1" +node_exporter_port: 9100 diff --git a/ansible/playbooks/monitoring_node_exporter.yml b/ansible/playbooks/monitoring_node_exporter.yml new file mode 100644 index 0000000..fc8a044 --- /dev/null +++ b/ansible/playbooks/monitoring_node_exporter.yml @@ -0,0 +1,6 @@ +- name: Deploy Node Exporter on app and monitoring servers + hosts: app:monitoring + gather_facts: true + become: true + roles: + - node_exporter diff --git a/ansible/roles/node_exporter/tasks/main.yml b/ansible/roles/node_exporter/tasks/main.yml new file mode 100644 index 0000000..ba1a5a6 --- /dev/null +++ b/ansible/roles/node_exporter/tasks/main.yml @@ -0,0 +1,12 @@ +- name: Ensure Node Exporter container is running + community.docker.docker_container: + name: node-exporter + image: "{{ node_exporter_image }}" + state: started + restart_policy: unless-stopped + network_mode: host + pid_mode: host + read_only: true + command: ["--path.rootfs=/host"] + volumes: + - "/:/host:ro,rslave" From 3d812af4fb413d701e2d5622174d41c29925dac7 Mon Sep 17 00:00:00 2001 From: tysker Date: Sun, 11 Jan 2026 12:18:30 +0100 Subject: [PATCH 6/8] fix()fix problem with bootstrap server sequence. First rrot and then devops setup --- ansible/ansible.cfg | 6 +++--- ansible/group_vars/monitoring.yml | 2 -- ansible/hosts.ini | 9 +++++---- ansible/playbooks/bootstrap_1.yml | 6 ++++++ ansible/playbooks/{bootstrap.yml => bootstrap_2.yml} | 6 ++---- .../{bootstrap_users => bootstrap_user}/tasks/main.yml | 0 ansible/roles/common/handlers/main.yml | 0 ansible/roles/prometheus/templates/prometheus.yml.j2 | 6 +++--- 8 files changed, 19 insertions(+), 16 deletions(-) create mode 100644 ansible/playbooks/bootstrap_1.yml rename ansible/playbooks/{bootstrap.yml => bootstrap_2.yml} (68%) rename ansible/roles/{bootstrap_users => bootstrap_user}/tasks/main.yml (100%) delete mode 100644 ansible/roles/common/handlers/main.yml diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg index b3f307b..0c6d0fd 100644 --- a/ansible/ansible.cfg +++ b/ansible/ansible.cfg @@ -1,7 +1,6 @@ [defaults] inventory = hosts.ini roles_path = roles -remote_user = devops host_key_checking = False retry_files_enabled = False timeout = 30 @@ -14,10 +13,11 @@ fact_caching_timeout = 86400 # logging & output log_path = ./ansible.log -stdout_callback = yaml +stdout_callback = ansible.builtin.default +result_format = yaml # pyhton interpreter path -interpreter_python = "{{ansible_python_interpreter}}" +interpreter_python = /usr/bin/python3 [ssh_connection] ssh_args = -o ForwardAgent=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null diff --git a/ansible/group_vars/monitoring.yml b/ansible/group_vars/monitoring.yml index b056f0e..3af93d7 100644 --- a/ansible/group_vars/monitoring.yml +++ b/ansible/group_vars/monitoring.yml @@ -2,5 +2,3 @@ prometheus_image: "prom/prometheus:v2.52.0" prometheus_port: 9090 prometheus_config_dir: "/opt/prometheus" prometheus_data_dir: "/opt/prometheus/data" -node_exporter_image: "prom/node-exporter:v1.8.1" -node_exporter_port: 9100 diff --git a/ansible/hosts.ini b/ansible/hosts.ini index d0d558d..c7d5e29 100644 --- a/ansible/hosts.ini +++ b/ansible/hosts.ini @@ -1,14 +1,15 @@ +# All configurations is set inside ~/.ssh/config + [bastion] -jump-1 ansible_host=172.105.80.74 +jump-1 [app] -app-1 ansible_host=192.168.133.230 ansible_ssh_common_args='-o ProxyJump=devops@172.105.80.74' +app-1 [monitoring] -monitoring-1 ansible_host=192.168.133.104 ansible_ssh_common_args='-o ProxyJump=devops@172.105.80.74' +monitoring-1 [all:children] bastion app monitoring - diff --git a/ansible/playbooks/bootstrap_1.yml b/ansible/playbooks/bootstrap_1.yml new file mode 100644 index 0000000..c1ffdec --- /dev/null +++ b/ansible/playbooks/bootstrap_1.yml @@ -0,0 +1,6 @@ +- name: Bootstrap all server (initial) + hosts: all + remote_user: root + roles: + - common + - bootstrap_user diff --git a/ansible/playbooks/bootstrap.yml b/ansible/playbooks/bootstrap_2.yml similarity index 68% rename from ansible/playbooks/bootstrap.yml rename to ansible/playbooks/bootstrap_2.yml index 766d8e4..a89f071 100644 --- a/ansible/playbooks/bootstrap.yml +++ b/ansible/playbooks/bootstrap_2.yml @@ -1,10 +1,8 @@ -- name: Bootstrap all servers +- name: Harden SSH (after devops exists) hosts: all - gather_facts: false + remote_user: devops become: true roles: - - common - - bootstrap_users - ssh_hardening - name: Install Docker on app and monitoring servers diff --git a/ansible/roles/bootstrap_users/tasks/main.yml b/ansible/roles/bootstrap_user/tasks/main.yml similarity index 100% rename from ansible/roles/bootstrap_users/tasks/main.yml rename to ansible/roles/bootstrap_user/tasks/main.yml diff --git a/ansible/roles/common/handlers/main.yml b/ansible/roles/common/handlers/main.yml deleted file mode 100644 index e69de29..0000000 diff --git a/ansible/roles/prometheus/templates/prometheus.yml.j2 b/ansible/roles/prometheus/templates/prometheus.yml.j2 index 0989a9f..819a3d3 100644 --- a/ansible/roles/prometheus/templates/prometheus.yml.j2 +++ b/ansible/roles/prometheus/templates/prometheus.yml.j2 @@ -6,6 +6,6 @@ scrape_configs: - job_name: "node_exporter" static_configs: - targets: - - "{{ hostvars['monitoring-1'].ansible_host }}:9100" - - "{{ hostvars['app-1'].ansible_host }}:9100" - +{% for h in groups['all'] %} + - "{{ h }}:9100" +{% endfor %} From b47b6e1d0e311391cd1be721d19e32ef8a42bc03 Mon Sep 17 00:00:00 2001 From: tysker Date: Sun, 11 Jan 2026 17:37:17 +0100 Subject: [PATCH 7/8] fix(monitoring): run grafana with writable data volume --- ansible/group_vars/monitoring.yml | 6 ++++++ ansible/hosts.ini | 6 +++--- ansible/playbooks/monitoring_grafana.yml | 6 ++++++ ansible/playbooks/monitoring_prometheus.yml | 1 + ansible/roles/grafana/tasks/main.yml | 21 +++++++++++++++++++ ansible/roles/prometheus/tasks/main.yml | 1 + .../prometheus/templates/prometheus.yml.j2 | 8 +++---- 7 files changed, 42 insertions(+), 7 deletions(-) create mode 100644 ansible/playbooks/monitoring_grafana.yml create mode 100644 ansible/roles/grafana/tasks/main.yml diff --git a/ansible/group_vars/monitoring.yml b/ansible/group_vars/monitoring.yml index 3af93d7..a21633e 100644 --- a/ansible/group_vars/monitoring.yml +++ b/ansible/group_vars/monitoring.yml @@ -1,4 +1,10 @@ +# prometheus prometheus_image: "prom/prometheus:v2.52.0" prometheus_port: 9090 prometheus_config_dir: "/opt/prometheus" prometheus_data_dir: "/opt/prometheus/data" + +# grafana +grafana_image: "grafana/grafana:10.4.3" +grafana_port: 3000 +grafana_data_dir: "/opt/grafana/data" diff --git a/ansible/hosts.ini b/ansible/hosts.ini index c7d5e29..6098b3a 100644 --- a/ansible/hosts.ini +++ b/ansible/hosts.ini @@ -1,13 +1,13 @@ # All configurations is set inside ~/.ssh/config [bastion] -jump-1 +jump-1 ansible_host=172.105.79.7 [app] -app-1 +app-1 ansible_host=192.168.143.86 ansible_ssh_common_args='-o ProxyJump=devops@172.105.79.7' [monitoring] -monitoring-1 +monitoring-1 ansible_host=192.168.129.112 ansible_ssh_common_args='-o ProxyJump=devops@172.105.79.7' [all:children] bastion diff --git a/ansible/playbooks/monitoring_grafana.yml b/ansible/playbooks/monitoring_grafana.yml new file mode 100644 index 0000000..84f273c --- /dev/null +++ b/ansible/playbooks/monitoring_grafana.yml @@ -0,0 +1,6 @@ +- name: Deploy Grafana on monitoring server + hosts: monitoring + gather_facts: true + become: true + roles: + - grafana diff --git a/ansible/playbooks/monitoring_prometheus.yml b/ansible/playbooks/monitoring_prometheus.yml index ab90297..81a8674 100644 --- a/ansible/playbooks/monitoring_prometheus.yml +++ b/ansible/playbooks/monitoring_prometheus.yml @@ -1,6 +1,7 @@ - name: Deploy Prometheus on monitoring server hosts: monitoring gather_facts: true + remote_user: devops become: true roles: - prometheus diff --git a/ansible/roles/grafana/tasks/main.yml b/ansible/roles/grafana/tasks/main.yml new file mode 100644 index 0000000..3869f7d --- /dev/null +++ b/ansible/roles/grafana/tasks/main.yml @@ -0,0 +1,21 @@ +- name: Ensure Grafana data directory exists + ansible.builtin.file: + path: "{{ grafana_data_dir }}" + state: directory + owner: "472" + group: "472" + mode: "0755" + +- name: Ensure Grafana container is running + community.docker.docker_container: + name: grafana + image: "{{ grafana_image }}" + state: started + restart_policy: unless-stopped + network_mode: host + volumes: + - "{{ grafana_data_dir }}:/var/lib/grafana" + env: + GF_SECURITY_ADMIN_USER: admin + GF_SECURITY_ADMIN_PASSWORD: admin + GF_USERS_ALLOW_SIGN_UP: "false" diff --git a/ansible/roles/prometheus/tasks/main.yml b/ansible/roles/prometheus/tasks/main.yml index 18edeb3..bb595c6 100644 --- a/ansible/roles/prometheus/tasks/main.yml +++ b/ansible/roles/prometheus/tasks/main.yml @@ -23,6 +23,7 @@ name: prometheus image: "{{ prometheus_image }}" state: started + recreate: true restart_policy: unless-stopped network_mode: host command: diff --git a/ansible/roles/prometheus/templates/prometheus.yml.j2 b/ansible/roles/prometheus/templates/prometheus.yml.j2 index 819a3d3..8340de1 100644 --- a/ansible/roles/prometheus/templates/prometheus.yml.j2 +++ b/ansible/roles/prometheus/templates/prometheus.yml.j2 @@ -3,9 +3,9 @@ global: evaluation_interval: 15s scrape_configs: - - job_name: "node_exporter" + - job_name: "node" static_configs: - targets: -{% for h in groups['all'] %} - - "{{ h }}:9100" -{% endfor %} + - "{{ hostvars['monitoring-1'].ansible_host }}:9100" + - "{{ hostvars['app-1'].ansible_host }}:9100" + From f895cb47c9dec510a60b7a9816e2e881d101a745 Mon Sep 17 00:00:00 2001 From: tysker Date: Mon, 12 Jan 2026 08:29:41 +0100 Subject: [PATCH 8/8] fix(playbooks,terraform) open port 80 for app and close for monitoring. make sure remote_user is set explicit inside playbooks --- ansible/hosts.ini | 8 +++----- ansible/playbooks/bootstrap_2.yml | 1 + ansible/playbooks/deploy_app.yml | 1 + ansible/playbooks/monitoring_grafana.yml | 1 + ansible/playbooks/monitoring_node_exporter.yml | 1 + infrastructure/terraform/main.tf | 16 ++++++++-------- 6 files changed, 15 insertions(+), 13 deletions(-) diff --git a/ansible/hosts.ini b/ansible/hosts.ini index 6098b3a..8ecc281 100644 --- a/ansible/hosts.ini +++ b/ansible/hosts.ini @@ -1,13 +1,11 @@ -# All configurations is set inside ~/.ssh/config - [bastion] -jump-1 ansible_host=172.105.79.7 +jump-1 ansible_host=172.104.228.45 [app] -app-1 ansible_host=192.168.143.86 ansible_ssh_common_args='-o ProxyJump=devops@172.105.79.7' +app-1 ansible_host=192.168.137.27 ansible_ssh_common_args='-o ProxyJump=devops@172.104.228.45' [monitoring] -monitoring-1 ansible_host=192.168.129.112 ansible_ssh_common_args='-o ProxyJump=devops@172.105.79.7' +monitoring-1 ansible_host=192.168.137.82 ansible_ssh_common_args='-o ProxyJump=devops@172.104.228.45' [all:children] bastion diff --git a/ansible/playbooks/bootstrap_2.yml b/ansible/playbooks/bootstrap_2.yml index a89f071..a604758 100644 --- a/ansible/playbooks/bootstrap_2.yml +++ b/ansible/playbooks/bootstrap_2.yml @@ -7,6 +7,7 @@ - name: Install Docker on app and monitoring servers hosts: app:monitoring + remote_user: devops gather_facts: true become: true roles: diff --git a/ansible/playbooks/deploy_app.yml b/ansible/playbooks/deploy_app.yml index def8e08..0e4fe3b 100644 --- a/ansible/playbooks/deploy_app.yml +++ b/ansible/playbooks/deploy_app.yml @@ -1,5 +1,6 @@ - name: Deploy Flask app container hosts: app + remote_user: devops gather_facts: true become: true roles: diff --git a/ansible/playbooks/monitoring_grafana.yml b/ansible/playbooks/monitoring_grafana.yml index 84f273c..ed6e604 100644 --- a/ansible/playbooks/monitoring_grafana.yml +++ b/ansible/playbooks/monitoring_grafana.yml @@ -1,5 +1,6 @@ - name: Deploy Grafana on monitoring server hosts: monitoring + remote_user: devops gather_facts: true become: true roles: diff --git a/ansible/playbooks/monitoring_node_exporter.yml b/ansible/playbooks/monitoring_node_exporter.yml index fc8a044..5039c37 100644 --- a/ansible/playbooks/monitoring_node_exporter.yml +++ b/ansible/playbooks/monitoring_node_exporter.yml @@ -1,5 +1,6 @@ - name: Deploy Node Exporter on app and monitoring servers hosts: app:monitoring + remote_user: devops gather_facts: true become: true roles: diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index 1f851cb..6f14484 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -64,6 +64,14 @@ resource "linode_firewall" "app_fw" { ipv4 = ["192.168.0.0/16"] } + inbound { + label = "allow-http" + action = "ACCEPT" + protocol = "TCP" + ports = "80" + ipv4 = ["0.0.0.0/0"] + } + inbound_policy = "DROP" outbound_policy = "ACCEPT" @@ -89,14 +97,6 @@ resource "linode_firewall" "monitoring_fw" { ipv4 = ["192.168.0.0/16"] } - inbound { - label = "allow-http" - action = "ACCEPT" - protocol = "TCP" - ports = "80" - ipv4 = ["0.0.0.0/0"] - } - inbound_policy = "DROP" outbound_policy = "ACCEPT"