Auto-update OS on Ubuntu + log process for monitoring of process with Zabbix

Copy script to /opt/autoupdate.sh

Script content:

#!/bin/bash

# Log file for update process
LOGFILE="/var/log/ubuntu_update.log"

# Function to log messages
echo_log() {
    echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOGFILE"
}

# Remove any existing scheduled reboot
if [ -f /etc/cron.d/scheduled_reboot ]; then
    rm -f /etc/cron.d/scheduled_reboot
    echo_log "Removed existing scheduled reboot."
fi

# Start logging
echo_log "Starting server update..."

# Update the system
echo_log "Running apt update and upgrade..."
/usr/bin/apt update >> "$LOGFILE" 2>&1
DEBIAN_FRONTEND=noninteractive /usr/bin/apt -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" upgrade >> "$LOGFILE" 2>&1

if [ $? -ne 0 ]; then
    echo_log "Error: apt update failed. Check the log for details."
    exit 1
fi

echo_log "System update completed."

# Check if a reboot is required
if [ -f /var/run/reboot-required ]; then
    echo_log "Reboot required. Scheduling reboot."
    # Schedule reboot for 7:00 AM today
    echo "0 7 $(date +"%d %m *") root /sbin/reboot" > /etc/cron.d/scheduled_reboot
    echo_log "Reboot scheduled for 7:00 AM today."
else
    echo_log "No reboot required. No reboot scheduled."
fi

echo_log "Update process completed."
exit 0

Assign execution rights to script.

chmod +x /opt/autoupdate.sh

schedule update

crontab -e
0 6 * * 2 /bin/bash /opt/autoupdate.sh

Update logs location: /var/log/ubuntu_update.log

To enable zabbix monitoring assign zabbix template in zabbix and modify agent config to include correct values for “ServerActive” & “Hostname” values

sample Zabbix template for monitoring updates:

zabbix_export:
  version: '6.0'

  groups:
    - name: Templates/Custom

  templates:
    - template: Linux_autoupdate_monitor_ubuntu   # keep this stable (technical name)
      name: 'Ubuntu auto-update monitor (log-based)'
      groups:
        - name: Templates/Custom

      items:
        - name: 'Check Ubuntu Update Log'
          type: ZABBIX_ACTIVE
          key: 'log[/var/log/ubuntu_update.log,update,,,skip]'
          trends: '0'
          value_type: LOG

          triggers:
            - name: 'Auto-update completed for {HOST.NAME}'
              expression: 'find(/Linux_autoupdate_monitor_ubuntu/log[/var/log/ubuntu_update.log,update,,,skip],1,,"completed")=1'
              recovery_mode: RECOVERY_EXPRESSION
              recovery_expression: 'nodata(/Linux_autoupdate_monitor_ubuntu/log[/var/log/ubuntu_update.log,update,,,skip],28800)=1'
              priority: INFO
              description: 'General notification about completed auto-update attempt.'
              manual_close: 'YES'
              dependencies:
                - name: 'Auto-update failed, check update logs on {HOST.NAME}'
                  expression: 'find(/Linux_autoupdate_monitor_ubuntu/log[/var/log/ubuntu_update.log,update,,,skip],1,,"Error")=1'
                  recovery_expression: 'nodata(/Linux_autoupdate_monitor_ubuntu/log[/var/log/ubuntu_update.log,update,,,skip],86400)=1'

            - name: 'Auto-update failed, check update logs on {HOST.NAME}'
              expression: 'find(/Linux_autoupdate_monitor_ubuntu/log[/var/log/ubuntu_update.log,update,,,skip],1,,"Error")=1'
              recovery_mode: RECOVERY_EXPRESSION
              recovery_expression: 'nodata(/Linux_autoupdate_monitor_ubuntu/log[/var/log/ubuntu_update.log,update,,,skip],86400)=1'
              priority: AVERAGE
              description: |
                Something failed during auto-update. Check logs on server.
                Alarm will gone in 24hrs after popping-up, or you can manually close it if you check and fix issue on server.
              manual_close: 'YES'

      tags:
        - tag: UpdateMonitoring
          value: Logs

Making snapshot on Nutanix from guest VM using Nutanix API

If you prefer full control inside the VM (or you want something more tailored per workload), following approach is to run a pre-snapshot script (quiesce), trigger the snapshot via API, then run a post-snapshot script (resume).

  1. Pre-snapshot (example)
#!/bin/bash
set -e
systemctl stop mysqld || true
sync
fsfreeze -f /data

2. Snapshot trigger (API example )

#!/bin/bash

VM_UUID="xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"

curl -k -u 'apiuser:apipass' \
  -H 'Content-Type: application/json' \
  -X POST "https://<PRISM-IP>:9440/api/nutanix/v3/snapshots" \
  -d '{
    "spec": {
      "resources": {
        "entity_uuid": "'"$VM_UUID"'",
        "snapshot_type": "CRASH_CONSISTENT"
      },
      "name": "snap-from-inside-'"$(date +%F-%H%M)"'"
    }
  }'

3. Post-snapshot (example)

#!/bin/bash
set -e
fsfreeze -u /data
systemctl start mysqld || true

This route can work well when you want custom quiescing logic, but it does add some moving parts (API auth/endpoint, error handling, timeouts, etc.).

Fault-tolerant cluster with haproxy and keepalived. Also backend sync with glusterfs

Case: build fault tolerant souliton using linux servers and sync data between cluster members with glusterfs.

solution: keepalived + HAProxy for VRRP and glusterfs to sync data.

Install keepalived + haproxy

apt-get update
apt-get upgrade
apt install haproxy keepalived

keepalived.conf configuration example

vrrp_script chk_haproxy {
    script "/etc/keepalived/check_haproxy.sh"
    interval 2
    timeout 3
    fall 2
    rise 2
}

vrrp_instance VI_1 {
    interface ens192
    state MASTER
    virtual_router_id 51          # A unique number [1-255] for this VRRP instance
    priority 100                  # 100 for master, 50 for backup
    advert_int 1
    authentication {
        auth_type PASS
        auth_pass mysecretpass   # A password for authentication, should be the same on all servers
    }
    virtual_ipaddress {
    10.10.10.10                 # The virtual IP address shared between master and backup
    10.10.10.11
    10.10.10.12
    }
    track_script {
        chk_haproxy
    }
}

vrrp_instance VI_2 {
    interface ens224
    state MASTER
    virtual_router_id 52          # A unique number [1-255] for this VRRP instance
    priority 100                  # 100 for master, 50 for backup
    advert_int 1
    authentication {
        auth_type PASS
        auth_pass mysecretpass   # A password for authentication, should be the same on all servers
    }
    virtual_ipaddress {
    10.10.20.10                 # The virtual IP address shared between master and backup
    10.10.20.11
    10.10.20.12
    }
    track_script {
        chk_haproxy
    }

Script to check if haproxy is alive – /etc/keepalived/check_haproxy.sh:

#!/bin/bash
# Check if HAProxy is running

if systemctl is-active --quiet haproxy; then
    exit 0
else
    exit 1

Sample of haproxy.cfg:

global
    log /dev/log local0
    log /dev/log local1 notice
    chroot /var/lib/haproxy
    stats socket /run/haproxy/admin.sock mode 660 level admin
    stats timeout 30s
    user haproxy
    group haproxy
    daemon

    # Default SSL material locations
    ca-base /etc/ssl/certs
    crt-base /etc/ssl/private

    # See: https://ssl-config.mozilla.org/#server=haproxy&server-version=2.0.3&config=intermediate
    ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384
    ssl-default-bind-ciphersuites TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256
    ssl-default-bind-options ssl-min-ver TLSv1.2 no-tls-tickets

defaults
    log global
    mode http
    option httplog
    option dontlognull
    retries 3
    option redispatch
    timeout connect 5000ms
    timeout client  50000ms
    timeout server  50000ms
    errorfile 400 /etc/haproxy/errors/400.http
    errorfile 403 /etc/haproxy/errors/403.http
    errorfile 408 /etc/haproxy/errors/408.http
    errorfile 500 /etc/haproxy/errors/500.http
    errorfile 502 /etc/haproxy/errors/502.http
    errorfile 503 /etc/haproxy/errors/503.http
    errorfile 504 /etc/haproxy/errors/504.http

# Stats server fof Haproxy
listen stats_and_disclaimer
    bind 0.0.0.0:80
    stats enable
    stats uri /haproxy?stats
    stats realm "Strictly Private"
    stats auth A_Username:YourPassword
    stats auth Another_User:passwd
    mode tcp
    option tcplog
    server lamp1 172.24.1.106:80 check
    server lamp2 172.24.1.107:80 check backup

listen some_server
    bind 0.0.0.0:123
    mode tcp
    option tcplog
    server lamp1 10.11.11.11:123 check
    server lamp2 10.11.11.12:123 check backup

Done.

Now let`s configure glusterfs to sync data between servers.

install Glusterfs

apt install glusterfs-server -y
systemctl enable glusterd --now
systemctl start glusterd

Prepare bricks on both servers. Ensure the directories are empty, as GlusterFS bricks must be initialized with empty directories

mkdir -p /data/brick1/etc_haproxy
chown -R gluster:gluster /data/brick1

On server1, add server2 as a peer and vice-versa

gluster peer probe server2.your_domain.ca

Verify the peer connection

gluster peer status

Create a replicated volume for /etc/haproxy

gluster volume create haproxy_vol replica 2 server1.your_domain.ca:/data/brick1/etc_haproxy server2.ypur_domain.ca:/data/brick1/etc_haproxy

Start the volume

gluster volume start haproxy_vol

Mount the /etc/haproxy volume (same on both servers) & add to fstab for persistence

mount -t glusterfs server1.your_domain.ca:/haproxy_vol /etc/haproxy
echo "server1.your_domain.ca,server2.your_domain.ca:/haproxy_vol /etc/haproxy glusterfs defaults,_netdev 0 0" | sudo tee -a /etc/fstab

Verify sync

Create a test file in /opt on server1

echo "Hello from server1" > /etc/haproxy/testfile.txt

Check if file appears on server2

Add firewall rules

firewall-cmd --add-service=glusterfs --permanent
firewall-cmd --reload

Allow specific IPs or subnets when creating the volume

gluster volume set haproxy_vol auth.allow 11.11.11.0/24
gluster volume stop opt_vol
gluster volume start opt_vol

Check volume current state.

gluster volume info opt_vol

enjoy.

XLS-to-CSV converter in custom-build Docker container

Dockerfile – to create custom docker container
docker-compose.yml – to build/start container
converter.py – python script to convert exel files
entrypoint.sh – to start cron service in container
crontab.txt – to schedule cron task to run every 5 min

Dockerfile content:

FROM python:3.11-slim

# Install cron
RUN apt-get update && apt-get install -y cron && rm -rf /var/lib/apt/lists/*

# Install Python deps
RUN pip install --no-cache-dir pandas xlrd openpyxl

# Create directories
RUN mkdir -p /app/source /app/destination

WORKDIR /app

# Copy files
COPY converter.py entrypoint.sh crontab.txt /app/

# Make entrypoint executable
RUN chmod +x /app/entrypoint.sh

# Setup cron
RUN crontab /app/crontab.txt

CMD ["/app/entrypoint.sh"]

docker-compose.yml content:

services:
  excel-converter:
    build:
      context: .
#      proxy for Docker if needed
#      args:
#        http_proxy: http://proxy:3128
#        https_proxy: http://proxy:3128
#        no_proxy: localhost,127.0.0.1
    container_name: excel-converter
    volumes:
      - /opt/data/source:/app/source
      - /opt/data/destination:/app/destination
    restart: unless-stopped

content of converter.py

import pandas as pd
from pathlib import Path

SOURCE_DIR = Path("/app/source")
DEST_DIR = Path("/app/destination")

def convert_files():
    for file in SOURCE_DIR.glob("*.xls*"):  # matches .xls and .xlsx
        try:
            print(f"Processing {file}")
            df = pd.read_excel(file)
            out_file = DEST_DIR / (file.stem + ".csv")
            df.to_csv(out_file, index=False, encoding="utf-8")
            file.unlink()  # delete original
            print(f"Converted {file} → {out_file}")
        except Exception as e:
            print(f"Failed to convert {file}: {e}")

if __name__ == "__main__":
    convert_files()

content of entrypoint.sh

#!/bin/bash
# Start cron in foreground
cron -f

content of crontab.txt

PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
*/5 * * * * python3 /app/converter.py >> /var/log/cron.log 2>&1

build and start converter

docker compose up -d --force-recreate

enjoy results.

Apache NiFi in Docker

shell csript to deploy NiFi

#!/bin/bash

# Passwords...
USER_PASSWORD="user_pass"
PGSQL_PASSWORD="PG_pass"
PGADMIN_PASSWORD="pgadmin_pass"

# Create network if it doesn't exist
if ! docker network inspect local_network &>/dev/null; then
  echo "Creating docker network 'local_network'..."
  docker network create --subnet=10.100.10.0/24 local_network
else
  echo "Docker network 'local_network' already exists."
fi

# Run apache-nifi container (for testing use "-e NIFI_WEB_PROXY_HOST=your_domain.ca:8443 \")
docker run -d --name apache-nifi --network local_network -p 8443:8443 \
  --restart unless-stopped \
  -e NIFI_WEB_PROXY_HOST=your_domain:8443 \
  -e SINGLE_USER_CREDENTIALS_USERNAME=admin \
  -e SINGLE_USER_CREDENTIALS_PASSWORD="$USER_PASSWORD" \
  -v /opt/nifi/data/dwh:/opt/nifi/nifi-current/dwh \
  apache/nifi:latest

# Run postgres-db container
docker run -d --name postgres-db --network local_network -p 5432:5432 \
  --restart unless-stopped \
  -e POSTGRES_PASSWORD="$PGSQL_PASSWORD" \
  -v /opt/nifi/data/postgres_data:/var/lib/postgresql/data \
  postgres:latest

# Run pgadmin container
docker run -d --name pgadmin --network local_network -p 5050:80 \
  --restart unless-stopped \
  -e PGADMIN_DEFAULT_EMAIL=pgadmin@your_domain.ca \
  -e PGADMIN_DEFAULT_PASSWORD="$PGADMIN_PASSWORD" \
  -v /opt/nifi/data/pgadmin:/var/lib/pgadmin \
  dpage/pgadmin4:latest

echo "All containers started."

# Now make sure the directory exists
docker exec apache-nifi mkdir -p /opt/nifi/nifi-current/some_dir_for_data_input

run it.

enjoy your Apache NiFi.

CFS connection to Windows share

Install cifs-utils

apt install cifs-utils -y

Create directory to mount

mkdir /mnt/some_dir

Mount a CIFS windows share

mount -t cifs //Windows_server/windows_share /mnt/some_dir -o username=username

To make it permanent add to /etc/fstab something like this:

//windows_server/windows_share /mnt/some_dir cifs vers=3.0,credentials=/home/user/.credentials,iocharset=utf8,file_mode=0777,dir_mode=0777,uid=local_user,gid=local_group,nofail,noserverino,echo_interval=30 0 0

.credentials file example:

username=username
password=password
domain=domain

We have LLM at home!

This will install Local LLM to your server. Basic installation. You will need to do a lot before get benefits from having LLM at home.

Prepare server:

Ensure Docker is installed on your server. Use the following commands to install Docker and docker-compose if not already installed:

apt update
apt install -y docker.io
systemctl start docker
systemctl enable docker
apt install -y docker-compose

Ensure sufficient CPU and memory are allocated for running the Ollama server and Open WebUI.

Create docker-compose.yml file and add following configuration to it:

services:
  ollama-server:
    image: ollama/ollama:latest
    container_name: ollama-server
    ports:
      - "11434:11434" # Ollama API port
    environment:
      - USE_CPU=1 # Force CPU usage
    restart: unless-stopped
    networks:
      inside:
        ipv4_address: 10.100.10.10 # Static IP for Ollama server

  open-webui:
    image: ghcr.io/open-webui/open-webui:main
    container_name: open-webui
    ports:
      - "80:8080" # Open WebUI port
    environment:
      - OLLAMA_BASE_URL=http://10.100.10.10:11434 # Reference Ollama server via static IP
    volumes:
      - ./data:/app/data # Mount folder for manuals
    restart: unless-stopped
    depends_on:
      - ollama-server # Ensure Ollama server starts before Open WebUI
    networks:
      inside:
        ipv4_address: 10.100.11.10 # Static IP for Open WebUI

networks:
  inside:
    driver: bridge
    ipam:
      config:
        - subnet: 10.100.10.0/23 # Combined subnet to include both servers

Start it

docker-compose up -d

Script to feed your LLM (upload all text files from ~/feed_data to LLM)

        for file in ~/feed_data/*.txt; do
            curl -X POST http://<your_server_ip>:11434/embed \
                 -d "{\"text\": \"$(cat $file)\"}"
        done

Ansible: snapshoting VM on VMWare and cleanup old snapshots.

Solution1. Script to make a snapshoots of listed servers. powershell. store password in separate file (/root/vmware.cred in this case).

script itself

#!/usr/bin/pwsh

# Define log file path
$logFile = "/var/log/vm_snapshot.log"

# Start transcript (captures everything displayed on screen)
Start-Transcript -Path $logFile -Append

# Function to write logs
function Write-Log {
    param (
        [string]$message
    )
    $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
    $logEntry = "[$timestamp] $message"

    # Write to log file and console
    Add-Content -Path $logFile -Value $logEntry
    Write-Host $logEntry
}

# Read the password from the file
$pass = Get-Content -Path "/root/vmware.cred" -ErrorAction Stop

# Start logging
Write-Log "Starting VM snapshot script execution."

# Connect to vCenter server
try {
    $connectionOutput = Connect-VIServer -Server "your_server_here" -User "user_with_snapsoting_rights" -Pass $pass -Force 2>&1
    Write-Log "Successfully connected to vCenter."
    Write-Log $connectionOutput
} catch {
    Write-Log "Failed to connect to vCenter: $_"
    Stop-Transcript
    exit 1
}

# Read the list of VM names from the servers.inv file
$vmNames = Get-Content -Path "/opt/snaps/servers.inv"  # Adjust path

# Get the current date and time in the desired format
$currentDateTime = (Get-Date).ToString("yyyy-MM-dd_HH-mm-ss")

# Loop through each VM and create a snapshot
foreach ($vmName in $vmNames) {
    # Check if VM exists
    $vm = Get-VM -Name $vmName -ErrorAction SilentlyContinue
    if ($vm) {
        $snapshotName = "autopatch_$currentDateTime" + "_$vmName"
        try {
            $snapshotOutput = New-Snapshot -VM $vm -Name $snapshotName -Quiesce 2>&1
            Write-Log "Snapshot created for VM: $vmName with name: $snapshotName"
            Write-Log $snapshotOutput
        } catch {
            Write-Log "Failed to create snapshot for VM: $vmName. Error: $_"
        }
    } else {
        Write-Log "VM not found: $vmName"
    }
}

Write-Log "VM snapshot script execution completed."

# Stop transcript
Stop-Transcript

servers.inv content – just list of servers (how they present at VMWare). like this:

server1
server2
server3
server4

Solution2. Cleanup old snapshoots. Just schedule it to run periodically and it will delete all associated to servers snapshoots. Yep, best time to run is after-hrs, as deleting snapshoot is impacting procedure at VMWare.

script:

#!/usr/bin/pwsh

# Define Variables
$serverList = @("/opt/snaps/servers.inv", "/opt/snaps/servers2.inv", "/opt/snaps/servers3.inv", "/opt/snaps/servers4.inv")
# Change snapshotLimit to above zero value if you want to keep some snapshoots (not recommended, as VMWare well-known for having bad snapshooting process).
$snapshotLimit = 0
$logFile = "/var/log/vm_snapshot_cleaning.log"
$vcServer = "your_server"
$vcUser = "user_with_right_to_delete_snapsoots"
$vcPassword = Get-Content -Path "/root/vmware2.cred" -ErrorAction Stop

# Function to Log Messages
function Write-Log {
    param (
        [string]$message
    )
    $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
    "$timestamp - $message" | Out-File -Append -FilePath $logFile
}

# Connect to vCenter
try {
    $connection = Connect-VIServer -Server $vcServer -User $vcUser -Password $vcPassword -Force -ErrorAction Stop
    Write-Log "Connected to vCenter: $vcServer"
} catch {
    Write-Log "Failed to connect to vCenter: $_"
    exit 1
}

# Process Each VM
foreach ($file in $serverList) {
    if (Test-Path $file) {
        Get-Content $file | ForEach-Object {
            $vmName = $_.Trim()
            if ($vmName -eq "") { return }

            # Check if VM exists
            $vm = Get-VM -Name $vmName -ErrorAction SilentlyContinue
            if (-not $vm) {
                Write-Log "VM not found: $vmName. Skipping."
                return
            }

            # Get Snapshots
            $snapshots = Get-Snapshot -VM $vm -ErrorAction SilentlyContinue | Sort-Object Created
            $snapshotCount = $snapshots.Count
            Write-Log "VM: $vmName has $snapshotCount snapshots."

            # Check Snapshot Limit
            if ($snapshotCount -gt $snapshotLimit) {
                $oldestSnapshot = $snapshots | Select-Object -First 1
                try {
                    $deleteOutput = Remove-Snapshot -Snapshot $oldestSnapshot -Confirm:$false -RunAsync 2>&1
                    Write-Log "Deleted oldest snapshot '$($oldestSnapshot.Name)' for VM: $vmName"
                    Write-Log $deleteOutput
                } catch {
                    Write-Log "Failed to delete snapshot for VM: $vmName. Error: $_"
                }
            }
        }
    } else {
        Write-Log "File not found: $file. Skipping."
    }
}

# Disconnect from vCenter
Disconnect-VIServer -Confirm:$false
Write-Log "Disconnected from vCenter."

Ansible: update OS on remote server

Combined use of shell script and ansible to update remote servers, divided to 3 groups.

shell script:

#!/bin/bash

TEMPLATE="update_ubuntu_template.yml"
PLAYBOOK="update_ubuntu.yml"
VAULT_OPTION="--ask-vault-pass"

ask_group() {
  local group=$1
  local default_response=$2
  local response

  read -p "Do you want to update/upgrade $group? ([Y] or Enter to accept, any other - to skip): " response
  response=${response:-$default_response}

  if [[ "$response" =~ ^[Yy]$ ]]; then
    sed "s/{{ target_group }}/$group/" $TEMPLATE > $PLAYBOOK
    ansible-playbook $PLAYBOOK $VAULT_OPTION
  fi
}


ask_group "ubuntu_group1" "Y"
ask_group "ubuntu_group2" "Y"

# Special handling for ubuntu_group3 as we want to schedule reboot of this servers for midnight and not reboot it right now
read -p "Do you want to update/upgrade ubuntu_group3? ([Y] or Enter to accept, any other - to skip): " response
response=${response:-"Y"}
if [[ "$response" =~ ^[Yy]$ ]]; then
  sed "s/{{ target_group }}/ubuntu_group3/" $TEMPLATE > $PLAYBOOK
  ansible-playbook $PLAYBOOK $VAULT_OPTION --extra-vars "schedule_reboot=true"
fi

content of update_ubuntu_template.yml

- hosts: {{ target_group }}
  become: true
  become_user: root
  tasks:
    - name: Update apt repo and cache on all Debian/Ubuntu boxes
      apt: update_cache=yes force_apt_get=yes cache_valid_time=3600

    - name: Upgrade all packages on servers
      apt: upgrade=dist force_apt_get=yes

    - name: Check if a reboot is needed on all servers
      register: reboot_required_file
      stat: path=/var/run/reboot-required

    - name: Reboot the box if kernel updated
      reboot:
        msg: "Reboot initiated by Ansible for kernel updates"
        connect_timeout: 5
        reboot_timeout: 300
        pre_reboot_delay: 0
        post_reboot_delay: 30
        test_command: uptime
      when: reboot_required_file.stat.exists and not (schedule_reboot | default(false))

    - name: Schedule reboot at 23:59 if kernel updated and scheduling is enabled
      cron:
        name: "Scheduled reboot for kernel update"
        user: root
        job: "shutdown -r now"
        hour: 23
        minute: 59
        day: "{{ ansible_date_time.day }}"
        month: "{{ ansible_date_time.month }}"
        weekday: "*"
        state: present
      when: reboot_required_file.stat.exists and (schedule_reboot | default(false))

content of update_ubuntu.yml

- hosts: ubuntu_group3
  become: true
  become_user: root
  tasks:
    - name: Update apt repo and cache on all Debian/Ubuntu boxes
      apt: update_cache=yes force_apt_get=yes cache_valid_time=3600

    - name: Upgrade all packages on servers
      apt: upgrade=dist force_apt_get=yes

    - name: Check if a reboot is needed on all servers
      register: reboot_required_file
      stat: path=/var/run/reboot-required

    - name: Reboot the box if kernel updated
      reboot:
        msg: "Reboot initiated by Ansible for kernel updates"
        connect_timeout: 5
        reboot_timeout: 300
        pre_reboot_delay: 0
        post_reboot_delay: 30
        test_command: uptime
      when: reboot_required_file.stat.exists and not (schedule_reboot | default(false))

    - name: Schedule reboot at 23:59 if kernel updated and scheduling is enabled
      cron:
        name: "Scheduled reboot for kernel update"
        user: root
        job: "shutdown -r now"
        hour: 23
        minute: 59
        day: "{{ ansible_date_time.day }}"
        month: "{{ ansible_date_time.month }}"
        weekday: "*"
        state: present
      when: reboot_required_file.stat.exists and (schedule_reboot | default(false))

content of ansible.cfg

[defaults]
remote_user=user
inventory = /opt/ansible/ansible_servers.inv
interpreter_python = auto_silent

content of ansible_servers.inv

[ubuntu_group1]
server1
server2
server3

[ubuntu_group2]
server4
server5

[ubuntu_group3]
server6
server7
server8

group_vars/all.yml should have your user password in encoded form. Something like this:

# Default variables for all playbooks
ansible_become_password: !vault |
      $ANSIBLE_VAULT;1.1;AES256
encoded password here

ansible_password: !vault |
      $ANSIBLE_VAULT;1.1;AES256
encoded password here

Deploy and/or upgrade splunk forwarder on Ubuntu

Case: customer want simply wat to update splunk forwarder on Ubuntu servers.

Solution: script to simplify update process.

Solution

Create shell script and allow it to run

touch /opt/splunk_uf_upgrade.sh
chmod +x /opt/splunk_uf_upgrade.sh

script

#!/bin/bash
set -euo pipefail

/opt/splunkforwarder/bin/splunk stop || true
/opt/splunkforwarder/bin/splunk disable boot-start || true
cp -a /opt/splunkforwarder /opt/splunkforwarder_backup_$(date +%F) || true
dpkg -r splunkforwarder || true
rm -rf /opt/splunkforwarder
rm -rf /etc/systemd/system/SplunkForwarder.service
systemctl daemon-reexec
systemctl daemon-reload

# Download the latest .deb package
curl -s https://www.splunk.com/en_us/download/universal-forwarder.html | \
grep -Eo 'https://download.splunk.com/products/universalforwarder/releases/[0-9]+\.[0-9]+\.[0-9]+/linux/splunkforwarder-[0-9]+\.[0-9]+\.[0-9]+-[a-z0-9]+-linux-amd64\.deb' | \
head -n 1 | \
xargs -r wget -O splunkforwarder-latest.deb


# Install the .deb package
dpkg -i splunkforwarder-latest.deb

# Seed admin user
tee /opt/splunkforwarder/etc/system/local/user-seed.conf > /dev/null <<EOF
[user_info]
USERNAME = admin
PASSWORD = local_admin_password_here
EOF

# Ensure splunk group exists
getent group splunk > /dev/null || groupadd --system -g 119 splunk

# Ensure splunk user exists
id splunk &>/dev/null || useradd --system -u 119 -g 119 -d /opt/splunkforwarder -s /usr/sbin/nologin -c "splunk@$( hostname -s )" splunk

# Set permissions and config
chown -R splunk:splunk /opt/splunkforwarder
echo 'SPLUNK_BINDIP=127.0.0.1' | tee -a /opt/splunkforwarder/etc/splunk-launch.conf > /dev/null

# Start and enable service
sudo -u splunk /opt/splunkforwarder/bin/splunk start --accept-license --answer-yes --no-prompt
# Wait until Splunkd started
timeout 60 bash -c 'until sudo -u splunk /opt/splunkforwarder/bin/splunk status 2>/dev/null | grep -q "splunkd is running (PID"; do sleep 2; done'
echo "Confirmed splunkd is running — waiting 10 more seconds to ensure it's stable..."
sleep 10
/opt/splunkforwarder/bin/splunk stop
/opt/splunkforwarder/bin/splunk enable boot-start -user splunk
systemctl daemon-reload
systemctl enable SplunkForwarder
systemctl start SplunkForwarder

# Wait until Splunkd started
timeout 60 bash -c 'until sudo -u splunk /opt/splunkforwarder/bin/splunk status 2>/dev/null | grep -q "splunkd is running (PID"; do sleep 2; done'
# Confirm it's been running steadily for at least 10 seconds
echo "Confirmed splunkd is running — waiting 10 more seconds to ensure it's stable..."
sleep 10

# Authenticate first (must match what's in user-seed.conf)
sudo -u splunk /opt/splunkforwarder/bin/splunk login -auth admin:local_admin_password_here

# Configure forward-server and deployment-poll
sudo -u splunk /opt/splunkforwarder/bin/splunk add forward-server splunkidx.your_domain.ca:9997
sudo -u splunk /opt/splunkforwarder/bin/splunk set deploy-poll splunkdeploy.your_domain.ca:8089

# Verify UF version
echo
echo "== UF version =="
sudo -u splunk /opt/splunkforwarder/bin/splunk version 2>&1 | grep -v '^Warning'

# Verify configured deployment server
echo
echo "== Deployment Server Configuration =="
sudo -u splunk /opt/splunkforwarder/bin/splunk show deploy-poll 2>&1 | grep -v '^Warning'

# Verify configured forward servers
echo
echo "== Forward Servers =="
sudo -u splunk /opt/splunkforwarder/bin/splunk list forward-server 2>&1 | grep -v '^Warning'

echo
echo "== Installation completed =="

# Cleanup installer
rm -f splunkforwarder-latest.deb

run script and enjoy easy update

/opt/splunk_uf_upgrade.sh

done