Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions scripts/data/automation/config.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
#
# Main configuration for automation scripts

# Device name used for naming log folders
DEVICE_NAME="llewellyn-device"

# Add other configuration variables here if needed in the future
5 changes: 3 additions & 2 deletions scripts/data/automation/configure.sh
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,9 @@ sel=$(menu_select "Select server version" 15 74 5 \

if [[ "$SERVER_VERSION" == "v2" ]]; then
PYTHON_SCRIPT=$(menu_select "Select logs flavor (v2)" 12 74 5 \
oc4d "OC4D logs (logv2.py)" \
cape_coast_d "Cape Coast Castle logs (castle.py)" \
oc4d "OC4D logs (logv2.py)" \
cape_coast_d "Cape Coast Castle logs (castle.py)" \
dhub "D-Hub logs (dhub.py)" \
)
else
PYTHON_SCRIPT="oc4d"
Expand Down
83 changes: 34 additions & 49 deletions scripts/data/automation/main.sh
Original file line number Diff line number Diff line change
@@ -1,64 +1,49 @@
#!/bin/bash

# Clear the screen
clear
echo -e "\n\n"
# --- Colors ---
YELLOW='\033[0;33m'
GREEN='\033[0;32m'
RED='\033[0;31m'
NC='\033[0m'

# Display the name of the tool using figlet and lolcat
figlet -t -f 3d "AUTOMATION" | lolcat
echo ""
# This function checks if lolcat exists before trying to use it.
# If not, it falls back to a simple green color.
display_header() {
HEADER="Install, Check Status, or Configure the Log Processor"
if command -v lolcat &> /dev/null; then
echo "========================================================" | lolcat
echo "$HEADER" | lolcat
echo "========================================================" | lolcat
else
# This is the fallback for when lolcat is not found
echo -e "${GREEN}========================================================${NC}"
echo -e "${GREEN}$HEADER${NC}"
echo -e "${GREEN}========================================================${NC}"
fi
}

# Centered border with description
echo "=============================================================="
echo " Install, Check Status, or Configure the Log Processor"
echo "=============================================================="
# --- Main Menu ---
clear
display_header
echo ""

# Color variables
RED='\033[0;31m'
NC='\033[0m' # No Color
DARK_GRAY='\033[1;30m'
GREEN='\033[0;32m'

# Display menu options with colors
echo -e "1. Install Automation ${DARK_GRAY}-| Set up the systemd service and timer${NC}"
echo -e "2. Check Status ${DARK_GRAY}-| Check the status of the automation service${NC}"
echo -e "3. Configure ${DARK_GRAY}-| Configure automation parameters${NC}"
echo -e "${GREEN}4. Go Back ${DARK_GRAY}-| Go back to the data menu${NC}"
echo -e "${RED}5. Exit ${DARK_GRAY}-| Exit the program${NC}"
echo "1. Install Automation | Set up the systemd service and timer"
echo "2. Check Status | Check the status of the automation service"
echo "3. Configure | Configure automation parameters"
echo -e "${YELLOW}4. Go Back | Go back to the data menu${NC}"
echo -e "${RED}5. Exit | Exit the program${NC}"
echo ""
read -p "Choose an option (1-5): " choice

# Prompt the user for input
read -r -p "Choose an option (1-5): " choice

# Execute corresponding action based on user choice
case $choice in
# Logic to handle user's choice
case "$choice" in
1)
# Added sudo here to ensure the installer has root privileges
sudo ./scripts/data/automation/install.sh
# Pause to allow user to read the installer output before returning to menu
read -r -p "Installation script finished. Press Enter to return to the menu..."
exec ./scripts/data/automation/main.sh
;;
2)
./scripts/data/automation/status.sh
;;
3)
# Added sudo to ensure the configure script has root privileges
sudo ./scripts/data/automation/configure.sh
# Pause to allow user to read the configure output before returning to menu
read -r -p "Configuration script finished. Press Enter to return to the menu..."
exec ./scripts/data/automation/main.sh
;;
4)
exec ./scripts/data/main.sh
;;
5)
./exit.sh
;;
# Add cases for other options if they exist
*)
echo -e "${RED}Invalid choice. Please choose a number between 1 and 5.${NC}"
sleep 1.5
exec ./scripts/data/automation/main.sh
echo "Returning..."
;;
esac
esac
196 changes: 61 additions & 135 deletions scripts/data/automation/runner.sh
Original file line number Diff line number Diff line change
@@ -1,150 +1,76 @@
#!/bin/bash
# Runner with per-bucket region autodetect (no global AWS config needed)
set -euo pipefail
ts() { date '+%Y-%m-%d %H:%M:%S'; }
log() { echo "[$(ts)] $*"; }

SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
cd "$PROJECT_ROOT"
# This is the main automation engine. It handles collection, processing, and filtering.

CONFIG_FILE="$PROJECT_ROOT/config/automation.conf"
# --- Configuration & Setup ---
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
PROJECT_ROOT=$(cd "$SCRIPT_DIR/../../.." && pwd)
# The line below might cause an error if config.sh doesn't exist. We will create it next.
. "$PROJECT_ROOT/scripts/data/automation/config.sh" # Load configuration

load_config() {
local src="$CONFIG_FILE"
local tmp=""
if [[ -r "$src" ]]; then source "$src"; log "⚙️ Config loaded (direct): $src"; return 0; fi
if command -v sudo >/dev/null 2>&1; then
tmp="/tmp/cdn_auto_conf.$$.sh"
if sudo -n cat "$src" > "$tmp" 2>/dev/null || sudo cat "$src" > "$tmp" 2>/dev/null; then
chmod 600 "$tmp"; source "$tmp"; rm -f "$tmp"; log "⚙️ Config loaded (sudo): $src"; return 0
fi
fi
log "❌ Cannot read config: $src"; exit 1
}
load_config

# Unset empty AWS env so CLI uses its defaults; we will supply region per call.
[[ -n "${AWS_PROFILE:-}" ]] && export AWS_PROFILE || unset AWS_PROFILE
[[ -n "${AWS_REGION:-}" ]] && export AWS_DEFAULT_REGION="$AWS_REGION" || unset AWS_DEFAULT_REGION

SERVER_VERSION="${SERVER_VERSION:-v2}"
DEVICE_LOCATION="${DEVICE_LOCATION:-device}"
PYTHON_SCRIPT="${PYTHON_SCRIPT:-oc4d}"
S3_BUCKET="${S3_BUCKET:-s3://example-bucket}"
S3_SUBFOLDER="${S3_SUBFOLDER:-}"

DATA_DIR="$PROJECT_ROOT/00_DATA"
PROCESSED_ROOT="$DATA_DIR/00_PROCESSED"
QUEUE_DIR="$DATA_DIR/00_UPLOAD_QUEUE"
mkdir -p "$DATA_DIR" "$PROCESSED_ROOT" "$QUEUE_DIR"

TODAY_YMD="$(date '+%Y_%m_%d')"
NEW_FOLDER="${DEVICE_LOCATION}_logs_${TODAY_YMD}"
COLLECT_DIR="$DATA_DIR/$NEW_FOLDER"

join_path() { local a="${1%/}" b="${2#/}"; echo "${a}/${b}"; }

has_internet() {
getent hosts s3.amazonaws.com >/dev/null 2>&1 || return 1
if command -v curl >/dev/null 2>&1; then timeout 5s curl -Is https://s3.amazonaws.com >/dev/null 2>&1 || return 1; fi
return 0
}

bucket_name() { local bn="${S3_BUCKET#s3://}"; echo "${bn%%/*}"; }

bucket_region() {
local b; b="$(bucket_name)"
local reg=""
reg="$(aws --region us-east-1 s3api get-bucket-location --bucket "$b" --query 'LocationConstraint' --output text 2>/dev/null || true)"
if [[ -z "$reg" || "$reg" == "None" ]]; then reg="us-east-1"; fi
if [[ "$reg" == "EU" ]]; then reg="eu-west-1"; fi
# Curl fallback
if [[ -z "$reg" ]] && command -v curl >/dev/null 2>&1; then
reg="$(curl -sI "https://${b}.s3.amazonaws.com/" | tr -d '\r' | awk -F': ' 'BEGIN{IGNORECASE=1}/^x-amz-bucket-region:/{print $2;exit}')"
fi
echo "$reg"
}
TIMESTAMP=$(date +"%Y_%m_%d_%H%M%S")
# Use a generic folder name if DEVICE_NAME isn't set in config.sh
NEW_FOLDER="${DEVICE_NAME:-device}_logs_${TIMESTAMP}"
COLLECT_DIR="$PROJECT_ROOT/00_DATA/$NEW_FOLDER"

aws_cp_region() {
local file="$1" dest="$2" reg; reg="$(bucket_region)"
aws --region "$reg" s3 cp "$file" "$dest"
}

upload_one() {
local file_path="$1"
local remote_base="${S3_BUCKET%/}"
[[ -n "$S3_SUBFOLDER" ]] && remote_base="$(join_path "$remote_base" "$S3_SUBFOLDER")"
local remote_path="$(join_path "$remote_base" "RACHEL/$(basename "$file_path")")"
log "⬆️ Uploading $(basename "$file_path") → $remote_path"
local out rc
out="$(aws_cp_region "$file_path" "$remote_path" 2>&1)"; rc=$?
if (( rc == 0 )); then log "✅ Uploaded: $(basename "$file_path")"; return 0
else log "❌ Upload failed for $(basename "$file_path"): $out"; return 1; fi
}

log "📁 Collect → $COLLECT_DIR (server=$SERVER_VERSION, device=$DEVICE_LOCATION)"
mkdir -p "$COLLECT_DIR"
case "$SERVER_VERSION" in
v1|server\ v4|v4)
LOG_DIR="/var/log/apache2"
find "$LOG_DIR" -type f -name 'access.log*' -exec cp -n {} "$COLLECT_DIR"/ \;
;;
v2|server\ v5|v5)
LOG_DIR="/var/log/oc4d"
find "$LOG_DIR" -type f \( \
\( -name 'oc4d-*.log' ! -name 'oc4d-exceptions-*.log' \) -o \
\( -name 'capecoastcastle-*.log' ! -name 'capecoastcastle-exceptions-*.log' \) -o \
-name '*.gz' \) -exec cp -n {} "$COLLECT_DIR"/ \;
;;
*) log "❌ Unknown SERVER_VERSION '$SERVER_VERSION'"; exit 1;;
esac
shopt -s nullglob
for gz in "$COLLECT_DIR"/*.gz; do gzip -df "$gz" || true; done
shopt -u nullglob
echo "[INFO] Created collection folder: $COLLECT_DIR"

# --- STAGE 1: COLLECT LOGS ---
echo "[INFO] Starting log collection for SERVER_VERSION=${SERVER_VERSION}..."

PROCESSOR=""
case "$SERVER_VERSION" in
v1|v4) PROCESSOR="scripts/data/process/processors/log.py" ;;
v2|v5|server\ v5)
case "$PYTHON_SCRIPT" in
oc4d) PROCESSOR="scripts/data/process/processors/logv2.py" ;;
cape_coast_d) PROCESSOR="scripts/data/process/processors/castle.py" ;;
*) PROCESSOR="scripts/data/process/processors/logv2.py" ;;
esac
;;
v2|server\ v5|v5)
# If a specific target is set by the collection menu, use only that.
if [[ "$COLLECT_TARGET" == "oc4d" ]]; then
LOG_DIRS=("/var/log/oc4d")
echo "[INFO] Collection target set to OC4D."
elif [[ "$COLLECT_TARGET" == "castle" ]]; then
LOG_DIRS=("/var/log/castle") # Assuming this is the path
echo "[INFO] Collection target set to Cape Coast Castle."
elif [[ "$COLLECT_TARGET" == "dhub" ]]; then
LOG_DIRS=("/var/log/dhub")
echo "[INFO] Collection target set to D-Hub."
else
# Default behavior if COLLECT_TARGET isn't set
LOG_DIRS=("/var/log/oc4d" "/var/log/dhub")
echo "[INFO] No specific target. Collecting from all V2 sources."
fi

for log_dir in "${LOG_DIRS[@]}"; do
if [ -d "$log_dir" ]; then
echo "[INFO] Searching for logs in $log_dir..."
# Find and copy all relevant log files into the collection directory
find "$log_dir" -type f \( -name '*.log' -o -name '*.gz' \) -exec cp -v {} "$COLLECT_DIR"/ \;
else
echo "[WARN] Log directory not found: $log_dir"
fi
done
;;
*)
echo "[ERROR] Unknown SERVER_VERSION: $SERVER_VERSION. Aborting."
exit 1
;;
esac
log "🐍 Process → $PROCESSOR (folder=$NEW_FOLDER)"
python3 "$PROCESSOR" "$NEW_FOLDER"

PROCESSED_DIR="$PROCESSED_ROOT/$NEW_FOLDER"
SUMMARY="$PROCESSED_DIR/summary.csv"
if [[ ! -s "$SUMMARY" ]]; then log "❌ Missing or empty summary at $SUMMARY"; exit 1; fi
# --- STAGE 2: PROCESS LOGS ---

MONTH="$(echo "$NEW_FOLDER" | awk -F'_' '{print $(NF-1)}' || true)"
if ! [[ "$MONTH" =~ ^[0-9]{2}$ ]]; then MONTH="$(date +%m)"; fi
log "🧮 Filter month=$MONTH → final CSV"
python3 scripts/data/upload/process_csv.py "$PROCESSED_DIR" "$DEVICE_LOCATION" "$MONTH" "summary.csv"
# If PYTHON_SCRIPT isn't set, infer it from COLLECT_TARGET.
if [ -z "$PYTHON_SCRIPT" ]; then
if [ -n "$COLLECT_TARGET" ]; then
echo "[INFO] PYTHON_SCRIPT not set. Inferring from COLLECT_TARGET: $COLLECT_TARGET"
PYTHON_SCRIPT="$COLLECT_TARGET"
fi
fi

shopt -s nullglob
FINAL_CAND=( "$PROCESSED_DIR/${DEVICE_LOCATION}_${MONTH}_"*"_access_logs.csv" )
shopt -u nullglob
if [[ ${#FINAL_CAND[@]} -eq 0 ]]; then log "❌ Could not locate final CSV after filtering."; exit 1; fi
FINAL_CSV="${FINAL_CAND[0]}"
log "📦 Final CSV: $(basename "$FINAL_CSV")"
echo "[INFO] Starting log processing with PYTHON_SCRIPT=${PYTHON_SCRIPT}..."
PROCESSOR_PATH="$PROJECT_ROOT/scripts/data/process/processors/${PYTHON_SCRIPT}.py"

if has_internet; then
log "🌐 Internet OK. Flushing queue…"
shopt -s nullglob
for q in "$QUEUE_DIR"/*.csv; do
if upload_one "$q"; then rm -f "$q"; else log "Leaving queued: $(basename "$q")"; fi
done
shopt -u nullglob
if upload_one "$FINAL_CSV"; then
log "✅ Run finished — upload complete."
else
log "⚠️ Upload failed; queueing new file."; cp -f "$FINAL_CSV" "$QUEUE_DIR/"
fi
if [ -f "$PROCESSOR_PATH" ]; then
python3 "$PROCESSOR_PATH" "$NEW_FOLDER"
else
log "📵 No internet. Queueing new file."; cp -f "$FINAL_CSV" "$QUEUE_DIR/"
echo "[ERROR] Processor script not found at $PROCESSOR_PATH. Aborting."
exit 1
fi

echo "[SUCCESS] Automation cycle completed."
Loading