diff --git a/.forgejo/workflows/deploy.yaml b/.forgejo/workflows/deploy.yaml index f7857d5..aeea56d 100644 --- a/.forgejo/workflows/deploy.yaml +++ b/.forgejo/workflows/deploy.yaml @@ -9,6 +9,8 @@ jobs: deploy: runs-on: self-hosted # Используем self-hosted runner на шлюзе env: + DEBUG: 1 + DEBUG_LOG: /tmp/generate-configs.debug.log # ==== Конфигурация проекта ==== INPUT_FILE: domains.txt @@ -40,7 +42,7 @@ jobs: fi # Проверка синтаксиса доменов grep -v '^#' domains.txt | grep -v '^$' | while read domain; do - if ! echo "$domain" | grep -qE '^[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'; then + if ! echo "$domain" | grep -qE '^[a-zA-Z0-9.-]+\.[a-zA-Z0-9-]{2,}$'; then echo "Error: Invalid domain format: $domain" exit 1 fi @@ -63,3 +65,4 @@ jobs: path: | ${{ env.IPSET_CONF }} ${{ env.RESOLVE_CONF }} + ${{ env.DEBUG_LOG }} diff --git a/domains.txt b/domains.txt index 3654351..2f810ef 100644 --- a/domains.txt +++ b/domains.txt @@ -146,7 +146,6 @@ tvrain.ru twitter.com video.google.com www1.thepiratebay3.to -www2.telegram.org www.autodesk.com www.autodesk.ru www.bbc.com diff --git a/scripts/generate-configs.sh b/scripts/generate-configs.sh index 65d15eb..0a0e688 100644 --- a/scripts/generate-configs.sh +++ b/scripts/generate-configs.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail # ============================== @@ -12,87 +12,219 @@ RESOLVE_CONF="${RESOLVE_CONF:-/tmp/92-resolve-bbrkn.conf}" API_URL="${CHROME_SERVER:-http://127.0.0.1:3000}/domains?domain=" DNS_SERVER="${DNS_SERVER:-8.8.8.8}" +# Debug knobs +DEBUG="${DEBUG:-0}" # set to 1 to enable verbose debug +DEBUG_LOG="${DEBUG_LOG:-/tmp/generate-configs.debug.log}" + DRY_RUN=false if [[ "${1:-}" == "--dry-run" ]]; then DRY_RUN=true fi -# Очистка файлов только если не dry-run +# Helpers +log() { printf '%s\n' "$*"; } +dbg() { if [ "$DEBUG" != "0" ]; then printf '[DEBUG] %s\n' "$*" | tee -a "$DEBUG_LOG"; fi } +err() { printf '[ERROR] %s\n' "$*" | tee -a "$DEBUG_LOG" >&2; } + +# sanity +if ! command -v curl >/dev/null 2>&1; then + err "curl is required but not found in PATH" + exit 2 +fi +if ! command -v jq >/dev/null 2>&1; then + err "jq is required but not found in PATH" + exit 2 +fi + +# reset debug log if debugging enabled +if [ "$DEBUG" != "0" ]; then + : > "$DEBUG_LOG" + dbg "Debugging enabled. Log: $DEBUG_LOG" +fi + +log "Starting generate-configs.sh" +dbg "ENV: INPUT_FILE=$INPUT_FILE IPSET_CONF=$IPSET_CONF RESOLVE_CONF=$RESOLVE_CONF API_URL=$API_URL DNS_SERVER=$DNS_SERVER DRY_RUN=$DRY_RUN" + +# clear outputs (only if not dry-run) if ! $DRY_RUN; then + dbg "Clearing output files: $IPSET_CONF , $RESOLVE_CONF" : > "$IPSET_CONF" : > "$RESOLVE_CONF" fi -declare -A DOM_ROLE -declare -A EXPANDED -declare -A SOURCES +declare -A DOM_ROLE # 'site' or 'service' for base domains +declare -A EXPANDED # unique set of resulting domains +declare -A SOURCES # source: base or related +declare -A ERRORS # API error messages per base domain (if any) +# Counters +total_lines=0 +normalized_ok=0 +normalized_skip=0 +api_success=0 +api_error=0 +related_total=0 + +# Normalizer with explicit reasons on failure normalize_domain() { local raw="$1" - raw="$(echo "$raw" | sed -E 's/#.*$//' | awk '{$1=$1};1')" || return 1 - [ -z "$raw" ] && return 1 - raw="$(echo "$raw" | tr '[:upper:]' '[:lower:]')" - raw="$(echo "$raw" | sed -E 's/^\*\.\s*//; s/^\.+//; s/\.+$//; s/\.+/./g')" - if echo "$raw" | grep -Eq '^[a-z0-9-]+(\.[a-z0-9-]+)+$' && \ - echo "$raw" | grep -Eq '\.[a-z]{2,}$'; then - printf '%s' "$raw" - else + # trim + remove inline comments + raw="$(printf '%s' "$raw" | sed -E 's/#.*$//' | awk '{$1=$1};1')" + [ -z "$raw" ] && { printf ''; return 1; } + # lowercase + raw="$(printf '%s' "$raw" | tr '[:upper:]' '[:lower:]')" + # remove leading "*." and leading/trailing dots, compress multiple dots + raw="$(printf '%s' "$raw" | sed -E 's/^\*\.\s*//; s/^\.+//; s/\.+$//; s/\.+/./g')" + + # syntactic checks + if ! printf '%s' "$raw" | grep -Eq '^[a-z0-9-]+(\.[a-z0-9-]+)+$'; then + # bad structure (labels) return 1 fi + if ! printf '%s' "$raw" | grep -Eq '\.[a-z]{2,}$'; then + # TLD check (only letters, len >= 2) + # note: this rejects numeric-only TLDs and punycode with prefixes; intentional but visible in logs + return 2 + fi + + printf '%s' "$raw" + return 0 } -# Обработка доменов -while IFS= read -r line || [ -n "$line" ]; do - dom="$(normalize_domain "$line" || true)" - [ -z "$dom" ] && continue - - echo "Querying service for $dom..." - resp="$(curl -s --max-time 15 "${API_URL}${dom}" || true)" - - if echo "$resp" | grep -q '"domains"'; then - DOM_ROLE["$dom"]="site" - EXPANDED["$dom"]=1 - SOURCES["$dom"]="base" - mapfile -t subs < <(echo "$resp" | jq -r '.domains[]?') - for s in "${subs[@]}"; do - nd="$(normalize_domain "$s" || true)" - [ -n "$nd" ] || continue - EXPANDED["$nd"]=1 - [[ -z "${SOURCES[$nd]:-}" ]] && SOURCES["$nd"]="related" - done - else - DOM_ROLE["$dom"]="service" - EXPANDED["$dom"]=1 - SOURCES["$dom"]="base" - fi -done < "$INPUT_FILE" - -# Сортировка -mapfile -t ALL_DOMAINS < <(printf "%s\n" "${!EXPANDED[@]}" | sort -u) - -# Генерация конфигов -if ! $DRY_RUN; then - for d in "${ALL_DOMAINS[@]}"; do - echo "ipset=/$d/bbrkn" >> "$IPSET_CONF" - echo "server=/$d/$DNS_SERVER" >> "$RESOLVE_CONF" - done +# read input +if [ ! -f "$INPUT_FILE" ]; then + err "Input file not found: $INPUT_FILE" + exit 3 fi -# Подсчёты -count_in=$(wc -l < "$INPUT_FILE") -count_total=${#ALL_DOMAINS[@]} -count_related=$(( count_total - count_in )) +raw_total_lines=$(wc -l < "$INPUT_FILE" | tr -d ' ') +dbg "Raw input lines: $raw_total_lines" -# Отладочный отчёт +lineno=0 +while IFS= read -r line || [ -n "$line" ]; do + lineno=$((lineno+1)) + total_lines=$((total_lines+1)) + dbg "Processing line #$lineno: '$(printf "%s" "$line")'" + + dom_norm="$(normalize_domain "$line" || true)" + if [ -z "$dom_norm" ]; then + normalized_skip=$((normalized_skip+1)) + dbg " -> SKIP (normalization failed) - original: '$line'" + continue + fi + normalized_ok=$((normalized_ok+1)) + dbg " -> NORMALIZED: $dom_norm" + + # query Chromium-service + dbg " -> Querying API: ${API_URL}${dom_norm}" + resp="$(curl -s --max-time 15 "${API_URL}${dom_norm}" 2>&1 || true)" + # preview for logs (first 400 chars) + preview="$(printf '%s' "$resp" | tr '\n' ' ' | cut -c1-400)" + dbg " -> API response preview: ${preview}" + + # check JSON validity + if ! jq -e . >/dev/null 2>&1 <<<"$resp"; then + api_error=$((api_error+1)) + ERRORS["$dom_norm"]="non-json-response" + DOM_ROLE["$dom_norm"]="service" + SOURCES["$dom_norm"]="base" + EXPANDED["$dom_norm"]=1 + dbg " -> API returned non-JSON. Marked as 'service' and included base only." + continue + fi + + # valid json: check for .domains array + has_domains=$(jq -r 'if has("domains") then "1" else "0" end' <<<"$resp" 2>/dev/null || echo "0") + if [ "$has_domains" = "1" ]; then + # got related domain list + api_success=$((api_success+1)) + DOM_ROLE["$dom_norm"]="site" + SOURCES["$dom_norm"]="base" + EXPANDED["$dom_norm"]=1 + + # extract domains + mapfile -t subs < <(jq -r '.domains[]? // empty' <<<"$resp") + dbg " -> API returned $(printf '%s\n' "${#subs[@]}" | tr -d '\n') related domains" + + for s in "${subs[@]}"; do + nd="$(normalize_domain "$s" || true)" + if [ -z "$nd" ]; then + dbg " - RELATED SKIP (normalize failed): '$s'" + continue + fi + # store + EXPANDED["$nd"]=1 + # mark source 'related' only if not already base + [ -z "${SOURCES[$nd]:-}" ] && SOURCES["$nd"]="related" + related_total=$((related_total+1)) + dbg " - RELATED ADD: $nd" + done + else + # no domains array: maybe error field + api_error=$((api_error+1)) + err_msg="$(jq -r '.error // empty' <<<"$resp" | sed 's/[\r\n]/ /g')" + ERRORS["$dom_norm"]="${err_msg:-unknown}" + DOM_ROLE["$dom_norm"]="service" + SOURCES["$dom_norm"]="base" + EXPANDED["$dom_norm"]=1 + dbg " -> API returned no 'domains' array; error='${ERRORS[$dom_norm]}' ; marked as 'service'" + fi + +done < "$INPUT_FILE" + +# sort & unique final list +mapfile -t ALL_DOMAINS < <(printf "%s\n" "${!EXPANDED[@]}" | sort -u) +total_unique=${#ALL_DOMAINS[@]} + +# write files (if not dry-run) +if ! $DRY_RUN; then + dbg "Writing outputs to: $IPSET_CONF and $RESOLVE_CONF" + for d in "${ALL_DOMAINS[@]}"; do + printf 'ipset=/%s/bbrkn\n' "$d" >> "$IPSET_CONF" + printf 'server=/%s/%s\n' "$d" "$DNS_SERVER" >> "$RESOLVE_CONF" + done + dbg "Wrote $(wc -l < "$IPSET_CONF" 2>/dev/null || echo 0) ipset lines and $(wc -l < "$RESOLVE_CONF" 2>/dev/null || echo 0) server lines" +else + dbg "Dry-run: not writing output files" +fi + +# Final debug report (concise + actionable) echo echo "===== DEBUG REPORT =====" -echo "Original domains file: $count_in entries" -echo "Final unique domains: $count_total" -echo " - Base domains: $count_in" -echo " - Related domains: $count_related" +echo "Input file: $INPUT_FILE" +echo "Raw input lines: $raw_total_lines" +echo "Processed lines: $total_lines" +echo "Normalized OK: $normalized_ok" +echo "Normalized skipped: $normalized_skip" echo -while IFS= read -r line || [ -n "$line" ]; do - dom="$(normalize_domain "$line" || true)" - [ -n "$dom" ] && echo "$dom - ${DOM_ROLE[$dom]:-unknown}" -done < "$INPUT_FILE" -echo "========================" +echo "API calls succeeded (returned domains): $api_success" +echo "API calls failed / returned error: $api_error" +echo "Related domains collected (raw count): $related_total" +echo "Final unique domains (base + related): $total_unique" +echo + +echo "---- base domains (from input) ----" +for base in "${!DOM_ROLE[@]}"; do + if [ "${DOM_ROLE[$base]}" = "site" ] || [ "${DOM_ROLE[$base]}" = "service" ]; then + echo "$base - ${DOM_ROLE[$base]} - ${ERRORS[$base]:-ok}" + fi +done | sort + +echo +echo "---- sample of final unique domains (first 200) ----" +printf '%s\n' "${ALL_DOMAINS[@]}" | head -n 200 + +echo +echo "---- API errors (if any) ----" +for k in "${!ERRORS[@]}"; do + if [ "${ERRORS[$k]}" != "ok" ]; then + echo "$k -> ${ERRORS[$k]}" + fi +done | sort + +echo "===== END DEBUG REPORT =====" + +if [ "$DEBUG" != "0" ]; then + echo + echo "Detailed debug log appended to: $DEBUG_LOG" +fi