diff --git a/scripts/generate-configs.sh b/scripts/generate-configs.sh index 494455a..61aaf14 100644 --- a/scripts/generate-configs.sh +++ b/scripts/generate-configs.sh @@ -13,7 +13,7 @@ API_URL="${CHROME_SERVER:-http://127.0.0.1:3000}/domains?domain=" DNS_SERVER="${DNS_SERVER:-8.8.8.8}" # Debug knobs -DEBUG="${DEBUG:-0}" +DEBUG="${DEBUG:-0}" # set to 1 to enable verbose debug DEBUG_LOG="${DEBUG_LOG:-/tmp/generate-configs.debug.log}" DRY_RUN=false @@ -26,25 +26,38 @@ log() { printf '%s\n' "$*"; } dbg() { if [ "$DEBUG" != "0" ]; then printf '[DEBUG] %s\n' "$*" | tee -a "$DEBUG_LOG"; fi } err() { printf '[ERROR] %s\n' "$*" | tee -a "$DEBUG_LOG" >&2; } -if ! command -v curl >/dev/null 2>&1; then err "curl is required"; exit 2; fi -if ! command -v jq >/dev/null 2>&1; then err "jq is required"; exit 2; fi +# sanity +if ! command -v curl >/dev/null 2>&1; then + err "curl is required but not found in PATH" + exit 2 +fi +if ! command -v jq >/dev/null 2>&1; then + err "jq is required but not found in PATH" + exit 2 +fi -if [ "$DEBUG" != "0" ]; then : > "$DEBUG_LOG"; dbg "Debugging enabled"; fi +# reset debug log if debugging enabled +if [ "$DEBUG" != "0" ]; then + : > "$DEBUG_LOG" + dbg "Debugging enabled. Log: $DEBUG_LOG" +fi log "Starting generate-configs.sh" dbg "ENV: INPUT_FILE=$INPUT_FILE IPSET_CONF=$IPSET_CONF RESOLVE_CONF=$RESOLVE_CONF API_URL=$API_URL DNS_SERVER=$DNS_SERVER DRY_RUN=$DRY_RUN" +# clear outputs (only if not dry-run) if ! $DRY_RUN; then + dbg "Clearing output files: $IPSET_CONF , $RESOLVE_CONF" : > "$IPSET_CONF" : > "$RESOLVE_CONF" fi -declare -A DOM_ROLE -declare -A EXPANDED -declare -A SOURCES -declare -A ERRORS -declare -A VALID_SITES # только живые site-домены +declare -A DOM_ROLE # 'site' or 'service' for base domains +declare -A EXPANDED # unique set of resulting domains +declare -A SOURCES # source: base or related +declare -A ERRORS # API error messages per base domain (if any) +# Counters total_lines=0 normalized_ok=0 normalized_skip=0 @@ -52,18 +65,31 @@ api_success=0 api_error=0 related_total=0 +# Normalizer with explicit reasons on failure normalize_domain() { local raw="$1" + # trim + remove inline comments raw="$(printf '%s' "$raw" | sed -E 's/#.*$//' | awk '{$1=$1};1')" - [ -z "$raw" ] && return 1 + [ -z "$raw" ] && { printf ''; return 1; } + # lowercase raw="$(printf '%s' "$raw" | tr '[:upper:]' '[:lower:]')" + # remove leading "*." and leading/trailing dots, compress multiple dots raw="$(printf '%s' "$raw" | sed -E 's/^\*\.\s*//; s/^\.+//; s/\.+$//; s/\.+/./g')" - if ! printf '%s' "$raw" | grep -Eq '^[a-z0-9-]+(\.[a-z0-9-]+)+$'; then return 1; fi - if ! printf '%s' "$raw" | grep -Eq '\.[a-z0-9-]{2,}$'; then return 2; fi + + # syntactic checks: буквы, цифры, дефис, минимум одна точка + if ! printf '%s' "$raw" | grep -Eq '^[a-z0-9-]+(\.[a-z0-9-]+)+$'; then + return 1 + fi + # TLD check: >=2 символов, буквы/цифры/дефис + if ! printf '%s' "$raw" | grep -Eq '\.[a-z0-9-]{2,}$'; then + return 2 + fi + printf '%s' "$raw" return 0 } +# read input if [ ! -f "$INPUT_FILE" ]; then err "Input file not found: $INPUT_FILE" exit 3 @@ -76,88 +102,86 @@ lineno=0 while IFS= read -r line || [ -n "$line" ]; do lineno=$((lineno+1)) total_lines=$((total_lines+1)) - dbg "Processing line #$lineno: '$line'" + dbg "Processing line #$lineno: '$(printf "%s" "$line")'" dom_norm="$(normalize_domain "$line" || true)" if [ -z "$dom_norm" ]; then normalized_skip=$((normalized_skip+1)) - dbg " -> SKIP (normalization failed)" + dbg " -> SKIP (normalization failed) - original: '$line'" continue fi normalized_ok=$((normalized_ok+1)) dbg " -> NORMALIZED: $dom_norm" + # query Chromium-service dbg " -> Querying API: ${API_URL}${dom_norm}" - resp="$(curl -s --max-time 15 "${API_URL}${dom_norm}" || true)" + resp="$(curl -s --max-time 15 "${API_URL}${dom_norm}" 2>&1 || true)" preview="$(printf '%s' "$resp" | tr '\n' ' ' | cut -c1-400)" dbg " -> API response preview: ${preview}" + # check JSON validity if ! jq -e . >/dev/null 2>&1 <<<"$resp"; then api_error=$((api_error+1)) ERRORS["$dom_norm"]="non-json-response" - dbg " -> non-JSON, skipping" - continue - fi - - # error handling - if jq -e 'has("error")' <<<"$resp" >/dev/null; then - err_msg="$(jq -r '.error' <<<"$resp")" - dbg " -> API error: $err_msg" - - if grep -Eq "ERR_NAME_NOT_RESOLVED|Timeout" <<<"$err_msg"; then - dbg " -> Dead domain, completely skipped" - continue - fi - - if grep -Eq "ERR_CERT_COMMON_NAME_INVALID|ERR_CONNECTION_REFUSED" <<<"$err_msg"; then - dbg " -> Service domain, keep only base" - DOM_ROLE["$dom_norm"]="service" - SOURCES["$dom_norm"]="base" - EXPANDED["$dom_norm"]=1 - ERRORS["$dom_norm"]="$err_msg" - continue - fi - - # default case: treat as service DOM_ROLE["$dom_norm"]="service" SOURCES["$dom_norm"]="base" EXPANDED["$dom_norm"]=1 - ERRORS["$dom_norm"]="$err_msg" + dbg " -> API returned non-JSON. Marked as 'service' and included base only." continue fi - # valid JSON without error -> must be site - api_success=$((api_success+1)) - DOM_ROLE["$dom_norm"]="site" - SOURCES["$dom_norm"]="base" - EXPANDED["$dom_norm"]=1 - VALID_SITES["$dom_norm"]=1 + # valid json: check for .relatedDomains array + has_related=$(jq -r 'if has("relatedDomains") then "1" else "0" end' <<<"$resp" 2>/dev/null || echo "0") + if [ "$has_related" = "1" ]; then + api_success=$((api_success+1)) + DOM_ROLE["$dom_norm"]="site" + SOURCES["$dom_norm"]="base" + EXPANDED["$dom_norm"]=1 - mapfile -t subs < <(jq -r '.relatedDomains[]? // empty' <<<"$resp") - dbg " -> API returned ${#subs[@]} related domains" - for s in "${subs[@]}"; do - nd="$(normalize_domain "$s" || true)" - if [ -z "$nd" ]; then - dbg " - RELATED SKIP: '$s'" - continue - fi - EXPANDED["$nd"]=1 - [ -z "${SOURCES[$nd]:-}" ] && SOURCES["$nd"]="related" - related_total=$((related_total+1)) - dbg " - RELATED ADD: $nd" - done + # extract related domains + mapfile -t subs < <(jq -r '.relatedDomains[]? // empty' <<<"$resp") + dbg " -> API returned ${#subs[@]} related domains" + + for s in "${subs[@]}"; do + nd="$(normalize_domain "$s" || true)" + if [ -z "$nd" ]; then + dbg " - RELATED SKIP (normalize failed): '$s'" + continue + fi + EXPANDED["$nd"]=1 + [ -z "${SOURCES[$nd]:-}" ] && SOURCES["$nd"]="related" + related_total=$((related_total+1)) + dbg " - RELATED ADD: $nd" + done + else + api_error=$((api_error+1)) + err_msg="$(jq -r '.error // empty' <<<"$resp" | sed 's/[\r\n]/ /g')" + ERRORS["$dom_norm"]="${err_msg:-unknown}" + DOM_ROLE["$dom_norm"]="service" + SOURCES["$dom_norm"]="base" + EXPANDED["$dom_norm"]=1 + dbg " -> API returned no 'relatedDomains' array; error='${ERRORS[$dom_norm]}' ; marked as 'service'" + fi done < "$INPUT_FILE" +# sort & unique final list mapfile -t ALL_DOMAINS < <(printf "%s\n" "${!EXPANDED[@]}" | sort -u) +total_unique=${#ALL_DOMAINS[@]} +# write files (if not dry-run) if ! $DRY_RUN; then + dbg "Writing outputs to: $IPSET_CONF and $RESOLVE_CONF" for d in "${ALL_DOMAINS[@]}"; do printf 'ipset=/%s/bbrkn\n' "$d" >> "$IPSET_CONF" printf 'server=/%s/%s\n' "$d" "$DNS_SERVER" >> "$RESOLVE_CONF" done + dbg "Wrote $(wc -l < "$IPSET_CONF" 2>/dev/null || echo 0) ipset lines and $(wc -l < "$RESOLVE_CONF" 2>/dev/null || echo 0) server lines" +else + dbg "Dry-run: not writing output files" fi +# Final debug report echo echo "===== DEBUG REPORT =====" echo "Input file: $INPUT_FILE" @@ -166,15 +190,34 @@ echo "Processed lines: $total_lines" echo "Normalized OK: $normalized_ok" echo "Normalized skipped: $normalized_skip" echo -echo "API success (sites): $api_success" -echo "API error/ignored: $api_error" -echo "Related domains added: $related_total" -echo "Final unique domains: ${#ALL_DOMAINS[@]}" +echo "API calls succeeded (returned relatedDomains): $api_success" +echo "API calls failed / returned error: $api_error" +echo "Related domains collected (raw count): $related_total" +echo "Final unique domains (base + related): $total_unique" echo -echo "---- VALID BASE SITES ----" -printf '%s\n' "${!VALID_SITES[@]}" | sort + +echo "---- base domains (from input) ----" +for base in "${!DOM_ROLE[@]}"; do + if [ "${DOM_ROLE[$base]}" = "site" ] || [ "${DOM_ROLE[$base]}" = "service" ]; then + echo "$base - ${DOM_ROLE[$base]} - ${ERRORS[$base]:-ok}" + fi +done | sort + +echo +echo "---- sample of final unique domains (first 200) ----" +printf '%s\n' "${ALL_DOMAINS[@]}" | head -n 200 + +echo +echo "---- API errors (if any) ----" +for k in "${!ERRORS[@]}"; do + if [ "${ERRORS[$k]}" != "ok" ]; then + echo "$k -> ${ERRORS[$k]}" + fi +done | sort + echo "===== END DEBUG REPORT =====" if [ "$DEBUG" != "0" ]; then - echo "Detailed debug log: $DEBUG_LOG" + echo + echo "Detailed debug log appended to: $DEBUG_LOG" fi