From 71c393d91d85a502e834dbf96649860ee3baa02d Mon Sep 17 00:00:00 2001 From: Kirill Kodanev Date: Mon, 15 Sep 2025 20:20:57 +0300 Subject: [PATCH] ADD query API function --- scripts/generate-configs.sh | 121 ++++++++++++++++++------------------ 1 file changed, 60 insertions(+), 61 deletions(-) diff --git a/scripts/generate-configs.sh b/scripts/generate-configs.sh index 3da2709..cbad223 100644 --- a/scripts/generate-configs.sh +++ b/scripts/generate-configs.sh @@ -14,14 +14,13 @@ DNS_SERVER="${DNS_SERVER:-8.8.8.8}" # Debug knobs DEBUG="${DEBUG:-0}" -DEBUG_LOG="${DEBUG_LOG:-/tmp/generate-configs.debug.log}" +DEBUG_LOG="${DEBUG_LOG:-/tmp/generate-configs.$$.debug.log}" DRY_RUN=false if [[ "${1:-}" == "--dry-run" ]]; then DRY_RUN=true fi -# Helpers log() { printf '%s\n' "$*"; } dbg() { if [ "$DEBUG" != "0" ]; then printf '[DEBUG] %s\n' "$*" | tee -a "$DEBUG_LOG"; fi } err() { printf '[ERROR] %s\n' "$*" | tee -a "$DEBUG_LOG" >&2; } @@ -43,7 +42,7 @@ declare -A DOM_ROLE declare -A EXPANDED declare -A SOURCES declare -A ERRORS -declare -A VALID_SITES # только живые site-домены +declare -A VALID_SITES total_lines=0 normalized_ok=0 @@ -64,6 +63,56 @@ normalize_domain() { return 0 } +query_api() { + local domain="$1" + local max_attempts=5 + local attempt=0 + local resp="" + local http_code="" + local content_type="" + local max_sleep=8 + + while :; do + attempt=$((attempt+1)) + dbg " -> API attempt #$attempt for $domain" + + resp="$(timeout 20 curl -sS --compressed \ + -m 10 --connect-timeout 5 \ + -H 'Accept: application/json' \ + -w '\n%{http_code}\n%{content_type}' \ + "${API_URL}${domain}" 2>>"$DEBUG_LOG" || true)" + + http_code="$(printf '%s' "$resp" | tail -n1)" + content_type="$(printf '%s' "$resp" | tail -n2 | head -n1)" + resp="$(printf '%s' "$resp" | head -n -2)" + + preview="$(printf '%s' "$resp" | tr '\n' ' ' | cut -c1-400)" + dbg " -> HTTP=${http_code}, Content-Type=${content_type}, preview=${preview}" + + if [ "$http_code" = "200" ] && printf '%s' "$content_type" | grep -qEi '^application/json|^text/json'; then + echo "$resp" + return 0 + fi + + if jq -e . >/dev/null 2>&1 <<<"$resp"; then + dbg " -> Body is valid JSON despite HTTP=${http_code}, accepting." + echo "$resp" + return 0 + fi + + if [ "$attempt" -ge "$max_attempts" ]; then + ERRORS["$domain"]="http_${http_code}_or_nonjson" + dbg " -> Failed after $attempt attempts: HTTP=${http_code}, preview=${preview}" + return 1 + fi + + sleep_time=$((2 ** (attempt-1))) + [ "$sleep_time" -gt "$max_sleep" ] && sleep_time=$max_sleep + dbg " -> Retry after ${sleep_time}s..." + sleep "$sleep_time" + done +} + if [ ! -f "$INPUT_FILE" ]; then err "Input file not found: $INPUT_FILE" exit 3 @@ -73,7 +122,7 @@ raw_total_lines=$(wc -l < "$INPUT_FILE" | tr -d ' ') dbg "Raw input lines: $raw_total_lines" lineno=0 -while IFS= read -r line || [ -n "$line" ]; do +while IFS= read -r line; do lineno=$((lineno+1)) total_lines=$((total_lines+1)) dbg "Processing line #$lineno: '$line'" @@ -87,56 +136,9 @@ while IFS= read -r line || [ -n "$line" ]; do normalized_ok=$((normalized_ok+1)) dbg " -> NORMALIZED: $dom_norm" - dbg " -> Querying API: ${API_URL}${dom_norm}" - - # параметры retry - max_attempts=5 - attempt=0 - resp="" - http_code=0 - content_type="" - - while :; do - attempt=$((attempt+1)) - raw="$(curl -sS --compressed \ - -m 10 --connect-timeout 5 \ - --retry 3 --retry-delay 1 \ - -H 'Accept: application/json' \ - -w '\n%{http_code}\n%{content_type}' \ - "${API_URL}${dom_norm}" 2>>"$DEBUG_LOG" || true)" - - http_code="$(printf '%s' "$raw" | tail -n1)" - content_type="$(printf '%s' "$raw" | tail -n2 | head -n1)" - resp="$(printf '%s' "$raw" | sed '$d' | sed '$d')" - - preview="$(printf '%s' "$resp" | tr '\n' ' ' | cut -c1-400)" - dbg " -> Attempt #${attempt}: HTTP=${http_code}, Content-Type=${content_type}, preview=${preview}" - - if [ "$http_code" = "200" ] && printf '%s' "$content_type" | grep -qEi 'application/(json|javascript)|^text/json'; then - break - fi - - if jq -e . >/dev/null 2>&1 <<<"$resp"; then - dbg " -> Body is valid JSON despite HTTP=${http_code}, accepting." - break - fi - - if [ "$attempt" -ge "$max_attempts" ]; then - api_error=$((api_error+1)) - ERRORS["$dom_norm"]="http_${http_code}_or_nonjson" - dbg " -> Failed after ${attempt} attempts: HTTP=${http_code}, preview=${preview}" - resp="" - break - fi - - # backoff: 1s, 2s, 4s, 8s ... - sleep_time=$((2 ** (attempt-1))) - dbg " -> Retry after ${sleep_time}s..." - sleep "$sleep_time" - done - - if [ -z "$resp" ] || ! jq -e . >/dev/null 2>&1 <<<"$resp"; then - dbg " -> non-JSON or empty response, skipping domain: $dom_norm" + resp="$(query_api "$dom_norm" || true)" + if [ -z "$resp" ]; then + dbg " -> No valid response for $dom_norm, skipping." continue fi @@ -145,12 +147,12 @@ while IFS= read -r line || [ -n "$line" ]; do dbg " -> API error: $err_msg" if grep -Eq "ERR_NAME_NOT_RESOLVED|Timeout" <<<"$err_msg"; then - dbg " -> Dead domain, completely skipped" + DOM_ROLE["$dom_norm"]="dead" + ERRORS["$dom_norm"]="$err_msg" continue fi if grep -Eq "ERR_CERT_COMMON_NAME_INVALID|ERR_CONNECTION_REFUSED" <<<"$err_msg"; then - dbg " -> Service domain, keep only base" DOM_ROLE["$dom_norm"]="service" SOURCES["$dom_norm"]="base" EXPANDED["$dom_norm"]=1 @@ -158,7 +160,7 @@ while IFS= read -r line || [ -n "$line" ]; do continue fi - DOM_ROLE["$dom_norm"]="service" + DOM_ROLE["$dom_norm"]="unknown" SOURCES["$dom_norm"]="base" EXPANDED["$dom_norm"]=1 ERRORS["$dom_norm"]="$err_msg" @@ -175,10 +177,7 @@ while IFS= read -r line || [ -n "$line" ]; do dbg " -> API returned ${#subs[@]} related domains" for s in "${subs[@]}"; do nd="$(normalize_domain "$s" || true)" - if [ -z "$nd" ]; then - dbg " - RELATED SKIP: '$s'" - continue - fi + [ -z "$nd" ] && continue EXPANDED["$nd"]=1 [ -z "${SOURCES[$nd]:-}" ] && SOURCES["$nd"]="related" related_total=$((related_total+1))