Rewrite API logic

This commit is contained in:
Kirill Kodanev 2025-09-12 17:37:58 +03:00
parent af6651fdfa
commit ecb6fa8df6
2 changed files with 13 additions and 20 deletions

View file

@ -42,7 +42,7 @@ jobs:
fi fi
# Проверка синтаксиса доменов # Проверка синтаксиса доменов
grep -v '^#' domains.txt | grep -v '^$' | while read domain; do grep -v '^#' domains.txt | grep -v '^$' | while read domain; do
if ! echo "$domain" | grep -qE '^[a-zA-Z0-9.-]+\.[a-zA-Z0-9-]{2,}$'; then if ! echo "$domain" | grep -qE '^[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)+$'; then
echo "Error: Invalid domain format: $domain" echo "Error: Invalid domain format: $domain"
exit 1 exit 1
fi fi

View file

@ -76,14 +76,12 @@ normalize_domain() {
# remove leading "*." and leading/trailing dots, compress multiple dots # remove leading "*." and leading/trailing dots, compress multiple dots
raw="$(printf '%s' "$raw" | sed -E 's/^\*\.\s*//; s/^\.+//; s/\.+$//; s/\.+/./g')" raw="$(printf '%s' "$raw" | sed -E 's/^\*\.\s*//; s/^\.+//; s/\.+$//; s/\.+/./g')"
# syntactic checks # syntactic checks: буквы, цифры, дефис, минимум одна точка
if ! printf '%s' "$raw" | grep -Eq '^[a-z0-9-]+(\.[a-z0-9-]+)+$'; then if ! printf '%s' "$raw" | grep -Eq '^[a-z0-9-]+(\.[a-z0-9-]+)+$'; then
# bad structure (labels)
return 1 return 1
fi fi
if ! printf '%s' "$raw" | grep -Eq '\.[a-z]{2,}$'; then # TLD check: >=2 символов, буквы/цифры/дефис
# TLD check (only letters, len >= 2) if ! printf '%s' "$raw" | grep -Eq '\.[a-z0-9-]{2,}$'; then
# note: this rejects numeric-only TLDs and punycode with prefixes; intentional but visible in logs
return 2 return 2
fi fi
@ -118,7 +116,6 @@ while IFS= read -r line || [ -n "$line" ]; do
# query Chromium-service # query Chromium-service
dbg " -> Querying API: ${API_URL}${dom_norm}" dbg " -> Querying API: ${API_URL}${dom_norm}"
resp="$(curl -s --max-time 15 "${API_URL}${dom_norm}" 2>&1 || true)" resp="$(curl -s --max-time 15 "${API_URL}${dom_norm}" 2>&1 || true)"
# preview for logs (first 400 chars)
preview="$(printf '%s' "$resp" | tr '\n' ' ' | cut -c1-400)" preview="$(printf '%s' "$resp" | tr '\n' ' ' | cut -c1-400)"
dbg " -> API response preview: ${preview}" dbg " -> API response preview: ${preview}"
@ -133,18 +130,17 @@ while IFS= read -r line || [ -n "$line" ]; do
continue continue
fi fi
# valid json: check for .domains array # valid json: check for .relatedDomains array
has_domains=$(jq -r 'if has("domains") then "1" else "0" end' <<<"$resp" 2>/dev/null || echo "0") has_related=$(jq -r 'if has("relatedDomains") then "1" else "0" end' <<<"$resp" 2>/dev/null || echo "0")
if [ "$has_domains" = "1" ]; then if [ "$has_related" = "1" ]; then
# got related domain list
api_success=$((api_success+1)) api_success=$((api_success+1))
DOM_ROLE["$dom_norm"]="site" DOM_ROLE["$dom_norm"]="site"
SOURCES["$dom_norm"]="base" SOURCES["$dom_norm"]="base"
EXPANDED["$dom_norm"]=1 EXPANDED["$dom_norm"]=1
# extract domains # extract related domains
mapfile -t subs < <(jq -r '.domains[]? // empty' <<<"$resp") mapfile -t subs < <(jq -r '.relatedDomains[]? // empty' <<<"$resp")
dbg " -> API returned $(printf '%s\n' "${#subs[@]}" | tr -d '\n') related domains" dbg " -> API returned ${#subs[@]} related domains"
for s in "${subs[@]}"; do for s in "${subs[@]}"; do
nd="$(normalize_domain "$s" || true)" nd="$(normalize_domain "$s" || true)"
@ -152,22 +148,19 @@ while IFS= read -r line || [ -n "$line" ]; do
dbg " - RELATED SKIP (normalize failed): '$s'" dbg " - RELATED SKIP (normalize failed): '$s'"
continue continue
fi fi
# store
EXPANDED["$nd"]=1 EXPANDED["$nd"]=1
# mark source 'related' only if not already base
[ -z "${SOURCES[$nd]:-}" ] && SOURCES["$nd"]="related" [ -z "${SOURCES[$nd]:-}" ] && SOURCES["$nd"]="related"
related_total=$((related_total+1)) related_total=$((related_total+1))
dbg " - RELATED ADD: $nd" dbg " - RELATED ADD: $nd"
done done
else else
# no domains array: maybe error field
api_error=$((api_error+1)) api_error=$((api_error+1))
err_msg="$(jq -r '.error // empty' <<<"$resp" | sed 's/[\r\n]/ /g')" err_msg="$(jq -r '.error // empty' <<<"$resp" | sed 's/[\r\n]/ /g')"
ERRORS["$dom_norm"]="${err_msg:-unknown}" ERRORS["$dom_norm"]="${err_msg:-unknown}"
DOM_ROLE["$dom_norm"]="service" DOM_ROLE["$dom_norm"]="service"
SOURCES["$dom_norm"]="base" SOURCES["$dom_norm"]="base"
EXPANDED["$dom_norm"]=1 EXPANDED["$dom_norm"]=1
dbg " -> API returned no 'domains' array; error='${ERRORS[$dom_norm]}' ; marked as 'service'" dbg " -> API returned no 'relatedDomains' array; error='${ERRORS[$dom_norm]}' ; marked as 'service'"
fi fi
done < "$INPUT_FILE" done < "$INPUT_FILE"
@ -188,7 +181,7 @@ else
dbg "Dry-run: not writing output files" dbg "Dry-run: not writing output files"
fi fi
# Final debug report (concise + actionable) # Final debug report
echo echo
echo "===== DEBUG REPORT =====" echo "===== DEBUG REPORT ====="
echo "Input file: $INPUT_FILE" echo "Input file: $INPUT_FILE"
@ -197,7 +190,7 @@ echo "Processed lines: $total_lines"
echo "Normalized OK: $normalized_ok" echo "Normalized OK: $normalized_ok"
echo "Normalized skipped: $normalized_skip" echo "Normalized skipped: $normalized_skip"
echo echo
echo "API calls succeeded (returned domains): $api_success" echo "API calls succeeded (returned relatedDomains): $api_success"
echo "API calls failed / returned error: $api_error" echo "API calls failed / returned error: $api_error"
echo "Related domains collected (raw count): $related_total" echo "Related domains collected (raw count): $related_total"
echo "Final unique domains (base + related): $total_unique" echo "Final unique domains (base + related): $total_unique"