Rewrite API logic

This commit is contained in:
Kirill Kodanev 2025-09-12 17:37:58 +03:00
parent af6651fdfa
commit ecb6fa8df6
2 changed files with 13 additions and 20 deletions

View file

@ -42,7 +42,7 @@ jobs:
fi
# Проверка синтаксиса доменов
grep -v '^#' domains.txt | grep -v '^$' | while read domain; do
if ! echo "$domain" | grep -qE '^[a-zA-Z0-9.-]+\.[a-zA-Z0-9-]{2,}$'; then
if ! echo "$domain" | grep -qE '^[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)+$'; then
echo "Error: Invalid domain format: $domain"
exit 1
fi

View file

@ -76,14 +76,12 @@ normalize_domain() {
# remove leading "*." and leading/trailing dots, compress multiple dots
raw="$(printf '%s' "$raw" | sed -E 's/^\*\.\s*//; s/^\.+//; s/\.+$//; s/\.+/./g')"
# syntactic checks
# syntactic checks: буквы, цифры, дефис, минимум одна точка
if ! printf '%s' "$raw" | grep -Eq '^[a-z0-9-]+(\.[a-z0-9-]+)+$'; then
# bad structure (labels)
return 1
fi
if ! printf '%s' "$raw" | grep -Eq '\.[a-z]{2,}$'; then
# TLD check (only letters, len >= 2)
# note: this rejects numeric-only TLDs and punycode with prefixes; intentional but visible in logs
# TLD check: >=2 символов, буквы/цифры/дефис
if ! printf '%s' "$raw" | grep -Eq '\.[a-z0-9-]{2,}$'; then
return 2
fi
@ -118,7 +116,6 @@ while IFS= read -r line || [ -n "$line" ]; do
# query Chromium-service
dbg " -> Querying API: ${API_URL}${dom_norm}"
resp="$(curl -s --max-time 15 "${API_URL}${dom_norm}" 2>&1 || true)"
# preview for logs (first 400 chars)
preview="$(printf '%s' "$resp" | tr '\n' ' ' | cut -c1-400)"
dbg " -> API response preview: ${preview}"
@ -133,18 +130,17 @@ while IFS= read -r line || [ -n "$line" ]; do
continue
fi
# valid json: check for .domains array
has_domains=$(jq -r 'if has("domains") then "1" else "0" end' <<<"$resp" 2>/dev/null || echo "0")
if [ "$has_domains" = "1" ]; then
# got related domain list
# valid json: check for .relatedDomains array
has_related=$(jq -r 'if has("relatedDomains") then "1" else "0" end' <<<"$resp" 2>/dev/null || echo "0")
if [ "$has_related" = "1" ]; then
api_success=$((api_success+1))
DOM_ROLE["$dom_norm"]="site"
SOURCES["$dom_norm"]="base"
EXPANDED["$dom_norm"]=1
# extract domains
mapfile -t subs < <(jq -r '.domains[]? // empty' <<<"$resp")
dbg " -> API returned $(printf '%s\n' "${#subs[@]}" | tr -d '\n') related domains"
# extract related domains
mapfile -t subs < <(jq -r '.relatedDomains[]? // empty' <<<"$resp")
dbg " -> API returned ${#subs[@]} related domains"
for s in "${subs[@]}"; do
nd="$(normalize_domain "$s" || true)"
@ -152,22 +148,19 @@ while IFS= read -r line || [ -n "$line" ]; do
dbg " - RELATED SKIP (normalize failed): '$s'"
continue
fi
# store
EXPANDED["$nd"]=1
# mark source 'related' only if not already base
[ -z "${SOURCES[$nd]:-}" ] && SOURCES["$nd"]="related"
related_total=$((related_total+1))
dbg " - RELATED ADD: $nd"
done
else
# no domains array: maybe error field
api_error=$((api_error+1))
err_msg="$(jq -r '.error // empty' <<<"$resp" | sed 's/[\r\n]/ /g')"
ERRORS["$dom_norm"]="${err_msg:-unknown}"
DOM_ROLE["$dom_norm"]="service"
SOURCES["$dom_norm"]="base"
EXPANDED["$dom_norm"]=1
dbg " -> API returned no 'domains' array; error='${ERRORS[$dom_norm]}' ; marked as 'service'"
dbg " -> API returned no 'relatedDomains' array; error='${ERRORS[$dom_norm]}' ; marked as 'service'"
fi
done < "$INPUT_FILE"
@ -188,7 +181,7 @@ else
dbg "Dry-run: not writing output files"
fi
# Final debug report (concise + actionable)
# Final debug report
echo
echo "===== DEBUG REPORT ====="
echo "Input file: $INPUT_FILE"
@ -197,7 +190,7 @@ echo "Processed lines: $total_lines"
echo "Normalized OK: $normalized_ok"
echo "Normalized skipped: $normalized_skip"
echo
echo "API calls succeeded (returned domains): $api_success"
echo "API calls succeeded (returned relatedDomains): $api_success"
echo "API calls failed / returned error: $api_error"
echo "Related domains collected (raw count): $related_total"
echo "Final unique domains (base + related): $total_unique"