Rewrite API logic
This commit is contained in:
parent
af6651fdfa
commit
ecb6fa8df6
2 changed files with 13 additions and 20 deletions
|
|
@ -42,7 +42,7 @@ jobs:
|
||||||
fi
|
fi
|
||||||
# Проверка синтаксиса доменов
|
# Проверка синтаксиса доменов
|
||||||
grep -v '^#' domains.txt | grep -v '^$' | while read domain; do
|
grep -v '^#' domains.txt | grep -v '^$' | while read domain; do
|
||||||
if ! echo "$domain" | grep -qE '^[a-zA-Z0-9.-]+\.[a-zA-Z0-9-]{2,}$'; then
|
if ! echo "$domain" | grep -qE '^[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)+$'; then
|
||||||
echo "Error: Invalid domain format: $domain"
|
echo "Error: Invalid domain format: $domain"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -76,14 +76,12 @@ normalize_domain() {
|
||||||
# remove leading "*." and leading/trailing dots, compress multiple dots
|
# remove leading "*." and leading/trailing dots, compress multiple dots
|
||||||
raw="$(printf '%s' "$raw" | sed -E 's/^\*\.\s*//; s/^\.+//; s/\.+$//; s/\.+/./g')"
|
raw="$(printf '%s' "$raw" | sed -E 's/^\*\.\s*//; s/^\.+//; s/\.+$//; s/\.+/./g')"
|
||||||
|
|
||||||
# syntactic checks
|
# syntactic checks: буквы, цифры, дефис, минимум одна точка
|
||||||
if ! printf '%s' "$raw" | grep -Eq '^[a-z0-9-]+(\.[a-z0-9-]+)+$'; then
|
if ! printf '%s' "$raw" | grep -Eq '^[a-z0-9-]+(\.[a-z0-9-]+)+$'; then
|
||||||
# bad structure (labels)
|
|
||||||
return 1
|
return 1
|
||||||
fi
|
fi
|
||||||
if ! printf '%s' "$raw" | grep -Eq '\.[a-z]{2,}$'; then
|
# TLD check: >=2 символов, буквы/цифры/дефис
|
||||||
# TLD check (only letters, len >= 2)
|
if ! printf '%s' "$raw" | grep -Eq '\.[a-z0-9-]{2,}$'; then
|
||||||
# note: this rejects numeric-only TLDs and punycode with prefixes; intentional but visible in logs
|
|
||||||
return 2
|
return 2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -118,7 +116,6 @@ while IFS= read -r line || [ -n "$line" ]; do
|
||||||
# query Chromium-service
|
# query Chromium-service
|
||||||
dbg " -> Querying API: ${API_URL}${dom_norm}"
|
dbg " -> Querying API: ${API_URL}${dom_norm}"
|
||||||
resp="$(curl -s --max-time 15 "${API_URL}${dom_norm}" 2>&1 || true)"
|
resp="$(curl -s --max-time 15 "${API_URL}${dom_norm}" 2>&1 || true)"
|
||||||
# preview for logs (first 400 chars)
|
|
||||||
preview="$(printf '%s' "$resp" | tr '\n' ' ' | cut -c1-400)"
|
preview="$(printf '%s' "$resp" | tr '\n' ' ' | cut -c1-400)"
|
||||||
dbg " -> API response preview: ${preview}"
|
dbg " -> API response preview: ${preview}"
|
||||||
|
|
||||||
|
|
@ -133,18 +130,17 @@ while IFS= read -r line || [ -n "$line" ]; do
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# valid json: check for .domains array
|
# valid json: check for .relatedDomains array
|
||||||
has_domains=$(jq -r 'if has("domains") then "1" else "0" end' <<<"$resp" 2>/dev/null || echo "0")
|
has_related=$(jq -r 'if has("relatedDomains") then "1" else "0" end' <<<"$resp" 2>/dev/null || echo "0")
|
||||||
if [ "$has_domains" = "1" ]; then
|
if [ "$has_related" = "1" ]; then
|
||||||
# got related domain list
|
|
||||||
api_success=$((api_success+1))
|
api_success=$((api_success+1))
|
||||||
DOM_ROLE["$dom_norm"]="site"
|
DOM_ROLE["$dom_norm"]="site"
|
||||||
SOURCES["$dom_norm"]="base"
|
SOURCES["$dom_norm"]="base"
|
||||||
EXPANDED["$dom_norm"]=1
|
EXPANDED["$dom_norm"]=1
|
||||||
|
|
||||||
# extract domains
|
# extract related domains
|
||||||
mapfile -t subs < <(jq -r '.domains[]? // empty' <<<"$resp")
|
mapfile -t subs < <(jq -r '.relatedDomains[]? // empty' <<<"$resp")
|
||||||
dbg " -> API returned $(printf '%s\n' "${#subs[@]}" | tr -d '\n') related domains"
|
dbg " -> API returned ${#subs[@]} related domains"
|
||||||
|
|
||||||
for s in "${subs[@]}"; do
|
for s in "${subs[@]}"; do
|
||||||
nd="$(normalize_domain "$s" || true)"
|
nd="$(normalize_domain "$s" || true)"
|
||||||
|
|
@ -152,22 +148,19 @@ while IFS= read -r line || [ -n "$line" ]; do
|
||||||
dbg " - RELATED SKIP (normalize failed): '$s'"
|
dbg " - RELATED SKIP (normalize failed): '$s'"
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
# store
|
|
||||||
EXPANDED["$nd"]=1
|
EXPANDED["$nd"]=1
|
||||||
# mark source 'related' only if not already base
|
|
||||||
[ -z "${SOURCES[$nd]:-}" ] && SOURCES["$nd"]="related"
|
[ -z "${SOURCES[$nd]:-}" ] && SOURCES["$nd"]="related"
|
||||||
related_total=$((related_total+1))
|
related_total=$((related_total+1))
|
||||||
dbg " - RELATED ADD: $nd"
|
dbg " - RELATED ADD: $nd"
|
||||||
done
|
done
|
||||||
else
|
else
|
||||||
# no domains array: maybe error field
|
|
||||||
api_error=$((api_error+1))
|
api_error=$((api_error+1))
|
||||||
err_msg="$(jq -r '.error // empty' <<<"$resp" | sed 's/[\r\n]/ /g')"
|
err_msg="$(jq -r '.error // empty' <<<"$resp" | sed 's/[\r\n]/ /g')"
|
||||||
ERRORS["$dom_norm"]="${err_msg:-unknown}"
|
ERRORS["$dom_norm"]="${err_msg:-unknown}"
|
||||||
DOM_ROLE["$dom_norm"]="service"
|
DOM_ROLE["$dom_norm"]="service"
|
||||||
SOURCES["$dom_norm"]="base"
|
SOURCES["$dom_norm"]="base"
|
||||||
EXPANDED["$dom_norm"]=1
|
EXPANDED["$dom_norm"]=1
|
||||||
dbg " -> API returned no 'domains' array; error='${ERRORS[$dom_norm]}' ; marked as 'service'"
|
dbg " -> API returned no 'relatedDomains' array; error='${ERRORS[$dom_norm]}' ; marked as 'service'"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
done < "$INPUT_FILE"
|
done < "$INPUT_FILE"
|
||||||
|
|
@ -188,7 +181,7 @@ else
|
||||||
dbg "Dry-run: not writing output files"
|
dbg "Dry-run: not writing output files"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Final debug report (concise + actionable)
|
# Final debug report
|
||||||
echo
|
echo
|
||||||
echo "===== DEBUG REPORT ====="
|
echo "===== DEBUG REPORT ====="
|
||||||
echo "Input file: $INPUT_FILE"
|
echo "Input file: $INPUT_FILE"
|
||||||
|
|
@ -197,7 +190,7 @@ echo "Processed lines: $total_lines"
|
||||||
echo "Normalized OK: $normalized_ok"
|
echo "Normalized OK: $normalized_ok"
|
||||||
echo "Normalized skipped: $normalized_skip"
|
echo "Normalized skipped: $normalized_skip"
|
||||||
echo
|
echo
|
||||||
echo "API calls succeeded (returned domains): $api_success"
|
echo "API calls succeeded (returned relatedDomains): $api_success"
|
||||||
echo "API calls failed / returned error: $api_error"
|
echo "API calls failed / returned error: $api_error"
|
||||||
echo "Related domains collected (raw count): $related_total"
|
echo "Related domains collected (raw count): $related_total"
|
||||||
echo "Final unique domains (base + related): $total_unique"
|
echo "Final unique domains (base + related): $total_unique"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue