Rewrite API logic
This commit is contained in:
parent
af6651fdfa
commit
ecb6fa8df6
2 changed files with 13 additions and 20 deletions
|
|
@ -76,14 +76,12 @@ normalize_domain() {
|
|||
# remove leading "*." and leading/trailing dots, compress multiple dots
|
||||
raw="$(printf '%s' "$raw" | sed -E 's/^\*\.\s*//; s/^\.+//; s/\.+$//; s/\.+/./g')"
|
||||
|
||||
# syntactic checks
|
||||
# syntactic checks: буквы, цифры, дефис, минимум одна точка
|
||||
if ! printf '%s' "$raw" | grep -Eq '^[a-z0-9-]+(\.[a-z0-9-]+)+$'; then
|
||||
# bad structure (labels)
|
||||
return 1
|
||||
fi
|
||||
if ! printf '%s' "$raw" | grep -Eq '\.[a-z]{2,}$'; then
|
||||
# TLD check (only letters, len >= 2)
|
||||
# note: this rejects numeric-only TLDs and punycode with prefixes; intentional but visible in logs
|
||||
# TLD check: >=2 символов, буквы/цифры/дефис
|
||||
if ! printf '%s' "$raw" | grep -Eq '\.[a-z0-9-]{2,}$'; then
|
||||
return 2
|
||||
fi
|
||||
|
||||
|
|
@ -118,7 +116,6 @@ while IFS= read -r line || [ -n "$line" ]; do
|
|||
# query Chromium-service
|
||||
dbg " -> Querying API: ${API_URL}${dom_norm}"
|
||||
resp="$(curl -s --max-time 15 "${API_URL}${dom_norm}" 2>&1 || true)"
|
||||
# preview for logs (first 400 chars)
|
||||
preview="$(printf '%s' "$resp" | tr '\n' ' ' | cut -c1-400)"
|
||||
dbg " -> API response preview: ${preview}"
|
||||
|
||||
|
|
@ -133,18 +130,17 @@ while IFS= read -r line || [ -n "$line" ]; do
|
|||
continue
|
||||
fi
|
||||
|
||||
# valid json: check for .domains array
|
||||
has_domains=$(jq -r 'if has("domains") then "1" else "0" end' <<<"$resp" 2>/dev/null || echo "0")
|
||||
if [ "$has_domains" = "1" ]; then
|
||||
# got related domain list
|
||||
# valid json: check for .relatedDomains array
|
||||
has_related=$(jq -r 'if has("relatedDomains") then "1" else "0" end' <<<"$resp" 2>/dev/null || echo "0")
|
||||
if [ "$has_related" = "1" ]; then
|
||||
api_success=$((api_success+1))
|
||||
DOM_ROLE["$dom_norm"]="site"
|
||||
SOURCES["$dom_norm"]="base"
|
||||
EXPANDED["$dom_norm"]=1
|
||||
|
||||
# extract domains
|
||||
mapfile -t subs < <(jq -r '.domains[]? // empty' <<<"$resp")
|
||||
dbg " -> API returned $(printf '%s\n' "${#subs[@]}" | tr -d '\n') related domains"
|
||||
# extract related domains
|
||||
mapfile -t subs < <(jq -r '.relatedDomains[]? // empty' <<<"$resp")
|
||||
dbg " -> API returned ${#subs[@]} related domains"
|
||||
|
||||
for s in "${subs[@]}"; do
|
||||
nd="$(normalize_domain "$s" || true)"
|
||||
|
|
@ -152,22 +148,19 @@ while IFS= read -r line || [ -n "$line" ]; do
|
|||
dbg " - RELATED SKIP (normalize failed): '$s'"
|
||||
continue
|
||||
fi
|
||||
# store
|
||||
EXPANDED["$nd"]=1
|
||||
# mark source 'related' only if not already base
|
||||
[ -z "${SOURCES[$nd]:-}" ] && SOURCES["$nd"]="related"
|
||||
related_total=$((related_total+1))
|
||||
dbg " - RELATED ADD: $nd"
|
||||
done
|
||||
else
|
||||
# no domains array: maybe error field
|
||||
api_error=$((api_error+1))
|
||||
err_msg="$(jq -r '.error // empty' <<<"$resp" | sed 's/[\r\n]/ /g')"
|
||||
ERRORS["$dom_norm"]="${err_msg:-unknown}"
|
||||
DOM_ROLE["$dom_norm"]="service"
|
||||
SOURCES["$dom_norm"]="base"
|
||||
EXPANDED["$dom_norm"]=1
|
||||
dbg " -> API returned no 'domains' array; error='${ERRORS[$dom_norm]}' ; marked as 'service'"
|
||||
dbg " -> API returned no 'relatedDomains' array; error='${ERRORS[$dom_norm]}' ; marked as 'service'"
|
||||
fi
|
||||
|
||||
done < "$INPUT_FILE"
|
||||
|
|
@ -188,7 +181,7 @@ else
|
|||
dbg "Dry-run: not writing output files"
|
||||
fi
|
||||
|
||||
# Final debug report (concise + actionable)
|
||||
# Final debug report
|
||||
echo
|
||||
echo "===== DEBUG REPORT ====="
|
||||
echo "Input file: $INPUT_FILE"
|
||||
|
|
@ -197,7 +190,7 @@ echo "Processed lines: $total_lines"
|
|||
echo "Normalized OK: $normalized_ok"
|
||||
echo "Normalized skipped: $normalized_skip"
|
||||
echo
|
||||
echo "API calls succeeded (returned domains): $api_success"
|
||||
echo "API calls succeeded (returned relatedDomains): $api_success"
|
||||
echo "API calls failed / returned error: $api_error"
|
||||
echo "Related domains collected (raw count): $related_total"
|
||||
echo "Final unique domains (base + related): $total_unique"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue