Add tini as INIT in container. Rewrite server.js
Some checks are pending
Build and Push Docker Image / build-and-push (push) Waiting to run
Some checks are pending
Build and Push Docker Image / build-and-push (push) Waiting to run
This commit is contained in:
parent
a5a0ed828d
commit
72752b1a0b
2 changed files with 282 additions and 147 deletions
57
Dockerfile
57
Dockerfile
|
|
@ -1,48 +1,65 @@
|
||||||
# Base minimal Debian
|
# -------- Builder stage --------
|
||||||
FROM debian:bookworm-slim
|
FROM debian:bookworm-slim AS builder
|
||||||
|
|
||||||
# Prevent tzdata prompts
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
# Install Node.js, Chromium and minimal runtime libs
|
# Node + build tools for native modules (better-sqlite3)
|
||||||
# Note: chromium package on Debian provides /usr/bin/chromium
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
ca-certificates curl gnupg \
|
ca-certificates curl gnupg \
|
||||||
nodejs npm \
|
nodejs npm \
|
||||||
chromium \
|
python3 make g++ pkg-config libsqlite3-dev \
|
||||||
# Minimal GUI/Chromium runtime libs often needed by Playwright Chromium
|
|
||||||
libx11-6 libxcomposite1 libxdamage1 libxrandr2 libxkbcommon0 \
|
|
||||||
libgtk-3-0 libnss3 libdrm2 libgbm1 libasound2 fonts-liberation \
|
|
||||||
# Useful for font rendering
|
|
||||||
fonts-dejavu-core \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# App directory
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Install only production deps
|
# Copy only manifests first to leverage Docker cache
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
|
|
||||||
|
# Install production deps (build native modules here)
|
||||||
ENV CI=true
|
ENV CI=true
|
||||||
RUN npm ci --omit=dev
|
RUN npm ci --omit=dev
|
||||||
|
|
||||||
# Copy source
|
# Copy source
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
# Security: run as non-root
|
# -------- Runtime stage --------
|
||||||
|
FROM debian:bookworm-slim
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
|
# Install tini for proper PID 1 and signal handling
|
||||||
|
# Install Node.js runtime, Chromium and minimal libs
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
ca-certificates curl gnupg \
|
||||||
|
tini \
|
||||||
|
nodejs npm \
|
||||||
|
chromium \
|
||||||
|
libx11-6 libxcomposite1 libxdamage1 libxrandr2 libxkbcommon0 \
|
||||||
|
libgtk-3-0 libnss3 libdrm2 libgbm1 libasound2 fonts-liberation \
|
||||||
|
fonts-dejavu-core \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy node_modules and app from builder
|
||||||
|
COPY --from=builder /app/node_modules /app/node_modules
|
||||||
|
COPY --from=builder /app/package*.json /app/
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Security: drop root
|
||||||
RUN useradd -ms /bin/bash nodeuser && chown -R nodeuser:nodeuser /app
|
RUN useradd -ms /bin/bash nodeuser && chown -R nodeuser:nodeuser /app
|
||||||
USER nodeuser
|
USER nodeuser
|
||||||
|
|
||||||
# Environment for service
|
# Environment
|
||||||
ENV PORT=3000 \
|
ENV PORT=3000 \
|
||||||
# Ensure Playwright uses system Chromium and does not download browsers
|
|
||||||
PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 \
|
PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 \
|
||||||
PLAYWRIGHT_BROWSERS_PATH=0 \
|
PLAYWRIGHT_BROWSERS_PATH=0 \
|
||||||
# Explicit executable if needed in code; here server uses default, so optional
|
CHROMIUM_PATH=/usr/bin/chromium \
|
||||||
CHROMIUM_PATH=/usr/bin/chromium
|
CACHE_TTL_SECONDS=21600
|
||||||
|
|
||||||
# Expose service port
|
|
||||||
EXPOSE 3000
|
EXPOSE 3000
|
||||||
|
|
||||||
|
# Use tini as PID 1 so we don't need `--init`
|
||||||
|
ENTRYPOINT ["/usr/bin/tini", "--"]
|
||||||
|
|
||||||
# Start the service
|
# Start the service
|
||||||
CMD ["node", "server.js"]
|
CMD ["node", "server.js"]
|
||||||
|
|
||||||
|
|
|
||||||
372
server.js
372
server.js
|
|
@ -1,36 +1,111 @@
|
||||||
// server.js
|
// server.js (hardened)
|
||||||
const express = require('express');
|
const express = require('express');
|
||||||
|
const rateLimit = require('express-rate-limit');
|
||||||
const { chromium } = require('playwright');
|
const { chromium } = require('playwright');
|
||||||
const Database = require('better-sqlite3');
|
const Database = require('better-sqlite3');
|
||||||
|
const punycode = require('punycode/');
|
||||||
|
|
||||||
const app = express();
|
// ---------- Config ----------
|
||||||
const port = process.env.PORT || 3000;
|
const PORT = Number(process.env.PORT || 3000);
|
||||||
|
const CHROMIUM_PATH = process.env.CHROMIUM_PATH || undefined;
|
||||||
|
const CACHE_TTL_SECONDS = parseInt(process.env.CACHE_TTL_SECONDS || '21600', 10);
|
||||||
|
const MAX_REDIRECT_STEPS = parseInt(process.env.MAX_REDIRECT_STEPS || '20', 10);
|
||||||
|
const CONCURRENCY = parseInt(process.env.CONCURRENCY || '3', 10);
|
||||||
|
const SQLITE_PATH = process.env.SQLITE_PATH || './cache.db';
|
||||||
|
const MAX_DOMAINS = parseInt(process.env.MAX_DOMAINS || '5000', 10);
|
||||||
|
const MAX_REDIRECT_LOG = parseInt(process.env.MAX_REDIRECT_LOG || '50', 10);
|
||||||
|
const NAV_TIMEOUT_MS = parseInt(process.env.NAV_TIMEOUT_MS || '30000', 10);
|
||||||
|
const QUIET_WINDOW_MS = parseInt(process.env.QUIET_WINDOW_MS || '600', 10); // «маленькая тишина»
|
||||||
|
|
||||||
const executablePath = process.env.CHROMIUM_PATH || undefined;
|
const CHROMIUM_ARGS = [
|
||||||
const chromiumArgs = [
|
|
||||||
'--no-sandbox',
|
'--no-sandbox',
|
||||||
'--disable-setuid-sandbox',
|
'--disable-setuid-sandbox',
|
||||||
'--disable-dev-shm-usage',
|
'--disable-dev-shm-usage', // рекомендуется заменить на --ipc=host при запуске контейнера
|
||||||
'--disable-gpu',
|
'--disable-gpu',
|
||||||
'--no-zygote',
|
'--no-zygote',
|
||||||
];
|
];
|
||||||
|
|
||||||
const CACHE_TTL_SECONDS = parseInt(process.env.CACHE_TTL_SECONDS || '21600', 10);
|
// ---------- Helpers ----------
|
||||||
const MAX_REDIRECT_STEPS = parseInt(process.env.MAX_REDIRECT_STEPS || '20', 10); // анти-цикл по глубине
|
const app = express();
|
||||||
|
app.use(express.json());
|
||||||
|
|
||||||
const db = new Database(process.env.SQLITE_PATH || './cache.db');
|
// Basic rate limit (per-IP)
|
||||||
|
const limiter = rateLimit({
|
||||||
|
windowMs: 60_000,
|
||||||
|
max: 30,
|
||||||
|
standardHeaders: true,
|
||||||
|
legacyHeaders: false,
|
||||||
|
});
|
||||||
|
app.use(limiter);
|
||||||
|
|
||||||
|
// Normalize/validate domain
|
||||||
|
function normalizeDomain(input) {
|
||||||
|
if (!input || typeof input !== 'string') return null;
|
||||||
|
const s = input.trim().toLowerCase();
|
||||||
|
// запрет схем/путей — ожидается чистый host
|
||||||
|
try {
|
||||||
|
// Если пришёл URL, извлечь hostname
|
||||||
|
const u = new URL(/^https?:\/\//i.test(s) ? s : `https://${s}`);
|
||||||
|
const host = u.hostname;
|
||||||
|
// IDNA -> ASCII
|
||||||
|
const ascii = punycode.toASCII(host);
|
||||||
|
if (!ascii || ascii.length > 253) return null;
|
||||||
|
return ascii;
|
||||||
|
} catch {
|
||||||
|
// Попытка интерпретации как host напрямую
|
||||||
|
try {
|
||||||
|
const ascii = punycode.toASCII(s);
|
||||||
|
return ascii || null;
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractDomain(url) {
|
||||||
|
try { return new URL(url).hostname.toLowerCase(); } catch { return null; }
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------- Simple semaphore ----------
|
||||||
|
class Semaphore {
|
||||||
|
constructor(limit) {
|
||||||
|
this.limit = limit;
|
||||||
|
this.active = 0;
|
||||||
|
this.queue = [];
|
||||||
|
}
|
||||||
|
acquire() {
|
||||||
|
return new Promise(resolve => {
|
||||||
|
const tryAcquire = () => {
|
||||||
|
if (this.active < this.limit) {
|
||||||
|
this.active++;
|
||||||
|
resolve(() => {
|
||||||
|
this.active--;
|
||||||
|
const next = this.queue.shift();
|
||||||
|
if (next) next();
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
this.queue.push(tryAcquire);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
tryAcquire();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const sem = new Semaphore(CONCURRENCY);
|
||||||
|
|
||||||
|
// ---------- DB ----------
|
||||||
|
const db = new Database(SQLITE_PATH);
|
||||||
db.pragma('journal_mode = WAL');
|
db.pragma('journal_mode = WAL');
|
||||||
db.exec(`
|
db.exec(`
|
||||||
CREATE TABLE IF NOT EXISTS domain_cache (
|
CREATE TABLE IF NOT EXISTS domain_cache (
|
||||||
domain TEXT PRIMARY KEY,
|
domain TEXT PRIMARY KEY,
|
||||||
result_json TEXT NOT NULL, -- JSON массива связанных доменов
|
result_json TEXT NOT NULL,
|
||||||
final_url TEXT,
|
final_url TEXT,
|
||||||
redirect_chain_json TEXT, -- JSON журнала редиректов
|
redirect_chain_json TEXT,
|
||||||
updated_at INTEGER NOT NULL,
|
updated_at INTEGER NOT NULL,
|
||||||
ttl_at INTEGER NOT NULL
|
ttl_at INTEGER NOT NULL
|
||||||
);
|
);
|
||||||
`);
|
`);
|
||||||
|
|
||||||
const stmtSelect = db.prepare(`
|
const stmtSelect = db.prepare(`
|
||||||
SELECT result_json, final_url, redirect_chain_json, updated_at, ttl_at
|
SELECT result_json, final_url, redirect_chain_json, updated_at, ttl_at
|
||||||
FROM domain_cache WHERE domain = ?
|
FROM domain_cache WHERE domain = ?
|
||||||
|
|
@ -46,110 +121,6 @@ ON CONFLICT(domain) DO UPDATE SET
|
||||||
ttl_at = excluded.ttl_at
|
ttl_at = excluded.ttl_at
|
||||||
`);
|
`);
|
||||||
|
|
||||||
app.use(express.json());
|
|
||||||
|
|
||||||
function extractDomain(url) {
|
|
||||||
try { return new URL(url).hostname; } catch { return null; }
|
|
||||||
}
|
|
||||||
|
|
||||||
let browser;
|
|
||||||
async function getBrowser() {
|
|
||||||
if (browser && browser.isConnected()) return browser;
|
|
||||||
browser = await chromium.launch({
|
|
||||||
executablePath,
|
|
||||||
headless: true,
|
|
||||||
args: chromiumArgs,
|
|
||||||
});
|
|
||||||
return browser;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Вспомогательная функция для сборки полного журнала редиректов через цепочку redirectedFrom()
|
|
||||||
function buildRedirectChainForResponse(resp) {
|
|
||||||
const chain = [];
|
|
||||||
const currentReq = resp.request();
|
|
||||||
let prev = currentReq.redirectedFrom();
|
|
||||||
let toUrl = currentReq.url();
|
|
||||||
const status = resp.status();
|
|
||||||
while (prev) {
|
|
||||||
chain.push({ from: prev.url(), to: toUrl, status });
|
|
||||||
toUrl = prev.url();
|
|
||||||
prev = prev.redirectedFrom();
|
|
||||||
}
|
|
||||||
return chain.reverse();
|
|
||||||
}
|
|
||||||
|
|
||||||
async function scanDomainOnce(originDomain) {
|
|
||||||
const startUrl = `https://${originDomain}`;
|
|
||||||
|
|
||||||
const b = await getBrowser();
|
|
||||||
const context = await b.newContext();
|
|
||||||
const page = await context.newPage();
|
|
||||||
|
|
||||||
const seenDomains = new Set();
|
|
||||||
const redirectLog = [];
|
|
||||||
const visitedUrls = new Set(); // для детекции циклов
|
|
||||||
let redirectSteps = 0;
|
|
||||||
|
|
||||||
// Фиксируем все запросы
|
|
||||||
page.on('request', req => {
|
|
||||||
const d = extractDomain(req.url());
|
|
||||||
if (d) seenDomains.add(d);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Фиксируем ответы и редиректные цепочки
|
|
||||||
page.on('response', resp => {
|
|
||||||
const url = resp.url();
|
|
||||||
const d = extractDomain(url);
|
|
||||||
if (d) seenDomains.add(d);
|
|
||||||
|
|
||||||
// Добавим элементы цепочки, если ответ был редиректом (3xx)
|
|
||||||
const status = resp.status();
|
|
||||||
if (status >= 300 && status < 400) {
|
|
||||||
const piece = buildRedirectChainForResponse(resp);
|
|
||||||
redirectLog.push(...piece);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
try {
|
|
||||||
let currentUrl = startUrl;
|
|
||||||
// Анти-цикл: свой контроль над goto в несколько шагов — через ожидание события navigation и проверку URL
|
|
||||||
// Однако Playwright следует редиректам сам; для анти-цикла контролируем уникальность URL после перехода
|
|
||||||
const resp = await page.goto(currentUrl, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
||||||
// После авто-редиректов Playwright мы проверим фактическую цепочку через обработчики и page.url()
|
|
||||||
|
|
||||||
// Защита от «вечных» редиректов: проверим историю URL в performance entries
|
|
||||||
// Простой и надёжный способ: считать шаги смены URL в waitForNavigation с url predicate — но нам достаточно лимита по постфакту.
|
|
||||||
// Проверим финальный URL и убедимся, что не было явного зацикливания по уже виденным URL.
|
|
||||||
const finalUrl = page.url();
|
|
||||||
if (visitedUrls.has(finalUrl)) {
|
|
||||||
throw new Error('Redirect loop detected');
|
|
||||||
}
|
|
||||||
visitedUrls.add(finalUrl);
|
|
||||||
|
|
||||||
// Как дополнительная защита — лимит по шагам 3xx из собранного redirectLog
|
|
||||||
// Если цепочка слишком длинная, считаем её небезопасной.
|
|
||||||
redirectSteps = redirectLog.length;
|
|
||||||
if (redirectSteps > MAX_REDIRECT_STEPS) {
|
|
||||||
throw new Error(`Too many redirects (${redirectSteps})`);
|
|
||||||
}
|
|
||||||
|
|
||||||
await context.close();
|
|
||||||
|
|
||||||
const relatedDomains = Array.from(seenDomains)
|
|
||||||
.filter(d => !d.includes('doubleclick') && !d.includes('google'))
|
|
||||||
.sort();
|
|
||||||
|
|
||||||
return {
|
|
||||||
finalUrl,
|
|
||||||
relatedDomains,
|
|
||||||
redirectChain: redirectLog,
|
|
||||||
};
|
|
||||||
} catch (e) {
|
|
||||||
try { await context.close(); } catch {}
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function getFromCache(domain) {
|
function getFromCache(domain) {
|
||||||
const row = stmtSelect.get(domain);
|
const row = stmtSelect.get(domain);
|
||||||
if (!row) return null;
|
if (!row) return null;
|
||||||
|
|
@ -166,7 +137,6 @@ function getFromCache(domain) {
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function putToCache(domain, result) {
|
function putToCache(domain, result) {
|
||||||
const now = Math.floor(Date.now() / 1000);
|
const now = Math.floor(Date.now() / 1000);
|
||||||
const ttlAt = now + CACHE_TTL_SECONDS;
|
const ttlAt = now + CACHE_TTL_SECONDS;
|
||||||
|
|
@ -180,17 +150,160 @@ function putToCache(domain, result) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------- Browser lifecycle ----------
|
||||||
|
let browser;
|
||||||
|
async function ensureBrowser() {
|
||||||
|
try {
|
||||||
|
if (browser && browser.isConnected()) return browser;
|
||||||
|
} catch {}
|
||||||
|
if (browser) {
|
||||||
|
try { await browser.close(); } catch {}
|
||||||
|
}
|
||||||
|
browser = await chromium.launch({
|
||||||
|
executablePath: CHROMIUM_PATH,
|
||||||
|
headless: true,
|
||||||
|
args: CHROMIUM_ARGS,
|
||||||
|
});
|
||||||
|
return browser;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------- Redirect utilities ----------
|
||||||
|
function buildRedirectChainForResponse(resp) {
|
||||||
|
const chain = [];
|
||||||
|
const currentReq = resp.request();
|
||||||
|
let prev = currentReq.redirectedFrom();
|
||||||
|
let toUrl = currentReq.url();
|
||||||
|
const status = resp.status();
|
||||||
|
while (prev) {
|
||||||
|
chain.push({ from: prev.url(), to: toUrl, status });
|
||||||
|
toUrl = prev.url();
|
||||||
|
prev = prev.redirectedFrom();
|
||||||
|
if (chain.length >= MAX_REDIRECT_LOG) break;
|
||||||
|
}
|
||||||
|
return chain.reverse();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------- Core scan ----------
|
||||||
|
async function scanDomainOnce(originDomain, signal) {
|
||||||
|
const startUrl = `https://${originDomain}`;
|
||||||
|
const b = await ensureBrowser();
|
||||||
|
const context = await b.newContext();
|
||||||
|
const page = await context.newPage();
|
||||||
|
|
||||||
|
const seenDomains = new Set();
|
||||||
|
const redirectLog = [];
|
||||||
|
const visitedUrls = new Set();
|
||||||
|
const seenPairs = new Set(); // from|to для детекции петель
|
||||||
|
|
||||||
|
// Бюджеты
|
||||||
|
let droppedDomains = 0;
|
||||||
|
|
||||||
|
// Capture network
|
||||||
|
// Lightweight counter для «тихого» окна
|
||||||
|
let inflight = 0;
|
||||||
|
let lastNetChange = Date.now();
|
||||||
|
|
||||||
|
const onReq = req => {
|
||||||
|
inflight++;
|
||||||
|
lastNetChange = Date.now();
|
||||||
|
const d = extractDomain(req.url());
|
||||||
|
if (d) {
|
||||||
|
if (seenDomains.size < MAX_DOMAINS) seenDomains.add(d);
|
||||||
|
else droppedDomains++;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
const onResp = resp => {
|
||||||
|
inflight = Math.max(0, inflight - 1);
|
||||||
|
lastNetChange = Date.now();
|
||||||
|
const url = resp.url();
|
||||||
|
const d = extractDomain(url);
|
||||||
|
if (d) {
|
||||||
|
if (seenDomains.size < MAX_DOMAINS) seenDomains.add(d);
|
||||||
|
else droppedDomains++;
|
||||||
|
}
|
||||||
|
const status = resp.status();
|
||||||
|
if (status >= 300 && status < 400) {
|
||||||
|
const piece = buildRedirectChainForResponse(resp);
|
||||||
|
for (const p of piece) {
|
||||||
|
if (redirectLog.length >= MAX_REDIRECT_LOG) break;
|
||||||
|
const key = `${p.from}|${p.to}`;
|
||||||
|
if (!seenPairs.has(key)) {
|
||||||
|
seenPairs.add(key);
|
||||||
|
redirectLog.push(p);
|
||||||
|
} else {
|
||||||
|
// петля
|
||||||
|
// ничего не делаем здесь — оценим ниже общим правилом
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
page.on('request', onReq);
|
||||||
|
page.on('response', onResp);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Навигация: domcontentloaded, затем дождаться короткой «тишины»
|
||||||
|
await page.goto(startUrl, { waitUntil: 'domcontentloaded', timeout: NAV_TIMEOUT_MS });
|
||||||
|
|
||||||
|
// Простейшее ожидание «тишины» сети, но с общим таймаутом
|
||||||
|
const startWait = Date.now();
|
||||||
|
while (Date.now() - startWait < NAV_TIMEOUT_MS) {
|
||||||
|
if (signal?.aborted) throw new Error('Aborted');
|
||||||
|
const quietFor = Date.now() - lastNetChange;
|
||||||
|
if (inflight === 0 && quietFor >= QUIET_WINDOW_MS) break;
|
||||||
|
await new Promise(r => setTimeout(r, 100));
|
||||||
|
}
|
||||||
|
|
||||||
|
const finalUrl = page.url();
|
||||||
|
// Анти-цикл: повтор URL или превышение лимита шагов/пар
|
||||||
|
if (visitedUrls.has(finalUrl)) throw new Error('Redirect loop detected');
|
||||||
|
visitedUrls.add(finalUrl);
|
||||||
|
|
||||||
|
const steps = redirectLog.length;
|
||||||
|
if (steps > MAX_REDIRECT_STEPS) throw new Error(`Too many redirects (${steps})`);
|
||||||
|
|
||||||
|
await context.close();
|
||||||
|
|
||||||
|
// Фильтрация и ограничение объёма
|
||||||
|
const filteredDomains = Array.from(seenDomains)
|
||||||
|
.filter(d => !d.includes('doubleclick') && !d.includes('google'))
|
||||||
|
.sort();
|
||||||
|
|
||||||
|
return {
|
||||||
|
finalUrl,
|
||||||
|
relatedDomains: filteredDomains,
|
||||||
|
redirectChain: redirectLog,
|
||||||
|
droppedDomains,
|
||||||
|
};
|
||||||
|
} catch (e) {
|
||||||
|
try { await context.close(); } catch {}
|
||||||
|
// Если браузер умер — перезапустим на следующем вызове
|
||||||
|
try { if (browser && !browser.isConnected()) { await browser.close(); browser = null; } } catch {}
|
||||||
|
throw e;
|
||||||
|
} finally {
|
||||||
|
page.off('request', onReq);
|
||||||
|
page.off('response', onResp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------- Routes ----------
|
||||||
app.get('/domains', async (req, res) => {
|
app.get('/domains', async (req, res) => {
|
||||||
const { domain } = req.query;
|
const norm = normalizeDomain(req.query.domain);
|
||||||
if (!domain) {
|
if (!norm) {
|
||||||
res.status(400).json({ error: '"domain" query parameter is required' });
|
res.status(400).json({ error: '"domain" must be a valid hostname' });
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Семафор — ограничиваем параллельность
|
||||||
|
const release = await sem.acquire();
|
||||||
|
const ac = new AbortController();
|
||||||
|
const timer = setTimeout(() => ac.abort(), NAV_TIMEOUT_MS * 2); // общий верхний потолок
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const cached = getFromCache(domain);
|
const cached = getFromCache(norm);
|
||||||
if (cached) {
|
if (cached) {
|
||||||
res.json({
|
res.json({
|
||||||
domain,
|
domain: norm,
|
||||||
finalUrl: cached.finalUrl,
|
finalUrl: cached.finalUrl,
|
||||||
relatedDomains: cached.relatedDomains,
|
relatedDomains: cached.relatedDomains,
|
||||||
redirectChain: cached.redirectChain,
|
redirectChain: cached.redirectChain,
|
||||||
|
|
@ -201,23 +314,28 @@ app.get('/domains', async (req, res) => {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const result = await scanDomainOnce(domain);
|
const result = await scanDomainOnce(norm, ac.signal);
|
||||||
putToCache(domain, result);
|
putToCache(norm, result);
|
||||||
|
|
||||||
res.json({
|
res.json({
|
||||||
domain,
|
domain: norm,
|
||||||
finalUrl: result.finalUrl,
|
finalUrl: result.finalUrl,
|
||||||
relatedDomains: result.relatedDomains,
|
relatedDomains: result.relatedDomains,
|
||||||
redirectChain: result.redirectChain,
|
redirectChain: result.redirectChain,
|
||||||
cached: false,
|
cached: false,
|
||||||
|
droppedDomains: result.droppedDomains,
|
||||||
});
|
});
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
res.status(500).json({ error: e.message || 'Internal server error' });
|
res.status(500).json({ error: e.message || 'Internal server error' });
|
||||||
|
} finally {
|
||||||
|
clearTimeout(timer);
|
||||||
|
release();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
app.get('/health', (_req, res) => res.json({ ok: true }));
|
app.get('/health', (_req, res) => res.json({ ok: true }));
|
||||||
|
|
||||||
|
// ---------- Shutdown ----------
|
||||||
process.on('SIGTERM', async () => {
|
process.on('SIGTERM', async () => {
|
||||||
try { if (browser) await browser.close(); } catch {}
|
try { if (browser) await browser.close(); } catch {}
|
||||||
process.exit(0);
|
process.exit(0);
|
||||||
|
|
@ -227,7 +345,7 @@ process.on('SIGINT', async () => {
|
||||||
process.exit(0);
|
process.exit(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
app.listen(port, () => {
|
app.listen(PORT, () => {
|
||||||
console.log(`Domain scanner service listening on port ${port}`);
|
console.log(`Domain scanner service listening on port ${PORT}`);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue