Use headless chromium
Some checks are pending
Build and Push Docker Image / build-and-push (push) Waiting to run
Some checks are pending
Build and Push Docker Image / build-and-push (push) Waiting to run
This commit is contained in:
parent
cdd36afd33
commit
09f054b888
2 changed files with 65 additions and 19 deletions
47
Dockerfile
47
Dockerfile
|
|
@ -1,25 +1,48 @@
|
||||||
FROM node:20-trixie-slim
|
# Base minimal Debian
|
||||||
|
FROM debian:bookworm-slim
|
||||||
|
|
||||||
|
# Prevent tzdata prompts
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
WORKDIR /usr/src/app
|
|
||||||
|
|
||||||
# Базовые утилиты, без лишних рекоммендованных пакетов
|
# Install Node.js, Chromium and minimal runtime libs
|
||||||
|
# Note: chromium package on Debian provides /usr/bin/chromium
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
ca-certificates curl gnupg && \
|
ca-certificates curl gnupg \
|
||||||
rm -rf /var/lib/apt/lists/*
|
nodejs npm \
|
||||||
|
chromium \
|
||||||
|
# Minimal GUI/Chromium runtime libs often needed by Playwright Chromium
|
||||||
|
libx11-6 libxcomposite1 libxdamage1 libxrandr2 libxkbcommon0 \
|
||||||
|
libgtk-3-0 libnss3 libdrm2 libgbm1 libasound2 fonts-liberation \
|
||||||
|
# Useful for font rendering
|
||||||
|
fonts-dejavu-core \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# App directory
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install only production deps
|
||||||
COPY package*.json ./
|
COPY package*.json ./
|
||||||
|
ENV CI=true
|
||||||
RUN npm ci --omit=dev
|
RUN npm ci --omit=dev
|
||||||
|
|
||||||
# Ставим только headless shell Chromium и его системные зависимости
|
# Copy source
|
||||||
RUN npx playwright install --with-deps --only-shell && \
|
COPY . .
|
||||||
rm -rf /usr/share/doc /usr/share/man /var/cache/apt/*
|
|
||||||
|
|
||||||
# Копируем минимально нужные исходники
|
# Security: run as non-root
|
||||||
COPY server.js ./
|
RUN useradd -ms /bin/bash nodeuser && chown -R nodeuser:nodeuser /app
|
||||||
# Если используется игнор-лист как файл — раскомментируйте строку:
|
USER nodeuser
|
||||||
COPY ignore-domains.txt ./
|
|
||||||
|
|
||||||
|
# Environment for service
|
||||||
|
ENV PORT=3000 \
|
||||||
|
# Ensure Playwright uses system Chromium and does not download browsers
|
||||||
|
PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 \
|
||||||
|
PLAYWRIGHT_BROWSERS_PATH=0 \
|
||||||
|
# Explicit executable if needed in code; here server uses default, so optional
|
||||||
|
CHROMIUM_PATH=/usr/bin/chromium
|
||||||
|
|
||||||
|
# Expose service port
|
||||||
EXPOSE 3000
|
EXPOSE 3000
|
||||||
|
|
||||||
|
# Start the service
|
||||||
CMD ["node", "server.js"]
|
CMD ["node", "server.js"]
|
||||||
|
|
||||||
|
|
|
||||||
37
server.js
37
server.js
|
|
@ -1,9 +1,22 @@
|
||||||
|
// server.js
|
||||||
const express = require('express');
|
const express = require('express');
|
||||||
const { chromium } = require('playwright');
|
const { chromium } = require('playwright');
|
||||||
|
|
||||||
const app = express();
|
const app = express();
|
||||||
const port = process.env.PORT || 3000;
|
const port = process.env.PORT || 3000;
|
||||||
|
|
||||||
|
// Использовать системный Chromium, если задан путь (например, /usr/bin/chromium в Debian)
|
||||||
|
const executablePath = process.env.CHROMIUM_PATH || undefined; // можно оставить undefined, если Chromium в PATH [1][2]
|
||||||
|
|
||||||
|
// Базовый набор флагов для контейнера без systemd/dbus и без install-deps
|
||||||
|
const chromiumArgs = [
|
||||||
|
'--no-sandbox', // запуск без setuid sandbox в контейнере [14]
|
||||||
|
'--disable-setuid-sandbox', // отключение setuid sandbox [14]
|
||||||
|
'--disable-dev-shm-usage', // использовать /tmp вместо /dev/shm (если нет --ipc=host) [15][16]
|
||||||
|
'--disable-gpu', // headless окружение [14]
|
||||||
|
'--no-zygote', // упрощение процессов в контейнере [14]
|
||||||
|
];
|
||||||
|
|
||||||
app.use(express.json());
|
app.use(express.json());
|
||||||
|
|
||||||
function extractDomain(url) {
|
function extractDomain(url) {
|
||||||
|
|
@ -23,12 +36,17 @@ app.get('/domains', async (req, res) => {
|
||||||
|
|
||||||
const url = `https://${domain}`;
|
const url = `https://${domain}`;
|
||||||
const seenDomains = new Set();
|
const seenDomains = new Set();
|
||||||
|
let browser;
|
||||||
|
let context;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const browser = await chromium.launch({
|
browser = await chromium.launch({
|
||||||
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
executablePath, // берётся из CHROMIUM_PATH при наличии [1][2]
|
||||||
|
headless: true, // явный headless режим для контейнера [14]
|
||||||
|
args: chromiumArgs, // флаги для стабильности в Docker [15][14]
|
||||||
});
|
});
|
||||||
const context = await browser.newContext();
|
|
||||||
|
context = await browser.newContext();
|
||||||
const page = await context.newPage();
|
const page = await context.newPage();
|
||||||
|
|
||||||
page.on('request', request => {
|
page.on('request', request => {
|
||||||
|
|
@ -37,15 +55,20 @@ app.get('/domains', async (req, res) => {
|
||||||
});
|
});
|
||||||
|
|
||||||
await page.goto(url, { waitUntil: 'load', timeout: 30000 });
|
await page.goto(url, { waitUntil: 'load', timeout: 30000 });
|
||||||
|
|
||||||
|
// Фильтрация доменов после закрытия страницы
|
||||||
|
await context.close();
|
||||||
await browser.close();
|
await browser.close();
|
||||||
|
|
||||||
// Фильтрация доменов
|
const filteredDomains = Array.from(seenDomains)
|
||||||
const filteredDomains = Array.from(seenDomains).filter(d =>
|
.filter(d => !d.includes('doubleclick') && !d.includes('google'))
|
||||||
!d.includes('doubleclick') && !d.includes('google')
|
.sort();
|
||||||
).sort();
|
|
||||||
|
|
||||||
res.json({ domains: filteredDomains });
|
res.json({ domains: filteredDomains });
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
// Безопасно закрыть ресурсы при ошибке
|
||||||
|
try { if (context) await context.close(); } catch {}
|
||||||
|
try { if (browser) await browser.close(); } catch {}
|
||||||
res.status(500).json({ error: e.message || 'Internal server error' });
|
res.status(500).json({ error: e.message || 'Internal server error' });
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue