diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 81c94041..770c2f7e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -53,16 +53,19 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- - name: Build image
+ - name: Build backend image
run: docker build -t website-profiling:ci .
+ - name: Build web image
+ run: docker build -t website-profiling-web:ci ./web --build-arg VITE_BFF_BASE_URL=http://localhost:8090
- name: Browser crawl tests in image
run: |
docker run --rm \
website-profiling:ci \
/opt/venv/bin/pytest tests/test_crawl_fetchers.py tests/test_crawler_browser_e2e.py -m browser -q -o addopts=
- - name: Compose smoke (postgres + web)
+ - name: Compose smoke (postgres + fastapi + web)
env:
- WEB_IMAGE: website-profiling:ci
+ BACKEND_IMAGE: website-profiling:ci
+ WEB_IMAGE: website-profiling-web:ci
run: |
docker compose -f docker-compose.pull.yml up -d --wait
curl -fsS http://127.0.0.1:3000/home
@@ -82,6 +85,8 @@ jobs:
cache-dependency-path: web/package-lock.json
- name: Install
run: npm ci
+ - name: Build
+ run: npm run build
- name: Typecheck
run: npm run typecheck
- name: Lint
@@ -98,3 +103,30 @@ jobs:
dotnet-version: '10.0.x'
- name: Test FileService
run: dotnet test services/FileService/FileService.slnx
+
+ bff:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-dotnet@v4
+ with:
+ dotnet-version: '10.0.x'
+ - name: Test BFF
+ run: dotnet test services/Bff/Bff.slnx
+ - name: Generated client drift gate
+ run: |
+ dotnet tool install -g NSwag.ConsoleCore
+ export PATH="$PATH:$HOME/.dotnet/tools"
+ (cd services/Bff && nswag run nswag.json)
+ git diff --exit-code services/Bff/src/Bff.Application/Generated/FastApiClient.g.cs \
+ || (echo "::error::FastApiClient.g.cs is stale — run services/Bff/generate-client.sh and commit." && exit 1)
+
+ data:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-dotnet@v4
+ with:
+ dotnet-version: '10.0.x'
+ - name: Test Data service
+ run: dotnet test services/Data/Data.slnx
diff --git a/.gitignore b/.gitignore
index 5ea03af9..503b94b8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
-# Project (root) only. Python: src/.gitignore. Next.js: web/.gitignore. .NET: services/FileService/
+# Project (root) only. Python: src/.gitignore. Web UI: web/.gitignore. .NET: services/*/
# Next.js UI: generated pipeline configs from the runner modal (repo root; must match Python cwd for paths)
.website-profiling-ui-*.txt
@@ -33,12 +33,13 @@ pipeline-config.txt
skills-lock.json
crawl_results.csv
commit.*
-
-# .NET FileService — build output and IDE artifacts
-services/FileService/**/bin/
-services/FileService/**/obj/
-services/FileService/.vs/
-services/FileService/**/*.user
-services/FileService/**/*.suo
-services/FileService/**/TestResults/
-services/FileService/**/*.DotSettings.user
\ No newline at end of file
+.cursor/
+.claude/
+# .NET services (FileService, Bff, …) — build output and IDE artifacts
+services/**/bin/
+services/**/obj/
+services/**/.vs/
+services/**/*.user
+services/**/*.suo
+services/**/TestResults/
+services/**/*.DotSettings.user
diff --git a/AGENT.md b/AGENT.md
index e51865f6..fcd3c030 100644
--- a/AGENT.md
+++ b/AGENT.md
@@ -6,16 +6,17 @@ Developer reference for agents and contributors. User-facing overview: [README.m
**LLM / AI:** Settings live in **`llm_config`** table in PostgreSQL. Providers: OpenAI, Google Gemini, Anthropic, Groq, Ollama (`web/src/lib/llmConfigSchema.ts`). Configure only via web UI **AI** tab (`GET/PUT /api/llm-config`, localhost). Never in `pipeline-config.txt` or `--config`.
-**Frontend:** **`web/`** (Next.js) -- server reads PostgreSQL via `/api/report/*`.
+**Frontend:** **`web/`** (Vite + React SPA) — browser calls **`services/Bff/`** for all `/api/*`; BFF proxies to FastAPI and FileService.
**Key paths**
- `src/website_profiling/` -- `cli.py`, `config.py`, `crawl/`, `db/storage.py`, `lighthouse/`, `reporting/`, `analysis/`, `llm/`, `tools/`
+- `services/Bff/` -- .NET BFF (auth, CORS, `/api/*` proxy)
- `services/FileService/` -- .NET PDF + Excel workbook export (HTTP-only; see [README](services/FileService/README.md))
-- `web/app/` -- routes; `web/src/` -- React; pipeline: `PipelineRunnerFab`, `server/pipelineJobs.ts`, `server/pipelineConfig.ts`, `server/llmConfig.ts`, `server/db.ts`
+- `web/src/` -- React SPA (`AppRoutes.tsx`, `views/`, `components/`); pipeline UI: `PipelineRunnerFab`, `PipelineContext`
- `alembic/` -- schema migrations
-**Local dev:** `./local-run` (Postgres in Docker `wp-pg`, FileService on `:8080`, Next.js on host; default `DATABASE_URL`: `postgres://postgres:dev@127.0.0.1:5432/website_profiling`). See `scripts/local-run.sh`. **Local tests:** `./local-test` runs **three** Python coverage gates (core 100%, reporting 100%, tools 100%) plus web checks — mirrors CI **python** and **web** jobs; Docker CI is separate (see `.github/workflows/ci.yml`). `./local-test browser` for `@pytest.mark.browser` integration tests — see `scripts/local-test.sh`. Mocked browser unit tests: `tests/test_browser_fetcher_unit.py`.
+**Local dev:** `./local-run` (Postgres in Docker `wp-pg`, FileService on `:8080`, FastAPI on `:8001`, BFF on `:8090`, Vite on `:3000`; default `DATABASE_URL`: `postgres://postgres:dev@127.0.0.1:5432/website_profiling`). See `scripts/local-run.sh`. **Local tests:** `./local-test` runs **three** Python coverage gates (core 100%, reporting 100%, tools 100%) plus web checks — mirrors CI **python** and **web** jobs; Docker CI is separate (see `.github/workflows/ci.yml`). `./local-test browser` for `@pytest.mark.browser` integration tests — see `scripts/local-test.sh`. Mocked browser unit tests: `tests/test_browser_fetcher_unit.py`.
**JavaScript crawl (optional):** Config keys `crawl_render_mode` (`static` | `javascript` | `auto`) and `crawl_js_*` in pipeline config / `pipelineConfigSchema.ts`. JS/auto crawls can capture browser console errors and uncaught exceptions (`crawl_js_capture_console`, stored under `page_analysis.browser`). **Auto mode** uses static-first fetch, pre-parse SPA heuristics (`needs_js_render`), then post-parse low-outlink fallback (`needs_js_render_after_parse`) in `crawler.py`. **Preflight:** `GET /api/crawl/browser-status` (localhost) spawns Python `browser_status()`; Run audit settings/run validation calls it when render mode is `javascript` or `auto`. Browser deps: Playwright from `requirements.txt` (installed by `./local-run setup` and `./local-test`). Runtime needs Chromium on `PATH` or `CHROME_PATH` (Docker sets `CHROME_PATH=/usr/bin/chromium`). Integration tests: `@pytest.mark.browser` — excluded by default in `pytest.ini`; Docker CI runs `tests/test_crawl_fetchers.py` and `tests/test_crawler_browser_e2e.py -m browser`; locally `./local-test browser`.
@@ -26,13 +27,13 @@ Developer reference for agents and contributors. User-facing overview: [README.m
- **`preserve_crawl_history`** (default true): append crawls; `false` truncates crawl tables but restores `report_payload`, Lighthouse, `google_data`, `keyword_data`, `keyword_history`, `keyword_suggest_cache`, and `crawl_runs`
- **`DATABASE_URL`** env: PostgreSQL connection string (required). **`DATA_DIR`**: secrets + shadow config (Docker: `/data`).
- **Pipeline storage** (crawl, edges, nodes, report payload, Lighthouse, keywords, warnings) lives in **PostgreSQL only**. Deliverables use the Export view, `GET /api/report/export`, or MCP `export_*` tools — not files written by the main pipeline step.
-- **Pool tuning:** `DB_POOL_MIN` / `DB_POOL_MAX` (Python), `PGPOOL_MAX` (Node). Bulk crawl writes via `executemany`; optional **`crawl_stream_to_db`** streams rows during fetch. Per-URL raw HTML: `crawl_page_html` table (migration `015`); API `GET/POST /api/crawl/page-html` (localhost).
-- **`web/` APIs:** `/api/report/*` read routes (payload, meta, history — not localhost-guarded; protect with `AUTH_*` when exposed); `/api/run` spawns Python (localhost); `/api/jobs`, `/api/jobs/[id]`, `/api/jobs/[id]/cancel` (localhost); `/api/crawl/browser-status`, `/api/crawl/page-html` (localhost); `/api/pipeline-config` GET/PUT; `/api/llm-config` GET/PUT; `/api/chat` POST (SSE); `/api/chat/sessions` GET/POST; `/api/ollama/status` (localhost); `/api/properties/{id}/google/links/import` POST; `PipelineRunnerFab` saves pipeline + LLM state before each run. Full route list: `web/app/api/**/route.ts`.
+- **Pool tuning:** `DB_POOL_MIN` / `DB_POOL_MAX` (Python). Bulk crawl writes via `executemany`; optional **`crawl_stream_to_db`** streams rows during fetch. Per-URL raw HTML: `crawl_page_html` table (migration `015`); API `GET/POST /api/crawl/page-html`.
+- **Browser API (BFF):** All `/api/*` routes are served by `services/Bff/` (proxied to FastAPI / FileService). Notable: `/api/report/*`, `/api/run`, `/api/jobs/*`, `/api/pipeline-config`, `/api/llm-config`, `/api/chat` (SSE), `/api/integrations/google/*` (OAuth callback on BFF origin). `PipelineRunnerFab` saves pipeline + LLM state before each run. OpenAPI: `web/openapi.json`; BFF client: `services/Bff/src/Bff.Application/Generated/`.
- **MCP:** `python -m website_profiling.mcp` (stdio) or `python -m website_profiling.mcp.http` (remote Streamable HTTP). Configure at **`/mcp`** in the web UI. See `docs/MCP.md`.
- **AI Chat UI:** `/chat` — property-scoped chat with saved sessions (`chat_sessions`, `chat_messages`; migration `012_chat_sessions`).
-- **Job store:** PostgreSQL `pipeline_jobs` when `DATABASE_URL` is set (`pipelineJobsDb.ts` — status, timestamps, truncated logs). In-memory map in `pipelineJobs.ts` holds live log tail and child process handles; stale rows reconciled via `PIPELINE_JOB_STALE_HOURS`.
+- **Job store:** PostgreSQL `pipeline_jobs` (FastAPI); live job status via `/api/jobs/*` through the BFF.
- **Schema head:** `015_crawl_page_html` (recent: `013` link_edges/discovery, `014` job log truncation, `015` per-URL HTML storage).
-- **Docker:** `Dockerfile` + `docker-compose.yml` (postgres + web + FileService); **`docker-compose.prod.yml`** (production + remote MCP on `:8000`); **`docker-compose.pull.yml`** for pre-built images (`WEB_IMAGE`); **`LIGHTHOUSE_CHROME_FLAGS`**
+- **Docker:** Root `Dockerfile` (Python backend); `web/Dockerfile` (Vite SPA + nginx); `docker-compose.yml` (postgres + fastapi + worker + bff + web + FileService); **`docker-compose.prod.yml`** (production + optional MCP on `:8000`); **`docker-compose.pull.yml`** for pre-built images (`BACKEND_IMAGE`, `WEB_IMAGE`); **`LIGHTHOUSE_CHROME_FLAGS`**
**Where to edit**
@@ -40,7 +41,7 @@ Developer reference for agents and contributors. User-facing overview: [README.m
|------|--------|
| Crawl | `crawl/crawler.py`, `crawl/fetchers/` |
| Report | `reporting/builder.py`, `reporting/categories.py` |
-| PDF / workbook export | `services/FileService/` (rendering); Next.js proxies in `web/src/server/proxyToFileService.ts` |
+| PDF / workbook export | `services/FileService/` (rendering); BFF routes `/api/report/export` and `/api/report/export-workbook` to FileService |
| DB schema | `alembic/versions/` |
| Local analysis | `analysis/local.py`, `requirements.txt` |
| AI insights (LLM) | `llm/enrich.py`, `llm/agent.py`, `llm_config.py`, `requirements.txt` |
@@ -49,7 +50,7 @@ Developer reference for agents and contributors. User-facing overview: [README.m
| Config / CLI | `config.py` (`load_config`, `load_config_from_db`), `cli.py`, `input.txt.example` |
| UI pipeline schema | `web/src/lib/pipelineConfigSchema.ts` |
| UI LLM schema | `web/src/lib/llmConfigSchema.ts` |
-| UI config I/O | `web/src/server/pipelineConfig.ts`, `web/src/server/llmConfig.ts` |
+| Browser API client | `web/src/lib/publicBase.ts` (`apiUrl`, `apiFetch`, `VITE_BFF_BASE_URL`) |
| D3 charts (custom / compare / overview) | `web/src/components/charts/d3/`, `web/src/lib/viz/` |
| Chart.js charts (standard bar/line/doughnut) | `web/src/utils/chartJsDefaults.ts`, `react-chartjs-2` in views under `web/src/views/`, `web/src/components/searchPerformance/`, `web/src/components/traffic/` |
@@ -86,7 +87,7 @@ The web UI uses **both** Chart.js and D3.js. Pick the library that fits each cha
- Keep chart-library types out of data-prep: use neutral shapes (`BarChartData`, `DualSeriesChartData` in `web/src/lib/viz/types.ts` and `web/src/lib/compareChartData.ts`); convert at the render layer via `web/src/lib/viz/adapters.ts` when needed.
- Migrate page-by-page when D3 is the better fit; do not remove `chart.js` from `package.json` until all consumers are migrated.
-**Company standards:** UI copy in `web/src/strings.json` (Site Audit, Properties, Run audit). Data provenance on `report_meta` in report payload. Docs: `docs/COMPANY_STANDARDS.md`, `docs/GLOSSARY.md`. Migration `003_company_standards` (properties, pipeline_jobs, audit_log). Durable jobs in `web/src/server/pipelineJobsDb.ts`. **Export:** PDF/workbook via FileService (`FILE_SERVICE_URL` on web/MCP; `REPORT_API_URL` on FileService); CSV/JSON via `GET /api/report/export` and `src/website_profiling/tools/export_audit.py`.
+**Company standards:** UI copy in `web/src/strings.json` (Site Audit, Properties, Run audit). Data provenance on `report_meta` in report payload. Docs: `docs/COMPANY_STANDARDS.md`, `docs/GLOSSARY.md`. Migration `003_company_standards` (properties, pipeline_jobs, audit_log). **Export:** PDF/workbook via FileService (`FILE_SERVICE_URL` on MCP; `REPORT_API_URL` on FileService); CSV/JSON via `GET /api/report/export` and `src/website_profiling/tools/export_audit.py`.
**Common footguns (check before finishing web or DB work)**
@@ -94,15 +95,16 @@ These recur when adding features. Verify explicitly — do not assume tests caug
1. **React context — `useReport` / `ReportProvider`**
- Report views call `useReport()`. That only works inside `ReportAppClient` → `ReportProvider`.
- - **Do:** Render report views via `ReportShell` (wraps `ReportAppClient` internally).
- - **Don't:** Import a view directly in `app/*/page.tsx` without `ReportShell`.
- - Standalone routes under `web/app/` (e.g. `log-analyzer`, `indexation`) are **not** auto-wrapped by `(reports)/layout`.
+ - **Do:** Render report views via `ReportShell` inside `ReportLayout` (`AppRoutes.tsx` → `/:slug`).
+ - **Don't:** Mount a report view outside `ReportAppClient` / `ReportProvider`.
+ - Standalone routes (`/pipeline`, `/chat`, `/write`, etc.) are defined in `web/src/AppRoutes.tsx`, not wrapped by `ReportLayout`.
```tsx
- // ✅
+ // ✅ ReportSlugPage in web/src/pages/ReportSlugPage.tsx
import ReportShell from '@/ReportShell';
- export default function Page() {
- return ;
+ export default function ReportSlugPage() {
+ const { slug } = useParams();
+ return ;
}
```
diff --git a/AGENTS.md b/AGENTS.md
index be63f52f..797c472b 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -4,14 +4,16 @@
This file is the canonical entry point for agents. For full detail see [AGENT.md](AGENT.md).
-**What it is:** Self-hosted SEO crawl and technical audit platform — `python -m src` from repo root. Stack: Python (crawl + analysis + MCP), Next.js (web UI), PostgreSQL.
+**What it is:** Self-hosted SEO crawl and technical audit platform — `python -m src` from repo root. Stack: Python (crawl + analysis + MCP + FastAPI), Vite + React SPA (web UI), .NET BFF (browser API), .NET Data (report reads), PostgreSQL.
**Key paths**
- `src/website_profiling/` — core Python package
- - `cli.py`, `config.py`, `crawl/`, `db/`, `reporting/`, `analysis/`, `llm/`, `tools/`
-- `web/` — Next.js frontend
-- `services/FileService/` — .NET PDF + Excel workbook export (port 8080). HTTP-only via `REPORT_API_URL`; no Postgres. Profiles: `executive|standard|full|premium`. Details: [services/FileService/README.md](services/FileService/README.md). Env: `FILE_SERVICE_URL` (Next.js/MCP), `REPORT_API_URL` (FileService).
+ - `cli.py`, `config.py`, `api/`, `worker/`, `crawl/`, `db/`, `reporting/`, `analysis/`, `llm/`, `tools/`
+- `web/` — Vite + React SPA (static nginx in prod); browser calls `services/Bff/` for all `/api/*`
+- `services/Bff/` — .NET BFF (auth, CORS, proxy to FastAPI + Data + FileService)
+- `services/Data/` — .NET read service (report payloads, portfolio, issue status, filters; port 8091)
+- `services/FileService/` — .NET PDF + Excel workbook export (port 8080). HTTP-only via `REPORT_API_URL`; no Postgres. Profiles: `executive|standard|full|premium`. Details: [services/FileService/README.md](services/FileService/README.md). Env: `FILE_SERVICE_URL` (MCP), `REPORT_API_URL` (FileService).
- `alembic/` — DB migrations
- `docs/` — documentation index
- `tests/` — pytest suite
@@ -19,8 +21,8 @@ This file is the canonical entry point for agents. For full detail see [AGENT.md
**Run / dev**
```bash
-./local-run # Start Postgres + FileService + Next.js
-./local-test # Run all three coverage gates
+./local-run # Start Postgres + FileService + Data + worker + FastAPI + BFF + Vite dev server
+./local-test # Python + web + .NET tests (CI parity)
python -m src # Run audit pipeline
python -m website_profiling.mcp # Start MCP server (stdio)
```
@@ -35,7 +37,7 @@ python -m website_profiling.mcp # Start MCP server (stdio)
| Report | `src/website_profiling/reporting/` |
| GEO / AEO / Agent readiness | `src/website_profiling/tools/audit_tools/geo/geo_tools.py`, `geo/agent_readiness.py` |
| DB schema | `alembic/versions/` |
-| UI | `web/src/views/`, `web/app/` |
+| UI | `web/src/views/`, `web/src/pages/`, `web/src/AppRoutes.tsx` |
| Charts | D3: `web/src/components/charts/d3/`, `web/src/lib/viz/` · Chart.js: GSC/GA4/Links etc. — see [AGENT.md](AGENT.md) § Charts |
**Charts:** Use **both** Chart.js and D3 — choose per chart (Overview/Compare → D3; standard GSC/GA4 bars → Chart.js). Full rules in [AGENT.md](AGENT.md).
diff --git a/Dockerfile b/Dockerfile
index 94d185bd..ca600ca7 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,6 @@
# syntax=docker/dockerfile:1
-# WebsiteProfiling: Next.js UI + FastAPI (port 8001) + Python worker + pipeline.
+# WebsiteProfiling: FastAPI (port 8001) + Python worker + pipeline.
+# Web UI is a separate image: web/Dockerfile (Vite SPA + nginx).
# Build from repository root: docker build -t website-profiling .
# BuildKit cache mounts (default in Docker Desktop) reuse pip/npm downloads across rebuilds.
@@ -32,7 +33,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
- NEXT_TELEMETRY_DISABLED=1 \
WEBSITE_PROFILING_ROOT=/app \
DATA_DIR=/data \
PYTHON=/opt/venv/bin/python \
@@ -57,27 +57,19 @@ RUN --mount=type=cache,target=/root/.npm \
WORKDIR /app
-# Next.js install + build (layer cache)
-COPY web/package.json web/package-lock.json /app/web/
-RUN --mount=type=cache,target=/root/.npm \
- cd /app/web && npm ci
-
# Application source
COPY pytest.ini /app/pytest.ini
COPY src /app/src
COPY tests /app/tests
-COPY web /app/web
COPY alembic /app/alembic
COPY alembic.ini /app/alembic.ini
COPY docker-entrypoint.sh /app/docker-entrypoint.sh
-RUN cd /app/web && npm run build && npm prune --omit=dev
-
ENV NODE_ENV=production
# Persisted data directory (secrets + shadow config)
RUN mkdir -p /data && chmod +x /app/docker-entrypoint.sh
-EXPOSE 3000
+EXPOSE 8001
CMD ["/app/docker-entrypoint.sh"]
diff --git a/README.md b/README.md
index 45b47f6f..070bfd7f 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,8 @@
-
+
+
@@ -40,7 +41,7 @@
# Site Audit
-**Developer-friendly SEO audit platform** — open-source crawl and technical audit tooling built with **Next.js, Python, and PostgreSQL**.
+**Developer-friendly SEO audit platform** — open-source crawl and technical audit tooling built with **React, Python, PostgreSQL, and .NET**. The stack is split into focused services: a **Python FastAPI** backend (crawl, pipeline, chat, integrations), a **.NET BFF** as the browser-facing API gateway, a **.NET Data** read service (report payloads, portfolio, issue status), and a **.NET FileService** for PDF/Excel export.
## Overview
@@ -139,12 +140,23 @@ Also included: **AI chat** over audit data (optional), **Content studio** (write
## Architecture
+```text
+Browser → web (:3000) → bff (:8090) → fastapi (:8001) crawl, pipeline, chat, integrations
+ │ data (:8091) report reads, portfolio, issue status, filters
+ │ files (:8080) PDF + Excel export
+ worker background pipeline jobs (same Python image)
+ postgres audit data store
+```
+
```
WebsiteProfiling/
├── src/website_profiling/ # Python audit engine (CLI: python -m src)
+│ ├── api/ # FastAPI app (uvicorn :8001)
+│ ├── worker/ # Background pipeline job runner
│ ├── crawl/ # Crawler, fetchers, JS rendering
│ ├── reporting/ # Report builder, issue categories
│ ├── analysis/ # On-page / local analysis
+│ ├── content_studio/ # Content writing + live SEO scoring
│ ├── lighthouse/ # Lighthouse runner
│ ├── integrations/ # Google Search Console, GA4, Bing, CrUX
│ ├── llm/ # AI enrich + chat agent
@@ -154,23 +166,26 @@ WebsiteProfiling/
│ ├── commands/ # CLI subcommands
│ ├── cli.py # Pipeline entrypoint
│ └── config.py # Config load (DB + shadow file)
-├── web/ # Next.js UI
-│ ├── app/ # App Router pages + /api routes
+├── web/ # Vite + React SPA (nginx in prod)
+│ ├── src/AppRoutes.tsx # React Router routes
│ ├── src/components/ # React UI components
│ ├── src/views/ # Report views (overview, links, issues, …)
-│ ├── src/server/ # Server-side DB, pipeline jobs, config I/O
+│ ├── src/lib/ # Client helpers, BFF apiUrl/apiFetch
│ └── public/ # Static assets (logo, favicon)
+├── services/Bff/ # .NET BFF — auth + /api/* proxy (port 8090)
+├── services/Data/ # .NET read service — report/portfolio/issue reads (port 8091)
├── services/FileService/ # .NET PDF + Excel workbook export (port 8080)
├── alembic/versions/ # PostgreSQL schema migrations
├── tests/ # pytest suite + fixtures
├── docs/ # Glossary, MCP, ops, brand assets
-├── scripts/ # local-run.sh, local-test.sh helpers
-├── .github/workflows/ # CI (Python + web + browser crawl)
-├── docker-compose.yml # Dev stack (Postgres + web + FileService)
-├── docker-compose.prod.yml # Production stack (requires AUTH_SECRET)
-├── docker-compose.pull.yml # Pre-built WEB_IMAGE
-├── Dockerfile # Production image
+├── scripts/ # local-run.sh, local-test.sh, local-prod.sh
+├── .github/workflows/ # CI (Python, web, .NET, Docker)
+├── docker-compose.yml # Full dev stack (see Getting started)
+├── docker-compose.prod.yml # Production stack (requires AUTH_SECRET; optional MCP profile)
+├── docker-compose.pull.yml # Pre-built BACKEND_IMAGE + WEB_IMAGE smoke layout
+├── Dockerfile # Python backend image (fastapi + worker roles)
├── local-run # Dev setup & start script
+├── local-prod # Production build + preview (no hot reload)
├── local-test # Full test suite (CI parity)
├── requirements.txt # Python dependencies
└── pipeline-config.example.txt
@@ -180,8 +195,11 @@ WebsiteProfiling/
| Path | Purpose |
| ------------------------------------- | ------------------------------------------------------------------------------ |
| `src/website_profiling/` | Crawl, analyze, report, Lighthouse, integrations, AI — run via `python -m src` |
+| `src/website_profiling/api/` | FastAPI HTTP layer — pipeline, chat, integrations, crawl control |
+| `services/Bff/` | Browser API gateway — auth, CORS, routes `/api/*` to FastAPI, Data, FileService |
+| `services/Data/` | .NET read/mutation service for report payloads, portfolio, issue status, saved filters |
| `services/FileService/` | PDF and Excel workbook export — see [services/FileService/README.md](services/FileService/README.md) |
-| `web/app/api/` | REST APIs: report data, pipeline runs, chat (SSE), Google/Bing sync |
+| `web/src/lib/publicBase.ts` | BFF base URL (`VITE_BFF_BASE_URL`) and `apiFetch` / `apiUrl` |
| `web/src/lib/pipelineConfigSchema.ts` | Audit settings schema (UI ↔ PostgreSQL) |
| `alembic/versions/` | Database migrations — run `./local-run migrate` |
| `tests/` | Backend tests; `./local-test browser` for Playwright crawl integration |
@@ -193,28 +211,42 @@ For layout details and common development patterns, see [AGENT.md](AGENT.md).
## Getting started
+### Prerequisites
+
+| Tool | Used for |
+| ---- | -------- |
+| **Docker** | Postgres container (local dev) and full-stack compose |
+| **Python 3.12+** | Audit engine, FastAPI, pipeline worker, tests |
+| **Node 20+** | Vite + React SPA |
+| **.NET SDK 10+** | BFF, Data, and FileService (required for `./local-run`; optional if you only use Docker) |
+
### Docker
-Build and run from source:
+Build and run the full dev stack from source:
```bash
docker compose up --build
```
-Open [http://localhost:3000/home](http://localhost:3000/home). PDF and workbook exports require the **FileService** container (`files`, port 8080).
+Services: **postgres**, **fastapi** (`:8001`, internal), **worker**, **data** (`:8091`, internal), **bff** (`:8090`), **web** (`:3000`), **files** (`:8080`, internal).
+
+Open [http://localhost:3000/home](http://localhost:3000/home). The browser talks only to the **BFF** (`:8090`); the BFF proxies to FastAPI, the Data service (report reads and portfolio routes), and FileService (PDF/workbook export).
-Production deployment: `docker-compose.prod.yml` — set `POSTGRES_USER`, `POSTGRES_PASSWORD`, and `AUTH_SECRET`. Pre-built images: `docker-compose.pull.yml` (`WEB_IMAGE`).
+Production deployment: `docker-compose.prod.yml` — set `POSTGRES_USER`, `POSTGRES_PASSWORD`, `AUTH_SECRET`, `BFF_ALLOWED_ORIGINS`, and `BFF_PUBLIC_URL`. Optional remote MCP: `docker compose -f docker-compose.prod.yml --profile mcp up`. Pre-built images: `docker-compose.pull.yml` (`BACKEND_IMAGE`, `WEB_IMAGE`).
### Local development
```bash
-./local-run setup # First time: Postgres, Python venv, migrations, npm deps
-./local-run # Start DB + FileService + Next.js → http://localhost:3000/home
+./local-run setup # First time: Postgres, Python venv, Playwright/Chromium, migrations, npm deps
+./local-run # Start full dev stack → http://localhost:3000/home
./local-run db # Postgres only (no app)
./local-run migrate # Apply Alembic migrations only
./local-run stop # Stop Postgres container
+./local-prod # Same DB, Vite production build + preview (no hot reload)
```
+`./local-run` starts (in order): **FileService** `:8080`, **Data** `:8091`, **pipeline worker**, **FastAPI** `:8001`, **BFF** `:8090`, and **Vite** `:3000`. Use `localhost` (not `127.0.0.1`) for pipeline APIs so CORS and cookies match the BFF origin.
+
Default local `DATABASE_URL`: `postgres://postgres:dev@127.0.0.1:5432/website_profiling` (Docker Compose dev stack uses `profiling:profiling`).
`requirements.txt` pins direct Python dependencies to versions verified by `./local-test python`. Re-run the full test suite after intentional upgrades.
@@ -233,15 +265,16 @@ Increase `PIPELINE_JOB_STALE_HOURS` for crawls that routinely exceed one hour.
### Testing
```bash
-./local-test # Python + web (matches CI python and web jobs)
+./local-test # Full CI parity: Python + web + .NET (Data, Bff, FileService)
./local-test python # Backend: three 100% coverage gates + browser pytest + CLI smoke
./local-test browser # JS crawl integration tests (skips if Chromium unavailable)
-./local-test web # Frontend: typecheck, lint, vitest
+./local-test web # Frontend: build, typecheck, lint, vitest
+./local-test dotnet # dotnet test Data + Bff + FileService + BFF OpenAPI drift gate
./local-test quick # Fast loop; requires DB already running (no coverage gate)
./local-test all --no-cov # Full run without pytest coverage gate
```
-CI also runs a **Docker** job (image build, browser pytest in container, compose smoke). See [.github/workflows/ci.yml](.github/workflows/ci.yml).
+CI runs separate jobs for **Python** (coverage gates), **web**, **Data**, **Bff**, **FileService**, and **Docker** (image build, browser pytest in container, compose smoke). See [.github/workflows/ci.yml](.github/workflows/ci.yml).
## Configuration
@@ -278,7 +311,7 @@ Ask questions about audit data at [http://localhost:3000/chat](http://localhost:
| **Groq** | API key in AI settings or `GROQ_API_KEY`; official Groq Python SDK; native tool calling with streaming. Default model `openai/gpt-oss-120b`. |
-The agent uses the same **342 read-only audit tools** as the MCP server ([docs/MCP.md](docs/MCP.md)), with **dynamic routing** (~45 tools per turn). Responses stream over SSE (`POST /api/chat`). Sessions persist per property (`chat_sessions` / `chat_messages`).
+The agent uses the same **340 read-only audit tools** as the MCP server ([docs/MCP.md](docs/MCP.md)), with **dynamic routing** (~45 tools per turn). Responses stream over SSE (`POST /api/chat`). Sessions persist per property (`chat_sessions` / `chat_messages`).
**Read-only SQL chat tool (opt-in):** Set `CHAT_SQL_TOOL_ENABLED=true` to expose `get_sql_schema` and `run_sql_query` to the LLM. The agent can then answer arbitrary data questions by generating and executing a single read-only SELECT. Queries are validated by a four-layer guard (regex pre-filter → `sqlglot` AST + table allowlist → `BEGIN TRANSACTION READ ONLY` → optional least-privilege DB role); DELETE/UPDATE/INSERT/DDL and non-allowlisted tables are always blocked. In multi-property deployments, scope-binding CTEs are automatically injected to enforce tenant isolation. See [docs/OPS.md](docs/OPS.md#read-only-sql-chat-tool) for setup including the recommended `audit_readonly` Postgres role and optional RLS configuration.
diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
index e1ccf357..a7e0ae71 100644
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -1,4 +1,5 @@
-# Production-style layout: postgres + web + worker (same image, different command).
+# Production-style layout: postgres + fastapi + worker + bff + web + files (+ optional mcp).
+# The browser talks only to `bff`; FastAPI and FileService are network-internal.
services:
postgres:
image: postgres:16-alpine
@@ -14,26 +15,19 @@ services:
timeout: 3s
retries: 5
- web:
+ fastapi:
build:
context: .
dockerfile: Dockerfile
+ image: website-profiling:latest
depends_on:
postgres:
condition: service_healthy
- ports:
- - '${WEB_PORT:-3000}:3000'
- - '${FASTAPI_PORT:-8001}:8001'
environment:
+ WP_ROLE: fastapi
WEBSITE_PROFILING_ROOT: /app
DATABASE_URL: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-website_profiling}
DATA_DIR: /data
- AUTH_SECRET: ${AUTH_SECRET:?set AUTH_SECRET}
- AUTH_PASSWORD: ${AUTH_PASSWORD:-}
- NODE_ENV: production
- FASTAPI_URL: http://127.0.0.1:8001
- FASTAPI_ALLOWED_ORIGINS: ${FASTAPI_ALLOWED_ORIGINS:-http://localhost:3000}
- FILE_SERVICE_URL: http://files:8080
PYTHON: /opt/venv/bin/python
CHROME_PATH: /usr/bin/chromium
LIGHTHOUSE_PATH: /usr/local/bin/lighthouse
@@ -41,50 +35,82 @@ services:
volumes:
- profiling-data:/data
healthcheck:
- test: ['CMD', 'node', '-e', "require('http').get('http://127.0.0.1:3000/api/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))"]
+ test: ['CMD', 'node', '-e', "require('http').get('http://127.0.0.1:8001/api/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
worker:
- build:
- context: .
- dockerfile: Dockerfile
+ image: website-profiling:latest
depends_on:
+ fastapi:
+ condition: service_started
postgres:
condition: service_healthy
- command: ['/opt/venv/bin/python', '-m', 'website_profiling.worker']
environment:
+ WP_ROLE: worker
WEBSITE_PROFILING_ROOT: /app
DATABASE_URL: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-website_profiling}
DATA_DIR: /data
volumes:
- profiling-data:/data
- profiles:
- - worker
files:
build:
context: ./services/FileService
+ environment:
+ REPORT_API_URL: http://fastapi:8001
+ depends_on:
+ fastapi:
+ condition: service_started
+
+ bff:
+ build:
+ context: ./services/Bff
+ depends_on:
+ fastapi:
+ condition: service_started
+ files:
+ condition: service_started
ports:
- - '${FILE_SERVICE_PORT:-8080}:8080'
+ - '${BFF_PORT:-8090}:8090'
environment:
- REPORT_API_URL: http://web:8001
+ FASTAPI_URL: http://fastapi:8001
+ FILE_SERVICE_URL: http://files:8080
+ AUTH_SECRET: ${AUTH_SECRET:?set AUTH_SECRET}
+ AUTH_PASSWORD: ${AUTH_PASSWORD:-}
+ BFF_ALLOWED_ORIGINS: ${BFF_ALLOWED_ORIGINS:?set BFF_ALLOWED_ORIGINS (the public UI origin)}
+ # Cross-site cookie (frontend + BFF on a shared parent domain over HTTPS):
+ BFF_COOKIE_SAMESITE: ${BFF_COOKIE_SAMESITE:-None}
+ BFF_COOKIE_SECURE: ${BFF_COOKIE_SECURE:-true}
+ BFF_COOKIE_DOMAIN: ${BFF_COOKIE_DOMAIN:-}
+
+ web:
+ build:
+ context: ./web
+ args:
+ VITE_BFF_BASE_URL: ${BFF_PUBLIC_URL:?set BFF_PUBLIC_URL (browser-facing BFF origin)}
depends_on:
- web:
+ bff:
condition: service_started
+ ports:
+ - '${WEB_PORT:-3000}:80'
+ healthcheck:
+ test: ['CMD', 'wget', '-qO-', 'http://127.0.0.1/home']
+ interval: 30s
+ timeout: 5s
+ retries: 3
+ start_period: 30s
mcp:
- build:
- context: .
- dockerfile: Dockerfile
+ image: website-profiling:latest
depends_on:
postgres:
condition: service_healthy
files:
condition: service_started
- command: ['python', '-m', 'website_profiling.mcp.http']
+ command: ['/opt/venv/bin/python', '-m', 'website_profiling.mcp.http']
environment:
WEBSITE_PROFILING_ROOT: /app
DATABASE_URL: postgres://${POSTGRES_USER}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-website_profiling}
@@ -96,6 +122,8 @@ services:
WP_MCP_ALLOWED_ORIGINS: ${WP_MCP_ALLOWED_ORIGINS:-}
WP_MCP_DOMAIN: ${WP_MCP_DOMAIN:-core}
WP_PROPERTY_ID: ${WP_PROPERTY_ID:-}
+ profiles:
+ - mcp
ports:
- '${MCP_PORT:-8000}:8000'
diff --git a/docker-compose.pull.yml b/docker-compose.pull.yml
index 67d072ca..40563899 100644
--- a/docker-compose.pull.yml
+++ b/docker-compose.pull.yml
@@ -1,7 +1,9 @@
-# Run a pre-built/pulled image with Postgres (no local docker build).
+# Run pre-built backend image + Vite web image with Postgres (no monolith UI).
# Usage:
-# export WEB_IMAGE=your-registry/website-profiling:tag
-# docker compose -f docker-compose.pull.yml up -d
+# docker build -t website-profiling:ci .
+# docker build -t website-profiling-web:ci ./web --build-arg VITE_BFF_BASE_URL=http://localhost:8090
+# BACKEND_IMAGE=website-profiling:ci WEB_IMAGE=website-profiling-web:ci \
+# docker compose -f docker-compose.pull.yml up -d
services:
postgres:
image: postgres:16-alpine
@@ -17,28 +19,38 @@ services:
timeout: 3s
retries: 5
- web:
- image: ${WEB_IMAGE:-website-profiling:latest}
+ fastapi:
+ image: ${BACKEND_IMAGE:-website-profiling:latest}
depends_on:
postgres:
condition: service_healthy
- ports:
- - "3000:3000"
environment:
+ WP_ROLE: fastapi
WEBSITE_PROFILING_ROOT: /app
DATABASE_URL: postgres://profiling:profiling@postgres:5432/website_profiling
DATA_DIR: /data
PYTHON: /opt/venv/bin/python
- NODE_ENV: production
CHROME_PATH: /usr/bin/chromium
LIGHTHOUSE_PATH: /usr/local/bin/lighthouse
LIGHTHOUSE_CHROME_FLAGS: --headless --no-sandbox --disable-dev-shm-usage --disable-gpu
- FASTAPI_URL: http://127.0.0.1:8001
- FILE_SERVICE_URL: http://files:8080
volumes:
- profiling-data:/data
healthcheck:
- test: ["CMD", "node", "-e", "require('http').get('http://127.0.0.1:3000/home', (r) => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))"]
+ test: ["CMD", "node", "-e", "require('http').get('http://127.0.0.1:8001/api/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))"]
+ interval: 30s
+ timeout: 5s
+ retries: 3
+ start_period: 30s
+
+ web:
+ image: ${WEB_IMAGE:-website-profiling-web:latest}
+ depends_on:
+ fastapi:
+ condition: service_healthy
+ ports:
+ - "3000:80"
+ healthcheck:
+ test: ["CMD", "wget", "-qO-", "http://127.0.0.1/home"]
interval: 30s
timeout: 5s
retries: 3
@@ -47,12 +59,10 @@ services:
files:
build:
context: ./services/FileService
- ports:
- - "8080:8080"
environment:
- REPORT_API_URL: http://web:8001
+ REPORT_API_URL: http://fastapi:8001
depends_on:
- web:
+ fastapi:
condition: service_started
volumes:
diff --git a/docker-compose.yml b/docker-compose.yml
index 02427c6c..07d6bba5 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -13,7 +13,9 @@ services:
timeout: 3s
retries: 5
- web:
+ # FastAPI backend — now its own service (was bundled into `web`). Network-internal:
+ # only the BFF talks to it, so no host port is published.
+ fastapi:
build:
context: .
dockerfile: Dockerfile
@@ -21,61 +23,112 @@ services:
depends_on:
postgres:
condition: service_healthy
- ports:
- - "3000:3000"
- - "8001:8001"
environment:
+ WP_ROLE: fastapi
+ WEBSITE_PROFILING_ROOT: /app
+ DATABASE_URL: postgres://profiling:profiling@postgres:5432/website_profiling
+ DATA_DIR: /data
+ PYTHON: /opt/venv/bin/python
+ CHROME_PATH: /usr/bin/chromium
+ LIGHTHOUSE_PATH: /usr/local/bin/lighthouse
+ LIGHTHOUSE_CHROME_FLAGS: --headless --no-sandbox --disable-dev-shm-usage --disable-gpu
+ FASTAPI_ALLOWED_ORIGINS: "http://localhost:8090"
+ volumes:
+ - profiling-data:/data
+ healthcheck:
+ test: ["CMD", "node", "-e", "require('http').get('http://127.0.0.1:8001/api/health', (r) => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))"]
+ interval: 30s
+ timeout: 5s
+ retries: 3
+ start_period: 20s
+
+ worker:
+ image: website-profiling:latest
+ depends_on:
+ fastapi:
+ condition: service_started
+ postgres:
+ condition: service_healthy
+ environment:
+ WP_ROLE: worker
WEBSITE_PROFILING_ROOT: /app
DATABASE_URL: postgres://profiling:profiling@postgres:5432/website_profiling
DATA_DIR: /data
PYTHON: /opt/venv/bin/python
- NODE_ENV: production
CHROME_PATH: /usr/bin/chromium
LIGHTHOUSE_PATH: /usr/local/bin/lighthouse
LIGHTHOUSE_CHROME_FLAGS: --headless --no-sandbox --disable-dev-shm-usage --disable-gpu
- FASTAPI_URL: http://127.0.0.1:8001
- FASTAPI_ALLOWED_ORIGINS: "http://localhost:3000"
- FILE_SERVICE_URL: http://files:8080
volumes:
- profiling-data:/data
+
+ # .NET read microservice — reads Postgres directly, incrementally replacing FastAPI reads.
+ # Internal: only the BFF reaches it (no published host port).
+ data:
+ build:
+ context: ./services/Data
+ environment:
+ DATABASE_URL: postgres://profiling:profiling@postgres:5432/website_profiling
+ ASPNETCORE_URLS: http://+:8091
+ depends_on:
+ postgres:
+ condition: service_healthy
healthcheck:
- test: ["CMD", "node", "-e", "require('http').get('http://127.0.0.1:3000/home', (r) => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))"]
+ test: ["CMD", "curl", "-fsS", "http://127.0.0.1:8091/health"]
interval: 30s
timeout: 5s
retries: 3
start_period: 15s
- files:
+ # .NET BFF — the single browser-facing API surface (owns auth + CORS).
+ bff:
build:
- context: ./services/FileService
+ context: ./services/Bff
ports:
- - "8080:8080"
+ - "8090:8090"
environment:
- REPORT_API_URL: http://web:8001
+ FASTAPI_URL: http://fastapi:8001
+ FILE_SERVICE_URL: http://files:8080
+ BFF_ALLOWED_ORIGINS: "http://localhost:3000"
+ DATA_SERVICE_URL: http://data:8091
+ # Comma-separated /api path prefixes served by the Data service (reads + issue/portfolio mutations).
+ # Rollback: remove a prefix here and restart bff.
+ DATA_ROUTES: "/api/report/meta,/api/report/payload,/api/report/history,/api/report/crawl-payload,/api/report/mobile-delta,/api/report/portfolio,/api/portfolio,/api/issues/status,/api/filters"
+ # AUTH_SECRET: set to enable wp_session auth (must match the web service)
+ depends_on:
+ fastapi:
+ condition: service_started
+ files:
+ condition: service_started
+ data:
+ condition: service_started
+
+ # Vite SPA (nginx). The browser calls the BFF (:8090) for all /api/*.
+ web:
+ build:
+ context: ./web
+ args:
+ VITE_BFF_BASE_URL: http://localhost:8090
depends_on:
- web:
+ bff:
condition: service_started
+ ports:
+ - "3000:80"
+ healthcheck:
+ test: ["CMD", "wget", "-qO-", "http://127.0.0.1/home"]
+ interval: 30s
+ timeout: 5s
+ retries: 3
+ start_period: 15s
- # Optional remote MCP (Streamable HTTP). Uncomment and set WP_MCP_TOKEN / WP_MCP_ALLOWED_HOSTS.
- # mcp:
- # build:
- # context: .
- # dockerfile: Dockerfile
- # image: website-profiling:latest
- # depends_on:
- # postgres:
- # condition: service_healthy
- # command: ['python', '-m', 'website_profiling.mcp.http']
- # environment:
- # WEBSITE_PROFILING_ROOT: /app
- # DATABASE_URL: postgres://profiling:profiling@postgres:5432/website_profiling
- # WP_MCP_HTTP_HOST: 0.0.0.0
- # WP_MCP_HTTP_PORT: 8000
- # WP_MCP_TOKEN: ${WP_MCP_TOKEN:-dev-mcp-token}
- # WP_MCP_ALLOWED_HOSTS: localhost,127.0.0.1
- # WP_MCP_DOMAIN: core
- # ports:
- # - "8000:8000"
+ # File export service (PDF/Excel). Internal: only the BFF reaches it.
+ files:
+ build:
+ context: ./services/FileService
+ environment:
+ REPORT_API_URL: http://fastapi:8001
+ depends_on:
+ fastapi:
+ condition: service_started
volumes:
pg-data:
diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh
index d5ec568b..6b93de80 100644
--- a/docker-entrypoint.sh
+++ b/docker-entrypoint.sh
@@ -2,13 +2,20 @@
set -e
cd /app
-if [ -z "${DATABASE_URL:-}" ] || [ -z "$(printf '%s' "$DATABASE_URL" | tr -d '[:space:]')" ]; then
- echo "ERROR: DATABASE_URL is required." >&2
- echo " Use docker compose (see README) or pass -e DATABASE_URL=postgres://user:pass@host:5432/db" >&2
- exit 1
-fi
+# Role dispatch. Default "all" runs worker + FastAPI in one container (legacy).
+# Split topology: WP_ROLE=fastapi | worker
+ROLE="${WP_ROLE:-all}"
+
+require_database_url() {
+ if [ -z "${DATABASE_URL:-}" ] || [ -z "$(printf '%s' "$DATABASE_URL" | tr -d '[:space:]')" ]; then
+ echo "ERROR: DATABASE_URL is required." >&2
+ echo " Use docker compose (see README) or pass -e DATABASE_URL=postgres://user:pass@host:5432/db" >&2
+ exit 1
+ fi
+}
-/opt/venv/bin/python <<'PY'
+wait_for_db() {
+ /opt/venv/bin/python <<'PY'
import os
import sys
import time
@@ -61,50 +68,59 @@ print(
print(f" Last error: {last_error}", file=sys.stderr)
sys.exit(1)
PY
-
-/opt/venv/bin/alembic upgrade head
-
-WORKER_PID=""
-UVICORN_PID=""
-NPM_PID=""
-
-cleanup() {
- [ -n "$WORKER_PID" ] && kill "$WORKER_PID" 2>/dev/null || true
- [ -n "$UVICORN_PID" ] && kill "$UVICORN_PID" 2>/dev/null || true
- [ -n "$NPM_PID" ] && kill "$NPM_PID" 2>/dev/null || true
}
-trap cleanup TERM INT
-/opt/venv/bin/python -m website_profiling.worker &
-WORKER_PID=$!
+migrate() {
+ /opt/venv/bin/alembic upgrade head
+}
-/opt/venv/bin/uvicorn website_profiling.api.main:app \
- --host 0.0.0.0 --port 8001 --workers 1 &
-UVICORN_PID=$!
+start_uvicorn_foreground() {
+ exec /opt/venv/bin/uvicorn website_profiling.api.main:app \
+ --host 0.0.0.0 --port 8001 --workers 1
+}
-# Wait for FastAPI to be ready before starting Next.js (max ~15s)
-i=0
-while [ "$i" -lt 30 ]; do
- if node -e "require('http').get('http://127.0.0.1:8001/api/health',r=>process.exit(r.statusCode===200?0:1)).on('error',()=>process.exit(1))" 2>/dev/null; then
- echo "FastAPI ready (attempt $((i + 1))/30)" >&2
- break
- fi
- sleep 0.5
- i=$((i + 1))
-done
-if [ "$i" -eq 30 ]; then
- echo "WARNING: FastAPI did not respond to /api/health after 15s — continuing anyway" >&2
-fi
-
-cd /app/web
-npm run start -- -H 0.0.0.0 -p 3000 &
-NPM_PID=$!
-
-# Monitor critical processes — exit the container if either npm or uvicorn dies.
-# A dead worker does not break the UI so it is intentionally excluded.
-while kill -0 "$NPM_PID" 2>/dev/null && kill -0 "$UVICORN_PID" 2>/dev/null; do
- sleep 5
-done
-echo "Critical process (npm or uvicorn) exited — shutting down container" >&2
-cleanup
-exit 1
+case "$ROLE" in
+ fastapi)
+ require_database_url
+ wait_for_db
+ migrate
+ start_uvicorn_foreground
+ ;;
+ worker)
+ require_database_url
+ wait_for_db
+ exec /opt/venv/bin/python -m website_profiling.worker
+ ;;
+ all)
+ require_database_url
+ wait_for_db
+ migrate
+
+ WORKER_PID=""
+ UVICORN_PID=""
+
+ cleanup() {
+ [ -n "$WORKER_PID" ] && kill "$WORKER_PID" 2>/dev/null || true
+ [ -n "$UVICORN_PID" ] && kill "$UVICORN_PID" 2>/dev/null || true
+ }
+ trap cleanup TERM INT
+
+ /opt/venv/bin/python -m website_profiling.worker &
+ WORKER_PID=$!
+
+ /opt/venv/bin/uvicorn website_profiling.api.main:app \
+ --host 0.0.0.0 --port 8001 --workers 1 &
+ UVICORN_PID=$!
+
+ while kill -0 "$UVICORN_PID" 2>/dev/null; do
+ sleep 5
+ done
+ echo "FastAPI exited — shutting down container" >&2
+ cleanup
+ exit 1
+ ;;
+ *)
+ echo "ERROR: unknown WP_ROLE '$ROLE' (expected: all | fastapi | worker)" >&2
+ exit 1
+ ;;
+esac
diff --git a/docs/README.md b/docs/README.md
index 132a6705..e8503538 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -16,6 +16,7 @@ This directory contains product, integration, and operations documentation for *
| [MCP.md](MCP.md) | Integrators | Model Context Protocol server configuration and tool reference |
| [OPS.md](OPS.md) | Operators | Scheduled audits, alerts, migrations, production notes |
| [services/FileService/README.md](../services/FileService/README.md) | Developers / operators | PDF and Excel workbook export service |
+| `services/Data/` | Developers | .NET read service — report payloads, portfolio, issue status, saved filters |
---
diff --git a/docs/assets/readme-banner.png b/docs/assets/readme-banner.png
index 6c77d150..cfaadcab 100644
Binary files a/docs/assets/readme-banner.png and b/docs/assets/readme-banner.png differ
diff --git a/docs/assets/seo-feedback-loop.png b/docs/assets/seo-feedback-loop.png
index f243a519..51efaaaa 100644
Binary files a/docs/assets/seo-feedback-loop.png and b/docs/assets/seo-feedback-loop.png differ
diff --git a/scripts/local-prod.sh b/scripts/local-prod.sh
index 37892ad0..eceb7ef3 100755
--- a/scripts/local-prod.sh
+++ b/scripts/local-prod.sh
@@ -1,7 +1,7 @@
#!/usr/bin/env bash
-# Local prod: same Postgres as ./local-run, Next.js build + start (NODE_ENV=production).
+# Local prod: same Postgres as ./local-run, Vite build + preview (NODE_ENV=production).
# Usage: ./local-prod [command]
-# (default) start — DB, migrations, npm run build, npm run start
+# (default) start — DB, migrations, npm run build, npm run preview
# build — npm run build only
# help — show commands
set -euo pipefail
@@ -26,8 +26,73 @@ WEB="$ROOT/web"
LOCAL_RUN="$ROOT/scripts/local-run.sh"
log() { printf '\033[1;36m→\033[0m %s\n' "$*"; }
+warn() { printf '\033[1;33m!\033[0m %s\n' "$*" >&2; }
die() { printf '\033[1;31m✗\033[0m %s\n' "$*" >&2; exit 1; }
+kill_process_tree() {
+ local pid="$1"
+ local sig="${2:-TERM}"
+ local child
+ [[ -z "$pid" ]] && return 0
+ for child in $(pgrep -P "$pid" 2>/dev/null || true); do
+ kill_process_tree "$child" "$sig"
+ done
+ kill "-$sig" "$pid" 2>/dev/null || true
+}
+
+wait_for_pid() {
+ local pid="$1"
+ local timeout="${2:-10}"
+ local i
+ [[ -z "$pid" ]] && return 0
+ for ((i = 0; i < timeout * 2; i++)); do
+ kill -0 "$pid" 2>/dev/null || return 0
+ sleep 0.5
+ done
+ return 1
+}
+
+stop_service() {
+ local name="$1"
+ local pid="$2"
+ [[ -z "$pid" ]] && return 0
+ if ! kill -0 "$pid" 2>/dev/null; then
+ wait "$pid" 2>/dev/null || true
+ log "$name already stopped."
+ return 0
+ fi
+ log "Stopping $name (PID $pid)..."
+ kill_process_tree "$pid" TERM
+ if ! wait_for_pid "$pid" 10; then
+ warn "$name did not exit in time — sending SIGKILL"
+ kill_process_tree "$pid" KILL
+ wait_for_pid "$pid" 2 || true
+ fi
+ wait "$pid" 2>/dev/null || true
+ log "$name stopped."
+}
+
+disown_bg() {
+ local pid="$1"
+ [[ -z "$pid" ]] && return 0
+ disown "$pid" 2>/dev/null || true
+}
+
+stop_postgres() {
+ if ! command -v docker >/dev/null 2>&1; then
+ return 0
+ fi
+ if ! docker info >/dev/null 2>&1; then
+ warn "Docker unavailable — skipping Postgres stop"
+ return 0
+ fi
+ if docker ps --format '{{.Names}}' 2>/dev/null | grep -qx "$PG_CONTAINER"; then
+ log "Stopping $PG_CONTAINER"
+ docker stop "$PG_CONTAINER" >/dev/null 2>&1 || warn "Could not stop $PG_CONTAINER"
+ log "Postgres stopped."
+ fi
+}
+
need_cmd() {
command -v "$1" >/dev/null 2>&1 || die "Missing required command: $1"
}
@@ -42,7 +107,7 @@ cmd_web_deps() {
cmd_build() {
cmd_web_deps
- log "Building Next.js (production)"
+ log "Building Vite SPA (production)"
(cd "$WEB" && npm run build)
}
@@ -63,7 +128,7 @@ cmd_start() {
cmd_web_deps
log "Skipping build (--skip-build)"
fi
- log "Starting Next.js production server (Ctrl+C to stop)"
+ log "Starting Vite preview server (Ctrl+C stops all services including Postgres)"
log "DATABASE_URL=$DATABASE_URL"
log "DATA_DIR=$DATA_DIR"
log "PYTHON=$PYTHON"
@@ -74,37 +139,58 @@ cmd_start() {
WORKER_PID=""
UVICORN_PID=""
NPM_PID=""
+ _CLEANUP_DONE=0
+ set +m
cleanup_prod() {
- [ -n "$WORKER_PID" ] && kill "$WORKER_PID" 2>/dev/null || true
- [ -n "$UVICORN_PID" ] && kill "$UVICORN_PID" 2>/dev/null || true
- [ -n "$NPM_PID" ] && kill "$NPM_PID" 2>/dev/null || true
+ if [[ "$_CLEANUP_DONE" -eq 1 ]]; then
+ return 0
+ fi
+ _CLEANUP_DONE=1
+ trap - INT TERM EXIT
+ set +e
+
+ log "Shutting down local prod stack..."
+ stop_service "Vite preview" "$NPM_PID"
+ NPM_PID=""
+ stop_service "FastAPI" "$UVICORN_PID"
+ UVICORN_PID=""
+ stop_service "pipeline worker" "$WORKER_PID"
+ WORKER_PID=""
+ stop_postgres
+ log "All services stopped."
+ exit 0
}
- trap cleanup_prod INT TERM EXIT
+ trap cleanup_prod EXIT INT TERM
log "Starting pipeline worker"
"$ROOT/.venv/bin/python" -m website_profiling.worker &
WORKER_PID=$!
+ disown_bg "$WORKER_PID"
log "Starting FastAPI on port 8001"
export FASTAPI_URL="http://127.0.0.1:8001"
"$ROOT/.venv/bin/uvicorn" website_profiling.api.main:app \
--host 0.0.0.0 --port 8001 --workers 1 &
UVICORN_PID=$!
+ disown_bg "$UVICORN_PID"
cd "$WEB"
- npm run start -- -H 0.0.0.0 -p 3000 &
+ npm run preview -- --host 0.0.0.0 --port 3000 &
NPM_PID=$!
- wait $NPM_PID
+ disown_bg "$NPM_PID"
+ set +e
+ wait "$NPM_PID"
+ exit 0
}
cmd_help() {
cat </dev/null || true); do
+ kill_process_tree "$child" "$sig"
+ done
+ kill "-$sig" "$pid" 2>/dev/null || true
+}
+
+wait_for_pid() {
+ local pid="$1"
+ local timeout="${2:-10}"
+ local i
+ [[ -z "$pid" ]] && return 0
+ for ((i = 0; i < timeout * 2; i++)); do
+ kill -0 "$pid" 2>/dev/null || return 0
+ sleep 0.5
+ done
+ return 1
+}
+
+stop_service() {
+ local name="$1"
+ local pid="$2"
+ local port="${3:-}"
+ [[ -z "$pid" ]] && return 0
+ if ! kill -0 "$pid" 2>/dev/null; then
+ wait "$pid" 2>/dev/null || true
+ log "$name already stopped."
+ [[ -n "$port" ]] && free_port "$port"
+ return 0
+ fi
+ log "Stopping $name (PID $pid)..."
+ kill_process_tree "$pid" TERM
+ if ! wait_for_pid "$pid" 10; then
+ warn "$name did not exit in time — sending SIGKILL"
+ kill_process_tree "$pid" KILL
+ wait_for_pid "$pid" 2 || true
+ fi
+ wait "$pid" 2>/dev/null || true
+ log "$name stopped."
+ [[ -n "$port" ]] && free_port "$port"
+}
+
+# Detach background jobs so bash does not print "Terminated" after cleanup.
+disown_bg() {
+ local pid="$1"
+ [[ -z "$pid" ]] && return 0
+ disown "$pid" 2>/dev/null || true
+}
+
need_cmd() {
command -v "$1" >/dev/null 2>&1 || die "Missing required command: $1"
}
@@ -153,13 +208,36 @@ cmd_start() {
WORKER_PID=""
UVICORN_PID=""
FILE_SERVICE_PID=""
+ DATA_PID=""
+ BFF_PID=""
+ _CLEANUP_DONE=0
+ set +m
cleanup_local() {
- [ -n "$WORKER_PID" ] && kill "$WORKER_PID" 2>/dev/null || true
- [ -n "$UVICORN_PID" ] && kill "$UVICORN_PID" 2>/dev/null || true
- [ -n "$FILE_SERVICE_PID" ] && kill "$FILE_SERVICE_PID" 2>/dev/null || true
+ if [[ "$_CLEANUP_DONE" -eq 1 ]]; then
+ return 0
+ fi
+ _CLEANUP_DONE=1
+ trap - INT TERM EXIT
+ set +e
+
+ log "Shutting down local dev stack..."
+ # Reverse startup order; Vite (foreground) is already exiting from Ctrl+C.
+ stop_service "BFF" "$BFF_PID" 8090
+ BFF_PID=""
+ stop_service "Data" "$DATA_PID" 8091
+ DATA_PID=""
+ stop_service "FastAPI" "$UVICORN_PID" 8001
+ UVICORN_PID=""
+ stop_service "pipeline worker" "$WORKER_PID"
+ WORKER_PID=""
+ stop_service "FileService" "$FILE_SERVICE_PID" 8080
+ FILE_SERVICE_PID=""
+ stop_postgres
+ log "All services stopped."
+ exit 0
}
- trap cleanup_local INT TERM EXIT
+ trap cleanup_local EXIT INT TERM
if command -v dotnet >/dev/null 2>&1; then
free_port 8080
@@ -170,65 +248,125 @@ cmd_start() {
ASPNETCORE_ENVIRONMENT=Development \
dotnet run --project src/FileService.Api --no-launch-profile) &
FILE_SERVICE_PID=$!
+ disown_bg "$FILE_SERVICE_PID"
+
+ free_port 8091
+ log "Starting Data service on port 8091"
+ (cd "$ROOT/services/Data" && \
+ DATABASE_URL="$DATABASE_URL" \
+ ASPNETCORE_URLS="http://127.0.0.1:8091" \
+ ASPNETCORE_ENVIRONMENT=Development \
+ dotnet run --project src/Data.Api --no-launch-profile) &
+ DATA_PID=$!
+ disown_bg "$DATA_PID"
else
warn "dotnet not found — PDF export requires FileService (see services/FileService/README.md)"
+ warn "dotnet not found — Data service unavailable on port 8091"
fi
log "Starting pipeline worker"
"$VENV/bin/python" -m website_profiling.worker &
WORKER_PID=$!
+ disown_bg "$WORKER_PID"
free_port 8001
log "Starting FastAPI on port 8001"
export FASTAPI_URL="http://127.0.0.1:8001"
- export FASTAPI_ALLOWED_ORIGINS="http://localhost:3000"
+ export FASTAPI_ALLOWED_ORIGINS="http://localhost:8090"
"$VENV/bin/uvicorn" website_profiling.api.main:app \
--host 0.0.0.0 --port 8001 --workers 1 &
UVICORN_PID=$!
+ disown_bg "$UVICORN_PID"
+
+ if command -v dotnet >/dev/null 2>&1; then
+ free_port 8090
+ log "Starting BFF on port 8090"
+ (cd "$ROOT/services/Bff" && \
+ FASTAPI_URL="http://127.0.0.1:8001" \
+ FILE_SERVICE_URL="${FILE_SERVICE_URL:-http://127.0.0.1:8080}" \
+ DATA_SERVICE_URL="http://127.0.0.1:8091" \
+ DATA_ROUTES="${DATA_ROUTES:-/api/report/meta,/api/report/payload,/api/report/history,/api/report/crawl-payload,/api/report/mobile-delta,/api/report/portfolio,/api/portfolio,/api/issues/status,/api/filters}" \
+ BFF_ALLOWED_ORIGINS="http://localhost:3000" \
+ ASPNETCORE_URLS="http://127.0.0.1:8090" \
+ ASPNETCORE_ENVIRONMENT=Development \
+ dotnet run --project src/Bff.Api --no-launch-profile) &
+ BFF_PID=$!
+ disown_bg "$BFF_PID"
+ else
+ warn "dotnet not found — browser API calls need the BFF (see services/Bff/)"
+ fi
- log "Starting Next.js dev server (Ctrl+C to stop)"
+ log "Starting Vite dev server (Ctrl+C stops all services including Postgres)"
log "DATABASE_URL=$DATABASE_URL"
log "DATA_DIR=$DATA_DIR"
log "PYTHON=$PYTHON"
+ log "VITE_BFF_BASE_URL=${VITE_BFF_BASE_URL:-http://localhost:8090}"
log "FILE_SERVICE_URL=${FILE_SERVICE_URL:-http://127.0.0.1:8080}"
+ log "DATA_ROUTES=${DATA_ROUTES:-/api/report/meta,...}"
export FILE_SERVICE_URL="${FILE_SERVICE_URL:-http://127.0.0.1:8080}"
+ export VITE_BFF_BASE_URL="${VITE_BFF_BASE_URL:-http://localhost:8090}"
cd "$WEB"
- # Do not exec — keep this shell alive so the trap kills FileService/worker/uvicorn on Ctrl+C.
+ set +e
npm run dev
+ exit 0
}
cmd_stop() {
- ensure_docker
+ need_cmd docker
+ if ! docker info >/dev/null 2>&1; then
+ die "Docker is not running. Start Docker Desktop, then retry."
+ fi
if docker ps --format '{{.Names}}' | grep -qx "$PG_CONTAINER"; then
- log "Stopping $PG_CONTAINER"
- docker stop "$PG_CONTAINER" >/dev/null
+ stop_postgres
else
warn "Container $PG_CONTAINER is not running"
fi
}
+stop_postgres() {
+ if ! command -v docker >/dev/null 2>&1; then
+ return 0
+ fi
+ if ! docker info >/dev/null 2>&1; then
+ warn "Docker unavailable — skipping Postgres stop"
+ return 0
+ fi
+ if docker ps --format '{{.Names}}' 2>/dev/null | grep -qx "$PG_CONTAINER"; then
+ log "Stopping $PG_CONTAINER"
+ docker stop "$PG_CONTAINER" >/dev/null 2>&1 || warn "Could not stop $PG_CONTAINER"
+ log "Postgres stopped."
+ fi
+}
+
+cmd_test() {
+ shift
+ exec "$ROOT/scripts/local-test.sh" all "$@"
+}
+
cmd_help() {
cat </data)
+ DATA_ROUTES (default: report reads, portfolio, issues status, saved filters)
WP_PG_CONTAINER, WP_PG_PORT, WP_PG_PASSWORD, WP_PG_DB
After start, open: http://localhost:3000/home
Run audits via sidebar "Run audit" (bottom-right FAB).
-Production Next.js (same Postgres, no hot reload): ./local-prod start
+Production build (same Postgres, no hot reload): ./local-prod start
-Run CI-style tests: ./local-test (see ./local-test help). JS crawl integration: ./local-test browser.
+Run CI-style tests: ./local-test or ./local-run test (see ./local-test help).
EOF
}
@@ -239,6 +377,7 @@ main() {
setup) cmd_setup ;;
db) cmd_db ;;
migrate) cmd_migrate ;;
+ test) cmd_test "$@" ;;
stop) cmd_stop ;;
help|-h|--help) cmd_help ;;
*)
diff --git a/scripts/local-test.sh b/scripts/local-test.sh
index b16595d9..dd2c1d7c 100755
--- a/scripts/local-test.sh
+++ b/scripts/local-test.sh
@@ -1,12 +1,13 @@
#!/usr/bin/env bash
# Local test runner — mirrors .github/workflows/ci.yml on your machine.
# Usage: ./local-test [command] [--no-cov]
-# (default) all — Postgres + migrations + Python + web checks
+# (default) all — Postgres + migrations + Python + web + .NET (Data, Bff, FileService)
# python — DB + pytest + CLI smoke only
-# web — typecheck, lint, vitest (no Postgres)
-# quick — pytest --no-cov + web (DB must already be running)
+# web — build, typecheck, lint, vitest (no Postgres)
+# dotnet — dotnet test Data + Bff + FileService + Bff OpenAPI drift gate
+# quick — pytest --no-cov + web + dotnet (DB must already be running)
# help — show commands
-set -euo pipefail
+set -uo pipefail
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT"
@@ -27,41 +28,108 @@ VENV="$ROOT/.venv"
WEB="$ROOT/web"
PYTEST_NO_COV=0
+STEP_PASS=()
+STEP_FAIL=() # entries: "name|detail"
+STEP_SKIP=() # entries: "name|reason"
+
log() { printf '\033[1;36m→\033[0m %s\n' "$*"; }
ok() { printf '\033[1;32m✓\033[0m %s\n' "$*"; }
warn() { printf '\033[1;33m!\033[0m %s\n' "$*" >&2; }
-die() { printf '\033[1;31m✗\033[0m %s\n' "$*" >&2; exit 1; }
+fail_msg() { printf '\033[1;31m✗\033[0m %s\n' "$*" >&2; }
+die() { fail_msg "$*"; exit 1; }
-need_cmd() {
- command -v "$1" >/dev/null 2>&1 || die "Missing required command: $1"
+reset_steps() {
+ STEP_PASS=()
+ STEP_FAIL=()
+ STEP_SKIP=()
}
-ensure_docker() {
- need_cmd docker
- if ! docker info >/dev/null 2>&1; then
- die "Docker is not running. Start Docker Desktop, then retry (or: ./local-test quick with DATABASE_URL set)."
+run_step() {
+ local name="$1"
+ shift
+ log "$name"
+ local ec=0
+ "$@" || ec=$?
+ if [[ "$ec" -eq 0 ]]; then
+ STEP_PASS+=("$name")
+ else
+ STEP_FAIL+=("$name|exit code $ec")
fi
}
-wait_for_postgres() {
- local i
- for i in $(seq 1 30); do
- if docker exec "$PG_CONTAINER" pg_isready -U "$PG_USER" -d "$PG_DB" >/dev/null 2>&1; then
- return 0
- fi
- sleep 1
- done
- die "Postgres did not become ready in time (container: $PG_CONTAINER)"
+skip_step() {
+ local name="$1"
+ local reason="${2:-skipped}"
+ warn "$name — $reason"
+ STEP_SKIP+=("$name|$reason")
+}
+
+print_summary() {
+ local total_pass=${#STEP_PASS[@]}
+ local total_fail=${#STEP_FAIL[@]}
+ local total_skip=${#STEP_SKIP[@]}
+ local entry name detail
+
+ printf '\n'
+ printf '\033[1m═══════════════════════════════════════════════════════════════\033[0m\n'
+ printf '\033[1m Test summary\033[0m\n'
+ printf '\033[1m═══════════════════════════════════════════════════════════════\033[0m\n'
+
+ if [[ "$total_pass" -gt 0 ]]; then
+ printf '\n\033[1;32mPASSED (%d)\033[0m\n' "$total_pass"
+ for name in "${STEP_PASS[@]}"; do
+ printf ' \033[1;32m✓\033[0m %s\n' "$name"
+ done
+ fi
+
+ if [[ "$total_fail" -gt 0 ]]; then
+ printf '\n\033[1;31mFAILED (%d)\033[0m\n' "$total_fail"
+ for entry in "${STEP_FAIL[@]}"; do
+ name="${entry%%|*}"
+ detail="${entry#*|}"
+ printf ' \033[1;31m✗\033[0m %s (%s)\n' "$name" "$detail"
+ done
+ fi
+
+ if [[ "$total_skip" -gt 0 ]]; then
+ printf '\n\033[1;33mSKIPPED (%d)\033[0m\n' "$total_skip"
+ for entry in "${STEP_SKIP[@]}"; do
+ name="${entry%%|*}"
+ detail="${entry#*|}"
+ printf ' \033[1;33m-\033[0m %s (%s)\n' "$name" "$detail"
+ done
+ fi
+
+ printf '\n\033[1m───────────────────────────────────────────────────────────────\033[0m\n'
+ if [[ "$total_fail" -eq 0 ]]; then
+ ok "All steps passed ($total_pass passed, $total_skip skipped)"
+ else
+ fail_msg "$total_fail failed, $total_pass passed, $total_skip skipped"
+ fi
+ printf '\n'
}
-cmd_db() {
- ensure_docker
+finish() {
+ print_summary
+ [[ ${#STEP_FAIL[@]} -eq 0 ]]
+}
+
+need_cmd() {
+ command -v "$1" >/dev/null 2>&1
+}
+
+start_postgres() {
+ need_cmd docker || { warn "docker not found"; return 1; }
+ if ! docker info >/dev/null 2>&1; then
+ warn "Docker is not running"
+ return 1
+ fi
if docker ps -a --format '{{.Names}}' | grep -qx "$PG_CONTAINER"; then
if docker ps --format '{{.Names}}' | grep -qx "$PG_CONTAINER"; then
log "Postgres already running ($PG_CONTAINER)"
else
log "Starting existing container $PG_CONTAINER"
- docker start "$PG_CONTAINER" >/dev/null
+ docker start "$PG_CONTAINER" >/dev/null || return 1
fi
else
log "Creating Postgres container $PG_CONTAINER on port $PG_PORT"
@@ -69,51 +137,57 @@ cmd_db() {
-e "POSTGRES_PASSWORD=$PG_PASSWORD" \
-e "POSTGRES_DB=$PG_DB" \
-p "${PG_PORT}:5432" \
- "$PG_IMAGE" >/dev/null
+ "$PG_IMAGE" >/dev/null || return 1
fi
- wait_for_postgres
- log "DATABASE_URL=$DATABASE_URL"
+ local i
+ for i in $(seq 1 30); do
+ if docker exec "$PG_CONTAINER" pg_isready -U "$PG_USER" -d "$PG_DB" >/dev/null 2>&1; then
+ log "DATABASE_URL=$DATABASE_URL"
+ return 0
+ fi
+ sleep 1
+ done
+ warn "Postgres did not become ready in time (container: $PG_CONTAINER)"
+ return 1
}
-cmd_venv() {
- need_cmd python3
+ensure_venv() {
+ need_cmd python3 || { warn "python3 not found"; return 1; }
if [[ ! -x "$VENV/bin/python" ]]; then
log "Creating Python venv at .venv"
- python3 -m venv "$VENV"
+ python3 -m venv "$VENV" || return 1
fi
if [[ ! -x "$VENV/bin/pytest" ]]; then
log "Installing Python dependencies"
- "$VENV/bin/pip" install -q -r "$ROOT/requirements.txt"
+ "$VENV/bin/pip" install -q -r "$ROOT/requirements.txt" || return 1
fi
+ return 0
}
-cmd_migrate() {
- [[ -x "$VENV/bin/alembic" ]] || cmd_venv
- log "Applying database migrations (alembic upgrade head)"
+run_migrate() {
+ [[ -x "$VENV/bin/alembic" ]] || ensure_venv || return 1
"$VENV/bin/alembic" upgrade head
}
-cmd_web_deps() {
- need_cmd npm
+ensure_web_deps() {
+ need_cmd npm || { warn "npm not found"; return 1; }
if [[ ! -d "$WEB/node_modules" ]]; then
log "Installing web dependencies (npm ci)"
- (cd "$WEB" && npm ci)
+ (cd "$WEB" && npm ci) || return 1
fi
+ return 0
}
run_pytest_core() {
if [[ "$PYTEST_NO_COV" -eq 1 ]]; then
- log "Pytest (tests/ -q -m not browser --no-cov)"
"$VENV/bin/pytest" tests/ -q -m "not browser" --no-cov
else
- log "Pytest (tests/ -q -m not browser, core 100% coverage gate)"
"$VENV/bin/pytest" tests/ -q -m "not browser"
fi
}
run_pytest_reporting() {
[[ "$PYTEST_NO_COV" -eq 1 ]] && return 0
- log "Pytest (reporting coverage gate, 100%)"
"$VENV/bin/pytest" \
tests/reporting/ \
--cov=website_profiling.reporting \
@@ -126,7 +200,6 @@ run_pytest_reporting() {
run_pytest_tools() {
[[ "$PYTEST_NO_COV" -eq 1 ]] && return 0
- log "Pytest (tools coverage gate, 100%)"
"$VENV/bin/pytest" \
tests/tools/ \
tests/clients/ \
@@ -138,87 +211,207 @@ run_pytest_tools() {
-o addopts=
}
-run_pytest() {
- run_pytest_core
- run_pytest_reporting
- run_pytest_tools
-}
-
run_browser_pytest() {
if "$VENV/bin/python" -c "from website_profiling.crawl.fetchers import browser_status; import sys; sys.exit(0 if browser_status().get('ok') else 1)" 2>/dev/null; then
- log "Browser pytest (tests/test_crawl_fetchers.py tests/test_crawler_browser_e2e.py -m browser)"
"$VENV/bin/pytest" tests/test_crawl_fetchers.py tests/test_crawler_browser_e2e.py -m browser -q --no-cov
else
- warn "Chromium unavailable — skipping browser integration tests"
+ return 2
fi
}
-cmd_python() {
- cmd_db
- cmd_venv
- cmd_migrate
- run_pytest
- run_browser_pytest
- log "CLI smoke (python -m src --help)"
+run_cli_smoke() {
"$VENV/bin/python" -m src --help >/dev/null
- ok "Python checks passed"
+}
+
+run_web_build() { (cd "$WEB" && npm run build); }
+run_web_typecheck() { (cd "$WEB" && npm run typecheck); }
+run_web_lint() { (cd "$WEB" && npm run lint); }
+run_web_test() { (cd "$WEB" && npm test); }
+
+dotnet_test_sln() {
+ local service_dir="$1"
+ local slnx="$2"
+ (cd "$ROOT/services/${service_dir}" && dotnet test "$slnx")
+}
+
+run_bff_openapi_drift_gate() {
+ if ! need_cmd dotnet; then
+ return 0
+ fi
+ if ! need_cmd nswag; then
+ if dotnet tool list -g 2>/dev/null | grep -q NSwag.ConsoleCore; then
+ export PATH="$PATH:$HOME/.dotnet/tools"
+ else
+ log "Installing NSwag.ConsoleCore (Bff OpenAPI drift gate)"
+ dotnet tool install -g NSwag.ConsoleCore || return 1
+ export PATH="$PATH:$HOME/.dotnet/tools"
+ fi
+ fi
+ if ! need_cmd nswag; then
+ return 2
+ fi
+ (cd "$ROOT/services/Bff" && nswag run nswag.json) || return 1
+ git diff --exit-code services/Bff/src/Bff.Application/Generated/FastApiClient.g.cs
+}
+
+run_step_or_skip_browser() {
+ local name="Browser pytest (tests/test_crawl_fetchers.py, tests/test_crawler_browser_e2e.py)"
+ log "$name"
+ local ec=0
+ run_browser_pytest || ec=$?
+ if [[ "$ec" -eq 0 ]]; then
+ STEP_PASS+=("$name")
+ elif [[ "$ec" -eq 2 ]]; then
+ skip_step "$name" "Chromium unavailable"
+ else
+ STEP_FAIL+=("$name|exit code $ec")
+ fi
+}
+
+run_step_or_skip_openapi() {
+ local name="Bff OpenAPI drift gate (FastApiClient.g.cs)"
+ log "$name"
+ local ec=0
+ run_bff_openapi_drift_gate || ec=$?
+ if [[ "$ec" -eq 0 ]]; then
+ STEP_PASS+=("$name")
+ elif [[ "$ec" -eq 2 ]]; then
+ skip_step "$name" "nswag not on PATH"
+ else
+ STEP_FAIL+=("$name|exit code $ec — run services/Bff/generate-client.sh and commit")
+ fi
+}
+
+steps_postgres() {
+ run_step "Postgres ($PG_CONTAINER)" start_postgres
+}
+
+steps_venv() {
+ run_step "Python venv + dependencies" ensure_venv
+}
+
+steps_migrate() {
+ run_step "Database migrations (alembic upgrade head)" run_migrate
+}
+
+steps_pytest() {
+ if [[ "$PYTEST_NO_COV" -eq 1 ]]; then
+ run_step "Pytest core (tests/ — no coverage)" run_pytest_core
+ skip_step "Pytest reporting coverage gate" "--no-cov"
+ skip_step "Pytest tools coverage gate" "--no-cov"
+ else
+ run_step "Pytest core (tests/ — 100% coverage gate)" run_pytest_core
+ run_step "Pytest reporting coverage gate (tests/reporting/)" run_pytest_reporting
+ run_step "Pytest tools coverage gate (tests/tools/, tests/clients/)" run_pytest_tools
+ fi
+}
+
+steps_browser() {
+ run_step_or_skip_browser
+}
+
+steps_cli_smoke() {
+ run_step "CLI smoke (python -m src --help)" run_cli_smoke
+}
+
+steps_web_deps() {
+ run_step "Web dependencies (npm ci if needed)" ensure_web_deps
+}
+
+steps_web() {
+ steps_web_deps
+ run_step "Web build (web/)" run_web_build
+ run_step "Web typecheck (web/)" run_web_typecheck
+ run_step "Web lint (web/)" run_web_lint
+ run_step "Web tests / vitest (web/)" run_web_test
+}
+
+steps_dotnet() {
+ if ! need_cmd dotnet; then
+ skip_step ".NET tests (Data, Bff, FileService)" "dotnet not found"
+ return 0
+ fi
+ run_step "dotnet test Data (services/Data/Data.slnx)" dotnet_test_sln "Data" "Data.slnx"
+ run_step "dotnet test Bff (services/Bff/Bff.slnx)" dotnet_test_sln "Bff" "Bff.slnx"
+ run_step_or_skip_openapi
+ run_step "dotnet test FileService (services/FileService/FileService.slnx)" dotnet_test_sln "FileService" "FileService.slnx"
+}
+
+steps_python() {
+ steps_postgres
+ steps_venv
+ steps_migrate
+ steps_pytest
+ steps_browser
+ steps_cli_smoke
+}
+
+cmd_python() {
+ reset_steps
+ steps_python
+ finish
}
cmd_browser() {
- cmd_venv
- run_browser_pytest
- ok "Browser pytest finished"
+ reset_steps
+ run_step "Python venv + dependencies" ensure_venv
+ run_step_or_skip_browser
+ finish
}
cmd_web() {
- cmd_web_deps
- log "Web typecheck"
- (cd "$WEB" && npm run typecheck)
- log "Web lint"
- (cd "$WEB" && npm run lint)
- log "Web tests (vitest)"
- (cd "$WEB" && npm test)
- ok "Web checks passed"
+ reset_steps
+ steps_web
+ finish
+}
+
+cmd_dotnet() {
+ reset_steps
+ steps_dotnet
+ finish
}
cmd_all() {
- cmd_python
- cmd_web
- ok "All local tests passed (CI python + web jobs, including reporting/tools gates)"
+ reset_steps
+ steps_python
+ steps_web
+ steps_dotnet
+ finish
}
cmd_quick() {
if [[ -z "${DATABASE_URL:-}" ]]; then
die "DATABASE_URL is not set. Export it or run ./local-test all."
fi
- cmd_venv
- cmd_web_deps
+ reset_steps
warn "quick: assuming Postgres is up and migrated (./local-run db && ./local-run migrate)"
- run_pytest
- log "CLI smoke (python -m src --help)"
- "$VENV/bin/python" -m src --help >/dev/null
- log "Web typecheck"
- (cd "$WEB" && npm run typecheck)
- log "Web lint"
- (cd "$WEB" && npm run lint)
- log "Web tests (vitest)"
- (cd "$WEB" && npm test)
- ok "Quick test run passed"
+ PYTEST_NO_COV=1
+ steps_venv
+ steps_pytest
+ steps_cli_smoke
+ steps_web
+ steps_dotnet
+ finish
}
cmd_help() {
cat < bool:
+ text = path.read_text()
+ original = text
+
+ text = LINK_IMPORT.sub("import { Link } from 'react-router-dom';\n", text)
+ text = DYNAMIC_IMPORT.sub("", text)
+
+ def nav_repl(m: re.Match[str]) -> str:
+ names = [n.strip() for n in m.group(1).split(",")]
+ mapping = {
+ "useRouter": "useNavigate",
+ "usePathname": None, # useLocation
+ "useSearchParams": "useSearchParams",
+ "useParams": "useParams",
+ "notFound": None,
+ "redirect": None,
+ }
+ out: list[str] = []
+ needs_location = "usePathname" in names
+ for n in names:
+ if n == "usePathname":
+ continue
+ if n == "notFound":
+ continue
+ out.append(mapping.get(n, n))
+ imports = list(dict.fromkeys(out))
+ if needs_location:
+ imports = ["useLocation", *imports]
+ return f"import {{ {', '.join(imports)} }} from 'react-router-dom';\n"
+
+ text = NAV_IMPORT.sub(nav_repl, text)
+
+ # Link href -> to
+ text = re.sub(r" useNavigate
+ text = re.sub(r"\bconst router = useRouter\(\)", "const navigate = useNavigate()", text)
+ text = re.sub(r"\bconst router = useNavigate\(\)", "const navigate = useNavigate()", text)
+
+ # usePathname -> useLocation
+ if "useLocation" in text and "usePathname" in text:
+ text = re.sub(
+ r"\bconst pathname = usePathname\(\)",
+ "const { pathname } = useLocation()",
+ text,
+ )
+
+ # useSearchParams destructuring
+ text = re.sub(
+ r"\bconst searchParams = useSearchParams\(\)",
+ "const [searchParams] = useSearchParams()",
+ text,
+ )
+
+ # router.push/replace -> navigate
+ text = re.sub(
+ r"router\.replace\(([^,)]+),\s*\{\s*scroll:\s*false\s*\}\)",
+ r"navigate(\1, { replace: true, preventScrollReset: true })",
+ text,
+ )
+ text = re.sub(r"router\.replace\(", "navigate(", text)
+ text = re.sub(r"router\.push\(", "navigate(", text)
+ text = re.sub(r"router\.back\(", "navigate(-1", text)
+
+ # navigate(x) from replace needs { replace: true } when it was router.replace without scroll option
+ # Fix navigate calls that came from router.replace(single arg) - already handled above except
+ # we need replace: true for plain router.replace(path)
+ # Re-run: navigate(q ? ... : pathname) from replace should have replace: true
+ # Heuristic: lines with navigate( that were from replace - hard to fix automatically.
+ # Manual fix for ReportShell etc.
+
+ # goToPipeline(router.push -> goToPipeline(navigate
+ text = text.replace("goToPipeline(router.push", "goToPipeline(navigate")
+
+ # next/dynamic -> lazy
+ text = re.sub(
+ r"const (\w+) = dynamic\(\(\) => import\(([^)]+)\),\s*\{[^}]*loading:[^}]*\}\);",
+ r"const \1 = lazy(() => import(\2));",
+ text,
+ )
+ text = re.sub(
+ r"const (\w+) = dynamic\(\(\) => import\(([^)]+)\),\s*\{[^}]*ssr:\s*false,[^}]*\}\);",
+ r"const \1 = lazy(() => import(\2));",
+ text,
+ )
+
+ # Add lazy import if lazy( used
+ if "lazy(" in text and "from 'react'" in text:
+ if re.search(r"import \{[^}]*\blazy\b", text):
+ pass
+ elif re.search(r"import \{([^}]+)\} from 'react'", text):
+ text = re.sub(
+ r"import \{([^}]+)\} from 'react'",
+ lambda m: f"import {{ {m.group(1).strip()}, lazy }} from 'react'"
+ if "lazy" not in m.group(1)
+ else m.group(0),
+ text,
+ count=1,
+ )
+ else:
+ text = "import { lazy } from 'react';\n" + text
+
+ # process.env.NODE_ENV -> import.meta.env.DEV / PROD
+ text = text.replace("process.env.NODE_ENV !== 'production'", "import.meta.env.DEV")
+ text = text.replace("process.env.NODE_ENV === 'production'", "import.meta.env.PROD")
+
+ if text != original:
+ path.write_text(text)
+ return True
+ return False
+
+
+def main() -> None:
+ changed = 0
+ for path in ROOT.rglob("*"):
+ if path.suffix not in {".ts", ".tsx"}:
+ continue
+ if migrate_file(path):
+ changed += 1
+ print(path.relative_to(ROOT.parent))
+ print(f"Updated {changed} files")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/services/Bff/.dockerignore b/services/Bff/.dockerignore
new file mode 100644
index 00000000..ab41d99e
--- /dev/null
+++ b/services/Bff/.dockerignore
@@ -0,0 +1,4 @@
+**/bin/
+**/obj/
+**/.vs/
+**/TestResults/
diff --git a/services/Bff/Bff.slnx b/services/Bff/Bff.slnx
new file mode 100644
index 00000000..90d068a3
--- /dev/null
+++ b/services/Bff/Bff.slnx
@@ -0,0 +1,10 @@
+
+
+
+
+
+
+
+
+
+
diff --git a/services/Bff/Dockerfile b/services/Bff/Dockerfile
new file mode 100644
index 00000000..54e2bbe0
--- /dev/null
+++ b/services/Bff/Dockerfile
@@ -0,0 +1,16 @@
+FROM mcr.microsoft.com/dotnet/sdk:10.0 AS build
+WORKDIR /src
+COPY Bff.slnx ./
+COPY src/Bff.Domain/Bff.Domain.csproj src/Bff.Domain/
+COPY src/Bff.Application/Bff.Application.csproj src/Bff.Application/
+COPY src/Bff.Api/Bff.Api.csproj src/Bff.Api/
+RUN dotnet restore src/Bff.Api/Bff.Api.csproj
+COPY src/ src/
+RUN dotnet publish src/Bff.Api/Bff.Api.csproj -c Release -o /app/publish --no-restore
+
+FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS runtime
+WORKDIR /app
+ENV ASPNETCORE_URLS=http://+:8090
+EXPOSE 8090
+COPY --from=build /app/publish .
+ENTRYPOINT ["dotnet", "Bff.Api.dll"]
diff --git a/services/Bff/generate-client.sh b/services/Bff/generate-client.sh
new file mode 100755
index 00000000..8937623a
--- /dev/null
+++ b/services/Bff/generate-client.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+# Regenerate the typed FastAPI client (src/Bff.Application/Generated/FastApiClient.g.cs)
+# from the committed OpenAPI spec (web/openapi.json).
+#
+# Prerequisite (one-time): dotnet tool install -g NSwag.ConsoleCore
+# The spec itself is produced by: python scripts/generate_openapi.py (run from the repo).
+set -e
+cd "$(dirname "$0")"
+nswag run nswag.json
+echo "Generated src/Bff.Application/Generated/FastApiClient.g.cs"
diff --git a/services/Bff/nswag.json b/services/Bff/nswag.json
new file mode 100644
index 00000000..0cc862c8
--- /dev/null
+++ b/services/Bff/nswag.json
@@ -0,0 +1,32 @@
+{
+ "runtime": "Net100",
+ "defaultVariables": null,
+ "documentGenerator": {
+ "fromDocument": {
+ "url": "../../web/openapi.json"
+ }
+ },
+ "codeGenerators": {
+ "openApiToCSharpClient": {
+ "clientBaseClass": null,
+ "className": "FastApiClient",
+ "operationGenerationMode": "SingleClientFromOperationId",
+ "generateClientInterfaces": true,
+ "generateOptionalParameters": true,
+ "jsonLibrary": "SystemTextJson",
+ "anyType": "object",
+ "dictionaryType": "System.Collections.Generic.Dictionary",
+ "dictionaryInstanceType": "System.Collections.Generic.Dictionary",
+ "arrayType": "System.Collections.Generic.List",
+ "arrayInstanceType": "System.Collections.Generic.List",
+ "namespace": "Bff.Application.Generated",
+ "requiredPropertiesMustBeDefined": true,
+ "generateDataAnnotations": false,
+ "generateExceptionClasses": true,
+ "exceptionClass": "FastApiClientException",
+ "useBaseUrl": false,
+ "generateNullableReferenceTypes": true,
+ "output": "src/Bff.Application/Generated/FastApiClient.g.cs"
+ }
+ }
+}
diff --git a/services/Bff/src/Bff.Api/Auth/AccessControlMiddleware.cs b/services/Bff/src/Bff.Api/Auth/AccessControlMiddleware.cs
new file mode 100644
index 00000000..985719e9
--- /dev/null
+++ b/services/Bff/src/Bff.Api/Auth/AccessControlMiddleware.cs
@@ -0,0 +1,70 @@
+using System.Security.Claims;
+using Bff.Application.Options;
+using Bff.Domain;
+using Microsoft.AspNetCore.Mvc;
+using Microsoft.Extensions.Options;
+
+namespace Bff.Api.Auth;
+
+///
+/// Enforces for every request using the principal populated by
+/// . Emits ProblemDetails (401/403) on denial.
+/// When auth is disabled (no AUTH_SECRET) everything is permitted, matching the TS contract.
+///
+public sealed class AccessControlMiddleware(RequestDelegate next, IOptions auth)
+{
+ private readonly AuthOptions _auth = auth.Value;
+
+ public async Task InvokeAsync(HttpContext context)
+ {
+ if (!_auth.Enabled)
+ {
+ await next(context);
+ return;
+ }
+
+ var requirement = AccessPolicy.Resolve(context.Request.Method, context.Request.Path);
+ if (requirement == AccessRequirement.Anonymous)
+ {
+ await next(context);
+ return;
+ }
+
+ var user = context.User;
+ var authenticated = user.Identity?.IsAuthenticated == true;
+ if (!authenticated)
+ {
+ await WriteProblem(context, StatusCodes.Status401Unauthorized, "Authentication required");
+ return;
+ }
+
+ var role = user.FindFirstValue(ClaimTypes.Role);
+ var allowed = requirement switch
+ {
+ AccessRequirement.Read => true, // any authenticated role, including read-only
+ AccessRequirement.Mutate => Roles.CanMutate(role),
+ AccessRequirement.Chat => Roles.CanChat(role),
+ _ => false,
+ };
+
+ if (!allowed)
+ {
+ await WriteProblem(context, StatusCodes.Status403Forbidden, "Forbidden");
+ return;
+ }
+
+ await next(context);
+ }
+
+ private static Task WriteProblem(HttpContext context, int status, string title)
+ {
+ context.Response.StatusCode = status;
+ context.Response.ContentType = "application/problem+json";
+ var problem = new ProblemDetails
+ {
+ Status = status,
+ Title = title,
+ };
+ return context.Response.WriteAsJsonAsync(problem, problem.GetType(), options: null, contentType: "application/problem+json");
+ }
+}
diff --git a/services/Bff/src/Bff.Api/Auth/AccessPolicy.cs b/services/Bff/src/Bff.Api/Auth/AccessPolicy.cs
new file mode 100644
index 00000000..e4036fe8
--- /dev/null
+++ b/services/Bff/src/Bff.Api/Auth/AccessPolicy.cs
@@ -0,0 +1,60 @@
+namespace Bff.Api.Auth;
+
+public enum AccessRequirement
+{
+ /// No session required (health, auth endpoints).
+ Anonymous,
+
+ /// Any authenticated role, including read-only (viewer/client-readonly).
+ Read,
+
+ /// Mutating role required: admin/editor/analyst (TS requireApiAuth).
+ Mutate,
+
+ /// Chat: allows client-readonly, blocks viewer (TS requireApiAuthForChat).
+ Chat,
+}
+
+///
+/// Single source of truth for the per-route access policy — the result of the per-route audit
+/// the plan calls for. This replaces the 79 scattered forbiddenIfNotLocal guards + 20 requireApiAuth
+/// calls in the Next.js routes. The localhost guard is intentionally dropped: under the new topology
+/// it is subsumed by auth + the upstreams being network-internal.
+///
+/// Default convention (refine specific paths here as needed):
+/// - GET/HEAD under /api -> Read (reads were open behind localhost before; now require a session)
+/// - other methods /api -> Mutate (mirrors the dominant requireApiAuth pattern)
+/// - chat / auth / health -> explicit overrides below
+///
+public static class AccessPolicy
+{
+ public static AccessRequirement Resolve(string method, PathString path)
+ {
+ // Non-/api paths (swagger/docs/health) are open.
+ if (!path.StartsWithSegments("/api", StringComparison.OrdinalIgnoreCase))
+ {
+ return AccessRequirement.Anonymous;
+ }
+
+ // Health + auth handshake endpoints.
+ if (Matches(path, "/api/health")
+ || Matches(path, "/api/auth/login")
+ || Matches(path, "/api/auth/session")
+ || Matches(path, "/api/auth/logout"))
+ {
+ return AccessRequirement.Anonymous;
+ }
+
+ // Chat is a read-only query but allows client-readonly.
+ if (Matches(path, "/api/chat") || Matches(path, "/api/chat/"))
+ {
+ return AccessRequirement.Chat;
+ }
+
+ var isRead = HttpMethods.IsGet(method) || HttpMethods.IsHead(method) || HttpMethods.IsOptions(method);
+ return isRead ? AccessRequirement.Read : AccessRequirement.Mutate;
+ }
+
+ private static bool Matches(PathString path, string value) =>
+ path.Equals(value, StringComparison.OrdinalIgnoreCase);
+}
diff --git a/services/Bff/src/Bff.Api/Auth/WpSessionAuthenticationHandler.cs b/services/Bff/src/Bff.Api/Auth/WpSessionAuthenticationHandler.cs
new file mode 100644
index 00000000..86746aec
--- /dev/null
+++ b/services/Bff/src/Bff.Api/Auth/WpSessionAuthenticationHandler.cs
@@ -0,0 +1,63 @@
+using System.Security.Claims;
+using System.Text.Encodings.Web;
+using Bff.Application.Auth;
+using Bff.Application.Options;
+using Bff.Domain;
+using Microsoft.AspNetCore.Authentication;
+using Microsoft.Extensions.Options;
+
+namespace Bff.Api.Auth;
+
+public static class WpSessionDefaults
+{
+ public const string Scheme = "WpSession";
+ public const string AuthDisabledClaim = "wp:auth_disabled";
+}
+
+///
+/// Authenticates requests from the wp_session cookie (verified byte-compatibly with auth.ts).
+/// When auth is disabled (no AUTH_SECRET), every request is authenticated as the default role,
+/// mirroring the TS behaviour where authEnabled() === false permits everything.
+///
+public sealed class WpSessionAuthenticationHandler : AuthenticationHandler
+{
+ private readonly AuthOptions _auth;
+
+ public WpSessionAuthenticationHandler(
+ IOptionsMonitor options,
+ ILoggerFactory logger,
+ UrlEncoder encoder,
+ IOptions auth)
+ : base(options, logger, encoder)
+ {
+ _auth = auth.Value;
+ }
+
+ protected override Task HandleAuthenticateAsync()
+ {
+ if (!_auth.Enabled)
+ {
+ return Task.FromResult(AuthenticateResult.Success(BuildTicket(_auth.DefaultRole, authDisabled: true)));
+ }
+
+ var cookie = Request.Cookies[WpSessionTokens.CookieName];
+ var role = WpSessionTokens.VerifyRole(cookie, _auth.Secret, DateTimeOffset.UtcNow.ToUnixTimeSeconds());
+ if (string.IsNullOrEmpty(role))
+ {
+ return Task.FromResult(AuthenticateResult.NoResult());
+ }
+ return Task.FromResult(AuthenticateResult.Success(BuildTicket(role, authDisabled: false)));
+ }
+
+ private AuthenticationTicket BuildTicket(string role, bool authDisabled)
+ {
+ var claims = new List { new(ClaimTypes.Role, role) };
+ if (authDisabled)
+ {
+ claims.Add(new Claim(WpSessionDefaults.AuthDisabledClaim, "true"));
+ }
+ var identity = new ClaimsIdentity(claims, WpSessionDefaults.Scheme);
+ var principal = new ClaimsPrincipal(identity);
+ return new AuthenticationTicket(principal, WpSessionDefaults.Scheme);
+ }
+}
diff --git a/services/Bff/src/Bff.Api/Bff.Api.csproj b/services/Bff/src/Bff.Api/Bff.Api.csproj
new file mode 100644
index 00000000..59624093
--- /dev/null
+++ b/services/Bff/src/Bff.Api/Bff.Api.csproj
@@ -0,0 +1,18 @@
+
+
+
+
+
+
+
+
+
+
+
+
+ net10.0
+ enable
+ enable
+
+
+
diff --git a/services/Bff/src/Bff.Api/Endpoints/AuthEndpoints.cs b/services/Bff/src/Bff.Api/Endpoints/AuthEndpoints.cs
new file mode 100644
index 00000000..a4f3b958
--- /dev/null
+++ b/services/Bff/src/Bff.Api/Endpoints/AuthEndpoints.cs
@@ -0,0 +1,132 @@
+using System.Security.Cryptography;
+using System.Text;
+using Bff.Application.Auth;
+using Bff.Application.Options;
+using Bff.Domain;
+using Microsoft.Extensions.Options;
+
+namespace Bff.Api.Endpoints;
+
+///
+/// Auth handshake endpoints, moved into the BFF (it now owns setting/verifying the wp_session cookie).
+/// Mirrors web/app/api/auth/login + auth/session.
+///
+public static class AuthEndpoints
+{
+ public static void MapAuthEndpoints(this IEndpointRouteBuilder app)
+ {
+ app.MapPost("/api/auth/login", (HttpContext context, IOptions authOptions) =>
+ {
+ var auth = authOptions.Value;
+ if (!auth.Enabled)
+ {
+ return Results.Json(new { ok = true, auth = "disabled" });
+ }
+ if (!ParseBasicAuth(context, auth))
+ {
+ return Results.Json(new { error = "Invalid credentials" }, statusCode: StatusCodes.Status401Unauthorized);
+ }
+
+ var now = DateTimeOffset.UtcNow.ToUnixTimeSeconds();
+ var token = WpSessionTokens.Create(auth.DefaultRole, auth.Secret, now, auth.SessionMaxAgeSeconds);
+ SetSessionCookie(context, token, auth);
+ return Results.Json(new { ok = true });
+ });
+
+ app.MapPost("/api/auth/logout", (HttpContext context, IOptions authOptions) =>
+ {
+ var auth = authOptions.Value;
+ context.Response.Cookies.Append(WpSessionTokens.CookieName, string.Empty, new CookieOptions
+ {
+ HttpOnly = true,
+ SameSite = ParseSameSite(auth.CookieSameSite),
+ Secure = auth.CookieSecure || ParseSameSite(auth.CookieSameSite) == SameSiteMode.None,
+ Path = "/",
+ Expires = DateTimeOffset.UnixEpoch,
+ Domain = string.IsNullOrEmpty(auth.CookieDomain) ? null : auth.CookieDomain,
+ });
+ return Results.Json(new { ok = true });
+ });
+
+ app.MapGet("/api/auth/session", (HttpContext context, IOptions authOptions) =>
+ {
+ var auth = authOptions.Value;
+ var enabled = auth.Enabled;
+ var role = enabled
+ ? WpSessionTokens.VerifyRole(
+ context.Request.Cookies[WpSessionTokens.CookieName],
+ auth.Secret,
+ DateTimeOffset.UtcNow.ToUnixTimeSeconds())
+ : null;
+ var effective = role ?? (enabled ? null : Roles.Analyst);
+ return Results.Json(new
+ {
+ authEnabled = enabled,
+ authenticated = !enabled || role is not null,
+ role = effective,
+ canMutate = Roles.CanMutate(effective),
+ @readonly = enabled && role is not null && !Roles.CanMutate(role),
+ });
+ });
+ }
+
+ private static bool ParseBasicAuth(HttpContext context, AuthOptions auth)
+ {
+ if (string.IsNullOrEmpty(auth.BasicPassword))
+ {
+ return false;
+ }
+ var header = context.Request.Headers.Authorization.ToString();
+ if (!header.StartsWith("Basic ", StringComparison.Ordinal))
+ {
+ return false;
+ }
+ try
+ {
+ var decoded = Encoding.UTF8.GetString(Convert.FromBase64String(header[6..]));
+ var idx = decoded.IndexOf(':');
+ if (idx < 0)
+ {
+ return false;
+ }
+ // Split on the first colon only (RFC 7617: password may contain colons).
+ var user = decoded[..idx];
+ var pass = decoded[(idx + 1)..];
+ // Constant-time compare (matches WpSessionTokens' HMAC check). Hashing
+ // to a fixed length first avoids leaking credential length, and `&`
+ // (not `&&`) ensures both comparisons always run.
+ return FixedTimeStringEquals(user, auth.BasicUser)
+ & FixedTimeStringEquals(pass, auth.BasicPassword);
+ }
+ catch (FormatException)
+ {
+ return false;
+ }
+ }
+
+ private static bool FixedTimeStringEquals(string a, string b) =>
+ CryptographicOperations.FixedTimeEquals(
+ SHA256.HashData(Encoding.UTF8.GetBytes(a)),
+ SHA256.HashData(Encoding.UTF8.GetBytes(b)));
+
+ private static void SetSessionCookie(HttpContext context, string token, AuthOptions auth)
+ {
+ var sameSite = ParseSameSite(auth.CookieSameSite);
+ context.Response.Cookies.Append(WpSessionTokens.CookieName, token, new CookieOptions
+ {
+ HttpOnly = true,
+ SameSite = sameSite,
+ Secure = auth.CookieSecure || sameSite == SameSiteMode.None,
+ Path = "/",
+ MaxAge = TimeSpan.FromSeconds(auth.SessionMaxAgeSeconds),
+ Domain = string.IsNullOrEmpty(auth.CookieDomain) ? null : auth.CookieDomain,
+ });
+ }
+
+ private static SameSiteMode ParseSameSite(string value) => value.Trim().ToLowerInvariant() switch
+ {
+ "none" => SameSiteMode.None,
+ "strict" => SameSiteMode.Strict,
+ _ => SameSiteMode.Lax,
+ };
+}
diff --git a/services/Bff/src/Bff.Api/Endpoints/ProxyEndpoints.cs b/services/Bff/src/Bff.Api/Endpoints/ProxyEndpoints.cs
new file mode 100644
index 00000000..9ada56e8
--- /dev/null
+++ b/services/Bff/src/Bff.Api/Endpoints/ProxyEndpoints.cs
@@ -0,0 +1,143 @@
+using Bff.Api.Forwarding;
+using Bff.Application;
+using Bff.Application.Options;
+using Microsoft.Extensions.Options;
+
+namespace Bff.Api.Endpoints;
+
+///
+/// The reverse-proxy surface: every /api/* request is mirrored to FastAPI, with explicit
+/// handling for streaming (chat SSE) and for exports that translate to the FileService.
+/// All near-identical 1:1 routes collapse into one catch-all instead of ~84 hand-written files.
+///
+public static class ProxyEndpoints
+{
+ public static void MapProxyEndpoints(this IEndpointRouteBuilder app)
+ {
+ // Chat: Server-Sent Events stream to FastAPI (upstream route has a trailing slash).
+ app.MapPost("/api/chat", (HttpContext ctx) => (IResult)new ForwardingResult(
+ DependencyInjection.FastApiStreamClient,
+ $"/api/chat/{ctx.Request.QueryString}",
+ disableResponseBuffering: true));
+
+ // Report export: PDF/CSV/JSON are all rendered by the FileService (which reads Postgres
+ // directly). A missing format defaults to csv (matches the old Python default); any other
+ // format is rejected (the Python export route has been removed). Mirrors proxyToFileService.ts.
+ app.MapGet("/api/report/export", (HttpContext ctx) =>
+ {
+ var raw = ctx.Request.Query["format"].ToString();
+ var format = string.IsNullOrEmpty(raw) ? "csv" : raw.ToLowerInvariant();
+ if (format is not ("pdf" or "csv" or "json"))
+ {
+ return Results.Json(
+ new { error = $"Unsupported export format '{format}'. Use pdf, csv, or json." },
+ statusCode: 400);
+ }
+ var path = BuildFileServiceReportPath(ctx.Request.Query, format);
+ return path is null
+ ? Results.Json(new { error = "reportId or domain required for export" }, statusCode: 400)
+ : (IResult)new ForwardingResult(DependencyInjection.FileServiceClient, path, disableResponseBuffering: true);
+ });
+
+ // Excel workbook export -> FileService. Mirrors proxyToFileService.ts.
+ app.MapGet("/api/report/export-workbook", (HttpContext ctx) =>
+ {
+ var path = BuildFileServiceWorkbookPath(ctx.Request.Query);
+ return path is null
+ ? Results.Json(new { error = "reportId or domain required for workbook export" }, statusCode: 400)
+ : (IResult)new ForwardingResult(DependencyInjection.FileServiceClient, path, disableResponseBuffering: true);
+ });
+
+ // Sitemap export -> FileService (was Python via the catch-all; now rendered from Postgres).
+ app.MapGet("/api/report/export-sitemap", (HttpContext ctx) =>
+ {
+ var path = BuildFileServiceReportPath(ctx.Request.Query, "sitemap");
+ return path is null
+ ? Results.Json(new { error = "reportId or domain required for sitemap export" }, statusCode: 400)
+ : (IResult)new ForwardingResult(DependencyInjection.FileServiceClient, path, disableResponseBuffering: true);
+ });
+
+ // Catch-all: every other /api/* request -> FastAPI (streamed for remaining export routes),
+ // except paths in the DATA_ROUTES allowlist, which go to the internal Data service
+ // (GET reads + POST/PUT/DELETE mutations on matched prefixes).
+ // Auth still runs in AccessControlMiddleware before this delegate, so routing here doesn't
+ // change which roles may reach a path. Empty allowlist => nothing matches => all FastAPI.
+ app.Map("/api/{**rest}", (HttpContext ctx) =>
+ {
+ var path = ctx.Request.Path.Value ?? string.Empty;
+ var streaming = path.Contains("/export", StringComparison.OrdinalIgnoreCase);
+
+ var upstream = ctx.RequestServices.GetRequiredService>().Value;
+ var matchesDataRoute = upstream.DataRoutes.Any(prefix =>
+ path.StartsWith(prefix, StringComparison.OrdinalIgnoreCase));
+ var toData = !streaming
+ && matchesDataRoute
+ && (HttpMethods.IsGet(ctx.Request.Method)
+ || HttpMethods.IsHead(ctx.Request.Method)
+ || HttpMethods.IsPost(ctx.Request.Method)
+ || HttpMethods.IsPut(ctx.Request.Method)
+ || HttpMethods.IsDelete(ctx.Request.Method));
+
+ var client = toData
+ ? DependencyInjection.DataClient
+ : streaming ? DependencyInjection.FastApiStreamClient : DependencyInjection.FastApiClient;
+
+ return (IResult)new ForwardingResult(
+ client,
+ $"{ctx.Request.Path}{ctx.Request.QueryString}",
+ disableResponseBuffering: streaming);
+ });
+ }
+
+ // Builds the FileService path for a report export. pdf carries profile/branding; csv/json/sitemap
+ // only need disposition. Returns null when neither reportId nor domain is supplied.
+ private static string? BuildFileServiceReportPath(IQueryCollection query, string format)
+ {
+ var reportId = query["reportId"].ToString();
+ var domain = query["domain"].ToString();
+ var disposition = Defaulted(query["disposition"].ToString(), "attachment");
+
+ string qs;
+ if (format == "pdf")
+ {
+ var profile = Defaulted(query["profile"].ToString(), "standard");
+ var branding = Defaulted(query["branding"].ToString(), "true");
+ qs = $"?profile={Uri.EscapeDataString(profile)}&disposition={Uri.EscapeDataString(disposition)}&branding={Uri.EscapeDataString(branding)}";
+ }
+ else
+ {
+ qs = $"?disposition={Uri.EscapeDataString(disposition)}";
+ }
+
+ if (!string.IsNullOrEmpty(reportId))
+ {
+ return $"/v1/reports/{Uri.EscapeDataString(reportId)}/{format}{qs}";
+ }
+ if (!string.IsNullOrEmpty(domain))
+ {
+ return $"/v1/reports/by-domain/{Uri.EscapeDataString(domain)}/{format}{qs}";
+ }
+ return null;
+ }
+
+ private static string? BuildFileServiceWorkbookPath(IQueryCollection query)
+ {
+ var reportId = query["reportId"].ToString();
+ var domain = query["domain"].ToString();
+ var disposition = Defaulted(query["disposition"].ToString(), "attachment");
+ var qs = $"?disposition={Uri.EscapeDataString(disposition)}";
+
+ if (!string.IsNullOrEmpty(reportId))
+ {
+ return $"/v1/reports/{Uri.EscapeDataString(reportId)}/workbook{qs}";
+ }
+ if (!string.IsNullOrEmpty(domain))
+ {
+ return $"/v1/reports/by-domain/{Uri.EscapeDataString(domain)}/workbook{qs}";
+ }
+ return null;
+ }
+
+ private static string Defaulted(string value, string fallback) =>
+ string.IsNullOrEmpty(value) ? fallback : value;
+}
diff --git a/services/Bff/src/Bff.Api/Forwarding/ForwardingResult.cs b/services/Bff/src/Bff.Api/Forwarding/ForwardingResult.cs
new file mode 100644
index 00000000..16d36968
--- /dev/null
+++ b/services/Bff/src/Bff.Api/Forwarding/ForwardingResult.cs
@@ -0,0 +1,16 @@
+namespace Bff.Api.Forwarding;
+
+/// An IResult that forwards the current request to a named upstream.
+public sealed class ForwardingResult(string clientName, string pathAndQuery, bool disableResponseBuffering) : IResult
+{
+ public Task ExecuteAsync(HttpContext httpContext)
+ {
+ var forwarder = httpContext.RequestServices.GetRequiredService();
+ return forwarder.ForwardAsync(
+ httpContext,
+ clientName,
+ pathAndQuery,
+ disableResponseBuffering,
+ httpContext.RequestAborted);
+ }
+}
diff --git a/services/Bff/src/Bff.Api/Forwarding/IUpstreamForwarder.cs b/services/Bff/src/Bff.Api/Forwarding/IUpstreamForwarder.cs
new file mode 100644
index 00000000..c2a7b502
--- /dev/null
+++ b/services/Bff/src/Bff.Api/Forwarding/IUpstreamForwarder.cs
@@ -0,0 +1,17 @@
+namespace Bff.Api.Forwarding;
+
+///
+/// Generic reverse-proxy primitive: forwards the current request to a named upstream client
+/// and streams the response back. Handles opaque JSON payloads, SSE, and binary exports
+/// uniformly (the upstream Content-Type/Content-Disposition are preserved). Cookies are NOT
+/// forwarded upstream — the BFF terminates auth.
+///
+public interface IUpstreamForwarder
+{
+ Task ForwardAsync(
+ HttpContext context,
+ string clientName,
+ string pathAndQuery,
+ bool disableResponseBuffering,
+ CancellationToken cancellationToken);
+}
diff --git a/services/Bff/src/Bff.Api/Forwarding/UpstreamForwarder.cs b/services/Bff/src/Bff.Api/Forwarding/UpstreamForwarder.cs
new file mode 100644
index 00000000..79806e37
--- /dev/null
+++ b/services/Bff/src/Bff.Api/Forwarding/UpstreamForwarder.cs
@@ -0,0 +1,74 @@
+using Microsoft.AspNetCore.Http.Features;
+
+namespace Bff.Api.Forwarding;
+
+public sealed class UpstreamForwarder(IHttpClientFactory factory) : IUpstreamForwarder
+{
+ public async Task ForwardAsync(
+ HttpContext context,
+ string clientName,
+ string pathAndQuery,
+ bool disableResponseBuffering,
+ CancellationToken cancellationToken)
+ {
+ var client = factory.CreateClient(clientName);
+ var target = new Uri(client.BaseAddress!, pathAndQuery);
+
+ using var request = new HttpRequestMessage(new HttpMethod(context.Request.Method), target);
+
+ if (HasBody(context.Request.Method))
+ {
+ request.Content = new StreamContent(context.Request.Body);
+ if (!string.IsNullOrEmpty(context.Request.ContentType))
+ {
+ request.Content.Headers.TryAddWithoutValidation("Content-Type", context.Request.ContentType);
+ }
+ }
+
+ // Forward a minimal allowlist of request headers (never Host/Cookie).
+ foreach (var name in ForwardableRequestHeaders)
+ {
+ if (context.Request.Headers.TryGetValue(name, out var values))
+ {
+ request.Headers.TryAddWithoutValidation(name, values.ToArray());
+ }
+ }
+
+ using var upstream = await client.SendAsync(
+ request,
+ HttpCompletionOption.ResponseHeadersRead,
+ cancellationToken);
+
+ context.Response.StatusCode = (int)upstream.StatusCode;
+
+ if (upstream.Content.Headers.ContentType is not null)
+ {
+ context.Response.ContentType = upstream.Content.Headers.ContentType.ToString();
+ }
+ if (upstream.Content.Headers.TryGetValues("Content-Disposition", out var disposition))
+ {
+ context.Response.Headers["Content-Disposition"] = disposition.ToArray();
+ }
+ // Pass redirects through (e.g. the Google OAuth consent/callback 302s).
+ if (upstream.Headers.Location is not null)
+ {
+ context.Response.Headers["Location"] = upstream.Headers.Location.ToString();
+ }
+
+ if (disableResponseBuffering)
+ {
+ context.Features.Get()?.DisableBuffering();
+ }
+
+ await using var stream = await upstream.Content.ReadAsStreamAsync(cancellationToken);
+ await stream.CopyToAsync(context.Response.Body, cancellationToken);
+ }
+
+ private static readonly string[] ForwardableRequestHeaders = ["Accept", "Accept-Language"];
+
+ private static bool HasBody(string method) =>
+ HttpMethods.IsPost(method)
+ || HttpMethods.IsPut(method)
+ || HttpMethods.IsPatch(method)
+ || HttpMethods.IsDelete(method);
+}
diff --git a/services/Bff/src/Bff.Api/Infrastructure/UpstreamExceptionHandler.cs b/services/Bff/src/Bff.Api/Infrastructure/UpstreamExceptionHandler.cs
new file mode 100644
index 00000000..85a415ec
--- /dev/null
+++ b/services/Bff/src/Bff.Api/Infrastructure/UpstreamExceptionHandler.cs
@@ -0,0 +1,46 @@
+using Microsoft.AspNetCore.Diagnostics;
+using Microsoft.AspNetCore.Mvc;
+
+namespace Bff.Api.Infrastructure;
+
+///
+/// Right-sized error normalization: upstream connection failures → 502, upstream timeouts → 504,
+/// anything else → 500, all as ProblemDetails. 4xx/422 bodies from FastAPI are NOT remapped here —
+/// the forwarder passes them through verbatim so the frontend's existing validation parsing keeps working.
+///
+public sealed class UpstreamExceptionHandler(ILogger logger) : IExceptionHandler
+{
+ public async ValueTask TryHandleAsync(
+ HttpContext context,
+ Exception exception,
+ CancellationToken cancellationToken)
+ {
+ // Once the response has started streaming we can't change the status — let it bubble.
+ if (context.Response.HasStarted)
+ {
+ return false;
+ }
+
+ // Client disconnected: not our error to report.
+ if (exception is OperationCanceledException && context.RequestAborted.IsCancellationRequested)
+ {
+ return false;
+ }
+
+ var (status, title) = exception switch
+ {
+ HttpRequestException => (StatusCodes.Status502BadGateway, "Upstream request failed"),
+ TaskCanceledException or TimeoutException => (StatusCodes.Status504GatewayTimeout, "Upstream timed out"),
+ _ => (StatusCodes.Status500InternalServerError, "Internal server error"),
+ };
+
+ logger.LogWarning(exception, "BFF upstream error ({Status}) for {Method} {Path}",
+ status, context.Request.Method, context.Request.Path);
+
+ context.Response.StatusCode = status;
+ var problem = new ProblemDetails { Status = status, Title = title, Detail = exception.Message };
+ await context.Response.WriteAsJsonAsync(problem, problem.GetType(), options: null,
+ contentType: "application/problem+json", cancellationToken);
+ return true;
+ }
+}
diff --git a/services/Bff/src/Bff.Api/Program.cs b/services/Bff/src/Bff.Api/Program.cs
new file mode 100644
index 00000000..04b5fd5d
--- /dev/null
+++ b/services/Bff/src/Bff.Api/Program.cs
@@ -0,0 +1,90 @@
+using Bff.Api.Auth;
+using Bff.Api.Endpoints;
+using Bff.Api.Forwarding;
+using Bff.Api.Infrastructure;
+using Bff.Application;
+using Microsoft.AspNetCore.Authentication;
+using Microsoft.OpenApi;
+
+const string CorsPolicy = "bff";
+
+var builder = WebApplication.CreateBuilder(args);
+
+builder.Services.AddBffApplication();
+builder.Services.AddSingleton();
+
+builder.Services
+ .AddAuthentication(WpSessionDefaults.Scheme)
+ .AddScheme(WpSessionDefaults.Scheme, null);
+builder.Services.AddAuthorization();
+
+builder.Services.AddProblemDetails();
+builder.Services.AddExceptionHandler();
+
+var corsOrigins = ResolveCorsOrigins(builder.Configuration);
+builder.Services.AddCors(options => options.AddPolicy(CorsPolicy, policy =>
+ policy.WithOrigins(corsOrigins)
+ .AllowAnyHeader()
+ .AllowAnyMethod()
+ .AllowCredentials()));
+
+builder.Services.AddEndpointsApiExplorer();
+builder.Services.AddSwaggerGen(options =>
+{
+ options.SwaggerDoc("v1", new OpenApiInfo
+ {
+ Title = "Website Profiling BFF",
+ Version = "v1",
+ Description =
+ "Backend-for-Frontend gateway: the single browser-facing API surface. Owns auth + CORS "
+ + "and proxies to the internal FastAPI and FileService backends.",
+ });
+});
+
+// Large uploads (logs/upload, credentials/upload, page-markdown/extract) — parity with the TS proxy.
+builder.WebHost.ConfigureKestrel(options => options.Limits.MaxRequestBodySize = 256L * 1024 * 1024);
+
+var app = builder.Build();
+
+app.UseExceptionHandler();
+
+if (app.Environment.IsDevelopment())
+{
+ app.UseSwagger();
+ app.UseSwaggerUI(options =>
+ {
+ options.SwaggerEndpoint("/swagger/v1/swagger.json", "Website Profiling BFF v1");
+ options.RoutePrefix = "docs";
+ });
+}
+
+// CORS before auth so denied (401/403) responses still carry CORS headers for the browser.
+app.UseCors(CorsPolicy);
+app.UseAuthentication();
+app.UseMiddleware();
+
+app.MapGet("/health", () => Results.Ok(new { status = "ok" }))
+ .WithName("HealthCheck")
+ .WithTags("Health");
+
+app.MapAuthEndpoints();
+app.MapProxyEndpoints();
+
+app.Run();
+
+static string[] ResolveCorsOrigins(IConfiguration config)
+{
+ var env = Environment.GetEnvironmentVariable("BFF_ALLOWED_ORIGINS");
+ if (!string.IsNullOrWhiteSpace(env))
+ {
+ return env.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
+ }
+ var fromConfig = config.GetSection("Cors:AllowedOrigins").Get();
+ if (fromConfig is { Length: > 0 })
+ {
+ return fromConfig;
+ }
+ return ["http://localhost:3000"];
+}
+
+public partial class Program;
diff --git a/services/Bff/src/Bff.Api/appsettings.Development.json b/services/Bff/src/Bff.Api/appsettings.Development.json
new file mode 100644
index 00000000..308b3b5a
--- /dev/null
+++ b/services/Bff/src/Bff.Api/appsettings.Development.json
@@ -0,0 +1,22 @@
+{
+ "Upstream": {
+ "DataBaseUrl": "http://127.0.0.1:8091",
+ "DataRoutes": [
+ "/api/report/meta",
+ "/api/report/payload",
+ "/api/report/history",
+ "/api/report/crawl-payload",
+ "/api/report/mobile-delta",
+ "/api/report/portfolio",
+ "/api/portfolio",
+ "/api/issues/status",
+ "/api/filters"
+ ]
+ },
+ "Logging": {
+ "LogLevel": {
+ "Default": "Information",
+ "Microsoft.AspNetCore": "Warning"
+ }
+ }
+}
diff --git a/services/Bff/src/Bff.Api/appsettings.json b/services/Bff/src/Bff.Api/appsettings.json
new file mode 100644
index 00000000..c37353ca
--- /dev/null
+++ b/services/Bff/src/Bff.Api/appsettings.json
@@ -0,0 +1,23 @@
+{
+ "Urls": "http://127.0.0.1:8090",
+ "Upstream": {
+ "FastApiBaseUrl": "http://127.0.0.1:8001",
+ "FileServiceBaseUrl": "http://127.0.0.1:8080",
+ "TimeoutSeconds": 120
+ },
+ "Auth": {
+ "CookieSameSite": "Lax",
+ "CookieSecure": false,
+ "DefaultRole": "analyst"
+ },
+ "Cors": {
+ "AllowedOrigins": ["http://localhost:3000"]
+ },
+ "Logging": {
+ "LogLevel": {
+ "Default": "Information",
+ "Microsoft.AspNetCore": "Warning"
+ }
+ },
+ "AllowedHosts": "*"
+}
diff --git a/services/Bff/src/Bff.Application/Auth/WpSessionTokens.cs b/services/Bff/src/Bff.Application/Auth/WpSessionTokens.cs
new file mode 100644
index 00000000..cade4c33
--- /dev/null
+++ b/services/Bff/src/Bff.Application/Auth/WpSessionTokens.cs
@@ -0,0 +1,148 @@
+using System.Globalization;
+using System.Security.Cryptography;
+using System.Text;
+
+namespace Bff.Application.Auth;
+
+///
+/// Create/verify the wp_session token, byte-for-byte compatible with the TypeScript
+/// implementation in web/src/server/auth.ts. Compatibility is load-bearing: existing
+/// sessions must survive the big-bang cutover, so any behavioural divergence (hex casing,
+/// exp parsing, dot-splitting) would silently invalidate live cookies.
+///
+/// Token format: "{role}:{exp}.{hmacSha256Hex(secret, "{role}:{exp}")}"
+/// - hex is lowercase on creation, but verification is case-insensitive (Node Buffer.from
+/// hex decoding is case-insensitive, so we decode bytes and compare).
+/// - exp is parsed JS-parseInt style (leading numeric prefix), NOT strict integer parsing.
+///
+public static class WpSessionTokens
+{
+ public const string CookieName = "wp_session";
+
+ /// HMAC-SHA256(secret, payload) as lowercase hex. Mirrors TS signToken().
+ public static string Sign(string payload, string secret)
+ {
+ using var hmac = new HMACSHA256(Encoding.UTF8.GetBytes(secret));
+ var hash = hmac.ComputeHash(Encoding.UTF8.GetBytes(payload));
+ return Convert.ToHexStringLower(hash);
+ }
+
+ /// Mirrors TS createSessionToken(): "{role}:{exp}.{sig}". Returns "" if no secret.
+ public static string Create(string role, string secret, long nowUnixSeconds, int maxAgeSeconds)
+ {
+ if (string.IsNullOrEmpty(secret))
+ {
+ return string.Empty;
+ }
+ var exp = nowUnixSeconds + maxAgeSeconds;
+ var payload = $"{role}:{exp}";
+ return $"{payload}.{Sign(payload, secret)}";
+ }
+
+ ///
+ /// Mirrors TS verifySessionToken(): returns the role if the token is valid and unexpired,
+ /// otherwise null. Returns null when the secret is empty (auth disabled is handled upstream).
+ ///
+ public static string? VerifyRole(string? token, string secret, long nowUnixSeconds)
+ {
+ if (string.IsNullOrEmpty(token) || string.IsNullOrEmpty(secret))
+ {
+ return null;
+ }
+
+ // TS: token.split('.') must yield exactly 2 parts.
+ var parts = token.Split('.');
+ if (parts.Length != 2)
+ {
+ return null;
+ }
+
+ var payload = parts[0];
+ var sig = parts[1];
+ var expectedHex = Sign(payload, secret);
+
+ // TS compares decoded HMAC bytes with timingSafeEqual (and returns null on length mismatch).
+ // Node's hex decode is case-insensitive, so decode both and fixed-time compare the bytes.
+ byte[] sigBytes;
+ byte[] expectedBytes;
+ try
+ {
+ sigBytes = Convert.FromHexString(sig);
+ expectedBytes = Convert.FromHexString(expectedHex);
+ }
+ catch (FormatException)
+ {
+ return null;
+ }
+ if (sigBytes.Length != expectedBytes.Length ||
+ !CryptographicOperations.FixedTimeEquals(sigBytes, expectedBytes))
+ {
+ return null;
+ }
+
+ // TS: const [role, expStr] = payload.split(':');
+ var seg = payload.Split(':');
+ var role = seg.Length > 0 ? seg[0] : null;
+ var expStr = seg.Length > 1 ? seg[1] : null;
+
+ // TS: const exp = parseInt(expStr || '0', 10); (NaN if no leading digits)
+ var exp = JsParseInt(string.IsNullOrEmpty(expStr) ? "0" : expStr);
+
+ // TS: if (!role || !Number.isFinite(exp) || exp < now) return null;
+ if (string.IsNullOrEmpty(role) || exp is null || exp.Value < nowUnixSeconds)
+ {
+ return null;
+ }
+ return role;
+ }
+
+ ///
+ /// JavaScript parseInt(str, 10) semantics: skip leading whitespace, optional sign, then
+ /// consume the leading run of decimal digits; ignore trailing garbage. Returns null for NaN
+ /// (no digits) — the .NET stand-in for !Number.isFinite. This is the off-by-one fix called out
+ /// in the plan: long.TryParse("123abc") fails, but JS parseInt("123abc") === 123.
+ ///
+ public static long? JsParseInt(string? input)
+ {
+ if (input is null)
+ {
+ return null;
+ }
+
+ var i = 0;
+ var n = input.Length;
+ while (i < n && char.IsWhiteSpace(input[i]))
+ {
+ i++;
+ }
+
+ var sign = 1L;
+ if (i < n && (input[i] == '+' || input[i] == '-'))
+ {
+ if (input[i] == '-')
+ {
+ sign = -1L;
+ }
+ i++;
+ }
+
+ var start = i;
+ while (i < n && input[i] >= '0' && input[i] <= '9')
+ {
+ i++;
+ }
+ if (i == start)
+ {
+ return null; // no digits -> NaN
+ }
+
+ var digits = input.Substring(start, i - start);
+ // Guard against absurdly long digit runs overflowing long; JS would keep precision as
+ // a double, but exp values here are 10-digit unix seconds, so long is sufficient.
+ if (!long.TryParse(digits, NumberStyles.None, CultureInfo.InvariantCulture, out var value))
+ {
+ return long.MaxValue * sign; // overflow -> treat as a finite, far-future/past value
+ }
+ return sign * value;
+ }
+}
diff --git a/services/Bff/src/Bff.Application/Bff.Application.csproj b/services/Bff/src/Bff.Application/Bff.Application.csproj
new file mode 100644
index 00000000..de38ac1e
--- /dev/null
+++ b/services/Bff/src/Bff.Application/Bff.Application.csproj
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+ net10.0
+ enable
+ enable
+
+
+
diff --git a/services/Bff/src/Bff.Application/DependencyInjection.cs b/services/Bff/src/Bff.Application/DependencyInjection.cs
new file mode 100644
index 00000000..668173d2
--- /dev/null
+++ b/services/Bff/src/Bff.Application/DependencyInjection.cs
@@ -0,0 +1,160 @@
+using Bff.Application.Http;
+using Bff.Application.Options;
+using Microsoft.Extensions.DependencyInjection;
+
+namespace Bff.Application;
+
+public static class DependencyInjection
+{
+ /// Named HttpClient for normal JSON proxying to FastAPI (with idempotent retry).
+ public const string FastApiClient = "fastapi";
+
+ /// Named HttpClient for streaming proxying to FastAPI (SSE/exports) — NO retry/buffering.
+ public const string FastApiStreamClient = "fastapi-stream";
+
+ /// Named HttpClient for the FileService (PDF/Excel exports) — streaming, no retry.
+ public const string FileServiceClient = "fileservice";
+
+ /// Named HttpClient for the internal Data service (direct-Postgres reads) — idempotent retry.
+ public const string DataClient = "data";
+
+ public static IServiceCollection AddBffApplication(this IServiceCollection services)
+ {
+ services.AddOptions()
+ .BindConfiguration(UpstreamOptions.SectionName)
+ .PostConfigure(o =>
+ {
+ var fastapi = Environment.GetEnvironmentVariable("FASTAPI_URL");
+ if (!string.IsNullOrWhiteSpace(fastapi))
+ {
+ o.FastApiBaseUrl = fastapi.Trim();
+ }
+ var files = Environment.GetEnvironmentVariable("FILE_SERVICE_URL");
+ if (!string.IsNullOrWhiteSpace(files))
+ {
+ o.FileServiceBaseUrl = files.Trim();
+ }
+ var data = Environment.GetEnvironmentVariable("DATA_SERVICE_URL");
+ if (!string.IsNullOrWhiteSpace(data))
+ {
+ o.DataBaseUrl = data.Trim();
+ }
+ var dataRoutes = Environment.GetEnvironmentVariable("DATA_ROUTES");
+ if (!string.IsNullOrWhiteSpace(dataRoutes))
+ {
+ o.DataRoutes = dataRoutes
+ .Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
+ }
+ });
+
+ services.AddOptions()
+ .BindConfiguration(AuthOptions.SectionName)
+ .PostConfigure(o =>
+ {
+ var secret = Environment.GetEnvironmentVariable("AUTH_SECRET")
+ ?? Environment.GetEnvironmentVariable("SESSION_SECRET");
+ if (!string.IsNullOrWhiteSpace(secret))
+ {
+ o.Secret = secret.Trim();
+ }
+ var user = Environment.GetEnvironmentVariable("AUTH_USER");
+ if (!string.IsNullOrWhiteSpace(user))
+ {
+ o.BasicUser = user.Trim();
+ }
+ var pass = Environment.GetEnvironmentVariable("AUTH_PASSWORD");
+ if (pass is not null)
+ {
+ o.BasicPassword = pass.Trim();
+ }
+ var role = Environment.GetEnvironmentVariable("AUTH_DEFAULT_ROLE");
+ if (!string.IsNullOrWhiteSpace(role))
+ {
+ o.DefaultRole = role.Trim();
+ }
+ var sameSite = Environment.GetEnvironmentVariable("BFF_COOKIE_SAMESITE");
+ if (!string.IsNullOrWhiteSpace(sameSite))
+ {
+ o.CookieSameSite = sameSite.Trim();
+ }
+ var secure = Environment.GetEnvironmentVariable("BFF_COOKIE_SECURE");
+ if (!string.IsNullOrWhiteSpace(secure))
+ {
+ o.CookieSecure = secure.Trim().Equals("true", StringComparison.OrdinalIgnoreCase);
+ }
+ var domain = Environment.GetEnvironmentVariable("BFF_COOKIE_DOMAIN");
+ if (!string.IsNullOrWhiteSpace(domain))
+ {
+ o.CookieDomain = domain.Trim();
+ }
+ });
+
+ services.AddOptions()
+ .BindConfiguration(BffCorsOptions.SectionName)
+ .PostConfigure(o =>
+ {
+ var origins = Environment.GetEnvironmentVariable("BFF_ALLOWED_ORIGINS");
+ if (!string.IsNullOrWhiteSpace(origins))
+ {
+ o.AllowedOrigins = origins
+ .Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
+ }
+ });
+
+ services.AddTransient();
+
+ services.AddHttpClient(FastApiClient)
+ .ConfigureHttpClient((sp, client) =>
+ {
+ var opts = GetUpstream(sp);
+ client.BaseAddress = NormalizeBase(opts.FastApiBaseUrl);
+ client.Timeout = TimeSpan.FromSeconds(Math.Max(5, opts.TimeoutSeconds));
+ })
+ .AddHttpMessageHandler();
+
+ // Internal Data service (direct-Postgres reads + issue/portfolio/filter mutations).
+ // GET/HEAD retry is safe; POST/PUT/DELETE are forwarded without retry.
+ services.AddHttpClient(DataClient)
+ .ConfigureHttpClient((sp, client) =>
+ {
+ var opts = GetUpstream(sp);
+ client.BaseAddress = NormalizeBase(opts.DataBaseUrl);
+ client.Timeout = TimeSpan.FromSeconds(Math.Max(5, opts.TimeoutSeconds));
+ })
+ .AddHttpMessageHandler();
+
+ services.AddHttpClient(FastApiStreamClient)
+ .ConfigureHttpClient((sp, client) =>
+ {
+ client.BaseAddress = NormalizeBase(GetUpstream(sp).FastApiBaseUrl);
+ client.Timeout = Timeout.InfiniteTimeSpan; // SSE/streaming: do not cut the body
+ });
+
+ services.AddHttpClient(FileServiceClient)
+ .ConfigureHttpClient((sp, client) =>
+ {
+ var opts = GetUpstream(sp);
+ client.BaseAddress = NormalizeBase(opts.FileServiceBaseUrl);
+ client.Timeout = TimeSpan.FromSeconds(Math.Max(5, opts.TimeoutSeconds));
+ });
+
+ // Typed FastAPI client generated from web/openapi.json (NSwag). The bulk of the gateway
+ // proxies opaque payloads via the generic forwarder; this typed client is available for
+ // aggregation/composition endpoints that need to read upstream responses by shape.
+ services.AddHttpClient()
+ .ConfigureHttpClient((sp, client) =>
+ {
+ var opts = GetUpstream(sp);
+ client.BaseAddress = NormalizeBase(opts.FastApiBaseUrl);
+ client.Timeout = TimeSpan.FromSeconds(Math.Max(5, opts.TimeoutSeconds));
+ })
+ .AddHttpMessageHandler();
+
+ return services;
+ }
+
+ private static UpstreamOptions GetUpstream(IServiceProvider sp) =>
+ sp.GetRequiredService>().Value;
+
+ private static Uri NormalizeBase(string url) => new(url.TrimEnd('/') + "/");
+}
diff --git a/services/Bff/src/Bff.Application/Generated/FastApiClient.g.cs b/services/Bff/src/Bff.Application/Generated/FastApiClient.g.cs
new file mode 100644
index 00000000..afe2dcdb
--- /dev/null
+++ b/services/Bff/src/Bff.Application/Generated/FastApiClient.g.cs
@@ -0,0 +1,13145 @@
+//----------------------
+//
+// Generated using the NSwag toolchain v14.7.1.0 (NJsonSchema v11.6.1.0 (Newtonsoft.Json v13.0.0.0)) (http://NSwag.org)
+//
+//----------------------
+
+#nullable enable
+
+#pragma warning disable 108 // Disable "CS0108 '{derivedDto}.ToJson()' hides inherited member '{dtoBase}.ToJson()'. Use the new keyword if hiding was intended."
+#pragma warning disable 114 // Disable "CS0114 '{derivedDto}.RaisePropertyChanged(String)' hides inherited member 'dtoBase.RaisePropertyChanged(String)'. To make the current member override that implementation, add the override keyword. Otherwise add the new keyword."
+#pragma warning disable 472 // Disable "CS0472 The result of the expression is always 'false' since a value of type 'Int32' is never equal to 'null' of type 'Int32?'
+#pragma warning disable 612 // Disable "CS0612 '...' is obsolete"
+#pragma warning disable 649 // Disable "CS0649 Field is never assigned to, and will always have its default value null"
+#pragma warning disable 1573 // Disable "CS1573 Parameter '...' has no matching param tag in the XML comment for ...
+#pragma warning disable 1591 // Disable "CS1591 Missing XML comment for publicly visible type or member ..."
+#pragma warning disable 8073 // Disable "CS8073 The result of the expression is always 'false' since a value of type 'T' is never equal to 'null' of type 'T?'"
+#pragma warning disable 3016 // Disable "CS3016 Arrays as attribute arguments is not CLS-compliant"
+#pragma warning disable 8600 // Disable "CS8600 Converting null literal or possible null value to non-nullable type"
+#pragma warning disable 8602 // Disable "CS8602 Dereference of a possibly null reference"
+#pragma warning disable 8603 // Disable "CS8603 Possible null reference return"
+#pragma warning disable 8604 // Disable "CS8604 Possible null reference argument for parameter"
+#pragma warning disable 8625 // Disable "CS8625 Cannot convert null literal to non-nullable reference type"
+#pragma warning disable 8765 // Disable "CS8765 Nullability of type of parameter doesn't match overridden member (possibly because of nullability attributes)."
+
+namespace Bff.Application.Generated
+{
+ using System = global::System;
+
+ [System.CodeDom.Compiler.GeneratedCode("NSwag", "14.7.1.0 (NJsonSchema v11.6.1.0 (Newtonsoft.Json v13.0.0.0))")]
+ public partial interface IFastApiClient
+ {
+
+ /// A cancellation token that can be used by other objects or threads to receive notice of cancellation.
+ ///
+ /// Health Check
+ ///
+ /// Successful Response
+ /// A server side error occurred.
+ System.Threading.Tasks.Task