diff --git a/AGENT.md b/AGENT.md
index cc1ac0bd..62427d68 100644
--- a/AGENT.md
+++ b/AGENT.md
@@ -43,14 +43,47 @@ Developer reference for agents and contributors. User-facing overview: [README.m
| Local analysis | `analysis/local.py`, `requirements.txt` |
| AI insights (LLM) | `llm/enrich.py`, `llm/agent.py`, `llm_config.py`, `requirements.txt` |
| Audit query tools (MCP + chat) | `tools/audit_tools/`, `mcp/server.py`, `mcp/http_server.py`, `commands/chat_cmd.py` |
-| Agent readiness checks | `tools/audit_tools/agent_readiness.py`, `tools/audit_tools/_aeo_helpers.py` |
+| Agent readiness checks | `tools/audit_tools/geo/agent_readiness.py`, `tools/audit_tools/_aeo_helpers.py` |
| Config / CLI | `config.py` (`load_config`, `load_config_from_db`), `cli.py`, `input.txt.example` |
| UI pipeline schema | `web/src/lib/pipelineConfigSchema.ts` |
| UI LLM schema | `web/src/lib/llmConfigSchema.ts` |
| UI config I/O | `web/src/server/pipelineConfig.ts`, `web/src/server/llmConfig.ts` |
+| D3 charts (custom / compare / overview) | `web/src/components/charts/d3/`, `web/src/lib/viz/` |
+| Chart.js charts (standard bar/line/doughnut) | `web/src/utils/chartJsDefaults.ts`, `react-chartjs-2` in views under `web/src/views/`, `web/src/components/searchPerformance/`, `web/src/components/traffic/` |
Schema changes: add Alembic migration (`alembic revision`).
+**Charts — Chart.js + D3 (hybrid)**
+
+The web UI uses **both** Chart.js and D3.js. Pick the library that fits each chart; do not migrate everything to one stack.
+
+| Prefer **Chart.js** when… | Prefer **D3** when… |
+|---------------------------|---------------------|
+| Standard bar, line, or doughnut with typical legend/tooltip/responsive canvas | Custom layout (grouped compare bars, dual lines with null gaps, arc gauges) |
+| Quick add with minimal custom SVG | Tight theme control via CSS vars (`--chart-grid`, `--chart-title`, etc.) |
+| Page already on Chart.js (GSC, GA4, Links, Content Analytics) | Reusing shared components in `web/src/components/charts/d3/` |
+| Chart.js plugins or defaults are enough | Neutral data types + adapters in `web/src/lib/viz/` |
+
+**Decision rule:** If a D3 component already exists (`D3GroupedBarChart`, `D3DualLineChart`, `D3VerticalBarChart`, `D3DonutChart`, compact charts, `arcGauge.ts`), reuse it. If it is a one-off standard chart on a Chart.js page, stay on Chart.js unless D3 clearly wins.
+
+**Current split (indicative)**
+
+| Area | Library |
+|------|---------|
+| Overview dashboard (`/dashboard`) | D3 |
+| Compare (`/compare`) | D3 |
+| Content analytics — Analytics tab (`/content-analytics?tab=analytics`) | D3 |
+| GSC / GA4 / scatter (`GscCharts`, `Ga4Charts`) | Chart.js |
+| Links explorer, Content Analytics, Text Content Analysis | Chart.js |
+| Score rings, distribution donuts, compact sparklines | D3 |
+
+**Conventions (both stacks)**
+
+- Wrap charts in `ChartPanel`, `ChartAccessibleFallback`, and/or `ChartCard` where applicable.
+- Theme helpers live in `web/src/utils/chartJsDefaults.ts` (`getGridColor`, `getChartTitleColor`, `truncateChartLabel`) — use them from D3 as well as Chart.js.
+- Keep chart-library types out of data-prep: use neutral shapes (`BarChartData`, `DualSeriesChartData` in `web/src/lib/viz/types.ts` and `web/src/lib/compareChartData.ts`); convert at the render layer via `web/src/lib/viz/adapters.ts` when needed.
+- Migrate page-by-page when D3 is the better fit; do not remove `chart.js` from `package.json` until all consumers are migrated.
+
**Company standards:** UI copy in `web/src/strings.json` (Site Audit, Properties, Run audit). Data provenance on `report_meta` in report payload. Docs: `docs/COMPANY_STANDARDS.md`, `docs/GLOSSARY.md`. Migration `003_company_standards` (properties, pipeline_jobs, audit_log). Durable jobs in `web/src/server/pipelineJobsDb.ts`. Export: `GET /api/report/export`, `src/website_profiling/tools/export_audit.py`.
**Common footguns (check before finishing web or DB work)**
diff --git a/AGENTS.md b/AGENTS.md
index e2d8d6d8..878fe63e 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -32,8 +32,11 @@ python -m website_profiling.mcp # Start MCP server (stdio)
|------|-------|
| Crawl | `src/website_profiling/crawl/` |
| Report | `src/website_profiling/reporting/` |
-| GEO / AEO / Agent readiness | `src/website_profiling/tools/audit_tools/geo_tools.py`, `agent_readiness.py` |
+| GEO / AEO / Agent readiness | `src/website_profiling/tools/audit_tools/geo/geo_tools.py`, `geo/agent_readiness.py` |
| DB schema | `alembic/versions/` |
| UI | `web/src/views/`, `web/app/` |
+| Charts | D3: `web/src/components/charts/d3/`, `web/src/lib/viz/` · Chart.js: GSC/GA4/Links etc. — see [AGENT.md](AGENT.md) § Charts |
+
+**Charts:** Use **both** Chart.js and D3 — choose per chart (Overview/Compare → D3; standard GSC/GA4 bars → Chart.js). Full rules in [AGENT.md](AGENT.md).
**Common pitfalls:** See [AGENT.md](AGENT.md) for the full footguns checklist (React context, Python local imports, psycopg dict rows, coverage gates).
diff --git a/README.md b/README.md
index d67dbc2e..3c02d82a 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,7 @@
Quick start ·
+ Feedback loop ·
Features ·
Limitations ·
Structure ·
@@ -50,8 +51,33 @@ Site Audit is a **developer-friendly SEO audit** tool: self-hosted, transparent,
- Content writing and optimization with live SEO scoring
- Search Console, GA4, and Bing Webmaster integration
- Agency portfolio management and run comparison
+- **Closed-loop SEO workflow** — audit, report, feed data to IDE agents via MCP, fix in code, review and compare
- Optional AI-assisted analysis over audit data via MCP-compatible tools
+## SEO feedback loop
+
+Site Audit is built for a **continuous improve-and-verify cycle**, not one-off dashboard checks. Crawl your site, generate reports, expose audit data to AI agents in **Cursor, Claude Code, or Copilot** via [340 MCP tools](docs/MCP.md), fix issues in your repository, then **review** the next run to compare health scores and issue deltas.
+
+```text
+Audit → Report → MCP → Fix → Review → (repeat)
+```
+
+
+
+
+
+**How each step maps to the product**
+
+| Step | What you do | In Site Audit |
+|------|-------------|---------------|
+| **Audit** | Crawl and score the site | Pipeline (`python -m src`), Lighthouse, on-page checks |
+| **Report** | Export and prioritize fixes | PDF/HTML/CSV exports, issue board, fix roadmap |
+| **MCP** | Pull audit context into your IDE | `python -m website_profiling.mcp` — read-only tools for Cursor / Claude Desktop |
+| **Fix** | Ship changes in your codebase | Your PR workflow (MCP does not write to the site) |
+| **Review** | Prove improvement | Compare runs, category deltas, GSC metric changes |
+
+See [docs/MCP.md](docs/MCP.md) for MCP setup and example prompts (e.g. compare two reports, export issue diffs).
+
## Scope and limitations
Site Audit focuses on **honest, self-hosted technical SEO**. It is not a drop-in replacement for every paid SaaS data product.
@@ -93,7 +119,7 @@ Site Audit focuses on **honest, self-hosted technical SEO**. It is not a drop-in
-Also included: **AI chat** over audit data (optional), **Content studio** (write & optimize with live SEO scoring), **340 MCP tools** (local stdio or remote Streamable HTTP), image SEO, GEO/AEO readiness, keyword explorer (GSC + on-site), backlinks (GSC Links import), compare runs, and portfolio management for agencies.
+Also included: **AI chat** over audit data (optional), **Content studio** (write & optimize with live SEO scoring), **340 MCP tools** (local stdio or remote Streamable HTTP), image SEO, GEO/AEO readiness, keyword explorer (GSC + on-site), backlinks (GSC Links import), compare runs, portfolio management for agencies, and the **agent-driven feedback loop** above.
diff --git a/alembic/versions/024_app_settings.py b/alembic/versions/024_app_settings.py
new file mode 100644
index 00000000..f427784d
--- /dev/null
+++ b/alembic/versions/024_app_settings.py
@@ -0,0 +1,30 @@
+"""Add app_settings table for generic application-level key-value settings.
+
+Used to persist appearance customisations (custom color palette, etc.) and
+any future app-level preferences that have no dedicated table.
+
+Revision ID: 024_app_settings
+Revises: 023_crawl_page_markdown
+"""
+from __future__ import annotations
+
+from alembic import op
+
+revision = "024_app_settings"
+down_revision = "023_crawl_page_markdown"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+ op.execute("""
+ CREATE TABLE app_settings (
+ key TEXT NOT NULL PRIMARY KEY,
+ value TEXT NOT NULL DEFAULT '',
+ updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
+ )
+ """)
+
+
+def downgrade() -> None:
+ op.execute("DROP TABLE IF EXISTS app_settings")
diff --git a/docs/README.md b/docs/README.md
index 94c08965..24dcbe1c 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -25,6 +25,7 @@ Marketing and README assets are stored in [assets/](assets/):
| Asset | Purpose |
|-------|---------|
| `readme-banner.png` | README header banner |
+| `seo-feedback-loop.png` | SEO feedback loop diagram (Audit → Report → MCP → Fix → Review) |
| `social-preview.png` | Application screenshot for README and social previews |
| `banner.svg` | Source artwork for the banner |
| `logo.svg`, `logo-icon.svg` | Product logo and icon |
diff --git a/docs/assets/seo-feedback-loop.png b/docs/assets/seo-feedback-loop.png
new file mode 100644
index 00000000..f243a519
Binary files /dev/null and b/docs/assets/seo-feedback-loop.png differ
diff --git a/local-prod b/local-prod
new file mode 100755
index 00000000..c96cbba4
--- /dev/null
+++ b/local-prod
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+exec "$(cd "$(dirname "$0")" && pwd)/scripts/local-prod.sh" "$@"
diff --git a/scripts/local-prod.sh b/scripts/local-prod.sh
new file mode 100755
index 00000000..61cd1ab9
--- /dev/null
+++ b/scripts/local-prod.sh
@@ -0,0 +1,114 @@
+#!/usr/bin/env bash
+# Local prod: same Postgres as ./local-run, Next.js build + start (NODE_ENV=production).
+# Usage: ./local-prod [command]
+# (default) start — DB, migrations, npm run build, npm run start
+# build — npm run build only
+# help — show commands
+set -euo pipefail
+
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+cd "$ROOT"
+
+PG_CONTAINER="${WP_PG_CONTAINER:-wp-pg}"
+PG_PORT="${WP_PG_PORT:-5432}"
+PG_USER="${WP_PG_USER:-postgres}"
+PG_PASSWORD="${WP_PG_PASSWORD:-dev}"
+PG_DB="${WP_PG_DB:-website_profiling}"
+
+export DATABASE_URL="${DATABASE_URL:-postgres://${PG_USER}:${PG_PASSWORD}@127.0.0.1:${PG_PORT}/${PG_DB}}"
+export DATA_DIR="${DATA_DIR:-$ROOT/data}"
+export PYTHON="${PYTHON:-$ROOT/.venv/bin/python}"
+export WEBSITE_PROFILING_ROOT="$ROOT"
+export PYTHONPATH="${PYTHONPATH:+$PYTHONPATH:}$ROOT/src"
+export NODE_ENV=production
+
+WEB="$ROOT/web"
+LOCAL_RUN="$ROOT/scripts/local-run.sh"
+
+log() { printf '\033[1;36m→\033[0m %s\n' "$*"; }
+die() { printf '\033[1;31m✗\033[0m %s\n' "$*" >&2; exit 1; }
+
+need_cmd() {
+ command -v "$1" >/dev/null 2>&1 || die "Missing required command: $1"
+}
+
+cmd_web_deps() {
+ need_cmd npm
+ if [[ ! -d "$WEB/node_modules" ]]; then
+ log "Installing web dependencies (npm ci)"
+ (cd "$WEB" && npm ci)
+ fi
+}
+
+cmd_build() {
+ cmd_web_deps
+ log "Building Next.js (production)"
+ (cd "$WEB" && npm run build)
+}
+
+cmd_start() {
+ local skip_build=0
+ for arg in "$@"; do
+ case "$arg" in
+ --skip-build) skip_build=1 ;;
+ esac
+ done
+
+ mkdir -p "$DATA_DIR"
+ log "Ensuring Postgres and migrations (via ./local-run migrate)"
+ "$LOCAL_RUN" migrate
+ if [[ "$skip_build" -eq 0 ]]; then
+ cmd_build
+ else
+ cmd_web_deps
+ log "Skipping build (--skip-build)"
+ fi
+ log "Starting Next.js production server (Ctrl+C to stop)"
+ log "DATABASE_URL=$DATABASE_URL"
+ log "DATA_DIR=$DATA_DIR"
+ log "PYTHON=$PYTHON"
+ log "NODE_ENV=$NODE_ENV"
+ cd "$WEB"
+ export DATABASE_URL DATA_DIR PYTHON WEBSITE_PROFILING_ROOT PYTHONPATH NODE_ENV
+ exec npm run start
+}
+
+cmd_help() {
+ cat </data)
+ AUTH_SECRET (optional — enables login when set)
+ WP_PG_CONTAINER, WP_PG_PORT, WP_PG_PASSWORD, WP_PG_DB
+
+After start, open: http://localhost:3000/home
+Use localhost (not 127.0.0.1) for pipeline APIs.
+
+Dev mode with hot reload: ./local-run start
+EOF
+}
+
+main() {
+ local cmd="${1:-start}"
+ case "$cmd" in
+ start)
+ shift || true
+ cmd_start "$@"
+ ;;
+ build) cmd_build ;;
+ help|-h|--help) cmd_help ;;
+ *)
+ die "Unknown command: $cmd (try: ./local-prod help)"
+ ;;
+ esac
+}
+
+main "$@"
diff --git a/scripts/local-run.sh b/scripts/local-run.sh
index e9e79df5..4f84fea5 100755
--- a/scripts/local-run.sh
+++ b/scripts/local-run.sh
@@ -172,6 +172,8 @@ Environment overrides (optional):
After start, open: http://localhost:3000/home
Run audits via sidebar "Run audit" (bottom-right FAB).
+Production Next.js (same Postgres, no hot reload): ./local-prod start
+
Run CI-style tests: ./local-test (see ./local-test help). JS crawl integration: ./local-test browser.
EOF
}
diff --git a/src/website_profiling/analysis/local.py b/src/website_profiling/analysis/local.py
index 1b557860..f457c4f9 100644
--- a/src/website_profiling/analysis/local.py
+++ b/src/website_profiling/analysis/local.py
@@ -41,7 +41,11 @@ def _cfg_int(cfg: dict[str, str] | None, key: str, default: int) -> int:
def _tokenize_simhash(text: str) -> list[str]:
- return re.findall(r"[a-z0-9]{3,}", text.lower())
+ # `[^\W_]` is word chars minus underscore: identical to the old `[a-z0-9]`
+ # for ASCII (input is lowercased) but ALSO matches Unicode letters/digits, so
+ # CJK / Cyrillic / Arabic / Greek pages no longer tokenize to nothing and
+ # collapse to SimHash 0 (which falsely clustered them all as duplicates).
+ return re.findall(r"[^\W_]{3,}", text.lower(), re.UNICODE)
def _stable_token_hash(token: str) -> int:
@@ -123,6 +127,11 @@ def compute_duplicate_groups(
bucket: dict[int, list[str]] = defaultdict(list)
for u, h in url_to_sh.items():
+ # SimHash 0 means "no tokenizable content", not "identical content".
+ # Bucketing those together unioned every untokenizable page as a single
+ # giant duplicate group — skip them.
+ if h == 0:
+ continue
bucket[h].append(u)
fuzz = _import_rapidfuzz()
@@ -163,7 +172,9 @@ def union(a: str, b: str, method: str) -> None:
union(base, m, "simhash")
if hamming_max > 0 and len(urls) <= simhash_max_urls:
- sh_list = [(u, url_to_sh[u]) for u in urls]
+ # Exclude SimHash-0 (untokenizable) pages — every pair of them has
+ # Hamming distance 0 and would be wrongly merged as duplicates.
+ sh_list = [(u, url_to_sh[u]) for u in urls if url_to_sh[u] != 0]
for i, (u1, h1) in enumerate(sh_list):
for u2, h2 in sh_list[i + 1 :]:
if _hamming(h1, h2) <= hamming_max:
diff --git a/src/website_profiling/analysis/page.py b/src/website_profiling/analysis/page.py
index e9185a97..e400eb70 100644
--- a/src/website_profiling/analysis/page.py
+++ b/src/website_profiling/analysis/page.py
@@ -91,7 +91,7 @@ def walk(obj: object) -> bool:
"corporation",
"store",
"restaurant",
- "professionalService",
+ "professionalservice",
"newsmediaorganization",
})
_CONTACT_CAP = 10
diff --git a/src/website_profiling/cli.py b/src/website_profiling/cli.py
index f5ea79c7..d9bf3742 100644
--- a/src/website_profiling/cli.py
+++ b/src/website_profiling/cli.py
@@ -9,6 +9,7 @@
enrich_cmd,
google_cmd,
gsc_links_cmd,
+ help_cmd,
keywords_cmd,
lighthouse_cmd,
page_coach_cmd,
@@ -46,6 +47,8 @@ def main() -> None:
chat_cmd.run(cfg, args)
elif args.command == "page-markdown":
page_markdown_cmd.run(cfg, args)
+ elif args.command == "help":
+ help_cmd.run(cfg, args)
else:
pipeline_cmd.run(cfg, args)
diff --git a/src/website_profiling/commands/config_resolve.py b/src/website_profiling/commands/config_resolve.py
index 2204f4ad..cae05ec3 100644
--- a/src/website_profiling/commands/config_resolve.py
+++ b/src/website_profiling/commands/config_resolve.py
@@ -281,6 +281,7 @@ def build_parser() -> argparse.ArgumentParser:
"page-coach",
"chat",
"page-markdown",
+ "help",
],
help="Run only this step (default: run all steps according to config)",
)
@@ -394,7 +395,7 @@ def build_parser() -> argparse.ArgumentParser:
"--stdin-json",
action="store_true",
dest="stdin_json",
- help="For 'chat' command: read JSON payload from stdin and emit NDJSON events.",
+ help="For 'chat' and 'help' commands: read JSON payload from stdin and emit NDJSON events.",
)
parser.add_argument(
"--resume-run-id",
diff --git a/src/website_profiling/commands/help_cmd.py b/src/website_profiling/commands/help_cmd.py
new file mode 100644
index 00000000..492aed9e
--- /dev/null
+++ b/src/website_profiling/commands/help_cmd.py
@@ -0,0 +1,41 @@
+"""CLI: help --stdin-json — single-turn help chat (NDJSON events on stdout)."""
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+
+from ..text_sanitize import sanitize_unicode_deep
+from ..llm.help_agent import run_help_turn
+
+
+def run(_cfg: dict, args: argparse.Namespace) -> None:
+ if not getattr(args, "stdin_json", False):
+ print("Error: help requires --stdin-json", file=sys.stderr)
+ sys.exit(1)
+
+ try:
+ payload = json.load(sys.stdin)
+ except json.JSONDecodeError as e:
+ print(json.dumps({"type": "error", "message": f"Invalid stdin JSON: {e}"}))
+ sys.exit(1)
+
+ messages = payload.get("messages") or []
+ if not isinstance(messages, list):
+ messages = []
+
+ def on_event(event: dict) -> None:
+ print(json.dumps(sanitize_unicode_deep(event), default=str), flush=True)
+
+ try:
+ result = run_help_turn(messages, on_event=on_event)
+ except Exception as e:
+ msg = str(e).strip() or type(e).__name__
+ print(json.dumps({"type": "error", "message": msg}), flush=True)
+ sys.exit(1)
+
+ if not result.get("ok"):
+ err = result.get("error", "Help agent failed")
+ print(json.dumps({"type": "error", "message": err}), flush=True)
+ sys.exit(1)
+ sys.exit(0)
diff --git a/src/website_profiling/commands/page_coach_cmd.py b/src/website_profiling/commands/page_coach_cmd.py
index f0984f12..b3fa87a6 100644
--- a/src/website_profiling/commands/page_coach_cmd.py
+++ b/src/website_profiling/commands/page_coach_cmd.py
@@ -8,6 +8,22 @@
from .config_resolve import resolve_config
+def _parse_ref(raw: str) -> tuple[str | None, int | None]:
+ """Parse a 'type:id' env value, tolerating a missing/non-numeric id.
+
+ The bare ``":" in raw`` guard does not guarantee the right-hand side is an
+ integer (e.g. "live:" or "snapshot:abc" from an unvalidated request body),
+ so coerce defensively rather than letting int() raise and crash the command.
+ """
+ if ":" not in raw:
+ return None, None
+ type_part, _, id_part = raw.partition(":")
+ try:
+ return type_part, int(id_part)
+ except ValueError:
+ return None, None
+
+
def run(cfg: dict, cwd: str, args: argparse.Namespace) -> None:
from ..llm.page_coach import run_page_coach
@@ -19,15 +35,8 @@ def run(cfg: dict, cwd: str, args: argparse.Namespace) -> None:
import os
refresh = bool(getattr(args, "refresh", False))
- current_type = current_id = baseline_type = baseline_id = None
- cur_env = os.environ.get("WP_PAGE_COACH_CURRENT", "")
- if ":" in cur_env:
- parts = cur_env.split(":", 1)
- current_type, current_id = parts[0], int(parts[1])
- base_env = os.environ.get("WP_PAGE_COACH_BASELINE", "")
- if ":" in base_env:
- parts = base_env.split(":", 1)
- baseline_type, baseline_id = parts[0], int(parts[1])
+ current_type, current_id = _parse_ref(os.environ.get("WP_PAGE_COACH_CURRENT", ""))
+ baseline_type, baseline_id = _parse_ref(os.environ.get("WP_PAGE_COACH_BASELINE", ""))
result = run_page_coach(
url,
diff --git a/src/website_profiling/content_analysis/batch.py b/src/website_profiling/content_analysis/batch.py
index 7e77a686..5770adbb 100644
--- a/src/website_profiling/content_analysis/batch.py
+++ b/src/website_profiling/content_analysis/batch.py
@@ -35,11 +35,16 @@ def _analyze_row(
url = row.get("url")
if not url or not html:
return None
- fields = analyze_page_html(
- str(html),
- excerpt_max_chars=excerpt_max_chars,
- strategy=strategy,
- )
+ try:
+ fields = analyze_page_html(
+ str(html),
+ excerpt_max_chars=excerpt_max_chars,
+ strategy=strategy,
+ )
+ except Exception:
+ # A single page whose HTML breaks the analysis stack must not abort the
+ # whole run (mirrors page_markdown.batch._extract_row); skip it instead.
+ return None
return {"url": str(url).rstrip("/"), **fields}
diff --git a/src/website_profiling/content_studio/agent.py b/src/website_profiling/content_studio/agent.py
index 5aa0254d..438cfea3 100644
--- a/src/website_profiling/content_studio/agent.py
+++ b/src/website_profiling/content_studio/agent.py
@@ -77,10 +77,15 @@ def _inject_missing_tools(
ctx: ContentStudioContext,
called: set[str],
ollama_format: bool,
+ tool_events: list[dict[str, Any]],
) -> None:
for name in sorted(REQUIRED_CONTENT_STUDIO_TOOLS - called):
result = sanitize_unicode_deep(dispatch_content_studio_tool(name, ctx))
called.add(name)
+ # Record the result for tool_events here too, so the caller need not
+ # dispatch each missing tool a second time (score_content_draft opens a
+ # DB session and re-parses the HTML on every dispatch).
+ tool_events.append({"name": name, "args": {}, "result": result})
if ollama_format:
openai_messages.append({
"role": "tool",
@@ -223,15 +228,9 @@ def run_content_studio_analyze(
}
if called:
- _inject_missing_tools(openai_messages, ctx, called, ollama_format)
- for name in sorted(REQUIRED_CONTENT_STUDIO_TOOLS):
- if any(e["name"] == name for e in tool_events):
- continue
- tool_events.append({
- "name": name,
- "args": {},
- "result": dispatch_content_studio_tool(name, ctx),
- })
+ # Populates both the model messages and tool_events in one dispatch
+ # pass (previously every missing tool was dispatched twice).
+ _inject_missing_tools(openai_messages, ctx, called, ollama_format, tool_events)
continue
break
diff --git a/src/website_profiling/crawl/crawler.py b/src/website_profiling/crawl/crawler.py
index bdc71636..1a1a3d9c 100644
--- a/src/website_profiling/crawl/crawler.py
+++ b/src/website_profiling/crawl/crawler.py
@@ -462,7 +462,7 @@ def crawl(
limit=crawl_limit,
)
emit_phase_start("crawl", message="Crawling pages")
- futures = []
+ futures: dict = {} # future -> dequeued url (so an errored fetch keeps its url)
db_writer: Optional[CrawlDbWriter] = None
pages_crawled = 0
self._db_writer = None
@@ -496,7 +496,7 @@ def crawl(
continue
if not self.frontier.mark_visited(url):
continue
- futures.append(ex.submit(self.worker, url))
+ futures[ex.submit(self.worker, url)] = url
can_submit_more = (
not self.queue.empty()
@@ -509,13 +509,16 @@ def crawl(
# returns immediately if a future is already done.
wait(futures, return_when=FIRST_COMPLETED)
- remaining = []
- for f in futures:
+ remaining: dict = {}
+ for f, f_url in futures.items():
if f.done():
try:
res = f.result()
except Exception:
- res = empty_crawl_row(status="error")
+ # Keep the dequeued url so the error row is persisted
+ # to the DB consistently with the non-streaming path
+ # (an url-less row is silently dropped from streaming).
+ res = empty_crawl_row(url=f_url, status="error")
if self.store_outlinks:
res["outlink_targets"] = "[]"
self.results.append(res)
@@ -529,7 +532,7 @@ def crawl(
pbar.update(1)
progress_tracker.maybe_emit(pages_crawled, page_url)
else:
- remaining.append(f)
+ remaining[f] = f_url
futures = remaining
# Check for pause request (SIGUSR1) or Windows file-based signal.
diff --git a/src/website_profiling/crawl/fetchers/hybrid.py b/src/website_profiling/crawl/fetchers/hybrid.py
index df4d6df2..8e7102fd 100644
--- a/src/website_profiling/crawl/fetchers/hybrid.py
+++ b/src/website_profiling/crawl/fetchers/hybrid.py
@@ -2,6 +2,7 @@
from __future__ import annotations
+import threading
from typing import Callable, Optional
from .base import FetchResult, PageFetcher
@@ -19,10 +20,17 @@ def __init__(
self._static = static
self._browser_factory = browser_factory
self._browser_instance: Optional[PageFetcher] = None
+ self._browser_lock = threading.Lock()
def _get_browser(self) -> PageFetcher:
+ # Double-checked locking: crawler worker threads share one HybridFetcher,
+ # so an unsynchronized check-then-act would let two threads each build a
+ # BrowserFetcher (launching a Chromium process + daemon thread); the
+ # second assignment orphans the first, leaking it for the process lifetime.
if self._browser_instance is None:
- self._browser_instance = self._browser_factory()
+ with self._browser_lock:
+ if self._browser_instance is None:
+ self._browser_instance = self._browser_factory()
return self._browser_instance
def fetch(self, url: str) -> FetchResult:
diff --git a/src/website_profiling/crawl/sitemap.py b/src/website_profiling/crawl/sitemap.py
index 39bab8bc..301b4677 100644
--- a/src/website_profiling/crawl/sitemap.py
+++ b/src/website_profiling/crawl/sitemap.py
@@ -20,6 +20,11 @@ def _origin(start_url: str) -> str:
return f"{parsed.scheme}://{parsed.netloc}"
+def _same_origin(url: str, origin: str) -> bool:
+ """True when *url* is on the same host as *origin* (the crawl start host)."""
+ return bool(url) and urlparse(url).netloc == urlparse(origin).netloc
+
+
def _sitemap_urls_from_robots(text: str) -> list[str]:
urls: list[str] = []
for line in text.splitlines():
@@ -84,7 +89,12 @@ def discover_sitemap_urls(
try:
r = sess.get(f"{origin}/robots.txt", timeout=timeout)
if r.status_code == 200 and r.text:
- sitemap_queue.extend(_sitemap_urls_from_robots(r.text))
+ # Only follow same-origin sitemaps: robots.txt (or a MITM of it)
+ # can advertise arbitrary hosts, which would otherwise let the
+ # crawler issue requests off the audited origin (SSRF / scope escape).
+ sitemap_queue.extend(
+ s for s in _sitemap_urls_from_robots(r.text) if _same_origin(s, origin)
+ )
except Exception:
pass
@@ -102,10 +112,12 @@ def discover_sitemap_urls(
continue
pages, nested = _parse_sitemap_xml(r.text, sm_url)
for n in nested:
- if n not in seen_sitemaps:
+ # Nested entries are attacker-controllable;
+ # never queue an off-origin sitemap for fetching.
+ if n not in seen_sitemaps and _same_origin(n, origin):
sitemap_queue.append(n)
for page in pages:
- if urlparse(page).netloc != urlparse(origin).netloc:
+ if not _same_origin(page, origin):
continue
if page not in seen_pages:
seen_pages.add(page)
diff --git a/src/website_profiling/db/crawl_store.py b/src/website_profiling/db/crawl_store.py
index b86ef579..5546368e 100644
--- a/src/website_profiling/db/crawl_store.py
+++ b/src/website_profiling/db/crawl_store.py
@@ -331,10 +331,17 @@ def _write_crawl_rows(conn: Connection, rows: list[tuple]) -> None:
else:
normalized.append(row)
try:
- _executemany(conn, _CRAWL_INSERT_SQL, normalized, page_size=_CRAWL_BATCH_SIZE)
+ # Savepoint so that a failure (e.g. a legacy schema missing the
+ # fetch_method column) rolls back ONLY this insert and leaves the
+ # transaction usable. Without it the legacy fallback below runs inside an
+ # aborted transaction, raises "current transaction is aborted", and
+ # silently writes nothing.
+ with conn.transaction():
+ _executemany(conn, _CRAWL_INSERT_SQL, normalized, page_size=_CRAWL_BATCH_SIZE)
except Exception:
legacy = [(r[0], r[1], r[2], r[3], r[5]) for r in normalized]
- _executemany(conn, _CRAWL_INSERT_SQL_LEGACY, legacy, page_size=_CRAWL_BATCH_SIZE)
+ with conn.transaction():
+ _executemany(conn, _CRAWL_INSERT_SQL_LEGACY, legacy, page_size=_CRAWL_BATCH_SIZE)
def write_crawl_batch(
diff --git a/src/website_profiling/db/historical.py b/src/website_profiling/db/historical.py
index 96984516..0b6f84a6 100644
--- a/src/website_profiling/db/historical.py
+++ b/src/website_profiling/db/historical.py
@@ -83,9 +83,14 @@ def read_historical_data() -> dict[str, list]:
with db_session() as conn:
for table in tables:
try:
- with conn.cursor() as cur:
- cur.execute(SQL("SELECT * FROM {}").format(Identifier(table)))
- result[table] = [dict(row) for row in cur.fetchall()]
+ # Savepoint per table: a failed read (e.g. a table missing on
+ # an under-migrated DB) otherwise aborts the whole transaction,
+ # making every *later* table read fail too and silently dropping
+ # all remaining preserved history. Roll back just this read.
+ with conn.transaction():
+ with conn.cursor() as cur:
+ cur.execute(SQL("SELECT * FROM {}").format(Identifier(table)))
+ result[table] = [dict(row) for row in cur.fetchall()]
except Exception as e:
console_print(
f" Warning: could not read historical table '{table}': {e}",
diff --git a/src/website_profiling/integrations/ai_citations/_types.py b/src/website_profiling/integrations/ai_citations/_types.py
index b72fbdc8..f5e30ea6 100644
--- a/src/website_profiling/integrations/ai_citations/_types.py
+++ b/src/website_profiling/integrations/ai_citations/_types.py
@@ -34,13 +34,25 @@ def to_dict(self) -> dict[str, Any]:
}
+def _strip_www(domain: str) -> str:
+ """Lowercase *domain* and remove a leading ``www.`` prefix.
+
+ ``str.lstrip("www.")`` strips the *character set* {'w', '.'}, not the
+ prefix — so ``wired.com`` becomes ``ired.com`` and ``w3.org`` becomes
+ ``3.org``, corrupting any domain that starts with 'w'. Use removeprefix
+ semantics instead.
+ """
+ d = domain.lower()
+ return d[4:] if d.startswith("www.") else d
+
+
def _domain_in_sources(domain: str, sources: list[str]) -> bool:
- needle = domain.lower().lstrip("www.").split("/")[0]
+ needle = _strip_www(domain).split("/")[0]
return any(needle in s.lower() for s in sources)
def _detect_competitors(sources: list[str], domain: str) -> list[str]:
- own = domain.lower().lstrip("www.").split("/")[0]
+ own = _strip_www(domain).split("/")[0]
seen: set[str] = set()
competitors: list[str] = []
for s in sources:
@@ -63,5 +75,5 @@ def _parametric_prompt(query: str, brand: str, domain: str) -> str:
def _parametric_brand_check(brand: str, domain: str, answer: str) -> tuple[bool, bool]:
brand_mentioned = brand.lower() in answer.lower()
- domain_cited = domain.lower().lstrip("www.").split("/")[0] in answer.lower()
+ domain_cited = _strip_www(domain).split("/")[0] in answer.lower()
return brand_mentioned, domain_cited
diff --git a/src/website_profiling/integrations/google/gsc_inspection.py b/src/website_profiling/integrations/google/gsc_inspection.py
index 97bb88b0..abe1cffe 100644
--- a/src/website_profiling/integrations/google/gsc_inspection.py
+++ b/src/website_profiling/integrations/google/gsc_inspection.py
@@ -49,7 +49,7 @@ def inspect_url(creds: Any, site_url: str, url: str) -> dict[str, Any]:
"issues": [
str(i.get("issueMessage") or i.get("severity") or i)
for i in (rich.get("issues") or [])[:5]
- if isinstance(i, dict) or i
+ if isinstance(i, dict)
],
},
"provenance": "Search Console",
diff --git a/src/website_profiling/integrations/google/suggest.py b/src/website_profiling/integrations/google/suggest.py
index 1a643c95..7e45d0f4 100644
--- a/src/website_profiling/integrations/google/suggest.py
+++ b/src/website_profiling/integrations/google/suggest.py
@@ -131,15 +131,18 @@ def batch_expand(
Returns { seed: { "web": [...], "youtube": [...], "questions": [...] } }
Uses concurrent requests and PostgreSQL cache (keyword_suggest_cache).
"""
- result: dict[str, dict[str, list[str]]] = {
- seed: {s: [] for s in sources} for seed in seeds
- }
+ result: dict[str, dict[str, list[str]]] = {}
tasks_to_fetch: list[tuple[str, str, str, str]] = []
for seed in seeds:
if not seed or not seed.strip():
continue
seed = seed.strip().lower()
+ # Key `result` by the NORMALIZED seed. The cache-hit path and worker path
+ # below both index `result[seed]` with this lowercased value, so keying
+ # by the raw seed would raise KeyError for any mixed-case seed on a cache
+ # hit (the common steady-state path), aborting the enrichment pipeline.
+ result.setdefault(seed, {s: [] for s in sources})
for source in sources:
# Check cache
if cache_conn is not None:
diff --git a/src/website_profiling/lighthouse/runner.py b/src/website_profiling/lighthouse/runner.py
index 39eaf525..afedd9df 100644
--- a/src/website_profiling/lighthouse/runner.py
+++ b/src/website_profiling/lighthouse/runner.py
@@ -334,8 +334,16 @@ def run_lighthouse_audit(
val = median_metrics.get(_cat_key[cat_id])
category_scores[cat_id] = round(val * 100) if val is not None else None
- # Merge top failures from run with worst performance score
- worst_run = min(runs, key=lambda r: (r["performance_score"] is None, -(r["performance_score"] or 0)))
+ # Merge top failures from the run with the WORST (lowest) performance score.
+ # Negating the score made `min` pick the highest (best) score; and `or 0`
+ # would mis-rank a legitimate 0.0 score. None scores sort last.
+ worst_run = min(
+ runs,
+ key=lambda r: (
+ r["performance_score"] is None,
+ r["performance_score"] if r["performance_score"] is not None else 1.0,
+ ),
+ )
top_failures = worst_run.get("top_failures") or []
lcp_ok = median_metrics["lcp_ms"] is not None and median_metrics["lcp_ms"] <= LCP_GOOD_MS
diff --git a/src/website_profiling/llm/agent.py b/src/website_profiling/llm/agent.py
index c0bca030..25d0d0a2 100644
--- a/src/website_profiling/llm/agent.py
+++ b/src/website_profiling/llm/agent.py
@@ -9,7 +9,7 @@
from ..llm_config import llm_is_enabled, load_llm_config_from_db
from ..text_sanitize import sanitize_unicode_deep, strip_surrogates
from ..tools.audit_tools import AuditToolContext
-from ..tools.audit_tools.crawl_actions import CHAT_CRAWL_TOOL
+from ..tools.audit_tools.crawl.crawl_actions import CHAT_CRAWL_TOOL
from ..tools.audit_tools.registry import (
TOOL_DEFINITIONS,
_normalize_tool_args,
@@ -98,7 +98,7 @@ def _max_tool_rounds(cfg: dict[str, str]) -> int:
- When SQL is needed: call get_sql_schema first to discover tables and foreign keys, then run_sql_query with a single read-only SELECT.
- Only SELECT is allowed — the tool rejects INSERT/UPDATE/DELETE/DDL.
- The tool automatically scopes queries to the active property; you do not need to add a property_id filter manually. For crawl data, scope is applied through crawl_runs.
-- Use row_cap intentionally: set a small value (10–50) for row listings and omit it (default 200) for aggregates.
+- Use row_cap intentionally: set a small value (10-50) for row listings and omit it (default 200) for aggregates.
- Keep results concise — use LIMIT, GROUP BY, and aggregate functions. Avoid SELECT *.
- Never tell the user you cannot run SQL if run_sql_query is loaded — use it.
diff --git a/src/website_profiling/llm/audit_summary.py b/src/website_profiling/llm/audit_summary.py
index 39157896..551582df 100644
--- a/src/website_profiling/llm/audit_summary.py
+++ b/src/website_profiling/llm/audit_summary.py
@@ -5,6 +5,8 @@
from ..scoring import round_half_up
+_PRIORITY_RANK = {"Critical": 0, "High": 1, "Medium": 2, "Low": 3}
+
def rank_issues_by_traffic(
categories: list[dict[str, Any]],
@@ -37,7 +39,10 @@ def rank_issues_by_traffic(
"gsc_clicks": clicks,
"traffic_weight": clicks,
})
- ranked.sort(key=lambda x: (-x.get("traffic_weight", 0), x.get("priority", "Medium")))
+ # Tiebreak by severity rank (not the raw string, which sorts Low before Medium alphabetically).
+ ranked.sort(
+ key=lambda x: (-x.get("traffic_weight", 0), _PRIORITY_RANK.get(x.get("priority", "Medium"), 99))
+ )
return ranked
diff --git a/src/website_profiling/llm/chat_narrative.py b/src/website_profiling/llm/chat_narrative.py
index f813fa56..54364920 100644
--- a/src/website_profiling/llm/chat_narrative.py
+++ b/src/website_profiling/llm/chat_narrative.py
@@ -53,8 +53,9 @@ def _normalize_string_list(value: Any, field: str, errors: list[str]) -> list[st
if not isinstance(value, list):
errors.append(f"{field} must be an array")
return []
- if len(value) > MAX_ITEMS:
- errors.append(f"{field} has more than {MAX_ITEMS} items")
+ # Over-length lists are silently capped to MAX_ITEMS below (the `break`), not flagged
+ # as a validation error — doing so would discard an otherwise-valid response and force
+ # a wasteful repair pass (or an outright failure) on common >5-item LLM output.
out: list[str] = []
for i, item in enumerate(value):
if not isinstance(item, str):
diff --git a/src/website_profiling/llm/dashboard_ai.py b/src/website_profiling/llm/dashboard_ai.py
index 9ff07051..2f78f914 100644
--- a/src/website_profiling/llm/dashboard_ai.py
+++ b/src/website_profiling/llm/dashboard_ai.py
@@ -58,7 +58,11 @@ def generate_dashboard_ai(
user = json.dumps(payload, indent=2, default=str)[:10_000]
raw = client.complete_json(DASHBOARD_AI_SYSTEM, user)
result = raw if isinstance(raw, dict) and raw else parse_json_response(str(raw))
- result["ok"] = True
+ if not isinstance(result, dict) or not result:
+ return {"ok": False, "error": "AI returned no parseable output."}
+ # Don't force success: keep an explicit ok/error the model may have
+ # returned instead of masking a failure as a successful generation.
+ result.setdefault("ok", True)
return result
except Exception as exc:
return {"ok": False, "error": str(exc)}
diff --git a/src/website_profiling/llm/help_agent.py b/src/website_profiling/llm/help_agent.py
new file mode 100644
index 00000000..2f78f17d
--- /dev/null
+++ b/src/website_profiling/llm/help_agent.py
@@ -0,0 +1,136 @@
+"""Help agent — single-turn LLM call for setup and usage questions. No tools, no property context."""
+from __future__ import annotations
+
+import json
+from typing import Any, Callable
+
+from ..llm_config import llm_is_enabled, load_llm_config_from_db
+from ..text_sanitize import sanitize_unicode_deep
+from .base import get_llm_client
+
+_HELP_SYSTEM_PROMPT = """You are the Site Audit Help Assistant, embedded in a self-hosted SEO audit platform.
+You help users set up the tool, configure credentials, and understand features.
+Answer only questions about this application. Keep answers concise (under 200 words unless step-by-step setup is needed).
+
+## Quick start
+- Run locally: `docker compose up --build` from the repo root.
+- Set `DATABASE_URL` env var pointing at a PostgreSQL instance.
+- Access the UI at http://localhost:3000.
+
+## Credential & integration setup
+
+### Google Search Console / Analytics
+1. Create a Google Cloud project with the Search Console API and Google Analytics Data API enabled.
+2. Set up an OAuth consent screen (External, add yourself as test user).
+3. Create OAuth 2.0 credentials (Web Application, redirect URI: http://localhost:3000/api/integrations/google/callback).
+4. In the app, go to /docs/integrations/google and follow the step-by-step guide.
+5. Alternatively, use a Service Account JSON for headless/server deployments.
+
+### AI providers (LLM)
+Go to /secrets (or the gear icon in AI Chat sidebar) and enter your API key:
+- OpenAI: get key at platform.openai.com → API keys
+- Anthropic: get key at console.anthropic.com
+- Groq: get key at console.groq.com
+- Google Gemini: get key at aistudio.google.com
+- Ollama (local, free): install Ollama, run `ollama pull `, set base URL to http://localhost:11434
+Full guide: /docs/integrations/ai
+
+### Bing Webmaster Tools
+1. Sign in at bing.com/webmasters and add your site.
+2. Go to Settings → API Access → Generate API Key.
+3. In the app, add the key to the pipeline config under "Bing Webmaster API key".
+Full guide: /docs/integrations/bing
+
+### SERP API
+1. Sign up at a SERP provider (e.g. ValueSERP, SerpApi).
+2. Copy your API key and add it to pipeline config under "SERP API key".
+Full guide: /docs/integrations/serp
+
+### MCP server (for Cursor / Claude Desktop / AI agents)
+- Stdio: `python -m website_profiling.mcp` — add to your IDE's MCP config.
+- HTTP: `python -m website_profiling.mcp.http` — remote Streamable HTTP on port 8000.
+- Scope tools with `WP_MCP_DOMAIN=core|crawl|google|links|full`.
+Full guide: /docs/integrations/mcp
+
+### Crawl authentication (basic auth / cookies)
+Set crawler HTTP credentials in pipeline config: `crawler_http_auth_user`, `crawler_http_auth_pass`, or paste cookies. Guide: /docs/integrations/crawl-auth
+
+### Import GSC links
+Export links from Google Search Console and upload via /docs/integrations/gsc-links.
+
+## Features overview
+- **/home** — landing page; start a new audit from here.
+- **/pipeline** or Run audit button — configure and run a crawl + report.
+- **/chat** — AI assistant over audit data (requires LLM configured + a completed audit).
+- **/docs** — all integration guides.
+- **/secrets** — manage API keys (AI providers, Google, Bing, SERP).
+- **/write** — Content Studio: write and score SEO content with live keyword targeting.
+- **/mcp** — MCP server settings and tool scoping.
+- Reports — after an audit, browse issues, links, keywords, Lighthouse scores, GSC data.
+
+## Common workflows
+1. First audit: Run audit → choose a preset → enter your site URL → click Run.
+2. Enable AI: /secrets → AI tab → choose provider → enter API key → enable → save.
+3. Connect GSC: /docs/integrations/google → complete OAuth flow → select property.
+4. Use MCP with Cursor: add `python -m website_profiling.mcp` to Cursor's MCP settings.
+
+Respond helpfully based on the above. If the user asks about something unrelated to this application, politely say this assistant only covers the Site Audit platform and direct them to /docs."""
+
+
+def _emit(on_event: Callable[[dict], None] | None, event: dict) -> None:
+ if on_event:
+ on_event(sanitize_unicode_deep(event))
+
+
+def run_help_turn(
+ messages: list[dict[str, str]],
+ *,
+ on_event: Callable[[dict], None] | None = None,
+) -> dict[str, Any]:
+ """Run a single help chat turn — no tools, no property context."""
+ cfg = load_llm_config_from_db()
+ if not llm_is_enabled(cfg):
+ _emit(
+ on_event,
+ {
+ "type": "error",
+ "message": (
+ "AI is not enabled. Configure a provider and API key at /secrets, "
+ "then enable AI in pipeline settings."
+ ),
+ },
+ )
+ return {"ok": False, "error": "AI disabled"}
+
+ try:
+ client = get_llm_client(cfg)
+ except ValueError as e:
+ _emit(on_event, {"type": "error", "message": str(e)})
+ return {"ok": False, "error": str(e)}
+
+ openai_messages: list[dict[str, Any]] = [
+ {"role": "system", "content": _HELP_SYSTEM_PROMPT},
+ *[
+ {"role": m.get("role", "user"), "content": m.get("content", "")}
+ for m in messages
+ if isinstance(m, dict)
+ ],
+ ]
+
+ accumulated: list[str] = []
+
+ def on_token(token: str) -> None:
+ accumulated.append(token)
+ _emit(on_event, {"type": "token", "text": token})
+
+ try:
+ result = client.chat_with_tools(openai_messages, tools=[], on_token=on_token)
+ # If the client buffered instead of streaming, emit the full content now.
+ if not accumulated and result.content:
+ _emit(on_event, {"type": "token", "text": result.content})
+ _emit(on_event, {"type": "done", "message": ""})
+ return {"ok": True}
+ except Exception as e:
+ msg = str(e).strip() or type(e).__name__
+ _emit(on_event, {"type": "error", "message": msg})
+ return {"ok": False, "error": msg}
diff --git a/src/website_profiling/llm/providers/anthropic.py b/src/website_profiling/llm/providers/anthropic.py
index 499927e2..6505e415 100644
--- a/src/website_profiling/llm/providers/anthropic.py
+++ b/src/website_profiling/llm/providers/anthropic.py
@@ -91,18 +91,20 @@ def complete_json(self, system: str, user: str) -> dict[str, Any]:
except ImportError as e:
raise ImportError("pip install -r requirements.txt") from e
- client = anthropic.Anthropic(api_key=self._api_key, timeout=self._timeout)
- msg = client.messages.create(
- model=self._model,
- max_tokens=4096,
- system=system + "\nRespond with valid JSON only.",
- messages=[{"role": "user", "content": user}],
- )
- parts = []
- for block in msg.content:
- if getattr(block, "type", None) == "text":
- parts.append(block.text)
- return parse_json_response("\n".join(parts))
+ # Use the client as a context manager so its underlying httpx connection
+ # pool is closed; otherwise every call leaks sockets across the agent loop.
+ with anthropic.Anthropic(api_key=self._api_key, timeout=self._timeout) as client:
+ msg = client.messages.create(
+ model=self._model,
+ max_tokens=4096,
+ system=system + "\nRespond with valid JSON only.",
+ messages=[{"role": "user", "content": user}],
+ )
+ parts = []
+ for block in msg.content:
+ if getattr(block, "type", None) == "text":
+ parts.append(block.text)
+ return parse_json_response("\n".join(parts))
def chat_with_tools(
self,
@@ -121,7 +123,6 @@ def chat_with_tools(
system, anthropic_messages = _to_anthropic_messages(messages)
anthropic_tools = _to_anthropic_tools(tools)
- client = anthropic.Anthropic(api_key=self._api_key, timeout=self._timeout)
kwargs: dict[str, Any] = {
"model": self._model,
"max_tokens": 4096,
@@ -130,51 +131,54 @@ def chat_with_tools(
"tools": anthropic_tools,
}
- if on_token:
+ # Context-manage the client so its httpx connection pool is closed on
+ # every path (the non-streaming branch closed nothing before).
+ with anthropic.Anthropic(api_key=self._api_key, timeout=self._timeout) as client:
+ if on_token:
+ content_parts: list[str] = []
+ tool_calls: list[ToolCall] = []
+ with client.messages.stream(**kwargs) as stream:
+ for event in stream:
+ if event.type == "content_block_delta" and hasattr(event.delta, "text"):
+ text = event.delta.text
+ content_parts.append(text)
+ on_token(text)
+ if event.type == "content_block_start" and getattr(event.content_block, "type", None) == "tool_use":
+ block = event.content_block
+ tool_calls.append(
+ ToolCall(id=block.id, name=block.name, arguments={}),
+ )
+ if event.type == "content_block_delta" and getattr(event.delta, "type", None) == "input_json_delta":
+ if tool_calls:
+ partial = getattr(event.delta, "partial_json", "") or ""
+ prev = tool_calls[-1].arguments.get("_partial", "")
+ tool_calls[-1].arguments["_partial"] = prev + partial
+ final = stream.get_final_message()
+ for tc in tool_calls:
+ partial = tc.arguments.pop("_partial", "")
+ if partial:
+ try:
+ tc.arguments = json.loads(partial)
+ except json.JSONDecodeError:
+ tc.arguments = {}
+ text_parts = []
+ for block in final.content:
+ if getattr(block, "type", None) == "text":
+ text_parts.append(block.text)
+ return ChatResult(content="".join(content_parts) or "".join(text_parts), tool_calls=tool_calls)
+
+ msg = client.messages.create(**kwargs)
content_parts: list[str] = []
- tool_calls: list[ToolCall] = []
- with client.messages.stream(**kwargs) as stream:
- for event in stream:
- if event.type == "content_block_delta" and hasattr(event.delta, "text"):
- text = event.delta.text
- content_parts.append(text)
- on_token(text)
- if event.type == "content_block_start" and getattr(event.content_block, "type", None) == "tool_use":
- block = event.content_block
- tool_calls.append(
- ToolCall(id=block.id, name=block.name, arguments={}),
- )
- if event.type == "content_block_delta" and getattr(event.delta, "type", None) == "input_json_delta":
- if tool_calls:
- partial = getattr(event.delta, "partial_json", "") or ""
- prev = tool_calls[-1].arguments.get("_partial", "")
- tool_calls[-1].arguments["_partial"] = prev + partial
- final = stream.get_final_message()
- for tc in tool_calls:
- partial = tc.arguments.pop("_partial", "")
- if partial:
- try:
- tc.arguments = json.loads(partial)
- except json.JSONDecodeError:
- tc.arguments = {}
- text_parts = []
- for block in final.content:
+ tool_calls = []
+ for block in msg.content:
if getattr(block, "type", None) == "text":
- text_parts.append(block.text)
- return ChatResult(content="".join(content_parts) or "".join(text_parts), tool_calls=tool_calls)
-
- msg = client.messages.create(**kwargs)
- content_parts: list[str] = []
- tool_calls = []
- for block in msg.content:
- if getattr(block, "type", None) == "text":
- content_parts.append(block.text)
- if getattr(block, "type", None) == "tool_use":
- tool_calls.append(
- ToolCall(
- id=block.id,
- name=block.name,
- arguments=dict(block.input) if isinstance(block.input, dict) else {},
- ),
- )
- return ChatResult(content="".join(content_parts), tool_calls=tool_calls)
+ content_parts.append(block.text)
+ if getattr(block, "type", None) == "tool_use":
+ tool_calls.append(
+ ToolCall(
+ id=block.id,
+ name=block.name,
+ arguments=dict(block.input) if isinstance(block.input, dict) else {},
+ ),
+ )
+ return ChatResult(content="".join(content_parts), tool_calls=tool_calls)
diff --git a/src/website_profiling/mcp/http_server.py b/src/website_profiling/mcp/http_server.py
index 88f4981f..753e8d22 100644
--- a/src/website_profiling/mcp/http_server.py
+++ b/src/website_profiling/mcp/http_server.py
@@ -101,7 +101,15 @@ def _origin_allowed(origin: str, allowed_origins: list[str]) -> bool:
return True
if pattern.startswith("http://") or pattern.startswith("https://"):
continue
- if origin_host == pattern or origin_host.endswith(f".{pattern.removeprefix('*.')}"):
+ if pattern.startswith("*."):
+ # Wildcard: match the apex and any subdomain.
+ if origin_host == pattern[2:] or origin_host.endswith(pattern[1:]):
+ return True
+ continue
+ # Bare hostname: exact match only. A non-wildcard pattern must NOT be
+ # widened into a ".pattern" suffix match, or `example.com` would also
+ # allow `evil.example.com`.
+ if origin_host == pattern:
return True
return False
@@ -179,7 +187,17 @@ async def __call__(self, scope: dict[str, Any], receive: Any, send: Any) -> None
return
origin = headers.get("origin", "")
- if settings.allowed_origins and not _origin_allowed(origin, settings.allowed_origins):
+ if settings.allowed_origins:
+ if not _origin_allowed(origin, settings.allowed_origins):
+ await _reject_request(send, 403, "Origin not allowed for remote MCP")
+ return
+ elif origin.strip() and not _host_allowed(_origin_host(origin), settings.allowed_hosts):
+ # No explicit allowed_origins configured. Transport-level Origin /
+ # DNS-rebinding protection is delegated to this middleware (see
+ # _transport_security_settings), so a request carrying a browser
+ # Origin header must at least be same-host as an allowed host;
+ # otherwise an unconfigured deployment performs no Origin check at
+ # all. Non-browser clients send no Origin and are unaffected.
await _reject_request(send, 403, "Origin not allowed for remote MCP")
return
diff --git a/src/website_profiling/mcp/server.py b/src/website_profiling/mcp/server.py
index 86090ba3..0d2145a8 100644
--- a/src/website_profiling/mcp/server.py
+++ b/src/website_profiling/mcp/server.py
@@ -75,6 +75,22 @@ def _mcp_domain() -> str:
return (os.environ.get("WP_MCP_DOMAIN") or "core").strip().lower()
+def _load_disabled_tools() -> frozenset[str]:
+ """Load mcp_disabled_tools JSON array from pipeline_config. Returns empty set on any error."""
+ try:
+ with db_session() as conn:
+ row = conn.execute(
+ "SELECT value FROM pipeline_config WHERE key = 'mcp_disabled_tools'"
+ ).fetchone()
+ if row and row[0]:
+ items = json.loads(row[0])
+ if isinstance(items, list):
+ return frozenset(str(i) for i in items if isinstance(i, str))
+ except Exception: # noqa: BLE001
+ pass
+ return frozenset()
+
+
def _tools_catalog_json(domain: str | None = None) -> str:
effective = (domain or _mcp_domain()).strip().lower() or "core"
exposed = mcp_tool_names(effective)
@@ -172,10 +188,13 @@ def create_server(domain: str | None = None):
@server.list_tools()
async def list_tools() -> list[Tool]:
+ disabled = _load_disabled_tools()
out: list[Tool] = []
for spec in TOOL_DEFINITIONS:
if spec["name"] not in exposed:
continue
+ if spec["name"] in disabled:
+ continue
out.append(
Tool(
name=spec["name"],
@@ -187,12 +206,19 @@ async def list_tools() -> list[Tool]:
@server.call_tool()
async def call_tool(name: str, arguments: dict[str, Any] | None) -> list[TextContent]:
+ disabled = _load_disabled_tools()
if name not in exposed:
result = {
"error": f"tool not exposed in MCP domain {effective_domain}: {name}",
"hint": "Connect WP_MCP_DOMAIN=full or the domain server that includes this tool.",
}
return [TextContent(type="text", text=json.dumps(result, indent=2, default=str))]
+ if name in disabled:
+ result = {
+ "error": f"tool '{name}' has been disabled via Risk Settings.",
+ "hint": "Enable it on the /risk-settings page to use this tool.",
+ }
+ return [TextContent(type="text", text=json.dumps(result, indent=2, default=str))]
args = dict(arguments or {})
ctx = _merge_context(args)
result = dispatch_tool(name, args, context=ctx)
diff --git a/src/website_profiling/parsing/seo.py b/src/website_profiling/parsing/seo.py
index 724eafc0..d1e59f6c 100644
--- a/src/website_profiling/parsing/seo.py
+++ b/src/website_profiling/parsing/seo.py
@@ -103,13 +103,16 @@ def parse_seo_extended(html_text: str, base_url: str) -> dict:
if not val or base_scheme != "https":
continue
val = str(val).strip().lower()
- if val.startswith("http://"):
- out["mixed_content_count"] += 1
- elif attr == "srcset":
+ if attr == "srcset":
+ # srcset is a comma-separated candidate list; count EVERY insecure
+ # candidate. The generic startswith() below would match the whole
+ # string once when the FIRST candidate is http:// and miss the rest.
for part in val.split(","):
- part = part.strip().split()[0] if part.strip() else ""
- if part.startswith("http://"):
+ tok = part.strip().split()[0] if part.strip() else ""
+ if tok.startswith("http://"):
out["mixed_content_count"] += 1
+ elif val.startswith("http://"):
+ out["mixed_content_count"] += 1
return out
def parse_resources(html_text: str, base_url: str) -> dict:
"""
diff --git a/src/website_profiling/reporting/builder.py b/src/website_profiling/reporting/builder.py
index c9fe9338..c4c29ef1 100644
--- a/src/website_profiling/reporting/builder.py
+++ b/src/website_profiling/reporting/builder.py
@@ -700,7 +700,7 @@ def run_simple_report(
except Exception:
pass
try:
- from ..tools.audit_tools.llm_tools import get_portfolio_summary
+ from ..tools.audit_tools.integrations.llm_tools import get_portfolio_summary
from ..tools.audit_tools.context import AuditToolContext
portfolio = get_portfolio_summary(conn, AuditToolContext(property_id=property_id), {})
diff --git a/src/website_profiling/reporting/builder_sections/content_urls.py b/src/website_profiling/reporting/builder_sections/content_urls.py
index 0cbc5420..7d7f0a01 100644
--- a/src/website_profiling/reporting/builder_sections/content_urls.py
+++ b/src/website_profiling/reporting/builder_sections/content_urls.py
@@ -18,6 +18,17 @@
)
+def _int_or_zero(value: Any) -> int:
+ """Coerce *value* to int, treating NaN / None / non-numeric as 0.
+
+ ``int(pd.to_numeric(x, errors="coerce") or 0)`` is unsafe: a NaN result is
+ truthy in Python, so ``NaN or 0`` evaluates to ``NaN`` and ``int(NaN)``
+ raises ValueError — crashing the whole report build on a single bad cell.
+ """
+ num = pd.to_numeric(value, errors="coerce")
+ return int(num) if pd.notna(num) else 0
+
+
def build_content_url_lists(
df: pd.DataFrame,
success_df_urls: pd.DataFrame,
@@ -106,7 +117,7 @@ def build_content_url_lists(
missing_alt.append({
"url": str(u).strip(),
"images_without_alt": int(alt_missing.loc[i]),
- "images_total": int(pd.to_numeric(row.get("images_total"), errors="coerce") or 0),
+ "images_total": _int_or_zero(row.get("images_total")),
})
missing_lazy: list[dict[str, Any]] = []
@@ -122,7 +133,7 @@ def build_content_url_lists(
missing_lazy.append({
"url": str(u).strip(),
"img_without_lazy": int(lazy_missing.loc[i]),
- "images_total": int(pd.to_numeric(row.get("images_total"), errors="coerce") or 0),
+ "images_total": _int_or_zero(row.get("images_total")),
})
if "img_without_dimensions" in success_df_urls.columns:
dim_missing = pd.to_numeric(success_df_urls["img_without_dimensions"], errors="coerce").fillna(0).astype(int)
@@ -135,7 +146,7 @@ def build_content_url_lists(
missing_dimensions.append({
"url": str(u).strip(),
"img_without_dimensions": int(dim_missing.loc[i]),
- "images_total": int(pd.to_numeric(row.get("images_total"), errors="coerce") or 0),
+ "images_total": _int_or_zero(row.get("images_total")),
})
title_short: list[dict[str, Any]] = []
diff --git a/src/website_profiling/reporting/builder_sections/links.py b/src/website_profiling/reporting/builder_sections/links.py
index ec59587e..86e9ce81 100644
--- a/src/website_profiling/reporting/builder_sections/links.py
+++ b/src/website_profiling/reporting/builder_sections/links.py
@@ -135,8 +135,11 @@ def _bool_col(col):
rec["content_security_policy"] = _str_col("content_security_policy")
# Content analysis
- rec["reading_level"] = round(float(pd.to_numeric(row.get("reading_level") if "reading_level" in df.columns else None, errors="coerce") or 0.0), 1)
- rec["content_html_ratio"] = round(float(pd.to_numeric(row.get("content_html_ratio") if "content_html_ratio" in df.columns else None, errors="coerce") or 0.0), 2)
+ # NaN is truthy, so `pd.to_numeric(...) or 0.0` does NOT fall back; guard with pd.isna.
+ _rl_num = pd.to_numeric(row.get("reading_level") if "reading_level" in df.columns else None, errors="coerce")
+ rec["reading_level"] = round(float(0.0 if pd.isna(_rl_num) else _rl_num), 1)
+ _chr_num = pd.to_numeric(row.get("content_html_ratio") if "content_html_ratio" in df.columns else None, errors="coerce")
+ rec["content_html_ratio"] = round(float(0.0 if pd.isna(_chr_num) else _chr_num), 2)
rec["top_keywords"] = _str_col("top_keywords")
rec["content_excerpt"] = _str_col("content_excerpt") if "content_excerpt" in df.columns else ""
diff --git a/src/website_profiling/reporting/compare_payload.py b/src/website_profiling/reporting/compare_payload.py
index ec54dc0b..f9ed4b1f 100644
--- a/src/website_profiling/reporting/compare_payload.py
+++ b/src/website_profiling/reporting/compare_payload.py
@@ -233,7 +233,10 @@ def build_link_metric_deltas(current: dict[str, Any], baseline: dict[str, Any])
"delta": delta,
})
out.sort(key=lambda x: abs(x.get("delta") or 0), reverse=True)
- return out[:_LINK_METRIC_CAP]
+ # Return the full list; callers slice and report truncation accurately
+ # (capping here hid the real total and produced a false "truncated" flag at
+ # exactly the cap).
+ return out
def _redirect_key(r: dict[str, Any]) -> str:
@@ -614,8 +617,9 @@ def build_full_compare(
truncated_sections["issue_deltas"] = True
issue_deltas = issue_deltas[:_ISSUE_DELTA_CAP]
link_metrics = build_link_metric_deltas(current, baseline)
- if len(link_metrics) >= _LINK_METRIC_CAP:
+ if len(link_metrics) > _LINK_METRIC_CAP:
truncated_sections["link_metric_deltas"] = True
+ link_metrics = link_metrics[:_LINK_METRIC_CAP]
google = build_google_metrics(current, baseline)
return {
"current_report_id": current_report_id,
diff --git a/src/website_profiling/reporting/issue_impact.py b/src/website_profiling/reporting/issue_impact.py
index 70ee30a2..fd14f1de 100644
--- a/src/website_profiling/reporting/issue_impact.py
+++ b/src/website_profiling/reporting/issue_impact.py
@@ -62,7 +62,12 @@ def enrich_categories_with_traffic_impact(
ga4_sess = 0.0
if url:
for path_key, ga in sessions_by_path.items():
- if url.endswith(path_key.rstrip("/")) or path_key in url:
+ key = path_key.rstrip("/")
+ # Skip the homepage "/" key (rstrip -> ""), which would make
+ # url.endswith("") match every issue. Match GA4 paths as URL suffixes.
+ if not key:
+ continue
+ if url.endswith(key):
ga4_sess = max(ga4_sess, float(ga.get("ga4_sessions") or 0))
issue["gsc_clicks"] = gsc.get("gsc_clicks", 0)
issue["gsc_impressions"] = gsc.get("gsc_impressions", 0)
diff --git a/src/website_profiling/reporting/optional_audits.py b/src/website_profiling/reporting/optional_audits.py
index f95c79f4..6c10a948 100644
--- a/src/website_profiling/reporting/optional_audits.py
+++ b/src/website_profiling/reporting/optional_audits.py
@@ -118,6 +118,10 @@ def spell_check_issues(df: pd.DataFrame, *, max_pages: int = 50) -> tuple[list[d
if not words:
continue
unknown = spell.unknown(words[:120])
+ # Count every page actually spell-checked, not only flagged ones, so
+ # max_pages bounds the expensive spell.unknown() work (the cap previously
+ # incremented only when an issue was appended).
+ checked += 1
if len(unknown) >= 3:
url = str(row.get("url") or "")
sample = ", ".join(sorted(unknown)[:5])
@@ -127,7 +131,6 @@ def spell_check_issues(df: pd.DataFrame, *, max_pages: int = 50) -> tuple[list[d
priority="Low",
recommendation="Review title, H1, and visible copy for typos.",
))
- checked += 1
return issues[:20], None
@@ -149,6 +152,9 @@ def html_validation_issues(df: pd.DataFrame, *, max_pages: int = 30) -> tuple[li
if len(html) < 100:
continue
url = str(row.get("url") or "")
+ # Count every page actually parsed, not only flagged ones, so max_pages
+ # bounds the expensive HTML parse/scan (was incremented only on warnings).
+ checked += 1
warnings: list[str] = []
if use_parser:
try:
@@ -172,7 +178,6 @@ def html_validation_issues(df: pd.DataFrame, *, max_pages: int = 30) -> tuple[li
priority="Low",
recommendation="Fix markup validation issues that may affect parsing or accessibility.",
))
- checked += 1
return issues, use_parser
@@ -235,6 +240,11 @@ def wayback_issues(df: pd.DataFrame, *, max_lookups: int = 15) -> list[dict]:
))
looked += 1
continue
+ # Every uncached 404 here triggers a Wayback network request; count it
+ # against max_lookups whether or not a snapshot is found (and even if the
+ # request fails). Previously only snapshots-found counted, so a site full
+ # of snapshot-less 404s issued one request per 404 with no effective cap.
+ looked += 1
try:
resp = requests.get(
"https://archive.org/wayback/available",
@@ -254,7 +264,6 @@ def wayback_issues(df: pd.DataFrame, *, max_lookups: int = 15) -> list[dict]:
priority="Low",
recommendation="Review whether redirect or content restoration is appropriate.",
))
- looked += 1
except Exception:
with _WAYBACK_LOCK:
_WAYBACK_CACHE[cache_key] = False
diff --git a/src/website_profiling/reporting/pdf/normalize.py b/src/website_profiling/reporting/pdf/normalize.py
index 94cc4c7c..b2d7547c 100644
--- a/src/website_profiling/reporting/pdf/normalize.py
+++ b/src/website_profiling/reporting/pdf/normalize.py
@@ -212,8 +212,12 @@ def normalize_issue_for_pdf(
str(row.get("recommendation") or "").strip() if include_recommendation else None
)
- # Detect Lighthouse rows (audit-id only, no human label)
- is_lh, audit_id = _is_lighthouse_row(raw_message, [])
+ # Detect Lighthouse rows (audit-id only, no human label). Pass the row's own
+ # tags so tag-based detection actually works (was hardcoded to [], making the
+ # `"lighthouse" in tags` branch dead).
+ is_lh, audit_id = _is_lighthouse_row(
+ raw_message, [str(t).lower() for t in (row.get("tags") or [])]
+ )
if is_lh and audit_id:
headline = _lh_label(audit_id)
else:
diff --git a/src/website_profiling/reporting/pdf/render/reportlab.py b/src/website_profiling/reporting/pdf/render/reportlab.py
index e20ae699..1ae98bb2 100644
--- a/src/website_profiling/reporting/pdf/render/reportlab.py
+++ b/src/website_profiling/reporting/pdf/render/reportlab.py
@@ -253,7 +253,10 @@ def _render_stat_grid(block: StatGridBlock, st: dict) -> list:
from reportlab.platypus import Spacer, Table
if not block.chips:
return []
- n = block.columns
+ # Never build more table cells than declared column widths: if a block has
+ # more chips than columns, widen the grid to fit them (ReportLab errors at
+ # build time on a cell/colWidths mismatch). Unchanged when chips <= columns.
+ n = max(block.columns, len(block.chips))
col_w = _col_w_in(n)
row: list = []
for chip in block.chips:
diff --git a/src/website_profiling/tools/audit_tools/backlinks/__init__.py b/src/website_profiling/tools/audit_tools/backlinks/__init__.py
new file mode 100644
index 00000000..369be20f
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/backlinks/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — backlinks domain."""
diff --git a/src/website_profiling/tools/audit_tools/backlink_lists.py b/src/website_profiling/tools/audit_tools/backlinks/backlink_lists.py
similarity index 98%
rename from src/website_profiling/tools/audit_tools/backlink_lists.py
rename to src/website_profiling/tools/audit_tools/backlinks/backlink_lists.py
index c7662b9f..858f7033 100644
--- a/src/website_profiling/tools/audit_tools/backlink_lists.py
+++ b/src/website_profiling/tools/audit_tools/backlinks/backlink_lists.py
@@ -7,9 +7,9 @@
from psycopg import Connection
-from ...common import strip_www_prefix
-from ._slice import cap_list, parse_limit
-from .context import AuditToolContext
+from ....common import strip_www_prefix
+from .._slice import cap_list, parse_limit
+from ..context import AuditToolContext
def _load_links(scoped: AuditToolContext, conn: Connection) -> dict[str, Any] | None:
diff --git a/src/website_profiling/tools/audit_tools/backlinks.py b/src/website_profiling/tools/audit_tools/backlinks/backlinks.py
similarity index 97%
rename from src/website_profiling/tools/audit_tools/backlinks.py
rename to src/website_profiling/tools/audit_tools/backlinks/backlinks.py
index b63ac491..b322c275 100644
--- a/src/website_profiling/tools/audit_tools/backlinks.py
+++ b/src/website_profiling/tools/audit_tools/backlinks/backlinks.py
@@ -5,9 +5,9 @@
from psycopg import Connection
-from ...integrations.google.gsc_links_store import read_gsc_links_status
-from ._slice import cap_list, parse_limit, payload_dict_slice
-from .context import AuditToolContext
+from ....integrations.google.gsc_links_store import read_gsc_links_status
+from .._slice import cap_list, parse_limit, payload_dict_slice
+from ..context import AuditToolContext
def get_gsc_links_summary(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
diff --git a/src/website_profiling/tools/audit_tools/compare/__init__.py b/src/website_profiling/tools/audit_tools/compare/__init__.py
new file mode 100644
index 00000000..6bb6ba93
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/compare/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — compare domain."""
diff --git a/src/website_profiling/tools/audit_tools/compare.py b/src/website_profiling/tools/audit_tools/compare/compare.py
similarity index 89%
rename from src/website_profiling/tools/audit_tools/compare.py
rename to src/website_profiling/tools/audit_tools/compare/compare.py
index 7453fb82..9d3090fc 100644
--- a/src/website_profiling/tools/audit_tools/compare.py
+++ b/src/website_profiling/tools/audit_tools/compare/compare.py
@@ -5,9 +5,9 @@
from psycopg import Connection
-from ...db.report_store import read_report_payload
-from ...reporting.compare_payload import build_full_compare
-from .context import AuditToolContext
+from ....db.report_store import read_report_payload
+from ....reporting.compare_payload import build_full_compare
+from ..context import AuditToolContext
def compare_reports(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
diff --git a/src/website_profiling/tools/audit_tools/compare_helpers.py b/src/website_profiling/tools/audit_tools/compare/compare_helpers.py
similarity index 94%
rename from src/website_profiling/tools/audit_tools/compare_helpers.py
rename to src/website_profiling/tools/audit_tools/compare/compare_helpers.py
index 4005fa6e..8db078b7 100644
--- a/src/website_profiling/tools/audit_tools/compare_helpers.py
+++ b/src/website_profiling/tools/audit_tools/compare/compare_helpers.py
@@ -5,8 +5,8 @@
from psycopg import Connection
-from ...db.report_store import read_report_payload
-from .context import AuditToolContext
+from ....db.report_store import read_report_payload
+from ..context import AuditToolContext
def _row_id(row: Any) -> Any:
diff --git a/src/website_profiling/tools/audit_tools/compare_list_tools.py b/src/website_profiling/tools/audit_tools/compare/compare_list_tools.py
similarity index 97%
rename from src/website_profiling/tools/audit_tools/compare_list_tools.py
rename to src/website_profiling/tools/audit_tools/compare/compare_list_tools.py
index b95492f6..62f7c1fb 100644
--- a/src/website_profiling/tools/audit_tools/compare_list_tools.py
+++ b/src/website_profiling/tools/audit_tools/compare/compare_list_tools.py
@@ -5,15 +5,15 @@
from psycopg import Connection
-from ...reporting.compare_payload import (
+from ....reporting.compare_payload import (
build_issue_deltas,
build_lighthouse_url_deltas,
build_url_set_diff,
)
-from ._slice import cap_list, parse_limit
+from .._slice import cap_list, parse_limit
from .compare_helpers import load_compare_pair
-from .context import AuditToolContext
-from .google_lists import _gsc_rows, _index_gsc_rows, _num
+from ..context import AuditToolContext
+from ..google.google_lists import _gsc_rows, _index_gsc_rows, _num
def _compare_meta(current_rid: int | None, baseline_rid: int | None, current: dict, baseline: dict) -> dict[str, Any]:
diff --git a/src/website_profiling/tools/audit_tools/compare_slices.py b/src/website_profiling/tools/audit_tools/compare/compare_slices.py
similarity index 98%
rename from src/website_profiling/tools/audit_tools/compare_slices.py
rename to src/website_profiling/tools/audit_tools/compare/compare_slices.py
index 576ee951..1bfd5408 100644
--- a/src/website_profiling/tools/audit_tools/compare_slices.py
+++ b/src/website_profiling/tools/audit_tools/compare/compare_slices.py
@@ -6,7 +6,7 @@
from psycopg import Connection
-from ...reporting.compare_payload import (
+from ....reporting.compare_payload import (
build_category_scores,
build_content_metrics,
build_duplicate_deltas,
@@ -24,9 +24,9 @@
build_url_set_diff,
_score_from_categories,
)
-from ._slice import cap_list, parse_limit
+from .._slice import cap_list, parse_limit
from .compare_helpers import load_compare_pair
-from .context import AuditToolContext
+from ..context import AuditToolContext
def _compare_meta(current_rid: int | None, baseline_rid: int | None, current: dict, baseline: dict) -> dict[str, Any]:
@@ -264,7 +264,7 @@ def compare_geo_score_deltas(conn: Connection, ctx: AuditToolContext, args: dict
return err
assert current is not None and baseline is not None
- from .geo_tools import (
+ from ..geo.geo_tools import (
_fetch_llms_txt,
_fetch_ai_discovery,
_score_meta_signals,
diff --git a/src/website_profiling/tools/audit_tools/content/__init__.py b/src/website_profiling/tools/audit_tools/content/__init__.py
new file mode 100644
index 00000000..a66693cb
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/content/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — content domain."""
diff --git a/src/website_profiling/tools/audit_tools/content.py b/src/website_profiling/tools/audit_tools/content/content.py
similarity index 97%
rename from src/website_profiling/tools/audit_tools/content.py
rename to src/website_profiling/tools/audit_tools/content/content.py
index a69c5ca9..375d2645 100644
--- a/src/website_profiling/tools/audit_tools/content.py
+++ b/src/website_profiling/tools/audit_tools/content/content.py
@@ -5,8 +5,8 @@
from psycopg import Connection
-from ._slice import cap_list, parse_limit, payload_dict_slice, payload_field
-from .context import AuditToolContext
+from .._slice import cap_list, parse_limit, payload_dict_slice, payload_field
+from ..context import AuditToolContext
def get_content_analytics(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
diff --git a/src/website_profiling/tools/audit_tools/content_lists.py b/src/website_profiling/tools/audit_tools/content/content_lists.py
similarity index 95%
rename from src/website_profiling/tools/audit_tools/content_lists.py
rename to src/website_profiling/tools/audit_tools/content/content_lists.py
index 3fdf7ff0..7c418ad7 100644
--- a/src/website_profiling/tools/audit_tools/content_lists.py
+++ b/src/website_profiling/tools/audit_tools/content/content_lists.py
@@ -6,8 +6,8 @@
from psycopg import Connection
-from ._slice import _parse_page_analysis, _row_schema_types_list, cap_list, parse_limit, payload_dict_slice
-from .context import AuditToolContext
+from .._slice import _parse_page_analysis, _row_schema_types_list, cap_list, parse_limit, payload_dict_slice
+from ..context import AuditToolContext
_ARTICLE_TYPES = frozenset({"article", "newsarticle", "blogposting", "scholarlyarticle"})
_ARTICLE_URL_HINTS = ("/blog/", "/news/", "/article/", "/post/", "/posts/")
@@ -116,8 +116,12 @@ def list_pages_containing_keyword(conn: Connection, ctx: AuditToolContext, args:
def list_pages_by_word_count_band(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
scoped = ctx.with_args(args)
try:
- min_wc = int(args.get("min_word_count") or 0)
- max_wc = int(args.get("max_word_count") or 10_000)
+ raw_min = args.get("min_word_count")
+ raw_max = args.get("max_word_count")
+ # Use None-checks, not `or`: an explicit max_word_count of 0 is falsy and
+ # would otherwise be silently replaced by the 10000 default.
+ min_wc = int(raw_min) if raw_min is not None else 0
+ max_wc = int(raw_max) if raw_max is not None else 10_000
except (TypeError, ValueError):
min_wc, max_wc = 0, 10_000
df = scoped.load_crawl_df(conn)
diff --git a/src/website_profiling/tools/audit_tools/core/__init__.py b/src/website_profiling/tools/audit_tools/core/__init__.py
new file mode 100644
index 00000000..d2668bb1
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/core/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — core domain."""
diff --git a/src/website_profiling/tools/audit_tools/data_coverage.py b/src/website_profiling/tools/audit_tools/core/data_coverage.py
similarity index 96%
rename from src/website_profiling/tools/audit_tools/data_coverage.py
rename to src/website_profiling/tools/audit_tools/core/data_coverage.py
index 773663e5..4e2776cc 100644
--- a/src/website_profiling/tools/audit_tools/data_coverage.py
+++ b/src/website_profiling/tools/audit_tools/core/data_coverage.py
@@ -5,8 +5,8 @@
from psycopg import Connection
-from ...db.property_store import get_property_by_id
-from .context import AuditToolContext
+from ....db.property_store import get_property_by_id
+from ..context import AuditToolContext
def _check(name: str, populated: bool, hint: str = "") -> dict[str, Any]:
@@ -110,7 +110,7 @@ def get_data_coverage_report(conn: Connection, ctx: AuditToolContext, args: dict
bool(payload.get("log_analysis") or payload.get("access_log_summary")),
"Upload access logs in Integrations for log list tools.",
))
- from ...integrations.google.store import read_prior_google_snapshot
+ from ....integrations.google.store import read_prior_google_snapshot
prior_google = read_prior_google_snapshot(conn, scoped.property_id, skip=1) if scoped.property_id else None
checks.append(_check(
"prior_google_snapshot",
diff --git a/src/website_profiling/tools/audit_tools/payload_extras.py b/src/website_profiling/tools/audit_tools/core/payload_extras.py
similarity index 97%
rename from src/website_profiling/tools/audit_tools/payload_extras.py
rename to src/website_profiling/tools/audit_tools/core/payload_extras.py
index 0b91d641..b7e1edf4 100644
--- a/src/website_profiling/tools/audit_tools/payload_extras.py
+++ b/src/website_profiling/tools/audit_tools/core/payload_extras.py
@@ -6,8 +6,8 @@
from psycopg import Connection
-from ._slice import _parse_page_analysis, cap_list, parse_limit, payload_dict_slice
-from .context import AuditToolContext
+from .._slice import _parse_page_analysis, cap_list, parse_limit, payload_dict_slice
+from ..context import AuditToolContext
def get_rich_results_summary(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
diff --git a/src/website_profiling/tools/audit_tools/router_tools.py b/src/website_profiling/tools/audit_tools/core/router_tools.py
similarity index 93%
rename from src/website_profiling/tools/audit_tools/router_tools.py
rename to src/website_profiling/tools/audit_tools/core/router_tools.py
index fffd6304..cad76394 100644
--- a/src/website_profiling/tools/audit_tools/router_tools.py
+++ b/src/website_profiling/tools/audit_tools/core/router_tools.py
@@ -5,13 +5,13 @@
from psycopg import Connection
-from ._slice import parse_limit
-from .context import AuditToolContext
-from .tool_domains import classify_tool_domain
+from .._slice import parse_limit
+from ..context import AuditToolContext
+from ..tool_domains import classify_tool_domain
def search_audit_tools(_conn: Connection, _ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
- from .registry import search_tools
+ from ..registry import search_tools
query = str(args.get("query") or args.get("q") or "").strip()
limit = parse_limit(args.get("limit"), 10, 50)
@@ -27,7 +27,7 @@ def search_audit_tools(_conn: Connection, _ctx: AuditToolContext, args: dict[str
def list_tool_domains(_conn: Connection, _ctx: AuditToolContext, _args: dict[str, Any]) -> dict[str, Any]:
- from .registry import list_domains_catalog, tools_catalog_by_domain
+ from ..registry import list_domains_catalog, tools_catalog_by_domain
catalog = list_domains_catalog()
by_domain = tools_catalog_by_domain()
@@ -39,7 +39,7 @@ def list_tool_domains(_conn: Connection, _ctx: AuditToolContext, _args: dict[str
def _dispatch(name: str, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
"""Dispatch one workflow step on its own pooled connection (safe to run in parallel)."""
- from .registry import dispatch_tool
+ from ..registry import dispatch_tool
try:
return dispatch_tool(name, args, context=ctx)
@@ -57,7 +57,7 @@ def _run_steps(
are not safe to share across threads), so independent read-only steps run in
parallel like Claude Code's parallel tool calls.
"""
- from ...concurrency import map_parallel, tool_concurrency
+ from ....concurrency import map_parallel, tool_concurrency
return map_parallel(
plan,
@@ -116,7 +116,7 @@ def run_keyword_workflow(_conn: Connection, ctx: AuditToolContext, args: dict[st
def run_domain_agent(_conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
"""Run a short scripted sequence of tools within one domain (subagent-style), in parallel."""
- from .registry import search_tools, tool_names_for_domain, tool_meta
+ from ..registry import search_tools, tool_names_for_domain, tool_meta
scoped = ctx.with_args(args)
task = str(args.get("task") or "").strip()
diff --git a/src/website_profiling/tools/audit_tools/sql_query.py b/src/website_profiling/tools/audit_tools/core/sql_query.py
similarity index 93%
rename from src/website_profiling/tools/audit_tools/sql_query.py
rename to src/website_profiling/tools/audit_tools/core/sql_query.py
index 4dcc3382..6ee26daf 100644
--- a/src/website_profiling/tools/audit_tools/sql_query.py
+++ b/src/website_profiling/tools/audit_tools/core/sql_query.py
@@ -26,9 +26,9 @@
import sqlglot.expressions as exp
from psycopg import Connection
-from ...db._common import _sanitize_for_json
-from ...db.pool import readonly_session
-from .context import AuditToolContext
+from ....db._common import _sanitize_for_json
+from ....db.pool import readonly_session
+from ..context import AuditToolContext
logger = logging.getLogger(__name__)
@@ -287,6 +287,7 @@ def _check_table_refs(ast: exp.Expression) -> None:
# node.db holds the schema qualifier (e.g. "information_schema" for
# information_schema.tables); node.catalog holds the catalog prefix.
schema_name = str(node.db or "").lower().strip('"').strip("'")
+ catalog_name = str(node.catalog or "").lower().strip('"').strip("'")
# Block system-catalog schemas — they leak metadata about denied tables.
if schema_name in _BLOCKED_SCHEMAS:
@@ -295,6 +296,19 @@ def _check_table_refs(ast: exp.Expression) -> None:
"Use the get_sql_schema tool to discover available tables."
)
+ # Reject every other schema/catalog qualifier. Tenant-scoping CTEs are
+ # injected under the *unqualified* table name, and a Postgres CTE does
+ # NOT shadow a schema-qualified reference — so `public.google_data`
+ # resolves to the real base table and would bypass scope binding,
+ # leaking every tenant's rows. All allowlisted tables live in the
+ # default schema and must be referenced unqualified.
+ if schema_name or catalog_name:
+ qualified = ".".join(p for p in (catalog_name, schema_name, table_name) if p)
+ raise ReadOnlyViolation(
+ f"Schema-qualified table reference '{qualified}' is not permitted; "
+ "reference tables by their unqualified name."
+ )
+
if not table_name:
continue
@@ -491,14 +505,24 @@ def _inject_scope_ctes(sql: str, stmt: exp.Expression, property_id: int) -> str:
# Merge with any existing WITH clause (regex-based, because sqlglot parses
# WITH ... SELECT as exp.Select with a nested exp.With, not exp.With itself).
- if re.match(r"\s*WITH\s", sql, re.IGNORECASE):
+ stripped = sql.strip()
+ if re.match(r"(?i)^WITH\s+RECURSIVE\b", stripped):
+ # A WITH RECURSIVE query cannot simply absorb our non-recursive scope
+ # CTEs: they self-shadow their base tables (e.g.
+ # `crawl_runs AS (SELECT * FROM crawl_runs ...)`), which Postgres would
+ # mis-read as a malformed recursive term and reject. Wrap the whole
+ # query in a subquery instead — the outer scope CTEs remain visible
+ # inside it, so the user's table references still resolve to the scoped
+ # CTEs while the recursive CTE keeps its required `WITH RECURSIVE` form.
+ return f"WITH {cte_block}\nSELECT * FROM (\n{stripped}\n) _scoped"
+ if re.match(r"(?i)^WITH\s", stripped):
return re.sub(
r"(?i)^\s*WITH\s+",
f"WITH {cte_block},\n",
- sql.strip(),
+ stripped,
count=1,
)
- return f"WITH {cte_block}\n{sql.strip()}"
+ return f"WITH {cte_block}\n{stripped}"
# ---------------------------------------------------------------------------
diff --git a/src/website_profiling/tools/audit_tools/crawl/__init__.py b/src/website_profiling/tools/audit_tools/crawl/__init__.py
new file mode 100644
index 00000000..62e5e068
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/crawl/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — crawl domain."""
diff --git a/src/website_profiling/tools/audit_tools/crawl.py b/src/website_profiling/tools/audit_tools/crawl/crawl.py
similarity index 98%
rename from src/website_profiling/tools/audit_tools/crawl.py
rename to src/website_profiling/tools/audit_tools/crawl/crawl.py
index 1bfc536f..ee518b26 100644
--- a/src/website_profiling/tools/audit_tools/crawl.py
+++ b/src/website_profiling/tools/audit_tools/crawl/crawl.py
@@ -5,9 +5,9 @@
from psycopg import Connection
-from ...integrations.google.page_lookup import slice_from_google_row
-from ._slice import _parse_page_analysis, cap_list, parse_limit, payload_dict_slice
-from .context import AuditToolContext
+from ....integrations.google.page_lookup import slice_from_google_row
+from .._slice import _parse_page_analysis, cap_list, parse_limit, payload_dict_slice
+from ..context import AuditToolContext
_PAGE_LIMIT_MAX = 30
@@ -100,7 +100,7 @@ def get_internal_links(conn: Connection, ctx: AuditToolContext, args: dict[str,
if not url:
return {"error": "url is required"}
- from ...db.crawl_store import read_edges
+ from ....db.crawl_store import read_edges
payload = scoped.load_payload(conn)
run_id = payload.get("crawl_run_id")
diff --git a/src/website_profiling/tools/audit_tools/crawl_actions.py b/src/website_profiling/tools/audit_tools/crawl/crawl_actions.py
similarity index 97%
rename from src/website_profiling/tools/audit_tools/crawl_actions.py
rename to src/website_profiling/tools/audit_tools/crawl/crawl_actions.py
index 5d977ce9..9379148e 100644
--- a/src/website_profiling/tools/audit_tools/crawl_actions.py
+++ b/src/website_profiling/tools/audit_tools/crawl/crawl_actions.py
@@ -6,19 +6,19 @@
from psycopg import Connection
-from ...crawl_presets import (
+from ....crawl_presets import (
CRAWL_PRESET_PATCHES,
DEFAULT_CRAWL_PRESET_ID,
apply_crawl_preset,
)
-from ...db.config_store import read_pipeline_config
-from ...db.property_store import (
+from ....db.config_store import read_pipeline_config
+from ....db.property_store import (
canonical_domain_from_start_url,
derive_property_name,
get_property_by_id,
)
-from ...llm_config import load_llm_config_from_db
-from .context import AuditToolContext
+from ....llm_config import load_llm_config_from_db
+from ..context import AuditToolContext
CHAT_CRAWL_TOOL = "prepare_audit_run"
diff --git a/src/website_profiling/tools/audit_tools/crawl_lists.py b/src/website_profiling/tools/audit_tools/crawl/crawl_lists.py
similarity index 99%
rename from src/website_profiling/tools/audit_tools/crawl_lists.py
rename to src/website_profiling/tools/audit_tools/crawl/crawl_lists.py
index d14c2a0f..48145f17 100644
--- a/src/website_profiling/tools/audit_tools/crawl_lists.py
+++ b/src/website_profiling/tools/audit_tools/crawl/crawl_lists.py
@@ -6,9 +6,9 @@
import pandas as pd
from psycopg import Connection
-from ...reporting.categories import REDIRECT_CHAIN_LONG
-from ._slice import _parse_page_analysis, cap_list, parse_limit
-from .context import AuditToolContext
+from ....reporting.categories import REDIRECT_CHAIN_LONG
+from .._slice import _parse_page_analysis, cap_list, parse_limit
+from ..context import AuditToolContext
_REDIRECT_CHAIN_MIN = REDIRECT_CHAIN_LONG
diff --git a/src/website_profiling/tools/audit_tools/crawl_metrics.py b/src/website_profiling/tools/audit_tools/crawl/crawl_metrics.py
similarity index 98%
rename from src/website_profiling/tools/audit_tools/crawl_metrics.py
rename to src/website_profiling/tools/audit_tools/crawl/crawl_metrics.py
index 50ee55db..beae8d1e 100644
--- a/src/website_profiling/tools/audit_tools/crawl_metrics.py
+++ b/src/website_profiling/tools/audit_tools/crawl/crawl_metrics.py
@@ -5,7 +5,7 @@
from psycopg import Connection
-from .context import AuditToolContext
+from ..context import AuditToolContext
def _percentile(values: list[float], pct: float) -> float | None:
diff --git a/src/website_profiling/tools/audit_tools/export/__init__.py b/src/website_profiling/tools/audit_tools/export/__init__.py
new file mode 100644
index 00000000..466faf4c
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/export/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — export domain."""
diff --git a/src/website_profiling/tools/audit_tools/export_extras.py b/src/website_profiling/tools/audit_tools/export/export_extras.py
similarity index 84%
rename from src/website_profiling/tools/audit_tools/export_extras.py
rename to src/website_profiling/tools/audit_tools/export/export_extras.py
index 093350f5..1217cebc 100644
--- a/src/website_profiling/tools/audit_tools/export_extras.py
+++ b/src/website_profiling/tools/audit_tools/export/export_extras.py
@@ -5,10 +5,10 @@
from psycopg import Connection
-from ...integrations.google.rich_results import validate_urls
-from ...tools.export_sitemap import build_sitemap_xml
-from ..export_artifacts import save_artifact
-from .context import AuditToolContext
+from ....integrations.google.rich_results import validate_urls
+from ....tools.export_sitemap import build_sitemap_xml
+from ...export_artifacts import save_artifact
+from ..context import AuditToolContext
def export_sitemap_xml(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
@@ -36,7 +36,7 @@ def validate_rich_results(conn: Connection, ctx: AuditToolContext, args: dict[st
site_url = str(args.get("site_url") or args.get("gsc_site_url") or "").strip() or None
if ctx.property_id:
try:
- from ...integrations.google.auth import build_credentials
+ from ....integrations.google.auth import build_credentials
creds = build_credentials(ctx.property_id)
except Exception:
diff --git a/src/website_profiling/tools/audit_tools/export_tools.py b/src/website_profiling/tools/audit_tools/export/export_tools.py
similarity index 97%
rename from src/website_profiling/tools/audit_tools/export_tools.py
rename to src/website_profiling/tools/audit_tools/export/export_tools.py
index 5c2ec4fb..fa8d77ec 100644
--- a/src/website_profiling/tools/audit_tools/export_tools.py
+++ b/src/website_profiling/tools/audit_tools/export/export_tools.py
@@ -5,21 +5,21 @@
from psycopg import Connection
-from ..export_artifacts import (
+from ...export_artifacts import (
dicts_to_csv,
rows_from_tool_result,
save_artifact,
)
-from ..export_compare import export_compare_issues_csv
-from ..export_audit import (
+from ...export_compare import export_compare_issues_csv
+from ...export_audit import (
export_audit_csv,
export_audit_html,
export_audit_json,
export_audit_pdf,
)
-from ._slice import parse_limit
-from .compare_helpers import load_compare_pair
-from .context import AuditToolContext
+from .._slice import parse_limit
+from ..compare.compare_helpers import load_compare_pair
+from ..context import AuditToolContext
_EXPORT_FORMATS = {"pdf", "html", "csv", "json"}
_MIME = {
@@ -194,7 +194,7 @@
def _dispatch(name: str, args: dict[str, Any], ctx: AuditToolContext, conn: Connection) -> dict[str, Any]:
- from .registry import dispatch_tool
+ from ..registry import dispatch_tool
return dispatch_tool(name, args, context=ctx, conn=conn)
diff --git a/src/website_profiling/tools/audit_tools/geo/__init__.py b/src/website_profiling/tools/audit_tools/geo/__init__.py
new file mode 100644
index 00000000..0f2458f6
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/geo/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — geo domain."""
diff --git a/src/website_profiling/tools/audit_tools/agent_readiness.py b/src/website_profiling/tools/audit_tools/geo/agent_readiness.py
similarity index 99%
rename from src/website_profiling/tools/audit_tools/agent_readiness.py
rename to src/website_profiling/tools/audit_tools/geo/agent_readiness.py
index 544244dc..c2094510 100644
--- a/src/website_profiling/tools/audit_tools/agent_readiness.py
+++ b/src/website_profiling/tools/audit_tools/geo/agent_readiness.py
@@ -20,7 +20,7 @@
import requests
from psycopg import Connection
-from ._aeo_helpers import (
+from .._aeo_helpers import (
count_tokens,
detect_copy_for_ai,
is_doc_like_url,
@@ -28,8 +28,8 @@
score_content_structure_aeo,
strip_html_to_text,
)
-from ._slice import cap_list, parse_limit
-from .context import AuditToolContext
+from .._slice import cap_list, parse_limit
+from ..context import AuditToolContext
from .geo_tools import _base_url, _fetch_llms_txt, _score_meta_signals, _score_robots_ai_access
_DEFAULT_MAX_TOKENS = 25_000
diff --git a/src/website_profiling/tools/audit_tools/geo_citability.py b/src/website_profiling/tools/audit_tools/geo/geo_citability.py
similarity index 98%
rename from src/website_profiling/tools/audit_tools/geo_citability.py
rename to src/website_profiling/tools/audit_tools/geo/geo_citability.py
index 5e481b0e..57159b78 100644
--- a/src/website_profiling/tools/audit_tools/geo_citability.py
+++ b/src/website_profiling/tools/audit_tools/geo/geo_citability.py
@@ -23,9 +23,9 @@
from psycopg import Connection
-from ._slice import _row_schema_types_list
-from .context import AuditToolContext
-from ...content_analysis.reading_level import flesch_kincaid_grade
+from .._slice import _row_schema_types_list
+from ..context import AuditToolContext
+from ....content_analysis.reading_level import flesch_kincaid_grade
_STAT_PATTERN = re.compile(r"\b\d[\d,]*\.?\d*\s*(?:%|percent|million|billion|thousand|k\b)", re.I)
diff --git a/src/website_profiling/tools/audit_tools/geo_detectors.py b/src/website_profiling/tools/audit_tools/geo/geo_detectors.py
similarity index 99%
rename from src/website_profiling/tools/audit_tools/geo_detectors.py
rename to src/website_profiling/tools/audit_tools/geo/geo_detectors.py
index 83002f90..9008860c 100644
--- a/src/website_profiling/tools/audit_tools/geo_detectors.py
+++ b/src/website_profiling/tools/audit_tools/geo/geo_detectors.py
@@ -11,8 +11,8 @@
from psycopg import Connection
-from ._slice import _row_schema_types_list, cap_list, parse_limit
-from .context import AuditToolContext
+from .._slice import _row_schema_types_list, cap_list, parse_limit
+from ..context import AuditToolContext
# ---------------------------------------------------------------------------
# Negative signals detection
diff --git a/src/website_profiling/tools/audit_tools/geo_list_tools.py b/src/website_profiling/tools/audit_tools/geo/geo_list_tools.py
similarity index 97%
rename from src/website_profiling/tools/audit_tools/geo_list_tools.py
rename to src/website_profiling/tools/audit_tools/geo/geo_list_tools.py
index 199f2b49..1f9e021a 100644
--- a/src/website_profiling/tools/audit_tools/geo_list_tools.py
+++ b/src/website_profiling/tools/audit_tools/geo/geo_list_tools.py
@@ -8,8 +8,8 @@
import requests
from psycopg import Connection
-from ._slice import _row_schema_types_list, cap_list, parse_limit
-from .context import AuditToolContext
+from .._slice import _row_schema_types_list, cap_list, parse_limit
+from ..context import AuditToolContext
from .geo_tools import _base_url, _fetch_llms_txt, _has_faq_schema, _score_robots_ai_access
_HOWTO_TYPES = frozenset({"howto", "how-to"})
@@ -120,7 +120,9 @@ def _agent_access(agent: str) -> str:
if not applicable:
return "default"
for allows, disallows in applicable:
- root_blocked = "/" in disallows or "" in disallows
+ # A bare `Disallow:` (empty value) is the canonical allow-all directive,
+ # not a block; only `Disallow: /` blocks the whole site.
+ root_blocked = "/" in disallows
root_allowed = "/" in allows
if root_blocked and not root_allowed:
return "blocked"
diff --git a/src/website_profiling/tools/audit_tools/geo_tools.py b/src/website_profiling/tools/audit_tools/geo/geo_tools.py
similarity index 97%
rename from src/website_profiling/tools/audit_tools/geo_tools.py
rename to src/website_profiling/tools/audit_tools/geo/geo_tools.py
index ade76f42..b4f442aa 100644
--- a/src/website_profiling/tools/audit_tools/geo_tools.py
+++ b/src/website_profiling/tools/audit_tools/geo/geo_tools.py
@@ -24,8 +24,8 @@
import requests
from psycopg import Connection
-from ._slice import _row_schema_types_list, cap_list, parse_limit
-from .context import AuditToolContext
+from .._slice import _row_schema_types_list, cap_list, parse_limit
+from ..context import AuditToolContext
_FAQ_TYPES = frozenset({"faqpage", "qapage", "question"})
_QA_URL_HINTS = ("/faq", "/faqs", "/help", "/support", "/questions")
@@ -418,22 +418,29 @@ def get_geo_readiness_score(conn: Connection, ctx: AuditToolContext, args: dict[
with ThreadPoolExecutor(max_workers=5) as pool:
futs = {pool.submit(fn, domain): key for key, fn in http_tasks.items()}
for fut in as_completed(futs):
- http_results[futs[fut]] = fut.result()
-
- llms = http_results["llms"]
+ key = futs[fut]
+ try:
+ http_results[key] = fut.result()
+ except Exception:
+ # A failing/raising live-HTTP task must degrade to a 0 sub-score,
+ # not crash the whole composite score (mirrors
+ # get_agent_readiness_score).
+ http_results[key] = {}
+
+ llms = http_results.get("llms", {})
llms_depth = llms.get("depth", {}) if llms.get("found") else {}
llms_raw = llms_depth.get("depth_score", 0) if llms.get("found") else 0
- robots_result = http_results["robots"]
+ robots_result = http_results.get("robots", {})
robots_raw = robots_result.get("robots_score", 0)
- meta_result = http_results["meta"]
+ meta_result = http_results.get("meta", {})
meta_raw = meta_result.get("meta_score", 0)
- freshness_result = http_results["freshness"]
+ freshness_result = http_results.get("freshness", {})
freshness_raw = freshness_result.get("freshness_score", 0)
- discovery_result = http_results["discovery"]
+ discovery_result = http_results.get("discovery", {})
discovery_raw = discovery_result.get("discovery_score", 0)
total_score = round(
diff --git a/src/website_profiling/tools/audit_tools/google/__init__.py b/src/website_profiling/tools/audit_tools/google/__init__.py
new file mode 100644
index 00000000..fe859fe6
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/google/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — google domain."""
diff --git a/src/website_profiling/tools/audit_tools/google.py b/src/website_profiling/tools/audit_tools/google/google.py
similarity index 97%
rename from src/website_profiling/tools/audit_tools/google.py
rename to src/website_profiling/tools/audit_tools/google/google.py
index 5043131b..74df39f9 100644
--- a/src/website_profiling/tools/audit_tools/google.py
+++ b/src/website_profiling/tools/audit_tools/google/google.py
@@ -5,9 +5,9 @@
from psycopg import Connection
-from ...integrations.google.page_lookup import slice_from_google_row
-from ._slice import cap_list, parse_limit
-from .context import AuditToolContext
+from ....integrations.google.page_lookup import slice_from_google_row
+from .._slice import cap_list, parse_limit
+from ..context import AuditToolContext
def get_google_summary(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
@@ -113,7 +113,7 @@ def get_ga4_page_metrics(conn: Connection, ctx: AuditToolContext, args: dict[str
ga4 = data.get("ga4") if isinstance(data.get("ga4"), dict) else {}
if not ga4:
return {"error": "no GA4 data", "missing": True}
- from ...integrations.google.normalize import url_to_path
+ from ....integrations.google.normalize import url_to_path
def _ga4_path_key(raw: str) -> str:
text = str(raw or "").strip()
@@ -139,7 +139,7 @@ def _ga4_path_key(raw: str) -> str:
def get_gsc_ctr_opportunity_pages(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
"""Pages with high impressions but CTR below industry curve at their position."""
- from ...integrations.google.keyword_enrich import ctr_as_fraction, industry_ctr
+ from ....integrations.google.keyword_enrich import ctr_as_fraction, industry_ctr
scoped = ctx.with_args(args)
data = scoped.load_google(conn)
diff --git a/src/website_profiling/tools/audit_tools/google_lists.py b/src/website_profiling/tools/audit_tools/google/google_lists.py
similarity index 97%
rename from src/website_profiling/tools/audit_tools/google_lists.py
rename to src/website_profiling/tools/audit_tools/google/google_lists.py
index fac019aa..3f9a4944 100644
--- a/src/website_profiling/tools/audit_tools/google_lists.py
+++ b/src/website_profiling/tools/audit_tools/google/google_lists.py
@@ -5,11 +5,11 @@
from psycopg import Connection
-from ...integrations.google.keyword_enrich import ctr_as_fraction, industry_ctr
-from ...integrations.google.normalize import normalize_url, url_to_path
-from ._slice import cap_list, parse_limit
-from .context import AuditToolContext
-from .insight_helpers import blend_landing_pages, provenance_block, traffic_health_ratio, _num
+from ....integrations.google.keyword_enrich import ctr_as_fraction, industry_ctr
+from ....integrations.google.normalize import normalize_url, url_to_path
+from .._slice import cap_list, parse_limit
+from ..context import AuditToolContext
+from ..insight.insight_helpers import blend_landing_pages, provenance_block, traffic_health_ratio, _num
def _gsc_ga4_blobs(raw: dict[str, Any]) -> tuple[dict[str, Any], dict[str, Any]]:
@@ -24,7 +24,7 @@ def _load_google_pair(ctx: AuditToolContext, conn: Connection) -> tuple[dict[str
if prior is not None or ctx.property_id is None:
return current, prior
try:
- from ...integrations.google.store import read_prior_google_snapshot
+ from ....integrations.google.store import read_prior_google_snapshot
prior = read_prior_google_snapshot(conn, ctx.property_id, skip=1)
except Exception:
@@ -41,7 +41,7 @@ def _load_google_pair(ctx: AuditToolContext, conn: Connection) -> tuple[dict[str
)
rows = cur.fetchall() or []
if len(rows) >= 2:
- from ...db.storage import _parse_row_json
+ from ....db.storage import _parse_row_json
prior_data = _parse_row_json(rows[1])
prior = prior_data if isinstance(prior_data, dict) else None
diff --git a/src/website_profiling/tools/audit_tools/images/__init__.py b/src/website_profiling/tools/audit_tools/images/__init__.py
new file mode 100644
index 00000000..c6b372ff
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/images/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — images domain."""
diff --git a/src/website_profiling/tools/audit_tools/image_tools.py b/src/website_profiling/tools/audit_tools/images/image_tools.py
similarity index 94%
rename from src/website_profiling/tools/audit_tools/image_tools.py
rename to src/website_profiling/tools/audit_tools/images/image_tools.py
index c1aea621..51391f89 100644
--- a/src/website_profiling/tools/audit_tools/image_tools.py
+++ b/src/website_profiling/tools/audit_tools/images/image_tools.py
@@ -6,9 +6,9 @@
import pandas as pd
from psycopg import Connection
-from ._slice import cap_list, parse_limit
-from .context import AuditToolContext
-from .crawl_lists import _content_urls_list, _filter_crawl_pages
+from .._slice import cap_list, parse_limit
+from ..context import AuditToolContext
+from ..crawl.crawl_lists import _content_urls_list, _filter_crawl_pages
IMAGE_LIGHTHOUSE_AUDIT_IDS = frozenset({
"uses-optimized-images",
@@ -77,9 +77,11 @@ def get_image_audit_summary(conn: Connection, ctx: AuditToolContext, args: dict[
if not payload:
return {"error": "no report found"}
df = scoped.load_crawl_df(conn)
- pages_missing_alt = 0
- pages_missing_lazy = 0
- pages_missing_dims = 0
+ # None = "not computed from the crawl DataFrame" (so we fall back to
+ # content_urls); a real 0 must survive instead of being treated as falsy.
+ pages_missing_alt: int | None = None
+ pages_missing_lazy: int | None = None
+ pages_missing_dims: int | None = None
images_total = 0
if df is not None and not df.empty and "status" in df.columns:
work = df[df["status"].astype(str).str.match(r"2\d{2}", na=False)]
@@ -100,9 +102,9 @@ def get_image_audit_summary(conn: Connection, ctx: AuditToolContext, args: dict[
content_urls = payload.get("content_urls") if isinstance(payload.get("content_urls"), dict) else {}
og_missing = social.get("og_image_missing") if isinstance(social.get("og_image_missing"), list) else []
return {
- "pages_missing_alt": pages_missing_alt or len(content_urls.get("missing_alt") or []),
- "pages_without_lazy_images": pages_missing_lazy or len(content_urls.get("missing_lazy") or []),
- "pages_missing_image_dimensions": pages_missing_dims or len(content_urls.get("missing_dimensions") or []),
+ "pages_missing_alt": pages_missing_alt if pages_missing_alt is not None else len(content_urls.get("missing_alt") or []),
+ "pages_without_lazy_images": pages_missing_lazy if pages_missing_lazy is not None else len(content_urls.get("missing_lazy") or []),
+ "pages_missing_image_dimensions": pages_missing_dims if pages_missing_dims is not None else len(content_urls.get("missing_dimensions") or []),
"images_total_crawled": images_total,
"og_image_coverage_pct": social.get("og_image_coverage_pct"),
"og_image_missing_count": len(og_missing),
diff --git a/src/website_profiling/tools/audit_tools/indexation/__init__.py b/src/website_profiling/tools/audit_tools/indexation/__init__.py
new file mode 100644
index 00000000..591706b4
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/indexation/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — indexation domain."""
diff --git a/src/website_profiling/tools/audit_tools/indexation_lists.py b/src/website_profiling/tools/audit_tools/indexation/indexation_lists.py
similarity index 97%
rename from src/website_profiling/tools/audit_tools/indexation_lists.py
rename to src/website_profiling/tools/audit_tools/indexation/indexation_lists.py
index bdca54be..f3761390 100644
--- a/src/website_profiling/tools/audit_tools/indexation_lists.py
+++ b/src/website_profiling/tools/audit_tools/indexation/indexation_lists.py
@@ -5,11 +5,11 @@
from psycopg import Connection
-from ...integrations.google.normalize import normalize_url, url_to_path
-from ...reporting.categories import REDIRECT_CHAIN_LONG
-from ._slice import _parse_page_analysis, cap_list, parse_limit
-from .context import AuditToolContext
-from .ops import _load_log_analysis
+from ....integrations.google.normalize import normalize_url, url_to_path
+from ....reporting.categories import REDIRECT_CHAIN_LONG
+from .._slice import _parse_page_analysis, cap_list, parse_limit
+from ..context import AuditToolContext
+from ..ops.ops import _load_log_analysis
_REDIRECT_CHAIN_MIN = REDIRECT_CHAIN_LONG
diff --git a/src/website_profiling/tools/audit_tools/indexation_tools.py b/src/website_profiling/tools/audit_tools/indexation/indexation_tools.py
similarity index 97%
rename from src/website_profiling/tools/audit_tools/indexation_tools.py
rename to src/website_profiling/tools/audit_tools/indexation/indexation_tools.py
index 8661187d..87ca210c 100644
--- a/src/website_profiling/tools/audit_tools/indexation_tools.py
+++ b/src/website_profiling/tools/audit_tools/indexation/indexation_tools.py
@@ -5,8 +5,8 @@
from psycopg import Connection
-from ._slice import cap_list, parse_limit
-from .context import AuditToolContext
+from .._slice import cap_list, parse_limit
+from ..context import AuditToolContext
_GAP_TYPES = frozenset({"sitemap_only", "crawled_not_in_sitemap", "gsc_not_crawled"})
diff --git a/src/website_profiling/tools/audit_tools/international.py b/src/website_profiling/tools/audit_tools/indexation/international.py
similarity index 90%
rename from src/website_profiling/tools/audit_tools/international.py
rename to src/website_profiling/tools/audit_tools/indexation/international.py
index 86e7c330..1a88396d 100644
--- a/src/website_profiling/tools/audit_tools/international.py
+++ b/src/website_profiling/tools/audit_tools/indexation/international.py
@@ -5,8 +5,8 @@
from psycopg import Connection
-from .context import AuditToolContext
-from ._slice import payload_dict_slice
+from ..context import AuditToolContext
+from .._slice import payload_dict_slice
def get_hreflang_summary(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
diff --git a/src/website_profiling/tools/audit_tools/insight/__init__.py b/src/website_profiling/tools/audit_tools/insight/__init__.py
new file mode 100644
index 00000000..ab052b43
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/insight/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — insight domain."""
diff --git a/src/website_profiling/tools/audit_tools/insight_helpers.py b/src/website_profiling/tools/audit_tools/insight/insight_helpers.py
similarity index 98%
rename from src/website_profiling/tools/audit_tools/insight_helpers.py
rename to src/website_profiling/tools/audit_tools/insight/insight_helpers.py
index e38d09ff..3dfaac34 100644
--- a/src/website_profiling/tools/audit_tools/insight_helpers.py
+++ b/src/website_profiling/tools/audit_tools/insight/insight_helpers.py
@@ -3,7 +3,7 @@
from typing import Any
-from ...integrations.google.normalize import normalize_url, url_to_path
+from ....integrations.google.normalize import normalize_url, url_to_path
def provenance_block(
diff --git a/src/website_profiling/tools/audit_tools/insight_tools.py b/src/website_profiling/tools/audit_tools/insight/insight_tools.py
similarity index 97%
rename from src/website_profiling/tools/audit_tools/insight_tools.py
rename to src/website_profiling/tools/audit_tools/insight/insight_tools.py
index 1d9357b2..98c7b5d1 100644
--- a/src/website_profiling/tools/audit_tools/insight_tools.py
+++ b/src/website_profiling/tools/audit_tools/insight/insight_tools.py
@@ -5,9 +5,9 @@
from psycopg import Connection
-from ...integrations.google.page_lookup import slice_from_google_row
-from ._slice import cap_list, parse_limit
-from .context import AuditToolContext
+from ....integrations.google.page_lookup import slice_from_google_row
+from .._slice import cap_list, parse_limit
+from ..context import AuditToolContext
from .insight_helpers import (
blend_landing_pages,
composite_page_score,
@@ -15,7 +15,7 @@
provenance_block,
traffic_health_ratio,
)
-from .report import list_issues
+from ..report.report import list_issues
def _gsc_ga4_blobs(raw: dict[str, Any]) -> tuple[dict[str, Any], dict[str, Any]]:
diff --git a/src/website_profiling/tools/audit_tools/integrations/__init__.py b/src/website_profiling/tools/audit_tools/integrations/__init__.py
new file mode 100644
index 00000000..a90247f8
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/integrations/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — integrations domain."""
diff --git a/src/website_profiling/tools/audit_tools/integration_tools.py b/src/website_profiling/tools/audit_tools/integrations/integration_tools.py
similarity index 95%
rename from src/website_profiling/tools/audit_tools/integration_tools.py
rename to src/website_profiling/tools/audit_tools/integrations/integration_tools.py
index 627a0903..1daa997c 100644
--- a/src/website_profiling/tools/audit_tools/integration_tools.py
+++ b/src/website_profiling/tools/audit_tools/integrations/integration_tools.py
@@ -5,11 +5,11 @@
from psycopg import Connection
-from ...db.property_store import get_property_by_id
-from ...integrations.google.auth import build_credentials
-from ...integrations.google.gsc_inspection import inspect_url
-from ._slice import cap_list, parse_limit
-from .context import AuditToolContext
+from ....db.property_store import get_property_by_id
+from ....integrations.google.auth import build_credentials
+from ....integrations.google.gsc_inspection import inspect_url
+from .._slice import cap_list, parse_limit
+from ..context import AuditToolContext
def _property_google_config(conn: Connection, property_id: int | None) -> tuple[dict[str, Any] | None, Any, str]:
@@ -72,14 +72,14 @@ def get_bing_index_status(conn: Connection, ctx: AuditToolContext, args: dict[st
prop = get_property_by_id(conn, scoped.property_id)
if not prop:
return {"error": "property not found", "missing": True}
- from ...db.config_store import read_pipeline_config
+ from ....db.config_store import read_pipeline_config
known, _ = read_pipeline_config(conn)
api_key = str(known.get("bing_webmaster_api_key") or "").strip()
site_url = str(prop.get("gsc_site_url") or prop.get("canonical_domain") or "").strip()
if not api_key:
return {"error": "bing_webmaster_api_key not configured in audit settings", "missing": True}
- from ...integrations.bing.webmaster import _bing_json_get
+ from ....integrations.bing.webmaster import _bing_json_get
raw = _bing_json_get("GetUrlInfo", api_key, siteUrl=site_url, url=url)
if raw.get("error"):
@@ -172,7 +172,7 @@ def check_ai_citations_live(conn: Connection, ctx: AuditToolContext, args: dict[
brand_name = brand or domain or "this brand"
query = f"What is {brand_name}? Can you tell me about their main products or services?"
- from ...integrations.ai_citations import check_citations, resolve_api_key
+ from ....integrations.ai_citations import check_citations, resolve_api_key
key = resolve_api_key(provider, api_key)
if not key:
diff --git a/src/website_profiling/tools/audit_tools/llm_tools.py b/src/website_profiling/tools/audit_tools/integrations/llm_tools.py
similarity index 92%
rename from src/website_profiling/tools/audit_tools/llm_tools.py
rename to src/website_profiling/tools/audit_tools/integrations/llm_tools.py
index d69f97da..a01ede9e 100644
--- a/src/website_profiling/tools/audit_tools/llm_tools.py
+++ b/src/website_profiling/tools/audit_tools/integrations/llm_tools.py
@@ -6,13 +6,13 @@
from psycopg import Connection
-from ...db._common import _row_field
-from ...db.property_store import list_properties_public
-from ...integrations.google.suggest import batch_expand
-from ...llm.content_brief import generate_content_brief as build_content_brief
-from ...llm.page_coach import run_page_coach
-from ._slice import parse_limit
-from .context import AuditToolContext
+from ....db._common import _row_field
+from ....db.property_store import list_properties_public
+from ....integrations.google.suggest import batch_expand
+from ....llm.content_brief import generate_content_brief as build_content_brief
+from ....llm.page_coach import run_page_coach
+from .._slice import parse_limit
+from ..context import AuditToolContext
def generate_content_brief(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
@@ -131,7 +131,7 @@ def expand_keywords(conn: Connection, ctx: AuditToolContext, args: dict[str, Any
def _llm_disabled_response() -> dict[str, Any]:
- from ...llm_config import load_llm_config_from_db, llm_is_enabled
+ from ....llm_config import load_llm_config_from_db, llm_is_enabled
cfg = load_llm_config_from_db()
if not llm_is_enabled(cfg):
@@ -140,8 +140,8 @@ def _llm_disabled_response() -> dict[str, Any]:
def generate_issue_fix(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
- from ...llm.issue_fixes import generate_issue_fix_suggestion
- from ...llm_config import load_llm_config_from_db
+ from ....llm.issue_fixes import generate_issue_fix_suggestion
+ from ....llm_config import load_llm_config_from_db
err = _llm_disabled_response()
if err:
@@ -163,7 +163,7 @@ def generate_issue_fix(conn: Connection, ctx: AuditToolContext, args: dict[str,
def summarize_category_for_client(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
- from .issues import get_category_issues
+ from ..issues.issues import get_category_issues
category_id = str(args.get("category_id") or "").strip()
if not category_id:
@@ -188,8 +188,8 @@ def summarize_category_for_client(conn: Connection, ctx: AuditToolContext, args:
}
err = _llm_disabled_response()
if not err:
- from ...llm.base import get_llm_client, parse_json_response
- from ...llm_config import load_llm_config_from_db
+ from ....llm.base import get_llm_client, parse_json_response
+ from ....llm_config import load_llm_config_from_db
cfg = load_llm_config_from_db()
try:
@@ -210,8 +210,8 @@ def summarize_category_for_client(conn: Connection, ctx: AuditToolContext, args:
def prioritize_fix_roadmap(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
- from ...reporting.issue_impact import sort_issues_by_impact
- from .report import _iter_category_issues
+ from ....reporting.issue_impact import sort_issues_by_impact
+ from ..report.report import _iter_category_issues
scoped = ctx.with_args(args)
payload = scoped.load_payload(conn)
@@ -240,7 +240,7 @@ def prioritize_fix_roadmap(conn: Connection, ctx: AuditToolContext, args: dict[s
def analyze_serp_snippet_for_url(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
- from ...integrations.google.page_lookup import slice_from_google_row
+ from ....integrations.google.page_lookup import slice_from_google_row
scoped = ctx.with_args(args)
url = str(args.get("url") or "").strip()
@@ -268,8 +268,8 @@ def analyze_serp_snippet_for_url(conn: Connection, ctx: AuditToolContext, args:
base["note"] = err.get("error")
base["provenance"] = "Crawl"
return base
- from ...llm.base import get_llm_client
- from ...llm_config import load_llm_config_from_db
+ from ....llm.base import get_llm_client
+ from ....llm_config import load_llm_config_from_db
cfg = load_llm_config_from_db()
client = get_llm_client(cfg)
@@ -314,8 +314,8 @@ def draft_llms_txt(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]
]
err = _llm_disabled_response()
if not err:
- from ...llm.base import get_llm_client
- from ...llm_config import load_llm_config_from_db
+ from ....llm.base import get_llm_client
+ from ....llm_config import load_llm_config_from_db
try:
client = get_llm_client(load_llm_config_from_db())
@@ -342,8 +342,8 @@ def generate_schema(conn: Connection, ctx: AuditToolContext, args: dict[str, Any
url = str(args.get("url") or "").strip()
payload = scoped.load_payload(conn)
domain = str(scoped.resolve_property_domain(conn) or "")
- site_name = str(payload.get("site_name") if payload else None or domain or "Site")
- from .geo_tools import _base_url as _mk_base
+ site_name = str(payload.get("site_name") or domain or "Site")
+ from ..geo.geo_tools import _base_url as _mk_base
base_url = _mk_base(domain) if domain else url
def _website_schema() -> dict[str, Any]:
@@ -429,8 +429,8 @@ def _article_schema() -> dict[str, Any]:
err = _llm_disabled_response()
if not err:
- from ...llm.base import get_llm_client
- from ...llm_config import load_llm_config_from_db
+ from ....llm.base import get_llm_client
+ from ....llm_config import load_llm_config_from_db
try:
client = get_llm_client(load_llm_config_from_db())
@@ -454,11 +454,11 @@ def _article_schema() -> dict[str, Any]:
def generate_robots_txt(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
"""Generate a robots.txt that explicitly allows all major AI citation bots."""
- from .geo_list_tools import _AI_BOT_TIERS
+ from ..geo.geo_list_tools import _AI_BOT_TIERS
scoped = ctx.with_args(args)
domain = str(scoped.resolve_property_domain(conn) or "")
- from .geo_tools import _base_url as _mk_base
+ from ..geo.geo_tools import _base_url as _mk_base
base_url = _mk_base(domain) if domain else ""
lines = ["# robots.txt — generated by Site Audit", ""]
@@ -527,8 +527,8 @@ def generate_geo_fix_bundle(conn: Connection, ctx: AuditToolContext, args: dict[
org_schema_result = generate_schema(conn, scoped, {**args, "schema_type": "Organization"})
from concurrent.futures import ThreadPoolExecutor, as_completed
- from .geo_tools import _fetch_llms_txt, _fetch_ai_discovery, _score_meta_signals
- from .geo_list_tools import _parse_robots_txt, _parse_robots_access
+ from ..geo.geo_tools import _fetch_llms_txt, _fetch_ai_discovery, _score_meta_signals
+ from ..geo.geo_list_tools import _parse_robots_txt, _parse_robots_access
with ThreadPoolExecutor(max_workers=3) as _pool:
_f_llms = _pool.submit(_fetch_llms_txt, domain)
@@ -543,7 +543,7 @@ def generate_geo_fix_bundle(conn: Connection, ctx: AuditToolContext, args: dict[
missing_files.append("llms.txt")
robots_text = _parse_robots_txt(domain)
access_map = _parse_robots_access(robots_text) if robots_text else {}
- from .geo_list_tools import _AI_BOT_TIERS
+ from ..geo.geo_list_tools import _AI_BOT_TIERS
citation_bots = [b for b, t in _AI_BOT_TIERS.items() if t == "citation"]
if any(access_map.get(b.lower()) == "blocked" for b in citation_bots):
missing_files.append("robots.txt (AI bots blocked)")
diff --git a/src/website_profiling/tools/audit_tools/issues/__init__.py b/src/website_profiling/tools/audit_tools/issues/__init__.py
new file mode 100644
index 00000000..644c309c
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/issues/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — issues domain."""
diff --git a/src/website_profiling/tools/audit_tools/issue_lists.py b/src/website_profiling/tools/audit_tools/issues/issue_lists.py
similarity index 98%
rename from src/website_profiling/tools/audit_tools/issue_lists.py
rename to src/website_profiling/tools/audit_tools/issues/issue_lists.py
index edd870f9..7d0ee53c 100644
--- a/src/website_profiling/tools/audit_tools/issue_lists.py
+++ b/src/website_profiling/tools/audit_tools/issues/issue_lists.py
@@ -7,16 +7,16 @@
import pandas as pd
from psycopg import Connection
-from ...reporting.categories._helpers import (
+from ....reporting.categories._helpers import (
RESPONSE_TIME_SLOW_MS,
TITLE_LEN_MAX,
TITLE_LEN_MIN,
_hreflang_issues,
_orphan_hub_suggestions,
)
-from ...reporting.categories.accessibility import contrast_issues_from_sources
-from ._slice import _parse_page_analysis, _row_schema_types_list, cap_list, parse_limit
-from .context import AuditToolContext
+from ....reporting.categories.accessibility import contrast_issues_from_sources
+from .._slice import _parse_page_analysis, _row_schema_types_list, cap_list, parse_limit
+from ..context import AuditToolContext
_READING_LEVEL_HIGH = 12.0
_VERY_THIN_WORDS = 100
diff --git a/src/website_profiling/tools/audit_tools/issues.py b/src/website_profiling/tools/audit_tools/issues/issues.py
similarity index 88%
rename from src/website_profiling/tools/audit_tools/issues.py
rename to src/website_profiling/tools/audit_tools/issues/issues.py
index 3c13f9a3..3e027d92 100644
--- a/src/website_profiling/tools/audit_tools/issues.py
+++ b/src/website_profiling/tools/audit_tools/issues/issues.py
@@ -5,9 +5,9 @@
from psycopg import Connection
-from ...reporting.terminology import category_display_name
-from .context import AuditToolContext
-from .report import _health_score, _iter_category_issues, list_issues
+from ....reporting.terminology import category_display_name
+from ..context import AuditToolContext
+from ..report.report import _health_score, _iter_category_issues, list_issues
def list_issues_by_category(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
diff --git a/src/website_profiling/tools/audit_tools/keywords/__init__.py b/src/website_profiling/tools/audit_tools/keywords/__init__.py
new file mode 100644
index 00000000..fff4bad8
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/keywords/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — keywords domain."""
diff --git a/src/website_profiling/tools/audit_tools/keyword_lists.py b/src/website_profiling/tools/audit_tools/keywords/keyword_lists.py
similarity index 96%
rename from src/website_profiling/tools/audit_tools/keyword_lists.py
rename to src/website_profiling/tools/audit_tools/keywords/keyword_lists.py
index 72ca6b4f..0c766489 100644
--- a/src/website_profiling/tools/audit_tools/keyword_lists.py
+++ b/src/website_profiling/tools/audit_tools/keywords/keyword_lists.py
@@ -6,11 +6,11 @@
from psycopg import Connection
-from ...integrations.google.keyword_enrich import opportunity_clicks
-from ...integrations.google.keyword_store import read_keyword_snapshots_for_property
-from ._slice import cap_list, parse_limit
-from .context import AuditToolContext
-from .insight_helpers import _num
+from ....integrations.google.keyword_enrich import opportunity_clicks
+from ....integrations.google.keyword_store import read_keyword_snapshots_for_property
+from .._slice import cap_list, parse_limit
+from ..context import AuditToolContext
+from ..insight.insight_helpers import _num
def _require_property(ctx: AuditToolContext) -> dict[str, Any] | None:
@@ -460,7 +460,15 @@ def _near(row: dict[str, Any]) -> bool:
def list_keywords_high_impression_zero_click(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
- min_impressions = parse_limit(args.get("min_impressions"), 100, 1_000_000)
+ # A free-form impressions threshold, NOT a pagination limit: parse_limit would
+ # clamp it to >= 1 and <= 1_000_000, silently rejecting an explicit 0 and
+ # capping large thresholds. Parse directly and clamp only to >= 0.
+ raw_min_impr = args.get("min_impressions")
+ try:
+ min_impressions = int(raw_min_impr) if raw_min_impr is not None else 100
+ except (TypeError, ValueError):
+ min_impressions = 100
+ min_impressions = max(0, min_impressions)
def _zero_click(row: dict[str, Any]) -> bool:
return int(_num(row.get("gsc_clicks"))) == 0 and int(_num(row.get("gsc_impressions"))) >= min_impressions
diff --git a/src/website_profiling/tools/audit_tools/keywords.py b/src/website_profiling/tools/audit_tools/keywords/keywords.py
similarity index 98%
rename from src/website_profiling/tools/audit_tools/keywords.py
rename to src/website_profiling/tools/audit_tools/keywords/keywords.py
index e6cd45c8..35e4eb01 100644
--- a/src/website_profiling/tools/audit_tools/keywords.py
+++ b/src/website_profiling/tools/audit_tools/keywords/keywords.py
@@ -5,9 +5,9 @@
from psycopg import Connection
-from ...integrations.google.keyword_store import read_keyword_history
-from ._slice import cap_list, parse_limit, payload_field
-from .context import AuditToolContext
+from ....integrations.google.keyword_store import read_keyword_history
+from .._slice import cap_list, parse_limit, payload_field
+from ..context import AuditToolContext
_KEYWORD_LIMIT_DEFAULT = 20
_KEYWORD_LIMIT_MAX = 50
diff --git a/src/website_profiling/tools/audit_tools/links/__init__.py b/src/website_profiling/tools/audit_tools/links/__init__.py
new file mode 100644
index 00000000..caed0239
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/links/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — links domain."""
diff --git a/src/website_profiling/tools/audit_tools/link_lists.py b/src/website_profiling/tools/audit_tools/links/link_lists.py
similarity index 98%
rename from src/website_profiling/tools/audit_tools/link_lists.py
rename to src/website_profiling/tools/audit_tools/links/link_lists.py
index 67138512..c4aa8dae 100644
--- a/src/website_profiling/tools/audit_tools/link_lists.py
+++ b/src/website_profiling/tools/audit_tools/links/link_lists.py
@@ -6,9 +6,9 @@
from psycopg import Connection
-from ...common import strip_www_prefix
-from ._slice import cap_list, parse_limit
-from .context import AuditToolContext
+from ....common import strip_www_prefix
+from .._slice import cap_list, parse_limit
+from ..context import AuditToolContext
def _norm_url(url: str) -> str:
diff --git a/src/website_profiling/tools/audit_tools/links.py b/src/website_profiling/tools/audit_tools/links/links.py
similarity index 97%
rename from src/website_profiling/tools/audit_tools/links.py
rename to src/website_profiling/tools/audit_tools/links/links.py
index f131d0fa..6a45a640 100644
--- a/src/website_profiling/tools/audit_tools/links.py
+++ b/src/website_profiling/tools/audit_tools/links/links.py
@@ -5,8 +5,8 @@
from psycopg import Connection
-from ._slice import cap_list, parse_limit, payload_field
-from .context import AuditToolContext
+from .._slice import cap_list, parse_limit, payload_field
+from ..context import AuditToolContext
def list_orphan_pages(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
@@ -127,7 +127,7 @@ def get_link_rel_summary(conn: Connection, ctx: AuditToolContext, args: dict[str
summary = payload.get("link_rel_summary")
if isinstance(summary, dict):
return summary
- from ...reporting.link_edges_report import summarize_link_rel
+ from ....reporting.link_edges_report import summarize_link_rel
edges = payload.get("link_edges") or []
return summarize_link_rel(edges if isinstance(edges, list) else [])
diff --git a/src/website_profiling/tools/audit_tools/onpage/__init__.py b/src/website_profiling/tools/audit_tools/onpage/__init__.py
new file mode 100644
index 00000000..4abd127e
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/onpage/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — onpage domain."""
diff --git a/src/website_profiling/tools/audit_tools/onpage.py b/src/website_profiling/tools/audit_tools/onpage/onpage.py
similarity index 98%
rename from src/website_profiling/tools/audit_tools/onpage.py
rename to src/website_profiling/tools/audit_tools/onpage/onpage.py
index 7b6279cf..6d846c38 100644
--- a/src/website_profiling/tools/audit_tools/onpage.py
+++ b/src/website_profiling/tools/audit_tools/onpage/onpage.py
@@ -5,8 +5,8 @@
from psycopg import Connection
-from ._slice import cap_list, parse_limit
-from .context import AuditToolContext
+from .._slice import cap_list, parse_limit
+from ..context import AuditToolContext
_CONTENT_BUCKETS = frozenset({
"missing_h1",
diff --git a/src/website_profiling/tools/audit_tools/ops/__init__.py b/src/website_profiling/tools/audit_tools/ops/__init__.py
new file mode 100644
index 00000000..4af473ca
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/ops/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — ops domain."""
diff --git a/src/website_profiling/tools/audit_tools/ops.py b/src/website_profiling/tools/audit_tools/ops/ops.py
similarity index 97%
rename from src/website_profiling/tools/audit_tools/ops.py
rename to src/website_profiling/tools/audit_tools/ops/ops.py
index 8ba3939c..853424e7 100644
--- a/src/website_profiling/tools/audit_tools/ops.py
+++ b/src/website_profiling/tools/audit_tools/ops/ops.py
@@ -6,11 +6,11 @@
from psycopg import Connection
-from ...db._common import _row_field
-from ...db.property_store import get_property_by_id
-from ...tools.alert_checker import check_all_alerts
-from ._slice import cap_list, parse_limit
-from .context import AuditToolContext
+from ....db._common import _row_field
+from ....db.property_store import get_property_by_id
+from ....tools.alert_checker import check_all_alerts
+from .._slice import cap_list, parse_limit
+from ..context import AuditToolContext
def get_integration_alerts(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
@@ -53,7 +53,7 @@ def get_google_integration_status(conn: Connection, ctx: AuditToolContext, args:
google = scoped.load_google(conn)
gsc_links_status = None
try:
- from ...integrations.google.gsc_links_store import read_gsc_links_status
+ from ....integrations.google.gsc_links_store import read_gsc_links_status
gsc_links_status = read_gsc_links_status(conn, int(scoped.property_id))
except Exception:
gsc_links_status = None
diff --git a/src/website_profiling/tools/audit_tools/workflow.py b/src/website_profiling/tools/audit_tools/ops/workflow.py
similarity index 68%
rename from src/website_profiling/tools/audit_tools/workflow.py
rename to src/website_profiling/tools/audit_tools/ops/workflow.py
index 23ec6327..2ab38f1a 100644
--- a/src/website_profiling/tools/audit_tools/workflow.py
+++ b/src/website_profiling/tools/audit_tools/ops/workflow.py
@@ -5,9 +5,9 @@
from psycopg import Connection
-from ...db._common import _row_field
-from ._slice import parse_limit
-from .context import AuditToolContext
+from ....db._common import _row_field
+from .._slice import parse_limit
+from ..context import AuditToolContext
def list_issue_workflow(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
@@ -16,19 +16,27 @@ def list_issue_workflow(conn: Connection, ctx: AuditToolContext, args: dict[str,
return {"error": "property_id is required"}
limit = parse_limit(args.get("limit"), 50, 50)
status_filter = str(args.get("status") or "").strip()
+ # Filter on status in SQL (not after LIMIT in Python). Applying it post-LIMIT
+ # would let the most-recent N rows be consumed by other statuses and return
+ # zero matches even when matching rows exist.
+ params: list[Any] = [scoped.property_id]
+ status_clause = ""
+ if status_filter:
+ status_clause = "AND status = %s"
+ params.append(status_filter)
+ params.append(limit)
cur = conn.execute(
- """SELECT issue_key, url, category, priority, message, status, assignee, note, updated_at
+ f"""SELECT issue_key, url, category, priority, message, status, assignee, note, updated_at
FROM issue_status
WHERE property_id = %s
+ {status_clause}
ORDER BY updated_at DESC
LIMIT %s""",
- (scoped.property_id, limit),
+ tuple(params),
)
rows = []
for row in cur.fetchall() or []:
st = str(_row_field(row, "status", index=5) or "")
- if status_filter and st != status_filter:
- continue
updated = _row_field(row, "updated_at", index=8)
rows.append({
"issue_key": _row_field(row, "issue_key", index=0),
diff --git a/src/website_profiling/tools/audit_tools/performance/__init__.py b/src/website_profiling/tools/audit_tools/performance/__init__.py
new file mode 100644
index 00000000..69448329
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/performance/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — performance domain."""
diff --git a/src/website_profiling/tools/audit_tools/lighthouse.py b/src/website_profiling/tools/audit_tools/performance/lighthouse.py
similarity index 97%
rename from src/website_profiling/tools/audit_tools/lighthouse.py
rename to src/website_profiling/tools/audit_tools/performance/lighthouse.py
index 0d8a5462..26e48dd4 100644
--- a/src/website_profiling/tools/audit_tools/lighthouse.py
+++ b/src/website_profiling/tools/audit_tools/performance/lighthouse.py
@@ -5,9 +5,9 @@
from psycopg import Connection
-from ...db.lighthouse_store import read_lighthouse_page_summaries, read_lighthouse_summary
-from ._slice import cap_list, parse_limit, payload_dict_slice
-from .context import AuditToolContext
+from ....db.lighthouse_store import read_lighthouse_page_summaries, read_lighthouse_summary
+from .._slice import cap_list, parse_limit, payload_dict_slice
+from ..context import AuditToolContext
def get_lighthouse_summary(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
@@ -234,7 +234,7 @@ def list_lighthouse_poor_best_practices_pages(conn: Connection, ctx: AuditToolCo
def list_lighthouse_cwv_failures(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
- from ...lighthouse.runner import CLS_GOOD, LCP_GOOD_MS, TBT_GOOD_MS
+ from ....lighthouse.runner import CLS_GOOD, LCP_GOOD_MS, TBT_GOOD_MS
scoped = ctx.with_args(args)
payload = scoped.load_payload(conn)
diff --git a/src/website_profiling/tools/audit_tools/portfolio/__init__.py b/src/website_profiling/tools/audit_tools/portfolio/__init__.py
new file mode 100644
index 00000000..f98f7aa4
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/portfolio/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — portfolio domain."""
diff --git a/src/website_profiling/tools/audit_tools/charts.py b/src/website_profiling/tools/audit_tools/portfolio/charts.py
similarity index 96%
rename from src/website_profiling/tools/audit_tools/charts.py
rename to src/website_profiling/tools/audit_tools/portfolio/charts.py
index edc64197..85cb6a61 100644
--- a/src/website_profiling/tools/audit_tools/charts.py
+++ b/src/website_profiling/tools/audit_tools/portfolio/charts.py
@@ -5,8 +5,8 @@
from psycopg import Connection
-from ._slice import cap_list, parse_limit
-from .context import AuditToolContext
+from .._slice import cap_list, parse_limit
+from ..context import AuditToolContext
def get_crawl_summary(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
@@ -71,7 +71,7 @@ def get_outlink_distribution(conn: Connection, ctx: AuditToolContext, args: dict
def get_issue_priority_breakdown(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
"""Chart-friendly issue counts by priority (for chat visualization)."""
- from .report import get_report_summary
+ from ..report.report import get_report_summary
summary = get_report_summary(conn, ctx, args)
if summary.get("error"):
diff --git a/src/website_profiling/tools/audit_tools/health.py b/src/website_profiling/tools/audit_tools/portfolio/health.py
similarity index 92%
rename from src/website_profiling/tools/audit_tools/health.py
rename to src/website_profiling/tools/audit_tools/portfolio/health.py
index 80b98fa8..8050391b 100644
--- a/src/website_profiling/tools/audit_tools/health.py
+++ b/src/website_profiling/tools/audit_tools/portfolio/health.py
@@ -6,8 +6,8 @@
from psycopg import Connection
-from ...db._common import _row_field
-from .context import AuditToolContext
+from ....db._common import _row_field
+from ..context import AuditToolContext
def get_health_history(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
@@ -63,16 +63,16 @@ def get_health_history(conn: Connection, ctx: AuditToolContext, args: dict[str,
def list_report_history(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
scoped = ctx.with_args(args)
- from ._slice import parse_limit as _parse_limit
+ from .._slice import parse_limit as _parse_limit
limit = _parse_limit(args.get("limit"), 20, 50)
clauses: list[str] = []
params: list[Any] = []
domain = scoped.resolve_property_domain(conn)
- if scoped.property_id is not None:
- clauses.append("canonical_domain = %s")
- params.append(domain)
- if not clauses and domain:
+ # Only filter when a domain actually resolved. Previously an unresolvable
+ # property (domain == "") produced `WHERE canonical_domain = ''`, which matches
+ # no rows — silently returning empty history instead of recent reports.
+ if domain:
clauses.append("canonical_domain = %s")
params.append(domain)
where = f"WHERE {' AND '.join(clauses)}" if clauses else ""
diff --git a/src/website_profiling/tools/audit_tools/properties.py b/src/website_profiling/tools/audit_tools/portfolio/properties.py
similarity index 93%
rename from src/website_profiling/tools/audit_tools/properties.py
rename to src/website_profiling/tools/audit_tools/portfolio/properties.py
index 9a3a126e..3e38363e 100644
--- a/src/website_profiling/tools/audit_tools/properties.py
+++ b/src/website_profiling/tools/audit_tools/portfolio/properties.py
@@ -5,8 +5,8 @@
from psycopg import Connection
-from ...db.property_store import get_property_by_id, list_properties_public
-from .context import AuditToolContext
+from ....db.property_store import get_property_by_id, list_properties_public
+from ..context import AuditToolContext
def _public_property_row(prop: dict[str, Any]) -> dict[str, Any]:
diff --git a/src/website_profiling/tools/audit_tools/property_profile.py b/src/website_profiling/tools/audit_tools/portfolio/property_profile.py
similarity index 97%
rename from src/website_profiling/tools/audit_tools/property_profile.py
rename to src/website_profiling/tools/audit_tools/portfolio/property_profile.py
index 4cdce4ba..92f9685f 100644
--- a/src/website_profiling/tools/audit_tools/property_profile.py
+++ b/src/website_profiling/tools/audit_tools/portfolio/property_profile.py
@@ -5,8 +5,8 @@
from psycopg import Connection
-from ._slice import cap_list, parse_limit
-from .context import AuditToolContext
+from .._slice import cap_list, parse_limit
+from ..context import AuditToolContext
def _site_level_or_error(payload: dict[str, Any]) -> tuple[dict[str, Any] | None, dict[str, Any] | None]:
diff --git a/src/website_profiling/tools/audit_tools/registry.py b/src/website_profiling/tools/audit_tools/registry.py
index e1866915..f1fdaff8 100644
--- a/src/website_profiling/tools/audit_tools/registry.py
+++ b/src/website_profiling/tools/audit_tools/registry.py
@@ -8,14 +8,14 @@
from ...db.storage import db_session
-from .backlink_lists import (
+from .backlinks.backlink_lists import (
get_anchor_text_distribution,
list_backlinks_by_anchor_text,
list_backlinks_from_domain,
list_backlinks_to_url,
list_referring_domains,
)
-from .compare_list_tools import (
+from .compare.compare_list_tools import (
list_compare_lighthouse_regressions,
list_compare_new_issues,
list_compare_new_urls,
@@ -23,7 +23,7 @@
list_compare_resolved_issues,
list_compare_traffic_losers,
)
-from .content_lists import (
+from .content.content_lists import (
get_text_content_analysis,
list_amp_validation_issues,
list_duplicate_content_pairs,
@@ -35,18 +35,18 @@
list_schema_errors_by_type,
list_spell_check_issues,
)
-from .geo_list_tools import (
+from .geo.geo_list_tools import (
get_robots_ai_access_score,
list_pages_ai_citation_signals,
list_pages_missing_howto_schema,
list_pages_missing_llms_txt_reference,
list_robots_blocked_ai_crawlers,
)
-from .geo_citability import (
+from .geo.geo_citability import (
get_citability_score,
get_citability_for_url,
)
-from .geo_detectors import (
+from .geo.geo_detectors import (
detect_prompt_injection,
get_content_decay_signals,
get_multimodal_readiness,
@@ -54,7 +54,7 @@
get_rag_chunk_readiness,
get_topic_authority,
)
-from .agent_readiness import (
+from .geo.agent_readiness import (
get_agents_md_status,
get_skill_md_status,
get_agent_permissions_status,
@@ -68,7 +68,7 @@
get_agent_readiness_score,
generate_agent_readiness_bundle,
)
-from .google_lists import (
+from .google.google_lists import (
compare_gsc_periods,
get_ga4_path_trend,
get_gsc_page_trend,
@@ -90,7 +90,7 @@
list_gsc_queries_by_clicks,
list_gsc_queries_by_impressions,
)
-from .indexation_lists import (
+from .indexation.indexation_lists import (
list_crawl_urls_not_in_sitemap,
list_hreflang_reciprocal_gaps,
list_indexation_indexed_not_submitted,
@@ -102,7 +102,7 @@
list_redirect_chains_by_length,
list_sitemap_urls_not_in_crawl,
)
-from .issue_lists import (
+from .issues.issue_lists import (
list_hreflang_issue_pages,
list_lighthouse_failure_cls,
list_lighthouse_failure_inp,
@@ -122,7 +122,7 @@
list_pages_title_too_short,
list_pages_very_thin_content,
)
-from .keyword_lists import (
+from .keywords.keyword_lists import (
get_keyword_opportunity_score,
get_keyword_serp_snapshot,
list_cannibalisation_queries,
@@ -144,14 +144,14 @@
list_semantic_cluster_pages,
list_semantic_cluster_queries,
)
-from .link_lists import (
+from .links.link_lists import (
list_internal_links_from_url,
list_internal_links_to_url,
list_links_by_rel_nofollow,
list_outbound_links,
list_pagerank_low_pages,
)
-from .backlinks import (
+from .backlinks.backlinks import (
get_backlinks_velocity,
get_bing_backlinks_summary,
get_competitor_link_gap,
@@ -161,7 +161,7 @@
get_gsc_sample_links,
get_third_party_links_overlay,
)
-from .charts import (
+from .portfolio.charts import (
get_crawl_summary,
get_domain_link_distribution,
get_issue_priority_breakdown,
@@ -170,15 +170,15 @@
get_title_length_distribution,
get_top_crawled_pages,
)
-from .data_coverage import get_data_coverage_report
-from .insight_tools import (
+from .core.data_coverage import get_data_coverage_report
+from .insight.insight_tools import (
get_issue_to_traffic_map,
get_landing_page_blended_table,
get_landing_page_full_diagnosis,
get_opportunity_matrix,
get_traffic_health_check,
)
-from .router_tools import (
+from .core.router_tools import (
list_tool_domains,
run_domain_agent,
run_insight_workflow,
@@ -186,8 +186,8 @@
run_technical_workflow,
search_audit_tools,
)
-from .compare import compare_reports
-from .compare_slices import (
+from .compare.compare import compare_reports
+from .compare.compare_slices import (
compare_category_deltas,
compare_content_metrics,
compare_duplicate_deltas,
@@ -206,8 +206,8 @@
compare_tech_deltas,
compare_url_set_diff,
)
-from .crawl_metrics import get_asset_weight_summary, get_readability_summary
-from .content import (
+from .crawl.crawl_metrics import get_asset_weight_summary, get_readability_summary
+from .content.content import (
get_content_analytics,
get_content_duplicates,
get_duplicate_cluster,
@@ -217,7 +217,7 @@
list_thin_content_pages,
)
from .context import AuditToolContext
-from .crawl_lists import (
+from .crawl.crawl_lists import (
get_axe_audit_summary,
get_heading_outline_for_url,
get_top_pages_by_pagerank,
@@ -238,7 +238,7 @@
list_pages_with_mixed_content,
list_robots_blocked_urls,
)
-from .geo_tools import (
+from .geo.geo_tools import (
get_aeo_content_signals_for_url,
get_ai_discovery_status,
get_eeat_signals_summary,
@@ -249,7 +249,7 @@
get_llms_txt_status,
list_pages_missing_faq_schema,
)
-from .crawl import (
+from .crawl.crawl import (
get_browser_diagnostics_summary,
get_crawl_links_table,
get_crawl_segments,
@@ -272,7 +272,7 @@
search_pages,
search_pages_advanced,
)
-from .google import (
+from .google.google import (
get_ga4_by_channel,
get_ga4_by_device,
get_ga4_daily_trend,
@@ -286,7 +286,7 @@
get_gsc_top_pages,
get_gsc_top_queries,
)
-from .integration_tools import (
+from .integrations.integration_tools import (
check_ai_citation_presence,
check_ai_citations_live,
get_bing_index_status,
@@ -294,11 +294,11 @@
get_gsc_url_inspection,
get_serp_feature_overlay,
)
-from .health import get_category_health_history, get_health_history, list_report_history
-from .indexation_tools import get_indexation_coverage, get_indexation_url_join, list_indexation_gaps
-from .international import get_hreflang_summary, get_language_summary
-from .issues import get_category_issues, list_issues_by_category
-from .keywords import (
+from .portfolio.health import get_category_health_history, get_health_history, list_report_history
+from .indexation.indexation_tools import get_indexation_coverage, get_indexation_url_join, list_indexation_gaps
+from .indexation.international import get_hreflang_summary, get_language_summary
+from .issues.issues import get_category_issues, list_issues_by_category
+from .keywords.keywords import (
get_brand_keyword_split,
get_keyword_cannibalisation,
get_keyword_history,
@@ -314,7 +314,7 @@
list_keywords_ctr_opportunity,
search_keywords,
)
-from .lighthouse import (
+from .performance.lighthouse import (
get_crux_summary,
get_lighthouse_diagnostics,
get_lighthouse_for_url,
@@ -326,14 +326,14 @@
list_lighthouse_poor_seo_pages,
list_slow_pages,
)
-from .export_tools import (
+from .export.export_tools import (
export_audit_report,
export_compare_csv,
export_list_as_csv,
list_export_formats,
)
-from .export_extras import export_sitemap_xml, validate_rich_results
-from .image_tools import (
+from .export.export_extras import export_sitemap_xml, validate_rich_results
+from .images.image_tools import (
get_image_audit_summary,
list_images_needing_attention,
list_largest_images,
@@ -343,7 +343,7 @@
list_site_image_urls,
list_unoptimized_images,
)
-from .llm_tools import (
+from .integrations.llm_tools import (
analyze_serp_snippet_for_url,
draft_llms_txt,
expand_keywords,
@@ -358,7 +358,7 @@
prioritize_fix_roadmap,
summarize_category_for_client,
)
-from .payload_extras import (
+from .core.payload_extras import (
get_competitor_keyword_gap,
get_pagination_audit_summary,
get_portfolio_benchmark,
@@ -366,7 +366,7 @@
get_site_anchor_text_summary,
list_rich_results_failures,
)
-from .onpage import (
+from .onpage.onpage import (
list_content_url_issues,
list_pages_meta_desc_too_long,
list_pages_meta_desc_too_short,
@@ -377,7 +377,7 @@
list_pages_noindex,
list_seo_onpage_issues,
)
-from .links import (
+from .links.links import (
get_inlink_anchors,
get_link_graph_summary,
get_link_rel_summary,
@@ -388,8 +388,8 @@
list_nofollow_internal_links,
list_orphan_pages,
)
-from .crawl_actions import prepare_audit_run
-from .ops import (
+from .crawl.crawl_actions import prepare_audit_run
+from .ops.ops import (
get_google_integration_status,
get_integration_alerts,
get_latest_log_analysis,
@@ -402,14 +402,14 @@
list_log_only_paths,
list_log_uploads,
)
-from .properties import get_property, list_properties
-from .property_profile import (
+from .portfolio.properties import get_property, list_properties
+from .portfolio.property_profile import (
get_ads_txt_status,
get_contact_intelligence,
get_security_txt_status,
list_subdomains,
)
-from .report import (
+from .report.report import (
get_category_scores,
get_critical_issues,
get_executive_summary,
@@ -420,7 +420,7 @@
list_top_impact_issues,
search_issues,
)
-from .report_extras import (
+from .report.report_extras import (
get_audit_recommendations,
get_category_recommendations,
get_ml_errors,
@@ -428,14 +428,14 @@
list_audit_categories,
list_issues_with_ai_fixes,
)
-from .schema import get_schema_coverage, list_pages_without_schema, search_pages_by_schema_type
-from .sql_query import get_sql_schema, run_sql_query
-from .security import (
+from .schema.schema import get_schema_coverage, list_pages_without_schema, search_pages_by_schema_type
+from .core.sql_query import get_sql_schema, run_sql_query
+from .security.security import (
get_security_findings,
get_security_findings_summary,
list_security_findings_by_type,
)
-from .tech import get_tech_stack_summary, list_pages_by_technology
+from .tech.tech import get_tech_stack_summary, list_pages_by_technology
from .tool_catalog import TOOL_DEFINITIONS
from .tool_domains import (
TIER_0_TOOLS,
@@ -447,7 +447,7 @@
tool_names_for_tier as _meta_tool_names_for_tier,
tools_by_domain,
)
-from .workflow import list_issue_workflow
+from .ops.workflow import list_issue_workflow
ToolHandler = Callable[[Connection, AuditToolContext, dict[str, Any]], dict[str, Any]]
diff --git a/src/website_profiling/tools/audit_tools/report/__init__.py b/src/website_profiling/tools/audit_tools/report/__init__.py
new file mode 100644
index 00000000..a0d40616
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/report/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — report domain."""
diff --git a/src/website_profiling/tools/audit_tools/report.py b/src/website_profiling/tools/audit_tools/report/report.py
similarity index 97%
rename from src/website_profiling/tools/audit_tools/report.py
rename to src/website_profiling/tools/audit_tools/report/report.py
index cf38b173..3b34b19f 100644
--- a/src/website_profiling/tools/audit_tools/report.py
+++ b/src/website_profiling/tools/audit_tools/report/report.py
@@ -5,8 +5,8 @@
from psycopg import Connection
-from ...reporting.terminology import category_display_name
-from .context import AuditToolContext
+from ....reporting.terminology import category_display_name
+from ..context import AuditToolContext
_PRIORITY_ORDER = {"Critical": 0, "High": 1, "Medium": 2, "Low": 3}
_ISSUE_LIMIT_DEFAULT = 20
@@ -50,7 +50,7 @@ def _iter_category_issues(payload: dict[str, Any]) -> list[dict[str, Any]]:
return rows
-from ...scoring import round_half_up
+from ....scoring import round_half_up
def _health_score(payload: dict[str, Any]) -> int | None:
@@ -134,7 +134,7 @@ def list_issues(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -
sort_mode = str(args.get("sort") or "").strip().lower()
if sort_mode == "impact":
- from ...reporting.issue_impact import sort_issues_by_impact
+ from ....reporting.issue_impact import sort_issues_by_impact
issues = sort_issues_by_impact(issues)
@@ -182,7 +182,7 @@ def search_issues(conn: Connection, ctx: AuditToolContext, args: dict[str, Any])
sort_mode = str(args.get("sort") or "").strip().lower()
if sort_mode == "impact":
- from ...reporting.issue_impact import sort_issues_by_impact
+ from ....reporting.issue_impact import sort_issues_by_impact
issues = sort_issues_by_impact(issues)
diff --git a/src/website_profiling/tools/audit_tools/report_extras.py b/src/website_profiling/tools/audit_tools/report/report_extras.py
similarity index 98%
rename from src/website_profiling/tools/audit_tools/report_extras.py
rename to src/website_profiling/tools/audit_tools/report/report_extras.py
index 2906d150..4ebf6f49 100644
--- a/src/website_profiling/tools/audit_tools/report_extras.py
+++ b/src/website_profiling/tools/audit_tools/report/report_extras.py
@@ -5,8 +5,8 @@
from psycopg import Connection
-from ._slice import cap_list, parse_limit
-from .context import AuditToolContext
+from .._slice import cap_list, parse_limit
+from ..context import AuditToolContext
def get_audit_recommendations(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
diff --git a/src/website_profiling/tools/audit_tools/schema/__init__.py b/src/website_profiling/tools/audit_tools/schema/__init__.py
new file mode 100644
index 00000000..43b099ff
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/schema/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — schema domain."""
diff --git a/src/website_profiling/tools/audit_tools/schema.py b/src/website_profiling/tools/audit_tools/schema/schema.py
similarity index 92%
rename from src/website_profiling/tools/audit_tools/schema.py
rename to src/website_profiling/tools/audit_tools/schema/schema.py
index 1582e171..eee54cbc 100644
--- a/src/website_profiling/tools/audit_tools/schema.py
+++ b/src/website_profiling/tools/audit_tools/schema/schema.py
@@ -5,8 +5,8 @@
from psycopg import Connection
-from ._slice import crawl_filter, parse_limit
-from .context import AuditToolContext
+from .._slice import crawl_filter, parse_limit
+from ..context import AuditToolContext
def get_schema_coverage(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
@@ -22,7 +22,7 @@ def get_schema_coverage(conn: Connection, ctx: AuditToolContext, args: dict[str,
has = str(rec.get("has_schema") or "").lower() in ("true", "1", "yes")
if has:
with_schema += 1
- from ._slice import _row_schema_types_list # noqa: PLC0415
+ from .._slice import _row_schema_types_list # noqa: PLC0415
for t in _row_schema_types_list(rec):
type_counts[t] = type_counts.get(t, 0) + 1
diff --git a/src/website_profiling/tools/audit_tools/security/__init__.py b/src/website_profiling/tools/audit_tools/security/__init__.py
new file mode 100644
index 00000000..6651ac4b
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/security/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — security domain."""
diff --git a/src/website_profiling/tools/audit_tools/security.py b/src/website_profiling/tools/audit_tools/security/security.py
similarity index 97%
rename from src/website_profiling/tools/audit_tools/security.py
rename to src/website_profiling/tools/audit_tools/security/security.py
index 59b10a72..60fa7b42 100644
--- a/src/website_profiling/tools/audit_tools/security.py
+++ b/src/website_profiling/tools/audit_tools/security/security.py
@@ -5,8 +5,8 @@
from psycopg import Connection
-from ._slice import cap_list, parse_limit
-from .context import AuditToolContext
+from .._slice import cap_list, parse_limit
+from ..context import AuditToolContext
def get_security_findings(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
diff --git a/src/website_profiling/tools/audit_tools/tech/__init__.py b/src/website_profiling/tools/audit_tools/tech/__init__.py
new file mode 100644
index 00000000..c80c1dcd
--- /dev/null
+++ b/src/website_profiling/tools/audit_tools/tech/__init__.py
@@ -0,0 +1 @@
+"""Audit tools — tech domain."""
diff --git a/src/website_profiling/tools/audit_tools/tech.py b/src/website_profiling/tools/audit_tools/tech/tech.py
similarity index 96%
rename from src/website_profiling/tools/audit_tools/tech.py
rename to src/website_profiling/tools/audit_tools/tech/tech.py
index 1354fbb8..e781b2b8 100644
--- a/src/website_profiling/tools/audit_tools/tech.py
+++ b/src/website_profiling/tools/audit_tools/tech/tech.py
@@ -6,8 +6,8 @@
from psycopg import Connection
-from .context import AuditToolContext
-from ._slice import cap_list, parse_limit, payload_dict_slice
+from ..context import AuditToolContext
+from .._slice import cap_list, parse_limit, payload_dict_slice
def get_tech_stack_summary(conn: Connection, ctx: AuditToolContext, args: dict[str, Any]) -> dict[str, Any]:
diff --git a/src/website_profiling/tools/keywords.py b/src/website_profiling/tools/keywords.py
index 00095b0e..9a5288b2 100644
--- a/src/website_profiling/tools/keywords.py
+++ b/src/website_profiling/tools/keywords.py
@@ -322,8 +322,8 @@ def run_keyword_pipeline(
if not high_value:
high_value = scored[:10]
summary_lines = [
- "Quick-wins (high score, lower difficulty): " + ", ".join([x["keyword"] for x in quick_wins[:5]]) or "none",
- "High-value targets (volume): " + ", ".join([x["keyword"] for x in high_value[:5]]) or "none",
+ "Quick-wins (high score, lower difficulty): " + (", ".join([x["keyword"] for x in quick_wins[:5]]) or "none"),
+ "High-value targets (volume): " + (", ".join([x["keyword"] for x in high_value[:5]]) or "none"),
]
human_summary = " ".join(summary_lines)
diff --git a/src/website_profiling/tools/schedule_runner.py b/src/website_profiling/tools/schedule_runner.py
index ed2b7120..ad030176 100644
--- a/src/website_profiling/tools/schedule_runner.py
+++ b/src/website_profiling/tools/schedule_runner.py
@@ -13,24 +13,53 @@ def _cron_dow(now: datetime) -> int:
return (now.weekday() + 1) % 7
+def _cron_field_matches(field: str, value: int) -> bool:
+ """True if *value* matches a cron field of '*' or a comma list of integers."""
+ if field == "*":
+ return True
+ allowed = {part.strip() for part in field.split(",") if part.strip()}
+ return str(value) in allowed
+
+
def _cron_matches(cron_expr: str, now: datetime) -> bool:
- """Minimal cron matcher: 'MIN HOUR * * DOW' (UTC, single values only)."""
+ """Minimal cron matcher (UTC): 'MIN HOUR DOM MONTH DOW', '*' or comma lists.
+
+ Day-of-month and month are honoured (previously ignored, which made e.g.
+ ``0 9 1 * *`` fire every day instead of only on the 1st). DOM and DOW follow
+ standard cron OR-semantics: when both are restricted, the day matches if
+ EITHER matches; an unparseable field fails closed (no match).
+ """
parts = cron_expr.strip().split()
if len(parts) != 5:
return False
- minute, hour, _dom, _month, dow = parts
+ minute, hour, dom, month, dow = parts
try:
if minute != "*" and int(minute) != now.minute:
return False
if hour != "*" and int(hour) != now.hour:
return False
+ # Validate the remaining fields up front so a malformed token fails closed.
+ for field in (dom, month, dow):
+ for part in field.split(","):
+ part = part.strip()
+ if part and part != "*":
+ int(part)
except ValueError:
return False
- if dow != "*":
- cron_dow = _cron_dow(now)
- allowed = {part.strip() for part in dow.split(",") if part.strip()}
- if str(cron_dow) not in allowed:
- return False
+
+ if not _cron_field_matches(month, now.month):
+ return False
+
+ dom_restricted = dom != "*"
+ dow_restricted = dow != "*"
+ dom_ok = _cron_field_matches(dom, now.day)
+ dow_ok = _cron_field_matches(dow, _cron_dow(now))
+ if dom_restricted and dow_restricted:
+ return dom_ok or dow_ok
+ if dom_restricted:
+ return dom_ok
+ if dow_restricted:
+ return dow_ok
return True
diff --git a/src/website_profiling/tools/warnings.py b/src/website_profiling/tools/warnings.py
index 71b1b752..b4ee1cae 100644
--- a/src/website_profiling/tools/warnings.py
+++ b/src/website_profiling/tools/warnings.py
@@ -5,6 +5,7 @@
"""
import json
import os
+import re
import sys
from typing import Any
@@ -235,6 +236,15 @@
]
+def _phrase_in_text(phrase: str, text: str) -> bool:
+ """Whole-word/phrase match so short tokens don't match inside other words.
+
+ Plain substring matching mis-mapped unrelated warnings (e.g. 'bandwidth'
+ contains 'width' → image-aspect-ratio/CLS). Anchor on word boundaries.
+ """
+ return re.search(rf"\b{re.escape(phrase)}\b", text) is not None
+
+
def _resolve_entry(audit_id: str, title: str | None, help_text: str | None) -> dict[str, Any]:
"""Get mapping entry for audit id or phrase match."""
aid = (audit_id or "").strip().lower()
@@ -242,7 +252,9 @@ def _resolve_entry(audit_id: str, title: str | None, help_text: str | None) -> d
return dict(AUDIT_MAP[aid])
text = f"{title or ''} {help_text or ''}".lower()
for phrase, mapped_id in PHRASE_TO_ID:
- if phrase in text or phrase in aid:
+ # `aid` is a structured audit id, so substring there is intentional; the
+ # free-text title/help must match on word boundaries.
+ if _phrase_in_text(phrase, text) or phrase in aid:
return dict(AUDIT_MAP.get(mapped_id, DEFAULT_ENTRY))
return dict(DEFAULT_ENTRY)
diff --git a/tests/content_studio/test_agent.py b/tests/content_studio/test_agent.py
index 5a23caa4..9e1d1d6b 100644
--- a/tests/content_studio/test_agent.py
+++ b/tests/content_studio/test_agent.py
@@ -66,12 +66,15 @@ def test_inject_missing_tools_appends_results() -> None:
ctx = sample_ctx()
messages: list[dict] = []
called = {"get_draft_seo_score"}
- _inject_missing_tools(messages, ctx, called, ollama_format=False)
+ events: list[dict] = []
+ _inject_missing_tools(messages, ctx, called, ollama_format=False, tool_events=events)
assert any(m.get("role") == "tool" for m in messages)
assert called == REQUIRED_CONTENT_STUDIO_TOOLS
+ # tool_events is populated in the same pass (no second dispatch needed).
+ assert {e["name"] for e in events} == REQUIRED_CONTENT_STUDIO_TOOLS - {"get_draft_seo_score"}
messages_ollama: list[dict] = []
called_ollama = {"get_draft_seo_score"}
- _inject_missing_tools(messages_ollama, ctx, called_ollama, ollama_format=True)
+ _inject_missing_tools(messages_ollama, ctx, called_ollama, ollama_format=True, tool_events=[])
assert any(m.get("tool_name") for m in messages_ollama)
diff --git a/tests/reporting/test_optional_audits.py b/tests/reporting/test_optional_audits.py
index f3830275..91453042 100644
--- a/tests/reporting/test_optional_audits.py
+++ b/tests/reporting/test_optional_audits.py
@@ -65,6 +65,30 @@ def test_html_validation_issues_missing_html5lib():
assert isinstance(issues, list)
+def test_wayback_issues_bounds_requests_for_snapshotless_404s(monkeypatch):
+ # Regression: snapshot-less 404s must count against max_lookups so the number
+ # of external Wayback requests stays bounded (previously only snapshots-found
+ # incremented the counter, so the cap did nothing for the common no-snapshot case).
+ import website_profiling.reporting.optional_audits as oa
+
+ oa._WAYBACK_CACHE.clear()
+ calls = {"n": 0}
+
+ class _Resp:
+ def json(self):
+ return {"archived_snapshots": {}}
+
+ def fake_get(*_a, **_k):
+ calls["n"] += 1
+ return _Resp()
+
+ monkeypatch.setattr(oa.requests, "get", fake_get)
+ df = pd.DataFrame([{"url": f"https://x.com/{i}", "status": "404"} for i in range(20)])
+ issues = oa.wayback_issues(df, max_lookups=5)
+ assert calls["n"] == 5 # bounded by max_lookups, not 20
+ assert issues == []
+
+
def test_apply_optional_audits_spell_skipped_without_package(capsys):
categories = [
{"id": "technical_seo", "name": "Technical", "issues": [], "recommendations": []},
diff --git a/tests/reporting/test_pdf_branch_coverage.py b/tests/reporting/test_pdf_branch_coverage.py
index 255628de..cd8d1c2c 100644
--- a/tests/reporting/test_pdf_branch_coverage.py
+++ b/tests/reporting/test_pdf_branch_coverage.py
@@ -359,6 +359,21 @@ def test_reportlab_empty_executive_and_top_issues(self):
assert _rl_render_executive_panel(cover, st) == []
assert _render_top_issues_table([], st) == []
+ def test_reportlab_stat_grid_more_chips_than_columns(self):
+ # chips > columns must not crash: colWidths must match the cell count.
+ st = _make_styles()
+ block = StatGridBlock(
+ id="s",
+ columns=2,
+ chips=[
+ StatChip(label="A", value="1", tone="high"),
+ StatChip(label="B", value="2", tone="medium"),
+ StatChip(label="C", value="3", tone="low"),
+ ],
+ )
+ out = _flowables_for_block(block, st)
+ assert out # renders a table flowable instead of raising at build time
+
def test_reportlab_empty_optional_blocks(self):
st = _make_styles()
assert _flowables_for_block(KpiRowBlock(id="k", items=[]), st) == []
diff --git a/tests/reporting/test_reporting_gaps.py b/tests/reporting/test_reporting_gaps.py
index bee58784..b184f1cf 100644
--- a/tests/reporting/test_reporting_gaps.py
+++ b/tests/reporting/test_reporting_gaps.py
@@ -50,6 +50,18 @@ def test_compute_impact_score_defaults():
assert compute_impact_score("Unknown") >= 1
+def test_issue_impact_skips_homepage_ga4_path():
+ """GA4 path '/' rstrip('/') is ''; must not match every issue via endswith('')."""
+ categories = [{
+ "issues": [{"url": "https://example.com/about", "priority": "Medium"}],
+ }]
+ google = {"ga4": {"pages": [{"path": "/", "sessions": 999}]}}
+ enrich_categories_with_traffic_impact(categories, google)
+ issue = categories[0]["issues"][0]
+ assert issue["ga4_sessions"] == 0
+ assert issue["impact_score"] == compute_impact_score("Medium")
+
+
def test_issue_impact_handles_invalid_rows():
assert enrich_categories_with_traffic_impact([], []) == []
enrich_categories_with_traffic_impact(
diff --git a/tests/test_agent_readiness.py b/tests/test_agent_readiness.py
index c31a1621..8ff33fc4 100644
--- a/tests/test_agent_readiness.py
+++ b/tests/test_agent_readiness.py
@@ -13,7 +13,7 @@
score_content_structure_aeo,
strip_html_to_text,
)
-from website_profiling.tools.audit_tools.agent_readiness import (
+from website_profiling.tools.audit_tools.geo.agent_readiness import (
_fetch_agents_md,
_fetch_agent_permissions,
_fetch_skill_md,
diff --git a/tests/test_analysis_crawl_stores_edge_unit.py b/tests/test_analysis_crawl_stores_edge_unit.py
index 584d5085..29a79f22 100644
--- a/tests/test_analysis_crawl_stores_edge_unit.py
+++ b/tests/test_analysis_crawl_stores_edge_unit.py
@@ -23,6 +23,12 @@ def test_common_mixed_content_srcset_and_links_serialized_fallback() -> None:
ext = parse_seo_extended(html, "https://secure.com/page")
assert ext["mixed_content_count"] >= 1
+ # Regression: an all-http:// srcset must count EVERY insecure candidate, not
+ # just the first (the generic startswith() previously matched once and skipped
+ # the per-candidate loop).
+ all_http = ''
+ assert parse_seo_extended(all_http, "https://secure.com")["mixed_content_count"] == 2
+
assert parse_links_serialized("[unclosed") == ["[unclosed"]
@@ -127,6 +133,32 @@ def test_analysis_local_duplicate_and_language_paths(monkeypatch) -> None:
assert payload["ml_errors"] == ["x"]
+def test_duplicate_detection_skips_empty_simhash(monkeypatch) -> None:
+ """SimHash-0 (untokenizable) pages must not be clustered together as duplicates."""
+ from website_profiling.analysis import local
+
+ monkeypatch.setattr(
+ local, "_import_rapidfuzz", lambda: types.SimpleNamespace(token_set_ratio=lambda a, b: 0)
+ )
+ # Force the two "blank" pages to SimHash 0 and the two real pages to share a hash.
+ monkeypatch.setattr(local, "simhash_64", lambda fp: 0 if "blank" in fp else 999)
+ # Fingerprints must be >= 20 chars to be considered (see compute_duplicate_groups).
+ df = pd.DataFrame(
+ [
+ {"url": "https://a.com/e1", "status": "200", "content_type": "text/html", "title": "blank placeholder page number one"},
+ {"url": "https://a.com/e2", "status": "200", "content_type": "text/html", "title": "blank placeholder page number two"},
+ {"url": "https://a.com/d1", "status": "200", "content_type": "text/html", "title": "real duplicate content body text here"},
+ {"url": "https://a.com/d2", "status": "200", "content_type": "text/html", "title": "real duplicate content body text here"},
+ ]
+ )
+ _groups, mapping, _w = local.compute_duplicate_groups(df, {"enable_duplicate_detection": "true"})
+ # The real duplicates are grouped together...
+ assert mapping.get("https://a.com/d1") == mapping.get("https://a.com/d2") is not None
+ # ...but the two SimHash-0 pages are NOT (they were skipped, not bucketed).
+ assert "https://a.com/e1" not in mapping
+ assert "https://a.com/e2" not in mapping
+
+
def test_browser_diagnostics_pandas_and_aggregate_paths() -> None:
from website_profiling.crawl.fetchers.browser_diagnostics import (
_parse_page_analysis_cell,
diff --git a/tests/test_chat_narrative.py b/tests/test_chat_narrative.py
index 5b764146..ba0a8485 100644
--- a/tests/test_chat_narrative.py
+++ b/tests/test_chat_narrative.py
@@ -37,7 +37,8 @@ def test_validate_chat_narrative_caps_items() -> None:
"power_insights": items,
"recommended_actions": ["one"],
})
- assert any("more than" in e for e in errors)
+ # Over-length is silently capped, not treated as a validation error.
+ assert not errors
assert len(narrative["power_insights"]) == 5
diff --git a/tests/test_commands_page_gsc_unit.py b/tests/test_commands_page_gsc_unit.py
index 56acfd5f..5cd7c57f 100644
--- a/tests/test_commands_page_gsc_unit.py
+++ b/tests/test_commands_page_gsc_unit.py
@@ -330,6 +330,34 @@ def fake_run(url, cfg, **kwargs):
assert out["ok"] is True
+def test_page_coach_cmd_malformed_env_does_not_crash(monkeypatch, capsys) -> None:
+ # A non-numeric / empty id (e.g. from an unvalidated request body) must
+ # degrade to None, not raise ValueError and crash the command.
+ from website_profiling.commands import page_coach_cmd
+
+ captured: dict = {}
+
+ def fake_run(url, cfg, **kwargs):
+ captured["kwargs"] = kwargs
+ return {"ok": True, "suggestions": []}
+
+ monkeypatch.setitem(
+ sys.modules,
+ "website_profiling.llm.page_coach",
+ types.SimpleNamespace(run_page_coach=fake_run),
+ )
+ monkeypatch.setenv("WP_PAGE_COACH_CURRENT", "live:abc")
+ monkeypatch.setenv("WP_PAGE_COACH_BASELINE", "snapshot:")
+
+ args = argparse.Namespace(url="https://example.com/page", refresh=False)
+ with pytest.raises(SystemExit) as exc:
+ page_coach_cmd.run({"start_url": "https://example.com"}, "/tmp", args)
+ assert exc.value.code == 0
+ assert captured["kwargs"]["current_type"] is None
+ assert captured["kwargs"]["current_id"] is None
+ assert captured["kwargs"]["baseline_id"] is None
+
+
def test_page_coach_cmd_failure_exit(monkeypatch, capsys) -> None:
from website_profiling.commands import page_coach_cmd
diff --git a/tests/test_common_analysis_commands_db_unit.py b/tests/test_common_analysis_commands_db_unit.py
index d2444668..82644927 100644
--- a/tests/test_common_analysis_commands_db_unit.py
+++ b/tests/test_common_analysis_commands_db_unit.py
@@ -929,6 +929,16 @@ class BadConn:
def cursor(self):
raise RuntimeError("cursor fail")
+ def transaction(self):
+ class CM:
+ def __enter__(self_non):
+ return None
+
+ def __exit__(self_non, _t, _v, _tb):
+ return False
+
+ return CM()
+
class Ctx:
def __enter__(self):
return BadConn()
diff --git a/tests/test_content_analysis_coverage.py b/tests/test_content_analysis_coverage.py
index e61be94b..c75ddadc 100644
--- a/tests/test_content_analysis_coverage.py
+++ b/tests/test_content_analysis_coverage.py
@@ -92,6 +92,27 @@ def test_analyze_run_html_parallel_workers(monkeypatch: pytest.MonkeyPatch) -> N
assert len(out) == 2
+def test_analyze_run_html_skips_failing_page(monkeypatch: pytest.MonkeyPatch) -> None:
+ # One page whose analysis raises must be skipped, not abort the whole batch.
+ rows = [
+ {"url": "https://good.com", "html": "good"},
+ {"url": "https://bad.com", "html": "bad"},
+ ]
+
+ def fake_analyze(html, **_k):
+ if "bad" in html:
+ raise ValueError("boom")
+ return {"word_count": 2}
+
+ monkeypatch.setattr(ca_batch, "analyze_page_html", fake_analyze)
+ monkeypatch.setattr(ca_batch, "iter_html_pages", lambda *_a, **_k: iter(rows))
+ out1 = ca_batch.analyze_run_html(MagicMock(), 1, workers=1)
+ assert [r["url"] for r in out1] == ["https://good.com"]
+ monkeypatch.setattr(ca_batch, "iter_html_pages", lambda *_a, **_k: iter(rows))
+ out2 = ca_batch.analyze_run_html(MagicMock(), 1, workers=2)
+ assert [r["url"] for r in out2] == ["https://good.com"]
+
+
def test_iter_html_pages_paginates(monkeypatch: pytest.MonkeyPatch) -> None:
chunks = [
[{"url": "https://a.com", "html": "a"}] * 500,
diff --git a/tests/test_crawler_deep.py b/tests/test_crawler_deep.py
index e51f341d..841a665a 100644
--- a/tests/test_crawler_deep.py
+++ b/tests/test_crawler_deep.py
@@ -386,7 +386,9 @@ def _boom(_url):
df = c.crawl(show_progress=False)
assert len(df) == 1
assert df.iloc[0]["status"] == "error"
- assert df.iloc[0]["url"] is None
+ # The error row now carries the dequeued URL (so it persists to the DB in
+ # streaming mode too), rather than being url-less and silently dropped.
+ assert df.iloc[0]["url"] and "site.com" in str(df.iloc[0]["url"])
assert "outlink_targets" in df.columns
@@ -445,6 +447,55 @@ def raise_if_failed(self) -> None:
assert writer.enqueued and writer.enqueued[0]["url"] == "https://site.com"
+def test_crawl_streams_error_rows_to_db_writer(monkeypatch):
+ # Regression: an errored fetch must still be persisted to the DB in streaming
+ # mode (it previously got a url-less error row and was silently dropped).
+ import website_profiling.crawl.crawler as mod
+
+ monkeypatch.setattr(
+ "website_profiling.crawl.sitemap.discover_sitemap_urls",
+ lambda *_a, **_k: [],
+ )
+
+ enqueued: list[dict] = []
+
+ class FakeDbWriter:
+ def __init__(self, *_a, **_k) -> None:
+ pass
+
+ def start(self) -> None:
+ pass
+
+ def enqueue(self, record: dict) -> None:
+ enqueued.append(record)
+
+ def finish(self) -> None:
+ pass
+
+ def join(self) -> None:
+ return None
+
+ def raise_if_failed(self) -> None:
+ return None
+
+ monkeypatch.setattr(mod, "_CrawlDbWriter", FakeDbWriter)
+ c = mod.Crawler(
+ start_url="https://site.com",
+ ignore_robots=True,
+ use_wappalyzer=False,
+ concurrency=1,
+ max_pages=1,
+ )
+
+ def _boom(_url):
+ raise RuntimeError("worker exploded")
+
+ monkeypatch.setattr(c, "worker", _boom)
+ c.crawl(show_progress=False, stream_crawl_run_id=7, stream_batch_size=100)
+ assert enqueued and enqueued[0]["status"] == "error"
+ assert "site.com" in str(enqueued[0]["url"])
+
+
def test_run_crawler_writes_json(monkeypatch, tmp_path):
import website_profiling.crawl.crawler as mod
diff --git a/tests/test_fetchers_sitemap_config_unit.py b/tests/test_fetchers_sitemap_config_unit.py
index d53511ff..43f93436 100644
--- a/tests/test_fetchers_sitemap_config_unit.py
+++ b/tests/test_fetchers_sitemap_config_unit.py
@@ -297,6 +297,54 @@ def close(self):
assert all("other.com" not in u for u in urls)
+def test_discover_sitemap_rejects_offorigin_nested_and_robots(monkeypatch):
+ # robots.txt and nested entries are attacker-controllable;
+ # an off-origin sitemap URL must never be fetched (SSRF / scope escape).
+ from website_profiling.crawl.sitemap import discover_sitemap_urls
+
+ fetched: list[str] = []
+
+ class FakeResp:
+ def __init__(self, code, text):
+ self.status_code = code
+ self.text = text
+
+ class FakeSession:
+ headers = {}
+
+ def get(self, url, timeout=0):
+ fetched.append(url)
+ if url.endswith("/robots.txt"):
+ return FakeResp(
+ 200,
+ "Sitemap: https://evil.com/evil.xml\n"
+ "Sitemap: https://example.com/index.xml\n",
+ )
+ if url.endswith("/index.xml"):
+ return FakeResp(
+ 200,
+ """
+ https://evil.com/nested.xml
+ https://example.com/pages.xml""",
+ )
+ if url.endswith("/pages.xml"):
+ return FakeResp(
+ 200,
+ """
+ https://example.com/p1""",
+ )
+ return FakeResp(404, "")
+
+ def close(self):
+ pass
+
+ monkeypatch.setattr("website_profiling.crawl.sitemap.requests.Session", lambda: FakeSession())
+ urls = discover_sitemap_urls("https://example.com", max_urls=10)
+ assert "https://example.com/p1" in urls
+ # Neither the robots-advertised nor the nested off-origin sitemap was fetched.
+ assert all("evil.com" not in u for u in fetched)
+
+
def test_pip_install_browser_requirements_runs_subprocess(monkeypatch, tmp_path):
from website_profiling.crawl.fetchers import browser_deps
diff --git a/tests/test_help_agent.py b/tests/test_help_agent.py
new file mode 100644
index 00000000..785e5942
--- /dev/null
+++ b/tests/test_help_agent.py
@@ -0,0 +1,170 @@
+"""Unit tests for the help agent."""
+from __future__ import annotations
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from website_profiling.llm.help_agent import run_help_turn
+
+
+def _make_fake_client(content: str = "Sure, here's how.", tokens: list[str] | None = None):
+ client = MagicMock()
+ result = MagicMock()
+ result.content = content
+ result.tool_calls = []
+
+ def fake_chat(messages, tools, on_token=None):
+ if tokens and on_token:
+ for t in tokens:
+ on_token(t)
+ return result
+
+ client.chat_with_tools.side_effect = fake_chat
+ return client
+
+
+def test_help_turn_disabled_llm() -> None:
+ events: list[dict] = []
+
+ with patch("website_profiling.llm.help_agent.load_llm_config_from_db", return_value={}):
+ with patch("website_profiling.llm.help_agent.llm_is_enabled", return_value=False):
+ result = run_help_turn(
+ [{"role": "user", "content": "help"}],
+ on_event=events.append,
+ )
+
+ assert result["ok"] is False
+ assert any(e["type"] == "error" for e in events)
+
+
+def test_help_turn_streams_tokens() -> None:
+ events: list[dict] = []
+ fake_client = _make_fake_client(tokens=["Hello", " world"])
+
+ with patch("website_profiling.llm.help_agent.load_llm_config_from_db", return_value={"llm_provider": "openai"}):
+ with patch("website_profiling.llm.help_agent.llm_is_enabled", return_value=True):
+ with patch("website_profiling.llm.help_agent.get_llm_client", return_value=fake_client):
+ result = run_help_turn(
+ [{"role": "user", "content": "help"}],
+ on_event=events.append,
+ )
+
+ assert result["ok"] is True
+ token_events = [e for e in events if e["type"] == "token"]
+ assert len(token_events) == 2
+ assert token_events[0]["text"] == "Hello"
+ assert token_events[1]["text"] == " world"
+ done_events = [e for e in events if e["type"] == "done"]
+ assert len(done_events) == 1
+
+
+def test_help_turn_no_tools_passed() -> None:
+ fake_client = _make_fake_client()
+
+ with patch("website_profiling.llm.help_agent.load_llm_config_from_db", return_value={"llm_provider": "openai"}):
+ with patch("website_profiling.llm.help_agent.llm_is_enabled", return_value=True):
+ with patch("website_profiling.llm.help_agent.get_llm_client", return_value=fake_client):
+ run_help_turn([{"role": "user", "content": "help"}])
+
+ call_args = fake_client.chat_with_tools.call_args
+ # tools is the second positional arg or passed as keyword
+ tools_arg = call_args[0][1] if len(call_args[0]) > 1 else call_args[1].get("tools", None)
+ assert tools_arg == []
+
+
+def test_help_turn_buffered_content_emitted() -> None:
+ """When on_token is never called (buffered provider), emit content once at end."""
+ events: list[dict] = []
+ fake_client = _make_fake_client(content="Buffered response", tokens=None)
+
+ with patch("website_profiling.llm.help_agent.load_llm_config_from_db", return_value={"llm_provider": "openai"}):
+ with patch("website_profiling.llm.help_agent.llm_is_enabled", return_value=True):
+ with patch("website_profiling.llm.help_agent.get_llm_client", return_value=fake_client):
+ result = run_help_turn(
+ [{"role": "user", "content": "help"}],
+ on_event=events.append,
+ )
+
+ assert result["ok"] is True
+ token_texts = [e["text"] for e in events if e["type"] == "token"]
+ assert "Buffered response" in token_texts
+
+
+def test_help_turn_provider_error() -> None:
+ events: list[dict] = []
+ bad_client = MagicMock()
+ bad_client.chat_with_tools.side_effect = ValueError("Connection refused")
+
+ with patch("website_profiling.llm.help_agent.load_llm_config_from_db", return_value={"llm_provider": "openai"}):
+ with patch("website_profiling.llm.help_agent.llm_is_enabled", return_value=True):
+ with patch("website_profiling.llm.help_agent.get_llm_client", return_value=bad_client):
+ result = run_help_turn(
+ [{"role": "user", "content": "help"}],
+ on_event=events.append,
+ )
+
+ assert result["ok"] is False
+ error_events = [e for e in events if e["type"] == "error"]
+ assert any("Connection refused" in e.get("message", "") for e in error_events)
+
+
+def test_help_turn_unknown_provider() -> None:
+ events: list[dict] = []
+
+ with patch("website_profiling.llm.help_agent.load_llm_config_from_db", return_value={"llm_provider": "unknown"}):
+ with patch("website_profiling.llm.help_agent.llm_is_enabled", return_value=True):
+ with patch(
+ "website_profiling.llm.help_agent.get_llm_client",
+ side_effect=ValueError("Unknown LLM provider: unknown"),
+ ):
+ result = run_help_turn(
+ [{"role": "user", "content": "help"}],
+ on_event=events.append,
+ )
+
+ assert result["ok"] is False
+ assert any("Unknown LLM provider" in e.get("message", "") for e in events if e["type"] == "error")
+
+
+def test_help_turn_system_prompt_in_messages() -> None:
+ """System prompt must be the first message sent to the client."""
+ captured: list[list] = []
+ fake_client = _make_fake_client()
+ fake_client.chat_with_tools.side_effect = lambda msgs, tools, on_token=None: (
+ captured.append(msgs) or MagicMock(content="ok", tool_calls=[])
+ )
+
+ with patch("website_profiling.llm.help_agent.load_llm_config_from_db", return_value={"llm_provider": "openai"}):
+ with patch("website_profiling.llm.help_agent.llm_is_enabled", return_value=True):
+ with patch("website_profiling.llm.help_agent.get_llm_client", return_value=fake_client):
+ run_help_turn([{"role": "user", "content": "How do I add my API key?"}])
+
+ assert captured
+ messages_sent = captured[0]
+ assert messages_sent[0]["role"] == "system"
+ assert "help" in messages_sent[0]["content"].lower() or "credential" in messages_sent[0]["content"].lower()
+
+
+def test_help_turn_no_event_callback() -> None:
+ """run_help_turn must work when on_event is None."""
+ fake_client = _make_fake_client(tokens=["Hi"])
+
+ with patch("website_profiling.llm.help_agent.load_llm_config_from_db", return_value={"llm_provider": "openai"}):
+ with patch("website_profiling.llm.help_agent.llm_is_enabled", return_value=True):
+ with patch("website_profiling.llm.help_agent.get_llm_client", return_value=fake_client):
+ result = run_help_turn([{"role": "user", "content": "hi"}])
+
+ assert result["ok"] is True
+
+
+@pytest.mark.parametrize("messages", [[], None])
+def test_help_turn_empty_messages(messages) -> None:
+ fake_client = _make_fake_client()
+
+ with patch("website_profiling.llm.help_agent.load_llm_config_from_db", return_value={"llm_provider": "openai"}):
+ with patch("website_profiling.llm.help_agent.llm_is_enabled", return_value=True):
+ with patch("website_profiling.llm.help_agent.get_llm_client", return_value=fake_client):
+ result = run_help_turn(messages or [])
+
+ assert result["ok"] is True
diff --git a/tests/test_help_cmd.py b/tests/test_help_cmd.py
new file mode 100644
index 00000000..90a7e5b4
--- /dev/null
+++ b/tests/test_help_cmd.py
@@ -0,0 +1,131 @@
+"""CLI help command tests."""
+from __future__ import annotations
+
+import argparse
+import io
+import json
+from unittest.mock import patch
+
+import pytest
+
+from website_profiling.commands import help_cmd
+
+
+def test_help_cmd_requires_stdin_json() -> None:
+ with pytest.raises(SystemExit) as exc:
+ help_cmd.run({}, argparse.Namespace(stdin_json=False))
+ assert exc.value.code == 1
+
+
+def test_help_cmd_invalid_stdin_json(capsys) -> None:
+ with patch("sys.stdin", io.StringIO("not-json")):
+ with pytest.raises(SystemExit) as exc:
+ help_cmd.run({}, argparse.Namespace(stdin_json=True))
+ assert exc.value.code == 1
+ assert "error" in capsys.readouterr().out
+
+
+def test_help_cmd_success(capsys) -> None:
+ payload = json.dumps({"messages": [{"role": "user", "content": "How do I set up Google?"}]})
+ with patch("sys.stdin", io.StringIO(payload)):
+ with patch(
+ "website_profiling.commands.help_cmd.run_help_turn",
+ return_value={"ok": True},
+ ) as mock_turn:
+ with pytest.raises(SystemExit) as exc:
+ help_cmd.run({}, argparse.Namespace(stdin_json=True))
+ assert exc.value.code == 0
+ mock_turn.assert_called_once()
+ # Confirm no property_id is passed (no AuditToolContext)
+ call_args = mock_turn.call_args
+ assert call_args[0][0] == [{"role": "user", "content": "How do I set up Google?"}]
+
+
+def test_help_cmd_no_property_id_in_payload(capsys) -> None:
+ """Help command must not pass property_id or any audit context."""
+ payload = json.dumps({"messages": [], "property_id": 99})
+ with patch("sys.stdin", io.StringIO(payload)):
+ with patch(
+ "website_profiling.commands.help_cmd.run_help_turn",
+ return_value={"ok": True},
+ ) as mock_turn:
+ with pytest.raises(SystemExit):
+ help_cmd.run({}, argparse.Namespace(stdin_json=True))
+ # run_help_turn is called with only (messages,) positional arg, no context
+ call_args = mock_turn.call_args
+ assert len(call_args[0]) == 1 # only messages positional arg
+
+
+def test_help_cmd_streams_token_events(capsys) -> None:
+ payload = json.dumps({"messages": [{"role": "user", "content": "help"}]})
+
+ def fake_turn(_messages, on_event=None):
+ if on_event:
+ on_event({"type": "token", "text": "Hello!"})
+ return {"ok": True}
+
+ with patch("sys.stdin", io.StringIO(payload)):
+ with patch("website_profiling.commands.help_cmd.run_help_turn", side_effect=fake_turn):
+ with pytest.raises(SystemExit) as exc:
+ help_cmd.run({}, argparse.Namespace(stdin_json=True))
+ assert exc.value.code == 0
+ out = capsys.readouterr().out
+ assert "token" in out
+ assert "Hello!" in out
+
+
+def test_help_cmd_agent_failure(capsys) -> None:
+ payload = json.dumps({"messages": []})
+ with patch("sys.stdin", io.StringIO(payload)):
+ with patch(
+ "website_profiling.commands.help_cmd.run_help_turn",
+ return_value={"ok": False, "error": "AI disabled"},
+ ):
+ with pytest.raises(SystemExit) as exc:
+ help_cmd.run({}, argparse.Namespace(stdin_json=True))
+ assert exc.value.code == 1
+ assert "AI disabled" in capsys.readouterr().out
+
+
+def test_help_cmd_exception(capsys) -> None:
+ payload = json.dumps({"messages": []})
+ with patch("sys.stdin", io.StringIO(payload)):
+ with patch(
+ "website_profiling.commands.help_cmd.run_help_turn",
+ side_effect=RuntimeError("boom"),
+ ):
+ with pytest.raises(SystemExit) as exc:
+ help_cmd.run({}, argparse.Namespace(stdin_json=True))
+ assert exc.value.code == 1
+ assert "boom" in capsys.readouterr().out
+
+
+def test_help_cmd_sanitizes_events(capsys) -> None:
+ payload = json.dumps({"messages": []})
+
+ def fake_turn(_messages, on_event=None):
+ if on_event:
+ on_event({"type": "token", "text": "bad\udc9d"})
+ return {"ok": True}
+
+ with patch("sys.stdin", io.StringIO(payload)):
+ with patch("website_profiling.commands.help_cmd.run_help_turn", side_effect=fake_turn):
+ with pytest.raises(SystemExit) as exc:
+ help_cmd.run({}, argparse.Namespace(stdin_json=True))
+ assert exc.value.code == 0
+ out = capsys.readouterr().out
+ assert "\udc9d" not in out
+ assert "token" in out
+
+
+def test_help_cmd_ignores_invalid_messages(capsys) -> None:
+ payload = json.dumps({"messages": "not-a-list"})
+ with patch("sys.stdin", io.StringIO(payload)):
+ with patch(
+ "website_profiling.commands.help_cmd.run_help_turn",
+ return_value={"ok": True},
+ ) as mock_turn:
+ with pytest.raises(SystemExit) as exc:
+ help_cmd.run({}, argparse.Namespace(stdin_json=True))
+ assert exc.value.code == 0
+ assert mock_turn.call_args[0][0] == []
diff --git a/tests/test_historical_keywords_crawl_store_unit.py b/tests/test_historical_keywords_crawl_store_unit.py
index c9e86eb3..b4bc4966 100644
--- a/tests/test_historical_keywords_crawl_store_unit.py
+++ b/tests/test_historical_keywords_crawl_store_unit.py
@@ -283,6 +283,61 @@ def test_write_nodes_variants(monkeypatch):
assert captured["n"] == 1
+def test_historical_read_table_failure_isolated(monkeypatch, capsys):
+ """A failing table read is rolled back and must not poison later reads."""
+ from website_profiling.db import historical as h
+
+ class IsoConn:
+ def __init__(self):
+ self.n = 0
+
+ def cursor(self_conn):
+ class IsoCursor:
+ def execute(self_cur, _sql):
+ self_conn.n += 1
+ if self_conn.n == 1:
+ raise RuntimeError("relation does not exist")
+ return None
+
+ def fetchall(self_cur):
+ return [{"id": 7}]
+
+ cur = IsoCursor()
+
+ class CM:
+ def __enter__(self_non):
+ return cur
+
+ def __exit__(self_non, _t, _v, _tb):
+ return False
+
+ return CM()
+
+ def transaction(self_conn):
+ class CM:
+ def __enter__(self_non):
+ return None
+
+ def __exit__(self_non, _t, _v, _tb):
+ return False
+
+ return CM()
+
+ class Ctx:
+ def __enter__(self):
+ return IsoConn()
+
+ def __exit__(self, _t, _v, _tb):
+ return False
+
+ monkeypatch.setattr(h, "db_session", lambda: Ctx())
+ data = h.read_historical_data()
+ # First table failed (left empty), but later tables were still read.
+ assert data["report_payload"] == []
+ assert any(rows for t, rows in data.items() if t != "report_payload")
+ assert "could not read historical table" in capsys.readouterr().err
+
+
def test_historical_read_and_restore(monkeypatch):
from website_profiling.db import historical as h
@@ -318,6 +373,16 @@ def execute(self, _sql, _p=None):
def commit(self):
self.commits += 1
+ def transaction(self):
+ class CM:
+ def __enter__(self_non):
+ return None
+
+ def __exit__(self_non, _t, _v, _tb):
+ return False
+
+ return CM()
+
class Ctx:
def __enter__(self):
return HistConn()
diff --git a/tests/test_mcp_http_server.py b/tests/test_mcp_http_server.py
index 62097411..c33fd390 100644
--- a/tests/test_mcp_http_server.py
+++ b/tests/test_mcp_http_server.py
@@ -542,10 +542,20 @@ def test_origin_allowed_url_and_hostname_patterns() -> None:
"https://audit.example.com",
["https://audit.example.com"],
)
+ # A bare hostname pattern matches the exact host only...
assert http_server._origin_allowed(
- "https://app.example.com",
+ "https://example.com",
["example.com"],
)
+ # ...and must NOT be widened into a subdomain wildcard.
+ assert not http_server._origin_allowed(
+ "https://evil.example.com",
+ ["example.com"],
+ )
+ # Explicit wildcard patterns match the apex and any subdomain.
+ assert http_server._origin_allowed("https://app.example.com", ["*.example.com"])
+ assert http_server._origin_allowed("https://example.com", ["*.example.com"])
+ assert not http_server._origin_allowed("https://app.other.com", ["*.example.com"])
assert not http_server._origin_allowed(
"https://evil.example.net",
["https://audit.example.com"],
@@ -587,6 +597,78 @@ async def capture_send(message: dict) -> None:
asyncio.run(run())
+def test_remote_access_middleware_origin_fallback_rejects_cross_host() -> None:
+ # No explicit allowed_origins: a browser Origin from a host that is not an
+ # allowed host must still be rejected (transport-level Origin protection is
+ # delegated to the middleware).
+ app = AsyncMock()
+ middleware = http_server.RemoteAccessMiddleware(app)
+
+ async def run() -> None:
+ sent: list[dict] = []
+
+ async def capture_send(message: dict) -> None:
+ sent.append(message)
+
+ with patch(
+ "website_profiling.mcp.http_server.load_mcp_http_settings",
+ return_value=McpHttpSettings(
+ token="secret-token",
+ allowed_hosts=["audit.example.com"],
+ allowed_origins=[],
+ ),
+ ):
+ await middleware(
+ {
+ "type": "http",
+ "headers": [
+ (b"host", b"audit.example.com"),
+ (b"authorization", b"Bearer secret-token"),
+ (b"origin", b"https://evil.example.net"),
+ ],
+ },
+ AsyncMock(),
+ capture_send,
+ )
+
+ assert sent[0]["status"] == 403
+ app.assert_not_called()
+
+ asyncio.run(run())
+
+
+def test_remote_access_middleware_origin_fallback_allows_same_host() -> None:
+ # Same-host browser Origin is allowed even without explicit allowed_origins.
+ app = AsyncMock()
+ middleware = http_server.RemoteAccessMiddleware(app)
+
+ async def run() -> None:
+ with patch(
+ "website_profiling.mcp.http_server.load_mcp_http_settings",
+ return_value=McpHttpSettings(
+ token="secret-token",
+ allowed_hosts=["audit.example.com"],
+ allowed_origins=[],
+ ),
+ ):
+ await middleware(
+ {
+ "type": "http",
+ "headers": [
+ (b"host", b"audit.example.com"),
+ (b"authorization", b"Bearer secret-token"),
+ (b"origin", b"https://audit.example.com"),
+ ],
+ },
+ AsyncMock(),
+ AsyncMock(),
+ )
+
+ app.assert_called_once()
+
+ asyncio.run(run())
+
+
def test_transport_security_public_env_only() -> None:
with patch(
"website_profiling.mcp.http_server.load_mcp_http_settings",
@@ -601,7 +683,7 @@ def test_host_allowed_wildcard_nomatch_then_exact() -> None:
def test_origin_allowed_http_nomatch_then_hostname() -> None:
assert http_server._origin_allowed(
- "https://app.example.com",
+ "https://example.com",
["https://other.example.com", "example.com"],
)
diff --git a/tests/test_mcp_server_helpers.py b/tests/test_mcp_server_helpers.py
index 08212e6b..c6c3b74c 100644
--- a/tests/test_mcp_server_helpers.py
+++ b/tests/test_mcp_server_helpers.py
@@ -187,6 +187,80 @@ async def __aexit__(self, *_args):
assert "current_mcp_domain" in domains_text
+def test_load_disabled_tools_from_db() -> None:
+ mock_conn = MagicMock()
+ mock_conn.execute.return_value.fetchone.return_value = (
+ json.dumps(["list_properties", "get_report_summary"]),
+ )
+ with patch("website_profiling.mcp.server.db_session") as mock_db:
+ mock_db.return_value.__enter__.return_value = mock_conn
+ disabled = mcp_server._load_disabled_tools()
+ assert disabled == frozenset({"list_properties", "get_report_summary"})
+
+
+def test_load_disabled_tools_on_error() -> None:
+ with patch("website_profiling.mcp.server.db_session", side_effect=RuntimeError("no db")):
+ assert mcp_server._load_disabled_tools() == frozenset()
+
+
+def test_mcp_disabled_tools_excluded_from_list_and_call(monkeypatch) -> None:
+ captured: dict[str, object] = {}
+
+ class FakeServer:
+ def __init__(self, name: str) -> None:
+ captured["name"] = name
+
+ def list_tools(self):
+ def decorator(fn):
+ captured["list_tools"] = fn
+ return fn
+ return decorator
+
+ def call_tool(self):
+ def decorator(fn):
+ captured["call_tool"] = fn
+ return fn
+ return decorator
+
+ def list_resources(self):
+ def decorator(fn):
+ return fn
+ return decorator
+
+ def read_resource(self):
+ def decorator(fn):
+ return fn
+ return decorator
+
+ fake_server_mod = MagicMock()
+ fake_server_mod.Server = FakeServer
+ fake_types_mod = MagicMock()
+ fake_types_mod.Tool = lambda **kwargs: kwargs
+ fake_types_mod.TextContent = lambda **kwargs: kwargs
+ fake_types_mod.Resource = lambda **kwargs: kwargs
+
+ monkeypatch.setitem(sys.modules, "mcp", MagicMock())
+ monkeypatch.setitem(sys.modules, "mcp.server", fake_server_mod)
+ monkeypatch.setitem(sys.modules, "mcp.types", fake_types_mod)
+
+ with patch.dict(os.environ, {"WP_MCP_DOMAIN": "full"}, clear=False):
+ with patch(
+ "website_profiling.mcp.server._load_disabled_tools",
+ return_value=frozenset({"list_properties"}),
+ ):
+ mcp_server.create_server()
+ tools = asyncio.run(captured["list_tools"]()) # type: ignore[arg-type]
+ blocked = asyncio.run(captured["call_tool"]("list_properties", {})) # type: ignore[arg-type]
+
+ tool_names = {t["name"] for t in tools}
+ assert "list_properties" not in tool_names
+ assert len(tool_names) >= 337
+
+ payload = json.loads(blocked[0]["text"])
+ assert "disabled via Risk Settings" in payload["error"]
+ assert "/risk-settings" in payload["hint"]
+
+
def test_mcp_call_tool_rejects_tools_outside_domain(monkeypatch) -> None:
captured: dict[str, object] = {}
diff --git a/tests/tools/test_agent_readiness_coverage.py b/tests/tools/test_agent_readiness_coverage.py
index 1bbd2133..d8c5a0c5 100644
--- a/tests/tools/test_agent_readiness_coverage.py
+++ b/tests/tools/test_agent_readiness_coverage.py
@@ -7,7 +7,7 @@
import pandas as pd
import pytest
-from website_profiling.tools.audit_tools import agent_readiness as ar_mod
+from website_profiling.tools.audit_tools.geo import agent_readiness as ar_mod
from website_profiling.tools.audit_tools.context import AuditToolContext as Ctx
from website_profiling.tools.audit_tools._aeo_helpers import (
count_tokens,
@@ -582,12 +582,12 @@ def raise_on_call(domain):
with patch.object(Ctx, "resolve_property_domain", return_value="ex.com"), \
patch.object(Ctx, "load_crawl_df", return_value=_empty_df()), \
- patch("website_profiling.tools.audit_tools.agent_readiness._fetch_agents_md", side_effect=raise_on_call), \
- patch("website_profiling.tools.audit_tools.agent_readiness._fetch_llms_txt", side_effect=raise_on_call), \
- patch("website_profiling.tools.audit_tools.agent_readiness._score_robots_ai_access", side_effect=raise_on_call), \
- patch("website_profiling.tools.audit_tools.agent_readiness._fetch_skill_md", side_effect=raise_on_call), \
- patch("website_profiling.tools.audit_tools.agent_readiness._fetch_agent_permissions", side_effect=raise_on_call), \
- patch("website_profiling.tools.audit_tools.agent_readiness._score_meta_signals", side_effect=raise_on_call):
+ patch("website_profiling.tools.audit_tools.geo.agent_readiness._fetch_agents_md", side_effect=raise_on_call), \
+ patch("website_profiling.tools.audit_tools.geo.agent_readiness._fetch_llms_txt", side_effect=raise_on_call), \
+ patch("website_profiling.tools.audit_tools.geo.agent_readiness._score_robots_ai_access", side_effect=raise_on_call), \
+ patch("website_profiling.tools.audit_tools.geo.agent_readiness._fetch_skill_md", side_effect=raise_on_call), \
+ patch("website_profiling.tools.audit_tools.geo.agent_readiness._fetch_agent_permissions", side_effect=raise_on_call), \
+ patch("website_profiling.tools.audit_tools.geo.agent_readiness._score_meta_signals", side_effect=raise_on_call):
result = ar_mod.get_agent_readiness_score(conn, ctx, {})
assert "percentage" in result
diff --git a/tests/tools/test_audit_tools.py b/tests/tools/test_audit_tools.py
index 7f9da2e9..fd4db666 100644
--- a/tests/tools/test_audit_tools.py
+++ b/tests/tools/test_audit_tools.py
@@ -9,7 +9,7 @@
from website_profiling.tools.audit_tools import AuditToolContext, dispatch_tool
from website_profiling.tools.audit_tools.context import AuditToolContext as Ctx
from website_profiling.tools.audit_tools.registry import TOOL_DEFINITIONS, openai_tools_schema
-from website_profiling.tools.audit_tools.report import (
+from website_profiling.tools.audit_tools.report.report import (
get_category_scores,
get_report_summary,
list_issues,
@@ -93,7 +93,7 @@ def test_dispatch_via_db_session() -> None:
with patch("website_profiling.tools.audit_tools.registry.db_session") as mock_sess:
mock_sess.return_value.__enter__.return_value = conn
with patch(
- "website_profiling.tools.audit_tools.properties.list_properties_public",
+ "website_profiling.tools.audit_tools.portfolio.properties.list_properties_public",
return_value=[],
):
result = dispatch_tool("list_properties", {})
@@ -193,20 +193,20 @@ def test_get_report_summary_and_categories() -> None:
def test_properties_tools() -> None:
conn = MagicMock()
with patch(
- "website_profiling.tools.audit_tools.properties.list_properties_public",
+ "website_profiling.tools.audit_tools.portfolio.properties.list_properties_public",
return_value=[{"id": 1}],
):
assert dispatch_tool("list_properties", {}, conn=conn)["count"] == 1
with patch(
- "website_profiling.tools.audit_tools.properties.get_property_by_id",
+ "website_profiling.tools.audit_tools.portfolio.properties.get_property_by_id",
return_value=None,
):
missing = dispatch_tool("get_property", {"property_id": 9}, conn=conn)
assert "not found" in missing["error"]
with patch(
- "website_profiling.tools.audit_tools.properties.get_property_by_id",
+ "website_profiling.tools.audit_tools.portfolio.properties.get_property_by_id",
return_value={"id": 1, "name": "ex.com", "canonical_domain": "ex.com"},
):
ok = dispatch_tool("get_property", {"property_id": 1}, conn=conn)
@@ -229,7 +229,7 @@ def test_crawl_tools() -> None:
with patch.object(Ctx, "load_crawl_df", return_value=pd.DataFrame()), patch.object(
Ctx, "load_payload", return_value=_sample_payload(),
), patch(
- "website_profiling.tools.audit_tools.crawl.slice_from_google_row",
+ "website_profiling.tools.audit_tools.crawl.crawl.slice_from_google_row",
return_value={"gsc": {"clicks": 1}},
):
detail = dispatch_tool(
@@ -339,7 +339,7 @@ def test_lighthouse_keywords_google_health() -> None:
with patch.object(Ctx, "load_crawl_df", return_value=df), patch.object(
Ctx, "load_payload", return_value=_sample_payload(),
), patch(
- "website_profiling.tools.audit_tools.crawl.slice_from_google_row",
+ "website_profiling.tools.audit_tools.crawl.crawl.slice_from_google_row",
return_value={},
):
found = dispatch_tool(
diff --git a/tests/tools/test_audit_tools_batch100_coverage.py b/tests/tools/test_audit_tools_batch100_coverage.py
index 27572834..e310e6f6 100644
--- a/tests/tools/test_audit_tools_batch100_coverage.py
+++ b/tests/tools/test_audit_tools_batch100_coverage.py
@@ -8,17 +8,17 @@
import pytest
import requests
-from website_profiling.tools.audit_tools import crawl as crawl_mod
+from website_profiling.tools.audit_tools.crawl import crawl as crawl_mod
from website_profiling.tools.audit_tools.context import AuditToolContext as Ctx
-from website_profiling.tools.audit_tools import issue_lists as issue_mod
-from website_profiling.tools.audit_tools import google_lists as google_mod
-from website_profiling.tools.audit_tools import keyword_lists as kw_mod
-from website_profiling.tools.audit_tools import backlink_lists as bl_mod
-from website_profiling.tools.audit_tools import content_lists as content_mod
-from website_profiling.tools.audit_tools import link_lists as link_mod
-from website_profiling.tools.audit_tools import indexation_lists as idx_mod
-from website_profiling.tools.audit_tools import compare_list_tools as cmp_mod
-from website_profiling.tools.audit_tools import geo_list_tools as geo_list_mod
+from website_profiling.tools.audit_tools.issues import issue_lists as issue_mod
+from website_profiling.tools.audit_tools.google import google_lists as google_mod
+from website_profiling.tools.audit_tools.keywords import keyword_lists as kw_mod
+from website_profiling.tools.audit_tools.backlinks import backlink_lists as bl_mod
+from website_profiling.tools.audit_tools.content import content_lists as content_mod
+from website_profiling.tools.audit_tools.links import link_lists as link_mod
+from website_profiling.tools.audit_tools.indexation import indexation_lists as idx_mod
+from website_profiling.tools.audit_tools.compare import compare_list_tools as cmp_mod
+from website_profiling.tools.audit_tools.geo import geo_list_tools as geo_list_mod
@pytest.fixture
@@ -723,7 +723,7 @@ def test_keyword_lists_all_paths(conn: MagicMock, ctx: Ctx) -> None:
prior = _prior_keywords()
payload = _payload()
with patch.object(Ctx, "load_keywords", return_value=kw), patch.object(Ctx, "load_payload", return_value=payload), patch(
- "website_profiling.tools.audit_tools.keyword_lists.read_keyword_snapshots_for_property",
+ "website_profiling.tools.audit_tools.keywords.keyword_lists.read_keyword_snapshots_for_property",
return_value=[kw, prior],
):
assert kw_mod.list_keyword_rank_improvements(conn, ctx, {})["total"] >= 1
@@ -740,6 +740,8 @@ def test_keyword_lists_all_paths(conn: MagicMock, ctx: Ctx) -> None:
assert kw_mod.get_keyword_opportunity_score(conn, ctx, {"keyword": "widgets"})["opportunity_score"] > 0
assert kw_mod.list_keywords_near_page_one(conn, ctx, {})["total"] >= 1
assert kw_mod.list_keywords_high_impression_zero_click(conn, ctx, {})["total"] >= 1
+ # Non-numeric threshold must fall back to the default (no crash).
+ assert kw_mod.list_keywords_high_impression_zero_click(conn, ctx, {"min_impressions": "bad"})["total"] >= 0
assert kw_mod.list_keywords_by_competition_band(conn, ctx, {})["total"] >= 1
assert kw_mod.get_keyword_serp_snapshot(conn, ctx, {"keyword": "widgets"})["keyword"] == "widgets"
assert kw_mod.list_keywords_with_ai_overview(conn, ctx, {})["total"] >= 1
@@ -751,7 +753,7 @@ def test_keyword_lists_all_paths(conn: MagicMock, ctx: Ctx) -> None:
assert kw_mod.list_cannibalisation_queries(conn, ctx, {})["missing"] is True
with patch.object(Ctx, "load_keywords", return_value={"rows": []}), patch(
- "website_profiling.tools.audit_tools.keyword_lists.read_keyword_snapshots_for_property",
+ "website_profiling.tools.audit_tools.keywords.keyword_lists.read_keyword_snapshots_for_property",
return_value=[{"rows": []}],
):
assert kw_mod.list_keyword_rank_improvements(conn, ctx, {})["missing"] is True
@@ -889,12 +891,12 @@ def test_indexation_lists_all_paths(conn: MagicMock, ctx: Ctx) -> None:
no_prop = Ctx(property_id=None, report_id=1)
assert idx_mod.list_log_paths_by_hits(conn, no_prop, {})["error"]
- with patch("website_profiling.tools.audit_tools.indexation_lists._load_log_analysis", return_value=None):
+ with patch("website_profiling.tools.audit_tools.indexation.indexation_lists._load_log_analysis", return_value=None):
assert idx_mod.list_log_paths_by_hits(conn, ctx, {})["missing"] is True
log = _log_row()
orphan_payload = {**payload, "orphan_urls": ["https://ex.com/orphan"]}
- with patch("website_profiling.tools.audit_tools.indexation_lists._load_log_analysis", return_value=log), patch.object(
+ with patch("website_profiling.tools.audit_tools.indexation.indexation_lists._load_log_analysis", return_value=log), patch.object(
Ctx, "load_payload", return_value=orphan_payload,
):
assert idx_mod.list_log_paths_by_hits(conn, ctx, {})["total"] >= 1
@@ -902,7 +904,7 @@ def test_indexation_lists_all_paths(conn: MagicMock, ctx: Ctx) -> None:
assert idx_mod.list_log_googlebot_low_crawl(conn, ctx, {"min_hits": "bad"})["total"] >= 0
assert idx_mod.list_log_orphan_high_traffic(conn, ctx, {"min_hits": "bad"})["total"] >= 0
- with patch("website_profiling.tools.audit_tools.indexation_lists._load_log_analysis", return_value=log), patch.object(
+ with patch("website_profiling.tools.audit_tools.indexation.indexation_lists._load_log_analysis", return_value=log), patch.object(
Ctx, "load_payload", return_value={"orphan_urls": []},
):
assert idx_mod.list_log_orphan_high_traffic(conn, ctx, {})["note"]
@@ -917,7 +919,7 @@ def test_indexation_lists_all_paths(conn: MagicMock, ctx: Ctx) -> None:
def test_compare_list_tools_all_paths(conn: MagicMock, ctx: Ctx) -> None:
err = {"error": "baseline required"}
- with patch("website_profiling.tools.audit_tools.compare_list_tools.load_compare_pair", return_value=(None, None, None, None, err)):
+ with patch("website_profiling.tools.audit_tools.compare.compare_list_tools.load_compare_pair", return_value=(None, None, None, None, err)):
assert cmp_mod.list_compare_new_issues(conn, ctx, {})["error"]
assert cmp_mod.list_compare_resolved_issues(conn, ctx, {})["error"]
assert cmp_mod.list_compare_new_urls(conn, ctx, {})["error"]
@@ -926,7 +928,7 @@ def test_compare_list_tools_all_paths(conn: MagicMock, ctx: Ctx) -> None:
assert cmp_mod.list_compare_traffic_losers(conn, ctx, {})["error"]
current, baseline = _compare_current(), _compare_baseline()
- with patch("website_profiling.tools.audit_tools.compare_list_tools.load_compare_pair", return_value=(current, baseline, 2, 1, None)):
+ with patch("website_profiling.tools.audit_tools.compare.compare_list_tools.load_compare_pair", return_value=(current, baseline, 2, 1, None)):
assert cmp_mod.list_compare_new_issues(conn, ctx, {})["total"] >= 0
assert cmp_mod.list_compare_resolved_issues(conn, ctx, {})["total"] >= 0
assert cmp_mod.list_compare_new_urls(conn, ctx, {})["total"] >= 1
@@ -938,7 +940,7 @@ def test_compare_list_tools_all_paths(conn: MagicMock, ctx: Ctx) -> None:
no_google.pop("google")
base_no_google = dict(baseline)
base_no_google.pop("google")
- with patch("website_profiling.tools.audit_tools.compare_list_tools.load_compare_pair", return_value=(no_google, base_no_google, 2, 1, None)), patch.object(
+ with patch("website_profiling.tools.audit_tools.compare.compare_list_tools.load_compare_pair", return_value=(no_google, base_no_google, 2, 1, None)), patch.object(
Ctx, "load_google_full", return_value=None,
), patch.object(Ctx, "load_google", return_value=None):
assert cmp_mod.list_compare_traffic_losers(conn, ctx, {})["missing"] is True
@@ -956,14 +958,14 @@ def test_geo_list_tools_all_paths(conn: MagicMock, ctx: Ctx) -> None:
assert geo_list_mod.list_pages_ai_citation_signals(conn, ctx, {"min_score": "bad"})["total"] >= 1
with patch.object(Ctx, "resolve_property_domain", return_value="ex.com"), patch(
- "website_profiling.tools.audit_tools.geo_list_tools._fetch_llms_txt",
+ "website_profiling.tools.audit_tools.geo.geo_list_tools._fetch_llms_txt",
return_value={"found": False},
):
assert geo_list_mod.list_pages_missing_llms_txt_reference(conn, ctx, {})["missing"] is True
llms = {"found": True, "url": "https://ex.com/llms.txt", "preview": "https://ex.com/\nMore docs"}
with patch.object(Ctx, "resolve_property_domain", return_value="ex.com"), patch(
- "website_profiling.tools.audit_tools.geo_list_tools._fetch_llms_txt", return_value=llms,
+ "website_profiling.tools.audit_tools.geo.geo_list_tools._fetch_llms_txt", return_value=llms,
), patch.object(Ctx, "load_payload", return_value=payload), patch.object(Ctx, "load_crawl_df", return_value=df):
missing = geo_list_mod.list_pages_missing_llms_txt_reference(conn, ctx, {})
assert missing["total"] >= 1
@@ -983,11 +985,11 @@ def test_geo_list_tools_all_paths(conn: MagicMock, ctx: Ctx) -> None:
):
assert geo_list_mod.list_robots_blocked_ai_crawlers(conn, ctx, {})["missing"] is True
- with patch("website_profiling.tools.audit_tools.geo_list_tools.requests.get", side_effect=requests.RequestException("fail")):
+ with patch("website_profiling.tools.audit_tools.geo.geo_list_tools.requests.get", side_effect=requests.RequestException("fail")):
assert geo_list_mod._parse_robots_txt("ex.com") == ""
mock_resp = MagicMock(status_code=404, text="")
- with patch("website_profiling.tools.audit_tools.geo_list_tools.requests.get", return_value=mock_resp):
+ with patch("website_profiling.tools.audit_tools.geo.geo_list_tools.requests.get", return_value=mock_resp):
assert geo_list_mod._parse_robots_txt("ex.com") == ""
@@ -1143,7 +1145,7 @@ def test_batch100_coverage_gaps(conn: MagicMock, ctx: Ctx) -> None:
# keyword_lists gaps
kw = _keyword_data()
with patch.object(Ctx, "load_keywords", return_value=None), patch(
- "website_profiling.tools.audit_tools.keyword_lists.read_keyword_snapshots_for_property",
+ "website_profiling.tools.audit_tools.keywords.keyword_lists.read_keyword_snapshots_for_property",
return_value=[kw],
):
cur, prior = kw_mod._load_keyword_pair(ctx, conn)
@@ -1175,7 +1177,7 @@ def test_batch100_coverage_gaps(conn: MagicMock, ctx: Ctx) -> None:
assert kw_mod._pair_delta_tool(conn, ctx, {}, builder=lambda a, b: [], item_key="keywords")["missing"] is True
with patch.object(Ctx, "load_keywords", return_value={"rows": []}), patch(
- "website_profiling.tools.audit_tools.keyword_lists.read_keyword_snapshots_for_property",
+ "website_profiling.tools.audit_tools.keywords.keyword_lists.read_keyword_snapshots_for_property",
return_value=[],
):
assert kw_mod._load_keyword_pair(ctx, conn) == ({"rows": []}, None)
@@ -1203,7 +1205,7 @@ def test_batch100_coverage_gaps(conn: MagicMock, ctx: Ctx) -> None:
assert bl_mod.list_backlinks_to_url(conn, ctx, {"url": "https://ex.com"})["missing"] is True
assert bl_mod.list_backlinks_from_domain(conn, ctx, {"domain": "x.com"})["missing"] is True
assert bl_mod.get_anchor_text_distribution(conn, ctx, {})["missing"] is True
- with patch("website_profiling.tools.audit_tools.backlink_lists.urlparse", side_effect=ValueError("bad")):
+ with patch("website_profiling.tools.audit_tools.backlinks.backlink_lists.urlparse", side_effect=ValueError("bad")):
assert bl_mod._norm_domain("bad://") == "bad://"
# content_lists gaps
@@ -1287,7 +1289,7 @@ def test_batch100_coverage_gaps(conn: MagicMock, ctx: Ctx) -> None:
"status_counts": {"500": 12, "503": 3},
},
}
- with patch("website_profiling.tools.audit_tools.indexation_lists._load_log_analysis", return_value=log_non_list), patch.object(
+ with patch("website_profiling.tools.audit_tools.indexation.indexation_lists._load_log_analysis", return_value=log_non_list), patch.object(
Ctx, "load_payload", return_value=_payload(),
):
assert idx_mod.list_log_paths_by_hits(conn, ctx, {})["total"] == 0
@@ -1305,7 +1307,7 @@ def test_batch100_coverage_gaps(conn: MagicMock, ctx: Ctx) -> None:
"googlebot_paths": [{"path": "/orphan", "hits": 0}],
},
}
- with patch("website_profiling.tools.audit_tools.indexation_lists._load_log_analysis", return_value=log_row), patch.object(
+ with patch("website_profiling.tools.audit_tools.indexation.indexation_lists._load_log_analysis", return_value=log_row), patch.object(
Ctx, "load_payload", return_value=orphan_log,
):
assert idx_mod.list_log_orphan_high_traffic(conn, ctx, {})["total"] >= 1
@@ -1326,7 +1328,7 @@ def test_batch100_coverage_gaps(conn: MagicMock, ctx: Ctx) -> None:
gaps = idx_mod.list_hreflang_reciprocal_gaps(conn, ctx, {})
assert gaps["total"] >= 1
- with patch("website_profiling.tools.audit_tools.indexation_lists.url_to_path", side_effect=RuntimeError("bad")):
+ with patch("website_profiling.tools.audit_tools.indexation.indexation_lists.url_to_path", side_effect=RuntimeError("bad")):
assert idx_mod._norm_path("https://ex.com/x") == "https://ex.com/x"
# compare_list_tools line 156 (skip non-losers)
@@ -1338,7 +1340,7 @@ def test_batch100_coverage_gaps(conn: MagicMock, ctx: Ctx) -> None:
"report_generated_at": "2026-05-01",
"google": {"gsc_full": {"pages": [{"page": "https://ex.com/win", "clicks": 10, "impressions": 50}]}},
}
- with patch("website_profiling.tools.audit_tools.compare_list_tools.load_compare_pair", return_value=(winner_current, winner_baseline, 2, 1, None)):
+ with patch("website_profiling.tools.audit_tools.compare.compare_list_tools.load_compare_pair", return_value=(winner_current, winner_baseline, 2, 1, None)):
losers = cmp_mod.list_compare_traffic_losers(conn, ctx, {})
assert losers["total"] == 0
@@ -1356,7 +1358,7 @@ def test_batch100_coverage_gaps(conn: MagicMock, ctx: Ctx) -> None:
assert geo_list_mod._parse_robots_txt("") == ""
ok_resp = MagicMock(status_code=200, text="User-agent: *\nAllow: /\n# comment\nUser-agent: ClaudeBot\nDisallow: /")
- with patch("website_profiling.tools.audit_tools.geo_list_tools.requests.get", return_value=ok_resp):
+ with patch("website_profiling.tools.audit_tools.geo.geo_list_tools.requests.get", return_value=ok_resp):
robots = geo_list_mod._parse_robots_txt("ex.com")
assert "User-agent" in robots
assert geo_list_mod._agent_blocked(robots, "ClaudeBot") is True
@@ -1467,12 +1469,12 @@ def test_batch100_coverage_gaps(conn: MagicMock, ctx: Ctx) -> None:
idx_mod.list_indexation_submitted_not_indexed(conn, ctx, {})
idx_mod.list_crawl_urls_not_in_sitemap(conn, ctx, {})
- with patch("website_profiling.tools.audit_tools.indexation_lists._load_log_analysis", return_value=_log_row()), patch.object(
+ with patch("website_profiling.tools.audit_tools.indexation.indexation_lists._load_log_analysis", return_value=_log_row()), patch.object(
Ctx, "load_payload", return_value=_payload(),
):
assert idx_mod.list_log_googlebot_low_crawl(conn, ctx, {"min_hits": "bad"})["paths"] is not None
- with patch("website_profiling.tools.audit_tools.indexation_lists._load_log_analysis", return_value=_log_row()), patch.object(
+ with patch("website_profiling.tools.audit_tools.indexation.indexation_lists._load_log_analysis", return_value=_log_row()), patch.object(
Ctx, "load_payload", return_value=None,
):
assert idx_mod.list_log_orphan_high_traffic(conn, ctx, {})["error"]
@@ -1630,7 +1632,7 @@ def test_batch100_coverage_gaps(conn: MagicMock, ctx: Ctx) -> None:
assert idx_mod.list_indexation_indexed_not_submitted(conn, ctx, {})["error"]
assert idx_mod.list_crawl_urls_not_in_sitemap(conn, ctx, {})["error"]
- with patch("website_profiling.tools.audit_tools.indexation_lists._load_log_analysis", return_value=_log_row()):
+ with patch("website_profiling.tools.audit_tools.indexation.indexation_lists._load_log_analysis", return_value=_log_row()):
assert idx_mod.list_log_5xx_paths(conn, Ctx(property_id=None, report_id=1), {})["error"]
assert idx_mod.list_log_googlebot_low_crawl(conn, Ctx(property_id=None, report_id=1), {})["error"]
@@ -1644,12 +1646,12 @@ def test_batch100_coverage_gaps(conn: MagicMock, ctx: Ctx) -> None:
**_payload(),
"links": [{"url": "https://ex.com/crawled"}, {"url": "https://ex.com/popular"}],
}
- with patch("website_profiling.tools.audit_tools.indexation_lists._load_log_analysis", return_value=bot_log), patch.object(
+ with patch("website_profiling.tools.audit_tools.indexation.indexation_lists._load_log_analysis", return_value=bot_log), patch.object(
Ctx, "load_payload", return_value=bot_payload,
):
assert idx_mod.list_log_googlebot_low_crawl(conn, ctx, {"min_hits": 20, "max_googlebot_hits": 5})["total"] == 0
- with patch("website_profiling.tools.audit_tools.indexation_lists._load_log_analysis", return_value=_log_row()):
+ with patch("website_profiling.tools.audit_tools.indexation.indexation_lists._load_log_analysis", return_value=_log_row()):
assert idx_mod.list_log_orphan_high_traffic(conn, Ctx(property_id=None, report_id=1), {})["error"]
orphan_payload = {
@@ -1661,7 +1663,7 @@ def test_batch100_coverage_gaps(conn: MagicMock, ctx: Ctx) -> None:
"top_paths": ["skip", {"path": "/orphan", "hits": 50}],
},
}
- with patch("website_profiling.tools.audit_tools.indexation_lists._load_log_analysis", return_value=orphan_log2), patch.object(
+ with patch("website_profiling.tools.audit_tools.indexation.indexation_lists._load_log_analysis", return_value=orphan_log2), patch.object(
Ctx, "load_payload", return_value=orphan_payload,
):
assert idx_mod.list_log_orphan_high_traffic(conn, ctx, {})["total"] >= 1
diff --git a/tests/tools/test_audit_tools_coverage.py b/tests/tools/test_audit_tools_coverage.py
index ec482165..41a95b25 100644
--- a/tests/tools/test_audit_tools_coverage.py
+++ b/tests/tools/test_audit_tools_coverage.py
@@ -9,18 +9,18 @@
from website_profiling.tools.audit_tools import _slice, dispatch_tool
from website_profiling.tools.audit_tools.context import AuditToolContext as Ctx
-from website_profiling.tools.audit_tools import compare as compare_mod
-from website_profiling.tools.audit_tools import crawl as crawl_mod
-from website_profiling.tools.audit_tools import keywords as kw_mod
-from website_profiling.tools.audit_tools import report as report_mod
-from website_profiling.tools.audit_tools import health as health_mod
-from website_profiling.tools.audit_tools import google as google_mod
-from website_profiling.tools.audit_tools import lighthouse as lh_mod
-from website_profiling.tools.audit_tools import links as links_mod
-from website_profiling.tools.audit_tools import backlinks as bl_mod
-from website_profiling.tools.audit_tools import content as content_mod
-from website_profiling.tools.audit_tools import issues as issues_mod
-from website_profiling.tools.audit_tools import schema as schema_mod
+from website_profiling.tools.audit_tools.compare import compare as compare_mod
+from website_profiling.tools.audit_tools.crawl import crawl as crawl_mod
+from website_profiling.tools.audit_tools.keywords import keywords as kw_mod
+from website_profiling.tools.audit_tools.report import report as report_mod
+from website_profiling.tools.audit_tools.portfolio import health as health_mod
+from website_profiling.tools.audit_tools.google import google as google_mod
+from website_profiling.tools.audit_tools.performance import lighthouse as lh_mod
+from website_profiling.tools.audit_tools.links import links as links_mod
+from website_profiling.tools.audit_tools.backlinks import backlinks as bl_mod
+from website_profiling.tools.audit_tools.content import content as content_mod
+from website_profiling.tools.audit_tools.issues import issues as issues_mod
+from website_profiling.tools.audit_tools.schema import schema as schema_mod
def test_slice_edge_cases() -> None:
@@ -111,14 +111,14 @@ def test_crawl_lighthouse_links(conn: MagicMock | None = None) -> None:
with patch.object(Ctx, "load_payload", return_value={}):
assert links_mod.list_orphan_pages(conn, ctx, {})["error"]
with patch.object(Ctx, "load_payload", return_value={"lighthouse_by_url": {}}), patch(
- "website_profiling.tools.audit_tools.lighthouse.read_lighthouse_page_summaries", return_value={},
+ "website_profiling.tools.audit_tools.performance.lighthouse.read_lighthouse_page_summaries", return_value={},
):
assert lh_mod.get_lighthouse_for_url(conn, ctx, {"url": "https://ex.com"})["error"]
def test_security_workflow_backlinks() -> None:
- from website_profiling.tools.audit_tools import security as sec_mod
- from website_profiling.tools.audit_tools import workflow as wf_mod
+ from website_profiling.tools.audit_tools.security import security as sec_mod
+ from website_profiling.tools.audit_tools.ops import workflow as wf_mod
conn = MagicMock()
payload = {"security_findings": [{"url": "u", "severity": "High", "finding_type": "x", "message": "m"}]}
@@ -132,8 +132,15 @@ def test_security_workflow_backlinks() -> None:
row = ("k", "u", "c", "Low", "m", "open", None, None, now)
conn.execute = MagicMock(return_value=MagicMock(fetchall=MagicMock(return_value=[row])))
ctx2 = Ctx(property_id=1)
- assert wf_mod.list_issue_workflow(conn, ctx2, {"status": "closed"})["count"] == 0
+ # The status filter is now applied in SQL (before LIMIT), not in Python, so the
+ # predicate must be present in the query and bound as a parameter.
+ wf_mod.list_issue_workflow(conn, ctx2, {"status": "closed"})
+ called_sql, called_params = conn.execute.call_args[0][0], conn.execute.call_args[0][1]
+ assert "AND status = %s" in called_sql
+ assert "closed" in called_params
+ # No status filter → no status clause; the row is returned as-is.
assert wf_mod.list_issue_workflow(conn, ctx2, {"limit": "bad"})["count"] == 1
+ assert "AND status = %s" not in conn.execute.call_args[0][0]
with patch.object(Ctx, "load_gsc_links", return_value=None):
assert bl_mod.get_gsc_links_summary(conn, Ctx(property_id=1), {})["missing"]
@@ -164,17 +171,17 @@ def test_misc_dispatch() -> None:
conn = MagicMock()
ctx = Ctx(property_id=None)
assert dispatch_tool("get_integration_alerts", {}, context=ctx, conn=conn)["error"]
- with patch("website_profiling.tools.audit_tools.ops.check_all_alerts", return_value=[]):
+ with patch("website_profiling.tools.audit_tools.ops.ops.check_all_alerts", return_value=[]):
assert dispatch_tool("get_integration_alerts", {"property_id": 1}, context=Ctx(property_id=1), conn=conn)["count"] == 0
def test_remaining_module_paths() -> None:
- from website_profiling.tools.audit_tools import backlinks as bl
- from website_profiling.tools.audit_tools import links as links_mod
- from website_profiling.tools.audit_tools import content as ct
- from website_profiling.tools.audit_tools import indexation_tools as idx
- from website_profiling.tools.audit_tools import international as intl
- from website_profiling.tools.audit_tools import tech as tech_mod
+ from website_profiling.tools.audit_tools.backlinks import backlinks as bl
+ from website_profiling.tools.audit_tools.links import links as links_mod
+ from website_profiling.tools.audit_tools.content import content as ct
+ from website_profiling.tools.audit_tools.indexation import indexation_tools as idx
+ from website_profiling.tools.audit_tools.indexation import international as intl
+ from website_profiling.tools.audit_tools.tech import tech as tech_mod
conn = MagicMock()
ctx = Ctx(property_id=1)
@@ -246,9 +253,9 @@ def test_remaining_module_paths() -> None:
lh_payload = {"lighthouse_summary": None, "lighthouse_diagnostics": "x", "lighthouse_by_url": "x", "crux_summary": {"ok": True}}
with patch.object(Ctx, "load_payload", return_value=lh_payload), patch(
- "website_profiling.tools.audit_tools.lighthouse.read_lighthouse_summary", return_value={"p": 1},
+ "website_profiling.tools.audit_tools.performance.lighthouse.read_lighthouse_summary", return_value={"p": 1},
), patch(
- "website_profiling.tools.audit_tools.lighthouse.read_lighthouse_page_summaries", return_value={"u": {"performance": 30}},
+ "website_profiling.tools.audit_tools.performance.lighthouse.read_lighthouse_page_summaries", return_value={"u": {"performance": 30}},
):
assert lh_mod.get_lighthouse_summary(conn, ctx, {})["pages_audited"] == 1
assert lh_mod.get_lighthouse_diagnostics(conn, ctx, {})["total"] == 0
@@ -292,19 +299,19 @@ def test_remaining_module_paths() -> None:
assert health_mod.list_report_history(fake2, Ctx(property_id=1), {"limit": "bad"})["count"] == 0
assert bl.get_gsc_links_import_status(conn, Ctx(property_id=1), {}) # patched in expanded tests
- with patch("website_profiling.tools.audit_tools.backlinks.read_gsc_links_status", return_value={"hasData": False}):
+ with patch("website_profiling.tools.audit_tools.backlinks.backlinks.read_gsc_links_status", return_value={"hasData": False}):
assert bl.get_gsc_links_import_status(conn, Ctx(property_id=1), {})["hasData"] is False
def test_new_tools_coverage() -> None:
- from website_profiling.tools.audit_tools import backlinks as bl_mod
- from website_profiling.tools.audit_tools import charts as charts_mod
- from website_profiling.tools.audit_tools import indexation_tools as idx_mod
- from website_profiling.tools.audit_tools import compare_helpers as ch_mod
- from website_profiling.tools.audit_tools import compare_slices as cs_mod
- from website_profiling.tools.audit_tools import onpage as onpage_mod
- from website_profiling.tools.audit_tools import ops as ops_mod
- from website_profiling.tools.audit_tools import report_extras as rex_mod
+ from website_profiling.tools.audit_tools.backlinks import backlinks as bl_mod
+ from website_profiling.tools.audit_tools.portfolio import charts as charts_mod
+ from website_profiling.tools.audit_tools.indexation import indexation_tools as idx_mod
+ from website_profiling.tools.audit_tools.compare import compare_helpers as ch_mod
+ from website_profiling.tools.audit_tools.compare import compare_slices as cs_mod
+ from website_profiling.tools.audit_tools.onpage import onpage as onpage_mod
+ from website_profiling.tools.audit_tools.ops import ops as ops_mod
+ from website_profiling.tools.audit_tools.report import report_extras as rex_mod
from website_profiling.reporting.compare_payload import build_url_set_diff
conn = MagicMock()
@@ -391,7 +398,7 @@ def test_new_tools_coverage() -> None:
fetchall=MagicMock(return_value=[{"captured_at": datetime.now(timezone.utc), "referring_domains": 5, "top_domains": []}]),
fetchone=MagicMock(return_value={"schedule_cron": "0 9 * * 1", "alert_webhook_url": "u", "alert_email": "a@b.com"}),
))
- with patch("website_profiling.tools.audit_tools.ops.get_property_by_id", return_value={"google_refresh_token": "t"}), patch.object(
+ with patch("website_profiling.tools.audit_tools.ops.ops.get_property_by_id", return_value={"google_refresh_token": "t"}), patch.object(
Ctx, "load_google", return_value={"fetched_at": "2026-01-01"},
), patch("website_profiling.integrations.google.gsc_links_store.read_gsc_links_status", return_value={"hasData": True}):
assert ops_mod.get_property_ops(conn, ctx, {})["has_schedule"]
@@ -427,7 +434,7 @@ def test_new_tools_coverage() -> None:
def _read_pair(_conn: MagicMock, rid: int) -> dict:
return cur_p if int(rid) == 1 else base_p
- with patch("website_profiling.tools.audit_tools.compare_helpers.read_report_payload", side_effect=_read_pair):
+ with patch("website_profiling.tools.audit_tools.compare.compare_helpers.read_report_payload", side_effect=_read_pair):
assert cs_mod.compare_url_set_diff(conn, ctx, {"baseline_report_id": 2})["new_count"] >= 1
assert cs_mod.compare_issue_deltas(conn, ctx, {"baseline_report_id": 2})["issue_deltas"] == []
assert cs_mod.compare_category_deltas(conn, ctx, {"baseline_report_id": 2})["category_scores"] == []
@@ -438,7 +445,7 @@ def _read_pair(_conn: MagicMock, rid: int) -> dict:
err = ch_mod.load_compare_pair(conn, ctx, {})
assert err[4]["error"] == "baseline_report_id is required"
- with patch("website_profiling.tools.audit_tools.compare_helpers.read_report_payload", side_effect=[{"a": 1}, None]):
+ with patch("website_profiling.tools.audit_tools.compare.compare_helpers.read_report_payload", side_effect=[{"a": 1}, None]):
err2 = ch_mod.load_compare_pair(conn, Ctx(report_id=1), {"baseline_report_id": 2})
assert "not found" in err2[4]["error"]
@@ -497,7 +504,7 @@ def _read_pair(_conn: MagicMock, rid: int) -> dict:
conn_rid = MagicMock()
conn_rid.execute = MagicMock(return_value=MagicMock(fetchone=MagicMock(return_value={"id": 7})))
- with patch("website_profiling.tools.audit_tools.compare_helpers.read_report_payload", side_effect=[{"links": []}, {"links": []}]):
+ with patch("website_profiling.tools.audit_tools.compare.compare_helpers.read_report_payload", side_effect=[{"links": []}, {"links": []}]):
ok = ch_mod.load_compare_pair(conn_rid, Ctx(report_id=None), {"baseline_report_id": 2})
assert ok[4] is None
assert ok[2] == 7
@@ -524,7 +531,7 @@ def _read_pair(_conn: MagicMock, rid: int) -> dict:
assert ops_mod.get_property_ops(ops_conn, ctx, {})["error"] == "property not found"
assert ops_mod.get_latest_log_analysis(ops_conn, ctx, {})["missing"]
- with patch("website_profiling.tools.audit_tools.ops.get_property_by_id", return_value=None):
+ with patch("website_profiling.tools.audit_tools.ops.ops.get_property_by_id", return_value=None):
assert ops_mod.get_google_integration_status(conn, ctx, {})["error"] == "property not found"
with patch.object(Ctx, "load_gsc_links", return_value=None):
@@ -546,7 +553,7 @@ def _read_pair(_conn: MagicMock, rid: int) -> dict:
assert ch_mod._row_id({"id": 9}) == 9
assert ch_mod._row_id((8,)) == 8
- with patch("website_profiling.tools.audit_tools.compare_helpers.read_report_payload", return_value=None):
+ with patch("website_profiling.tools.audit_tools.compare.compare_helpers.read_report_payload", return_value=None):
err_cur_only = ch_mod.load_compare_pair(conn, Ctx(report_id=3), {"baseline_report_id": 2})
assert "not found" in err_cur_only[4]["error"]
@@ -586,14 +593,14 @@ def _read_pair(_conn: MagicMock, rid: int) -> dict:
ops_row.keys = MagicMock(return_value=["schedule_cron", "alert_webhook_url", "alert_email"])
ops_conn3 = MagicMock()
ops_conn3.execute = MagicMock(return_value=MagicMock(fetchone=MagicMock(return_value=ops_row)))
- with patch("website_profiling.tools.audit_tools.ops._row_field", side_effect=lambda row, key, index=0: (
+ with patch("website_profiling.tools.audit_tools.ops.ops._row_field", side_effect=lambda row, key, index=0: (
None, "https://hooks.example/alerts", None
)[index]):
ops = ops_mod.get_property_ops(ops_conn3, ctx, {})
assert ops["has_alert_webhook"] is True
assert ops["has_schedule"] is False
- with patch("website_profiling.tools.audit_tools.ops.get_property_by_id", return_value={"google_refresh_token": "t"}), patch(
+ with patch("website_profiling.tools.audit_tools.ops.ops.get_property_by_id", return_value={"google_refresh_token": "t"}), patch(
"website_profiling.integrations.google.gsc_links_store.read_gsc_links_status", side_effect=RuntimeError("db"),
), patch.object(Ctx, "load_google", return_value={}):
status = ops_mod.get_google_integration_status(conn, ctx, {})
@@ -639,7 +646,7 @@ def _read_pair(_conn: MagicMock, rid: int) -> dict:
assert bl_mod.get_backlinks_velocity(bl_conn2, ctx, {})["count"] == 0
with patch.object(Ctx, "load_google", return_value={"ga4": {"top_pages": []}, "fetched_at": "t"}), patch(
- "website_profiling.tools.audit_tools.google.slice_from_google_row", return_value={"ga4": {"sessions": 3}},
+ "website_profiling.tools.audit_tools.google.google.slice_from_google_row", return_value={"ga4": {"sessions": 3}},
):
assert google_mod.get_ga4_page_metrics(conn, ctx, {"path": "/x"})["metrics"]["sessions"] == 3
@@ -653,7 +660,7 @@ def _read_pair(_conn: MagicMock, rid: int) -> dict:
def test_property_profile_tools() -> None:
- from website_profiling.tools.audit_tools import property_profile as pp_mod
+ from website_profiling.tools.audit_tools.portfolio import property_profile as pp_mod
conn = MagicMock()
ctx = Ctx(property_id=1)
diff --git a/tests/tools/test_audit_tools_dispatch_coverage.py b/tests/tools/test_audit_tools_dispatch_coverage.py
index 175ada41..1f725932 100644
--- a/tests/tools/test_audit_tools_dispatch_coverage.py
+++ b/tests/tools/test_audit_tools_dispatch_coverage.py
@@ -149,10 +149,10 @@ def test_content_and_lighthouse_paths(conn: MagicMock, ctx: Ctx) -> None:
},
}
with patch.object(Ctx, "load_payload", return_value=lh_payload), patch(
- "website_profiling.tools.audit_tools.lighthouse.read_lighthouse_page_summaries",
+ "website_profiling.tools.audit_tools.performance.lighthouse.read_lighthouse_page_summaries",
return_value={"https://ex.com/x": {"performance": 40, "scores": {"performance": 40}}},
), patch(
- "website_profiling.tools.audit_tools.lighthouse.read_lighthouse_summary",
+ "website_profiling.tools.audit_tools.performance.lighthouse.read_lighthouse_summary",
return_value={"human_summary": "db"},
):
summary = dispatch_tool("get_lighthouse_summary", {}, context=ctx, conn=conn)
@@ -164,7 +164,7 @@ def test_content_and_lighthouse_paths(conn: MagicMock, ctx: Ctx) -> None:
def test_compare_slices_and_llm(conn: MagicMock, ctx: Ctx) -> None:
payload = _payload()
- with patch("website_profiling.tools.audit_tools.compare_helpers.read_report_payload", return_value=payload):
+ with patch("website_profiling.tools.audit_tools.compare.compare_helpers.read_report_payload", return_value=payload):
for name in (
"compare_duplicate_deltas",
"compare_tech_deltas",
@@ -175,7 +175,7 @@ def test_compare_slices_and_llm(conn: MagicMock, ctx: Ctx) -> None:
result = dispatch_tool(name, {"baseline_report_id": 1}, context=ctx, conn=conn)
assert "error" not in result, name
- with patch("website_profiling.tools.audit_tools.llm_tools.run_page_coach", return_value={"coach": "ok"}):
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools.run_page_coach", return_value={"coach": "ok"}):
assert dispatch_tool("get_page_coach", {"url": "https://ex.com"}, context=ctx, conn=conn)["coach"] == "ok"
assert dispatch_tool("get_page_coach", {}, context=ctx, conn=conn)["error"]
assert dispatch_tool("generate_content_brief", {}, context=ctx, conn=conn)["error"]
@@ -187,7 +187,7 @@ def test_compare_slices_and_llm(conn: MagicMock, ctx: Ctx) -> None:
brief = dispatch_tool("generate_content_brief", {"keyword": "widgets", "gaps": ["gap"]}, context=ctx, conn=conn)
assert brief["matched_rows"] == 1
- with patch("website_profiling.tools.audit_tools.llm_tools.batch_expand", return_value={}), patch.object(
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools.batch_expand", return_value={}), patch.object(
Ctx, "load_keywords", return_value=None,
):
expanded = dispatch_tool(
@@ -204,7 +204,7 @@ def test_compare_slices_and_llm(conn: MagicMock, ctx: Ctx) -> None:
"report_id": 1,
"issue_counts": json.dumps({"High": 2}),
})))
- with patch("website_profiling.tools.audit_tools.llm_tools.list_properties_public", return_value=[{"id": 1, "name": "Ex"}]):
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools.list_properties_public", return_value=[{"id": 1, "name": "Ex"}]):
portfolio = dispatch_tool("get_portfolio_summary", {}, conn=conn)
assert portfolio["count"] == 1
assert portfolio["properties"][0]["issue_counts"]["High"] == 2
@@ -227,7 +227,7 @@ def test_ops_log_paths(conn: MagicMock, ctx: Ctx) -> None:
"analysis": {"top_paths": [{"path": "/"}], "parsed_lines": 10, "googlebot_hits": 2},
"uploaded_at": datetime.now(timezone.utc).isoformat(),
}
- with patch("website_profiling.tools.audit_tools.ops._load_log_analysis", return_value=log_row):
+ with patch("website_profiling.tools.audit_tools.ops.ops._load_log_analysis", return_value=log_row):
assert dispatch_tool("get_log_analysis_by_id", {"upload_id": 1}, context=ctx, conn=conn)["upload_id"] == 1
assert dispatch_tool("get_latest_log_analysis", {}, context=ctx, conn=conn)["filename"] == "access.log"
assert dispatch_tool("get_log_top_paths", {}, context=ctx, conn=conn)["total"] == 1
@@ -235,13 +235,13 @@ def test_ops_log_paths(conn: MagicMock, ctx: Ctx) -> None:
assert dispatch_tool("list_crawl_only_paths", {}, context=ctx, conn=conn)["total"] == 0
assert dispatch_tool("get_log_googlebot_stats", {}, context=ctx, conn=conn)["googlebot_hits"] == 2
- with patch("website_profiling.tools.audit_tools.ops._load_log_analysis", return_value=None):
+ with patch("website_profiling.tools.audit_tools.ops.ops._load_log_analysis", return_value=None):
assert dispatch_tool("get_log_top_paths", {}, context=ctx, conn=conn)["missing"]
def test_tech_lighthouse_charts_keywords(conn: MagicMock, ctx: Ctx) -> None:
- from website_profiling.tools.audit_tools import tech as tech_mod
- from website_profiling.tools.audit_tools import lighthouse as lh_mod
+ from website_profiling.tools.audit_tools.tech import tech as tech_mod
+ from website_profiling.tools.audit_tools.performance import lighthouse as lh_mod
assert tech_mod.list_pages_by_technology(conn, ctx, {})["error"]
with patch.object(Ctx, "load_payload", return_value={}):
@@ -275,10 +275,10 @@ def test_tech_lighthouse_charts_keywords(conn: MagicMock, ctx: Ctx) -> None:
"lighthouse_human_summary": "inline",
}
with patch.object(Ctx, "load_payload", return_value=lh_data), patch(
- "website_profiling.tools.audit_tools.lighthouse.read_lighthouse_page_summaries",
+ "website_profiling.tools.audit_tools.performance.lighthouse.read_lighthouse_page_summaries",
return_value={"https://ex.com/a": {"performance": 20}},
), patch(
- "website_profiling.tools.audit_tools.lighthouse.read_lighthouse_summary",
+ "website_profiling.tools.audit_tools.performance.lighthouse.read_lighthouse_summary",
return_value={"human_summary": "db"},
):
out = lh_mod.get_lighthouse_summary(conn, ctx, {})
@@ -301,7 +301,7 @@ def test_tech_lighthouse_charts_keywords(conn: MagicMock, ctx: Ctx) -> None:
assert lh_mod.list_lighthouse_poor_best_practices_pages(conn, ctx, {})["total"] >= 1
assert lh_mod.list_lighthouse_cwv_failures(conn, ctx, {})["total"] >= 0
- with patch("website_profiling.tools.audit_tools.report.get_report_summary", return_value={
+ with patch("website_profiling.tools.audit_tools.report.report.get_report_summary", return_value={
"issue_counts": {"Critical": 2, "High": 1, "bad": "x"},
"total_issues": 3,
"health_score": 70,
@@ -320,7 +320,7 @@ def test_tech_lighthouse_charts_keywords(conn: MagicMock, ctx: Ctx) -> None:
"serp_overlay_count": 1,
}
with patch.object(Ctx, "load_keywords", return_value=kw_payload), patch(
- "website_profiling.tools.audit_tools.keywords.read_keyword_history",
+ "website_profiling.tools.audit_tools.keywords.keywords.read_keyword_history",
return_value=[{"keyword": "x", "position": 4}],
):
assert dispatch_tool("get_striking_distance_keywords", {}, context=ctx, conn=conn)["keywords"]
@@ -389,7 +389,7 @@ def test_security_google_lighthouse_and_portfolio(conn: MagicMock, ctx: Ctx, tmp
assert dispatch_tool("get_gsc_top_pages", {}, context=ctx, conn=conn)["error"]
assert dispatch_tool("get_gsc_page_query_slice", {"url": "https://ex.com"}, context=ctx, conn=conn)["error"]
with patch.object(Ctx, "load_google", return_value=google_data), patch(
- "website_profiling.tools.audit_tools.google.slice_from_google_row",
+ "website_profiling.tools.audit_tools.google.google.slice_from_google_row",
return_value={"gsc": {"clicks": 1}, "ga4": {"sessions": 2}},
):
assert dispatch_tool("get_gsc_top_pages", {}, context=ctx, conn=conn)["total"] == 1
@@ -426,7 +426,7 @@ def test_security_google_lighthouse_and_portfolio(conn: MagicMock, ctx: Ctx, tmp
assert dispatch_tool("list_keywords_by_impressions", {"min_impressions": "x"}, context=ctx, conn=conn)["error"]
assert dispatch_tool("list_keywords_by_position", {"min_position": 1, "max_position": 10}, context=ctx, conn=conn)["total"] == 1
- with patch("website_profiling.tools.audit_tools.llm_tools.list_properties_public", return_value=["bad", {"id": None}]):
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools.list_properties_public", return_value=["bad", {"id": None}]):
assert dispatch_tool("get_portfolio_summary", {}, conn=conn)["count"] == 0
conn.execute = MagicMock(return_value=MagicMock(fetchone=MagicMock(return_value={
@@ -435,7 +435,7 @@ def test_security_google_lighthouse_and_portfolio(conn: MagicMock, ctx: Ctx, tmp
"report_id": 3,
"issue_counts": "not-json",
})))
- with patch("website_profiling.tools.audit_tools.llm_tools.list_properties_public", return_value=[{"id": 3, "name": "c"}]):
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools.list_properties_public", return_value=[{"id": 3, "name": "c"}]):
portfolio = dispatch_tool("get_portfolio_summary", {}, conn=conn)
assert portfolio["count"] == 1
assert portfolio["properties"][0]["issue_counts"] == {}
@@ -470,10 +470,10 @@ def test_security_google_lighthouse_and_portfolio(conn: MagicMock, ctx: Ctx, tmp
"lighthouse_by_url": {"skip": "bad"},
}
with patch.object(Ctx, "load_payload", return_value=lh_summary_payload), patch(
- "website_profiling.tools.audit_tools.lighthouse.read_lighthouse_page_summaries",
+ "website_profiling.tools.audit_tools.performance.lighthouse.read_lighthouse_page_summaries",
return_value={"https://ex.com/a": "bad"},
), patch(
- "website_profiling.tools.audit_tools.lighthouse.read_lighthouse_summary",
+ "website_profiling.tools.audit_tools.performance.lighthouse.read_lighthouse_summary",
return_value=None,
):
assert dispatch_tool("get_lighthouse_summary", {}, context=ctx, conn=conn)["pages_audited"] == 1
diff --git a/tests/tools/test_audit_tools_expanded.py b/tests/tools/test_audit_tools_expanded.py
index 97413316..ffa2eaea 100644
--- a/tests/tools/test_audit_tools_expanded.py
+++ b/tests/tools/test_audit_tools_expanded.py
@@ -225,19 +225,19 @@ def test_all_payload_tools(conn: MagicMock, ctx: AuditToolContext) -> None:
), patch.object(Ctx, "load_google", return_value=payload["google"]), patch.object(
Ctx, "load_gsc_links", return_value=gsc_links,
), patch(
- "website_profiling.tools.audit_tools.backlinks.read_gsc_links_status",
+ "website_profiling.tools.audit_tools.backlinks.backlinks.read_gsc_links_status",
return_value={"hasData": True},
), patch(
- "website_profiling.tools.audit_tools.keywords.read_keyword_history",
+ "website_profiling.tools.audit_tools.keywords.keywords.read_keyword_history",
return_value=[{"fetched_at": "2026-06-07", "position": 5}],
), patch(
- "website_profiling.tools.audit_tools.ops.check_all_alerts",
+ "website_profiling.tools.audit_tools.ops.ops.check_all_alerts",
return_value=[{"type": "health_drop"}],
), patch(
- "website_profiling.tools.audit_tools.crawl.slice_from_google_row",
+ "website_profiling.tools.audit_tools.crawl.crawl.slice_from_google_row",
return_value={"queries": []},
), patch(
- "website_profiling.tools.audit_tools.ops.get_property_by_id",
+ "website_profiling.tools.audit_tools.ops.ops.get_property_by_id",
return_value={"id": 1, "google_refresh_token": "tok", "gsc_site_url": "sc-domain:ex.com"},
), patch.object(
conn, "execute",
@@ -390,6 +390,25 @@ def test_list_report_history_and_workflow(conn: MagicMock, ctx: AuditToolContext
hist = dispatch_tool("list_report_history", {"property_id": 1}, conn=fake)
assert hist["count"] == 1
+ # Unresolvable domain ("") must NOT filter on canonical_domain = '' (which
+ # matched no rows); it returns recent history instead.
+ fake_no_domain = FakeConn()
+ fake_no_domain.set_next_cursor(
+ FakeCursor(
+ fetchall_value=[{
+ "id": 11,
+ "site_name": "Ex2",
+ "canonical_domain": "ex2.com",
+ "generated_at": now,
+ }],
+ ),
+ )
+ with patch.object(Ctx, "resolve_property_domain", return_value=""):
+ hist2 = dispatch_tool("list_report_history", {"property_id": 1}, conn=fake_no_domain)
+ assert hist2["count"] == 1
+ executed_sql, _params = fake_no_domain.executed[-1]
+ assert "canonical_domain = %s" not in executed_sql
+
fake2 = FakeConn()
fake2.set_next_cursor(
FakeCursor(
@@ -409,19 +428,44 @@ def test_list_report_history_and_workflow(conn: MagicMock, ctx: AuditToolContext
wf = dispatch_tool("list_issue_workflow", {"property_id": 1}, conn=fake2)
assert wf["count"] == 1
+ # status filter must be pushed into the SQL WHERE (applied before LIMIT),
+ # otherwise the most-recent N rows can be all-other-statuses and the filtered
+ # result is wrongly empty.
+ fake3 = FakeConn()
+ fake3.set_next_cursor(
+ FakeCursor(
+ fetchall_value=[{
+ "issue_key": "k1",
+ "url": "https://ex.com",
+ "category": "Tech",
+ "priority": "High",
+ "message": "msg",
+ "status": "open",
+ "assignee": None,
+ "note": None,
+ "updated_at": now,
+ }],
+ ),
+ )
+ wf2 = dispatch_tool("list_issue_workflow", {"property_id": 1, "status": "open"}, conn=fake3)
+ assert wf2["count"] == 1
+ executed_sql, executed_params = fake3.executed[-1]
+ assert "AND status = %s" in executed_sql
+ assert "open" in executed_params
+
def test_compare_reports(conn: MagicMock, ctx: AuditToolContext) -> None:
current = _full_payload()
baseline = {**current, "summary": {**current["summary"], "total_urls": 8}}
- with patch("website_profiling.tools.audit_tools.compare.read_report_payload", side_effect=[current, baseline]):
+ with patch("website_profiling.tools.audit_tools.compare.compare.read_report_payload", side_effect=[current, baseline]):
result = dispatch_tool("compare_reports", {"baseline_report_id": 1}, context=ctx, conn=conn)
assert "health_score" in result
- with patch("website_profiling.tools.audit_tools.compare_helpers.read_report_payload", side_effect=[current, baseline]):
+ with patch("website_profiling.tools.audit_tools.compare.compare_helpers.read_report_payload", side_effect=[current, baseline]):
diff = dispatch_tool("compare_url_set_diff", {"baseline_report_id": 1}, context=ctx, conn=conn)
assert "new_count" in diff
assert dispatch_tool("compare_reports", {}, context=ctx, conn=conn)["error"]
with patch(
- "website_profiling.tools.audit_tools.compare.read_report_payload",
+ "website_profiling.tools.audit_tools.compare.compare.read_report_payload",
side_effect=[None, baseline],
):
assert "not found" in dispatch_tool("compare_reports", {"baseline_report_id": 1}, context=ctx, conn=conn)["error"]
@@ -496,18 +540,18 @@ def test_new_gap_closure_tools(conn: MagicMock, ctx: AuditToolContext) -> None:
assert dispatch_tool("list_lighthouse_poor_accessibility_pages", {}, context=ctx, conn=conn)["total"] == 1
assert dispatch_tool("list_lighthouse_poor_best_practices_pages", {}, context=ctx, conn=conn)["total"] == 1
assert dispatch_tool("list_lighthouse_cwv_failures", {}, context=ctx, conn=conn)["total"] == 1
- with patch("website_profiling.tools.audit_tools.ops._load_log_analysis", return_value=log_row):
+ with patch("website_profiling.tools.audit_tools.ops.ops._load_log_analysis", return_value=log_row):
assert dispatch_tool("get_log_top_paths", {"property_id": 1}, context=ctx, conn=conn)["total"] == 1
assert dispatch_tool("list_log_only_paths", {"property_id": 1}, context=ctx, conn=conn)["total"] == 1
assert dispatch_tool("list_crawl_only_paths", {"property_id": 1}, context=ctx, conn=conn)["total"] == 1
assert dispatch_tool("get_log_googlebot_stats", {"property_id": 1}, context=ctx, conn=conn)["googlebot_hits"] == 10
- with patch("website_profiling.tools.audit_tools.compare_helpers.read_report_payload", return_value=payload):
+ with patch("website_profiling.tools.audit_tools.compare.compare_helpers.read_report_payload", return_value=payload):
assert "security_deltas" in dispatch_tool("compare_security_deltas", {"baseline_report_id": 1}, context=ctx, conn=conn)
assert "health_score" in dispatch_tool("compare_health_score_delta", {"baseline_report_id": 1}, context=ctx, conn=conn)
- with patch("website_profiling.tools.audit_tools.llm_tools.list_properties_public", return_value=[{"id": 1, "name": "ex.com", "canonical_domain": "ex.com"}]):
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools.list_properties_public", return_value=[{"id": 1, "name": "ex.com", "canonical_domain": "ex.com"}]):
conn.execute = MagicMock(return_value=MagicMock(fetchone=MagicMock(return_value={"health_score": 80, "generated_at": datetime.now(timezone.utc), "report_id": 1, "issue_counts": "{}"})))
assert dispatch_tool("get_portfolio_summary", {}, conn=conn)["count"] == 1
- with patch("website_profiling.tools.audit_tools.llm_tools.batch_expand", return_value={"widgets": {"web": ["widgets near me"]}}):
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools.batch_expand", return_value={"widgets": {"web": ["widgets near me"]}}):
assert dispatch_tool("expand_keywords", {"seeds": ["widgets"]}, context=ctx, conn=conn)["seed_count"] == 1
assert dispatch_tool("generate_content_brief", {"keyword": "widgets"}, context=ctx, conn=conn)["brief"]["keyword"] == "widgets"
@@ -516,7 +560,7 @@ def test_export_tools(conn: MagicMock, ctx: AuditToolContext, tmp_path, monkeypa
monkeypatch.setenv("DATA_DIR", str(tmp_path))
payload = _full_payload()
with patch.object(Ctx, "load_payload", return_value=payload), patch(
- "website_profiling.tools.audit_tools.export_tools.export_audit_csv",
+ "website_profiling.tools.audit_tools.export.export_tools.export_audit_csv",
return_value="url,status\nhttps://ex.com,200\n",
):
out = dispatch_tool("export_audit_report", {"format": "csv"}, context=ctx, conn=conn)
@@ -525,7 +569,7 @@ def test_export_tools(conn: MagicMock, ctx: AuditToolContext, tmp_path, monkeypa
formats = dispatch_tool("list_export_formats", {}, context=ctx, conn=conn)
assert formats.get("formats")
with patch.object(Ctx, "load_payload", return_value=payload), patch(
- "website_profiling.tools.audit_tools.export_tools._dispatch",
+ "website_profiling.tools.audit_tools.export.export_tools._dispatch",
return_value={"pages": [{"url": "https://ex.com/broken", "status": "404"}], "total": 1, "truncated": False},
):
csv_out = dispatch_tool(
@@ -536,7 +580,7 @@ def test_export_tools(conn: MagicMock, ctx: AuditToolContext, tmp_path, monkeypa
)
assert csv_out.get("artifact_id")
assert csv_out.get("total") == 1
- with patch("website_profiling.tools.audit_tools.export_tools.load_compare_pair") as mock_pair:
+ with patch("website_profiling.tools.audit_tools.export.export_tools.load_compare_pair") as mock_pair:
mock_pair.return_value = (payload, payload, 2, 1, None)
cmp_out = dispatch_tool("export_compare_csv", {"baseline_report_id": 1}, context=ctx, conn=conn)
assert cmp_out.get("artifact_id")
diff --git a/tests/tools/test_audit_tools_expansion.py b/tests/tools/test_audit_tools_expansion.py
index 70a64773..51dc9700 100644
--- a/tests/tools/test_audit_tools_expansion.py
+++ b/tests/tools/test_audit_tools_expansion.py
@@ -143,7 +143,7 @@ def test_crawl_extras_tools(conn: MagicMock, ctx: AuditToolContext) -> None:
def test_compare_indexation_and_orphan_deltas(conn: MagicMock, ctx: AuditToolContext) -> None:
current = _payload()
baseline = {**_payload(), "indexation_coverage": {"counts": {"crawled": 8}, "lists": {}, "lists_total": {}}, "orphan_urls": []}
- with patch("website_profiling.tools.audit_tools.compare_slices.load_compare_pair", return_value=(current, baseline, 2, 1, None)):
+ with patch("website_profiling.tools.audit_tools.compare.compare_slices.load_compare_pair", return_value=(current, baseline, 2, 1, None)):
idx = dispatch_tool("compare_indexation_deltas", {"baseline_report_id": 1}, context=ctx, conn=conn)
assert idx["count_deltas"]
orphan = dispatch_tool("compare_orphan_deltas", {"baseline_report_id": 1}, context=ctx, conn=conn)
@@ -152,7 +152,7 @@ def test_compare_indexation_and_orphan_deltas(conn: MagicMock, ctx: AuditToolCon
def test_geo_tools_mocked(conn: MagicMock, ctx: AuditToolContext) -> None:
with patch.object(Ctx, "load_payload", return_value=_payload()), patch.object(Ctx, "load_crawl_df", return_value=_crawl_df()), patch(
- "website_profiling.tools.audit_tools.geo_tools._fetch_llms_txt",
+ "website_profiling.tools.audit_tools.geo.geo_tools._fetch_llms_txt",
return_value={"found": False},
):
geo = dispatch_tool("get_geo_readiness_score", {}, context=ctx, conn=conn)
@@ -176,7 +176,7 @@ def test_prioritize_fix_roadmap(conn: MagicMock, ctx: AuditToolContext) -> None:
def test_integration_tools_missing_config(conn: MagicMock, ctx: AuditToolContext) -> None:
- with patch("website_profiling.tools.audit_tools.integration_tools.get_property_by_id", return_value={"canonical_domain": "ex.com"}):
+ with patch("website_profiling.tools.audit_tools.integrations.integration_tools.get_property_by_id", return_value={"canonical_domain": "ex.com"}):
gsc = dispatch_tool("get_gsc_url_inspection", {"url": "https://ex.com/"}, context=ctx, conn=conn)
assert gsc["missing"] is True
bing = dispatch_tool("get_bing_index_status", {"url": "https://ex.com/"}, context=ctx, conn=conn)
@@ -192,11 +192,11 @@ def test_gsc_index_coverage_from_payload(conn: MagicMock, ctx: AuditToolContext)
def test_gsc_url_inspection_mocked(conn: MagicMock, ctx: AuditToolContext) -> None:
prop = {"google_refresh_token": "tok", "gsc_site_url": "https://ex.com/"}
- with patch("website_profiling.tools.audit_tools.integration_tools.get_property_by_id", return_value=prop), patch(
- "website_profiling.tools.audit_tools.integration_tools.build_credentials",
+ with patch("website_profiling.tools.audit_tools.integrations.integration_tools.get_property_by_id", return_value=prop), patch(
+ "website_profiling.tools.audit_tools.integrations.integration_tools.build_credentials",
return_value=object(),
), patch(
- "website_profiling.tools.audit_tools.integration_tools.inspect_url",
+ "website_profiling.tools.audit_tools.integrations.integration_tools.inspect_url",
return_value={"verdict": "PASS", "provenance": "GSC"},
):
out = dispatch_tool("get_gsc_url_inspection", {"url": "https://ex.com/page"}, context=ctx, conn=conn)
diff --git a/tests/tools/test_audit_tools_expansion_coverage.py b/tests/tools/test_audit_tools_expansion_coverage.py
index 10bf15fc..cc368743 100644
--- a/tests/tools/test_audit_tools_expansion_coverage.py
+++ b/tests/tools/test_audit_tools_expansion_coverage.py
@@ -10,16 +10,16 @@
from website_profiling.tools.audit_tools import dispatch_tool
from website_profiling.tools.audit_tools.context import AuditToolContext as Ctx
-from website_profiling.tools.audit_tools import crawl_lists as cl_mod
-from website_profiling.tools.audit_tools import crawl_metrics as cm_mod
-from website_profiling.tools.audit_tools import geo_tools as geo_mod
-from website_profiling.tools.audit_tools import google as google_mod
-from website_profiling.tools.audit_tools import integration_tools as int_mod
-from website_profiling.tools.audit_tools import keywords as kw_mod
-from website_profiling.tools.audit_tools import llm_tools as llm_mod
-from website_profiling.tools.audit_tools import payload_extras as pe_mod
-from website_profiling.tools.audit_tools import compare_slices as cmp_mod
-from website_profiling.tools.audit_tools import report as report_mod
+from website_profiling.tools.audit_tools.crawl import crawl_lists as cl_mod
+from website_profiling.tools.audit_tools.crawl import crawl_metrics as cm_mod
+from website_profiling.tools.audit_tools.geo import geo_tools as geo_mod
+from website_profiling.tools.audit_tools.google import google as google_mod
+from website_profiling.tools.audit_tools.integrations import integration_tools as int_mod
+from website_profiling.tools.audit_tools.keywords import keywords as kw_mod
+from website_profiling.tools.audit_tools.integrations import llm_tools as llm_mod
+from website_profiling.tools.audit_tools.core import payload_extras as pe_mod
+from website_profiling.tools.audit_tools.compare import compare_slices as cmp_mod
+from website_profiling.tools.audit_tools.report import report as report_mod
@pytest.fixture
@@ -218,11 +218,11 @@ def test_geo_tools_paths(conn: MagicMock, ctx: Ctx) -> None:
assert geo_mod._fetch_llms_txt("")["found"] is False
mock_resp = MagicMock(status_code=200, text="llms content", content=b"llms content")
- with patch("website_profiling.tools.audit_tools.geo_tools.requests.get", return_value=mock_resp):
+ with patch("website_profiling.tools.audit_tools.geo.geo_tools.requests.get", return_value=mock_resp):
found = geo_mod._fetch_llms_txt("ex.com")
assert found["found"] is True
- with patch("website_profiling.tools.audit_tools.geo_tools.requests.get", side_effect=requests.RequestException("fail")):
+ with patch("website_profiling.tools.audit_tools.geo.geo_tools.requests.get", side_effect=requests.RequestException("fail")):
assert geo_mod._fetch_llms_txt("ex.com")["found"] is False
with patch.object(Ctx, "resolve_property_domain", return_value="ex.com"), patch.object(
@@ -237,7 +237,7 @@ def test_geo_tools_paths(conn: MagicMock, ctx: Ctx) -> None:
with patch.object(Ctx, "load_payload", return_value=payload), patch.object(
Ctx, "load_crawl_df", return_value=_crawl_df(),
), patch.object(Ctx, "resolve_property_domain", return_value="ex.com"), patch(
- "website_profiling.tools.audit_tools.geo_tools._fetch_llms_txt",
+ "website_profiling.tools.audit_tools.geo.geo_tools._fetch_llms_txt",
return_value={"found": True},
):
geo = geo_mod.get_geo_readiness_score(conn, ctx, {})
@@ -257,7 +257,7 @@ def test_geo_tools_paths(conn: MagicMock, ctx: Ctx) -> None:
assert geo_mod.get_internal_link_suggestions(conn, ctx, {"url": "https://ex.com/nope"})["error"]
with patch.object(Ctx, "resolve_property_domain", return_value="ex.com"), patch(
- "website_profiling.tools.audit_tools.geo_tools._fetch_llms_txt", return_value={"found": False},
+ "website_profiling.tools.audit_tools.geo.geo_tools._fetch_llms_txt", return_value={"found": False},
):
assert geo_mod.get_llms_txt_status(conn, ctx, {})["domain"] == "ex.com"
@@ -282,11 +282,27 @@ def test_geo_tools_paths(conn: MagicMock, ctx: Ctx) -> None:
empty_geo = pd.DataFrame([{"url": "https://ex.com/e", "status": "404", "page_analysis": "{}"}])
with patch.object(Ctx, "load_payload", return_value={}), patch.object(Ctx, "load_crawl_df", return_value=empty_geo), patch.object(
Ctx, "resolve_property_domain", return_value="ex.com",
- ), patch("website_profiling.tools.audit_tools.geo_tools._fetch_llms_txt", return_value={"found": False}):
+ ), patch("website_profiling.tools.audit_tools.geo.geo_tools._fetch_llms_txt", return_value={"found": False}):
geo_empty = geo_mod.get_geo_readiness_score(conn, ctx, {})
assert geo_empty["components"]["schema_coverage"] == 0
+def test_geo_readiness_survives_http_task_exception(conn: MagicMock, ctx: Ctx) -> None:
+ # A live-HTTP task raising (beyond the RequestException it guards internally)
+ # must degrade to a 0 sub-score, not crash the whole composite score.
+ with patch.object(Ctx, "load_payload", return_value={}), patch.object(
+ Ctx, "load_crawl_df", return_value=_crawl_df(),
+ ), patch.object(Ctx, "resolve_property_domain", return_value="ex.com"), patch(
+ "website_profiling.tools.audit_tools.geo.geo_tools._fetch_llms_txt", return_value={"found": False},
+ ), patch(
+ "website_profiling.tools.audit_tools.geo.geo_tools._score_robots_ai_access",
+ side_effect=RuntimeError("boom"),
+ ):
+ result = geo_mod.get_geo_readiness_score(conn, ctx, {})
+ assert 0 <= result["geo_readiness_score"] <= 100
+ assert result["categories"]["robots_ai_access"]["score"] == 0
+
+
def test_google_ctr_and_keywords(conn: MagicMock, ctx: Ctx) -> None:
with patch.object(Ctx, "load_google", return_value=None):
assert google_mod.get_gsc_ctr_opportunity_pages(conn, ctx, {})["error"]
@@ -321,10 +337,10 @@ def test_google_ctr_and_keywords(conn: MagicMock, ctx: Ctx) -> None:
def test_integration_tools_paths(conn: MagicMock, ctx: Ctx) -> None:
assert int_mod.get_gsc_url_inspection(conn, Ctx(property_id=None), {"url": "https://ex.com"})["missing"]
assert int_mod.get_gsc_url_inspection(conn, ctx, {})["missing"]
- with patch("website_profiling.tools.audit_tools.integration_tools.get_property_by_id", return_value=None):
+ with patch("website_profiling.tools.audit_tools.integrations.integration_tools.get_property_by_id", return_value=None):
assert int_mod.get_gsc_url_inspection(conn, ctx, {"url": "https://ex.com"})["missing"]
- with patch("website_profiling.tools.audit_tools.integration_tools.get_property_by_id", return_value={"canonical_domain": "ex.com"}):
+ with patch("website_profiling.tools.audit_tools.integrations.integration_tools.get_property_by_id", return_value={"canonical_domain": "ex.com"}):
assert int_mod.get_bing_index_status(conn, ctx, {})["missing"]
with patch("website_profiling.db.config_store.read_pipeline_config", return_value=({"bing_webmaster_api_key": "key"}, {})):
with patch("website_profiling.integrations.bing.webmaster._bing_json_get", return_value={"d": {"indexed": True}}):
@@ -350,8 +366,8 @@ def test_integration_tools_paths(conn: MagicMock, ctx: Ctx) -> None:
assert cite["entity_in_ner_summary"] is True
prop = {"google_refresh_token": "t", "gsc_site_url": "https://ex.com/"}
- with patch("website_profiling.tools.audit_tools.integration_tools.get_property_by_id", return_value=prop), patch(
- "website_profiling.tools.audit_tools.integration_tools.build_credentials", side_effect=RuntimeError("creds"),
+ with patch("website_profiling.tools.audit_tools.integrations.integration_tools.get_property_by_id", return_value=prop), patch(
+ "website_profiling.tools.audit_tools.integrations.integration_tools.build_credentials", side_effect=RuntimeError("creds"),
):
out = int_mod.get_gsc_url_inspection(conn, ctx, {"url": "https://ex.com"})
assert "credentials error" in out["error"]
@@ -369,10 +385,10 @@ def test_llm_tools_paths(conn: MagicMock, ctx: Ctx) -> None:
assert llm_mod._llm_disabled_response()["missing"] is True
assert llm_mod.generate_issue_fix(conn, ctx, {})["missing"] is True
- with patch("website_profiling.tools.audit_tools.llm_tools._llm_disabled_response", return_value={}):
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response", return_value={}):
assert llm_mod.generate_issue_fix(conn, ctx, {"message": ""})["error"]
- with patch("website_profiling.tools.audit_tools.llm_tools._llm_disabled_response", return_value={}), patch(
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response", return_value={}), patch(
"website_profiling.llm.issue_fixes.generate_issue_fix_suggestion",
return_value={"fix": "x"},
), patch("website_profiling.llm_config.load_llm_config_from_db", return_value={}):
@@ -382,17 +398,17 @@ def test_llm_tools_paths(conn: MagicMock, ctx: Ctx) -> None:
cat_data = {"name": "Tech", "score": 80, "issues": [{"priority": "High", "message": "Slow", "url": "https://ex.com"}]}
assert llm_mod.summarize_category_for_client(conn, ctx, {})["error"] == "category_id is required"
- with patch("website_profiling.tools.audit_tools.issues.get_category_issues", return_value=cat_data), patch(
- "website_profiling.tools.audit_tools.llm_tools._llm_disabled_response", return_value={"missing": True},
+ with patch("website_profiling.tools.audit_tools.issues.issues.get_category_issues", return_value=cat_data), patch(
+ "website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response", return_value={"missing": True},
):
summary = llm_mod.summarize_category_for_client(conn, ctx, {"category_id": "tech"})
assert summary["provenance"] == "Crawl"
- with patch("website_profiling.tools.audit_tools.llm_tools._llm_disabled_response", return_value={}), patch(
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response", return_value={}), patch(
"website_profiling.llm.base.get_llm_client",
return_value=MagicMock(complete_json=MagicMock(return_value={"summary": "Client text"})),
), patch("website_profiling.llm_config.load_llm_config_from_db", return_value={}), patch(
- "website_profiling.tools.audit_tools.issues.get_category_issues", return_value=cat_data,
+ "website_profiling.tools.audit_tools.issues.issues.get_category_issues", return_value=cat_data,
):
narrative = llm_mod.summarize_category_for_client(conn, ctx, {"category_id": "tech"})
assert narrative["narrative"] == "Client text"
@@ -400,13 +416,13 @@ def test_llm_tools_paths(conn: MagicMock, ctx: Ctx) -> None:
with patch.object(Ctx, "load_payload", return_value=None):
assert llm_mod.prioritize_fix_roadmap(conn, ctx, {})["error"]
with patch.object(Ctx, "load_payload", return_value={"categories": []}), patch(
- "website_profiling.tools.audit_tools.llm_tools._llm_disabled_response", return_value={"error": "off"},
+ "website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response", return_value={"error": "off"},
), patch.object(Ctx, "load_google", return_value=None), patch.object(Ctx, "load_crawl_df", return_value=None):
snippet = llm_mod.analyze_serp_snippet_for_url(conn, ctx, {"url": "https://ex.com"})
assert snippet["provenance"] == "Crawl"
with patch.object(Ctx, "load_payload", return_value={"site_name": "Ex", "top_pages": [{"url": "https://ex.com"}]}), patch(
- "website_profiling.tools.audit_tools.llm_tools._llm_disabled_response", return_value={},
+ "website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response", return_value={},
), patch(
"website_profiling.llm.base.get_llm_client",
return_value=MagicMock(complete_json=MagicMock(return_value={"content": "# Ex\n\nPolished"})),
@@ -414,11 +430,11 @@ def test_llm_tools_paths(conn: MagicMock, ctx: Ctx) -> None:
draft = llm_mod.draft_llms_txt(conn, ctx, {})
assert "Polished" in draft["llms_txt_draft"]
- with patch("website_profiling.tools.audit_tools.issues.get_category_issues", return_value={"error": "no cat"}):
+ with patch("website_profiling.tools.audit_tools.issues.issues.get_category_issues", return_value={"error": "no cat"}):
assert llm_mod.summarize_category_for_client(conn, ctx, {"category_id": "x"})["error"] == "no cat"
with patch.object(Ctx, "load_payload", return_value={"categories": []}), patch(
- "website_profiling.tools.audit_tools.llm_tools._llm_disabled_response", return_value={},
+ "website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response", return_value={},
), patch("website_profiling.llm.base.get_llm_client", return_value=None), patch.object(
Ctx, "load_google", return_value={"gsc": {}},
), patch.object(Ctx, "load_crawl_df", return_value=pd.DataFrame([{"url": "https://ex.com", "title": "T", "meta_description": "D"}])):
@@ -430,7 +446,7 @@ def test_llm_tools_paths(conn: MagicMock, ctx: Ctx) -> None:
client = MagicMock(complete_json=MagicMock(side_effect=RuntimeError("llm fail")))
with patch.object(Ctx, "load_payload", return_value={"site_name": "Ex"}), patch(
- "website_profiling.tools.audit_tools.llm_tools._llm_disabled_response", return_value={},
+ "website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response", return_value={},
), patch("website_profiling.llm.base.get_llm_client", return_value=client), patch(
"website_profiling.llm_config.load_llm_config_from_db", return_value={},
):
@@ -499,16 +515,16 @@ def test_expansion_coverage_gaps(conn: MagicMock, ctx: Ctx) -> None:
with patch.object(Ctx, "load_google", return_value=low_ctr):
assert google_mod.get_gsc_ctr_opportunity_pages(conn, ctx, {})["total"] == 1
- with patch("website_profiling.tools.audit_tools.integration_tools.get_property_by_id", return_value={"google_refresh_token": "t"}), patch(
- "website_profiling.tools.audit_tools.integration_tools.build_credentials", return_value=None,
+ with patch("website_profiling.tools.audit_tools.integrations.integration_tools.get_property_by_id", return_value={"google_refresh_token": "t"}), patch(
+ "website_profiling.tools.audit_tools.integrations.integration_tools.build_credentials", return_value=None,
):
assert int_mod.get_gsc_url_inspection(conn, ctx, {"url": "https://ex.com"})["missing"]
- with patch("website_profiling.tools.audit_tools.integration_tools.get_property_by_id", return_value={"google_refresh_token": "t", "canonical_domain": ""}), patch(
- "website_profiling.tools.audit_tools.integration_tools.build_credentials", return_value=object(),
+ with patch("website_profiling.tools.audit_tools.integrations.integration_tools.get_property_by_id", return_value={"google_refresh_token": "t", "canonical_domain": ""}), patch(
+ "website_profiling.tools.audit_tools.integrations.integration_tools.build_credentials", return_value=object(),
):
assert "GSC site URL" in int_mod.get_gsc_url_inspection(conn, ctx, {"url": "https://ex.com"})["error"]
assert int_mod.get_bing_index_status(conn, Ctx(property_id=None), {"url": "https://ex.com"})["missing"]
- with patch("website_profiling.tools.audit_tools.integration_tools.get_property_by_id", return_value=None):
+ with patch("website_profiling.tools.audit_tools.integrations.integration_tools.get_property_by_id", return_value=None):
assert int_mod.get_bing_index_status(conn, ctx, {"url": "https://ex.com"})["missing"]
with patch.object(Ctx, "load_keywords", return_value={"rows": []}):
@@ -543,20 +559,20 @@ def test_expansion_coverage_gaps(conn: MagicMock, ctx: Ctx) -> None:
assert links["suggestions"]
with patch.object(Ctx, "load_payload", return_value={"categories": []}), patch(
- "website_profiling.tools.audit_tools.llm_tools._llm_disabled_response", return_value={},
+ "website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response", return_value={},
):
assert llm_mod.prioritize_fix_roadmap(conn, ctx, {"limit": "bad"})["roadmap"] == []
- with patch("website_profiling.tools.audit_tools.llm_tools._llm_disabled_response", return_value={}), patch(
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response", return_value={}), patch(
"website_profiling.llm.base.get_llm_client", return_value=MagicMock(complete_json=MagicMock(return_value={})),
), patch("website_profiling.llm_config.load_llm_config_from_db", return_value={}), patch(
- "website_profiling.tools.audit_tools.issues.get_category_issues", return_value={"name": "T", "score": 1, "issues": []},
+ "website_profiling.tools.audit_tools.issues.issues.get_category_issues", return_value={"name": "T", "score": 1, "issues": []},
), patch("website_profiling.llm.base.parse_json_response", return_value={"summary": "parsed"}):
summary = llm_mod.summarize_category_for_client(conn, ctx, {"category_id": "t"})
assert summary.get("narrative") == "parsed"
with patch.object(Ctx, "load_payload", return_value={"site_name": "Ex"}), patch(
- "website_profiling.tools.audit_tools.llm_tools._llm_disabled_response", return_value={},
+ "website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response", return_value={},
), patch("website_profiling.llm.base.get_llm_client", return_value=MagicMock(complete_json=MagicMock(side_effect=RuntimeError("x")))):
draft = llm_mod.draft_llms_txt(conn, ctx, {})
assert "Ex" in draft["llms_txt_draft"]
@@ -608,10 +624,10 @@ def test_expansion_coverage_gaps(conn: MagicMock, ctx: Ctx) -> None:
):
assert llm_mod._llm_disabled_response() == {}
- with patch("website_profiling.tools.audit_tools.llm_tools._llm_disabled_response", return_value={}), patch(
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response", return_value={}), patch(
"website_profiling.llm.base.get_llm_client", return_value=MagicMock(complete_json=MagicMock(side_effect=RuntimeError("boom"))),
), patch("website_profiling.llm_config.load_llm_config_from_db", return_value={}), patch(
- "website_profiling.tools.audit_tools.issues.get_category_issues", return_value={"name": "T", "score": 1, "issues": []},
+ "website_profiling.tools.audit_tools.issues.issues.get_category_issues", return_value={"name": "T", "score": 1, "issues": []},
):
err_summary = llm_mod.summarize_category_for_client(conn, ctx, {"category_id": "t"})
assert "narrative_error" in err_summary
@@ -619,7 +635,7 @@ def test_expansion_coverage_gaps(conn: MagicMock, ctx: Ctx) -> None:
client_ok = MagicMock(complete_json=MagicMock(return_value={"title": "New", "meta_description": "Meta"}))
with patch.object(Ctx, "load_google", return_value={"gsc": {}}), patch.object(
Ctx, "load_crawl_df", return_value=pd.DataFrame([{"url": "https://ex.com", "title": "Old", "meta_description": "Old meta"}]),
- ), patch("website_profiling.tools.audit_tools.llm_tools._llm_disabled_response", return_value={}), patch(
+ ), patch("website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response", return_value={}), patch(
"website_profiling.llm.base.get_llm_client", return_value=client_ok,
), patch("website_profiling.llm_config.load_llm_config_from_db", return_value={}):
serp = llm_mod.analyze_serp_snippet_for_url(conn, ctx, {"url": "https://ex.com"})
@@ -628,6 +644,6 @@ def test_expansion_coverage_gaps(conn: MagicMock, ctx: Ctx) -> None:
def test_compare_slices_error_paths(conn: MagicMock, ctx: Ctx) -> None:
err = {"error": "bad baseline"}
- with patch("website_profiling.tools.audit_tools.compare_slices.load_compare_pair", return_value=(None, None, None, None, err)):
+ with patch("website_profiling.tools.audit_tools.compare.compare_slices.load_compare_pair", return_value=(None, None, None, None, err)):
assert cmp_mod.compare_indexation_deltas(conn, ctx, {}) == err
assert cmp_mod.compare_orphan_deltas(conn, ctx, {}) == err
diff --git a/tests/tools/test_audit_tools_links_extras.py b/tests/tools/test_audit_tools_links_extras.py
index 2d9ee34b..6a6903b6 100644
--- a/tests/tools/test_audit_tools_links_extras.py
+++ b/tests/tools/test_audit_tools_links_extras.py
@@ -6,8 +6,8 @@
import pytest
from website_profiling.tools.audit_tools.context import AuditToolContext
-from website_profiling.tools.audit_tools.export_extras import export_sitemap_xml, validate_rich_results
-from website_profiling.tools.audit_tools.links import (
+from website_profiling.tools.audit_tools.export.export_extras import export_sitemap_xml, validate_rich_results
+from website_profiling.tools.audit_tools.links.links import (
get_inlink_anchors,
get_link_rel_summary,
list_broken_link_sources,
@@ -81,7 +81,7 @@ def test_export_sitemap_xml_tool(ctx, monkeypatch):
scoped.load_payload.return_value = {"links": [{"url": "https://example.com/", "status": "200"}]}
ctx.load_payload = scoped.load_payload
monkeypatch.setattr(
- "website_profiling.tools.audit_tools.export_extras.save_artifact",
+ "website_profiling.tools.audit_tools.export.export_extras.save_artifact",
lambda content, **kwargs: {"path": "/tmp/sitemap.xml", "filename": kwargs.get("filename")},
)
out = export_sitemap_xml(conn, ctx, {})
@@ -98,3 +98,17 @@ def test_validate_rich_results_tool(ctx):
out = validate_rich_results(conn, ctx, {"limit": 5})
assert out["count"] == 1
assert out["rows"][0]["provenance"] == "Crawl analysis"
+
+
+def test_validate_rich_results_ignores_credential_errors(ctx, monkeypatch):
+ conn = MagicMock()
+ ctx.load_payload = MagicMock(return_value={
+ "links": [{"url": "https://example.com/", "status": "200", "has_schema": True, "page_analysis": {}}],
+ })
+ monkeypatch.setattr(
+ "website_profiling.integrations.google.auth.build_credentials",
+ MagicMock(side_effect=RuntimeError("no creds")),
+ )
+ out = validate_rich_results(conn, ctx, {"limit": 5})
+ assert out["count"] == 1
+ assert out["provenance"] == "Crawl analysis"
diff --git a/tests/tools/test_crawl_actions.py b/tests/tools/test_crawl_actions.py
index 2b9d7a05..23cb492e 100644
--- a/tests/tools/test_crawl_actions.py
+++ b/tests/tools/test_crawl_actions.py
@@ -4,7 +4,7 @@
from unittest.mock import MagicMock, patch
from website_profiling.tools.audit_tools.context import AuditToolContext
-from website_profiling.tools.audit_tools.crawl_actions import (
+from website_profiling.tools.audit_tools.crawl.crawl_actions import (
CHAT_CRAWL_TOOL,
prepare_audit_run,
)
@@ -32,7 +32,7 @@ def test_prepare_audit_run_disabled_when_setting_off() -> None:
conn = _FakeConn()
ctx = AuditToolContext(property_id=1)
with patch(
- "website_profiling.tools.audit_tools.crawl_actions._chat_allow_crawl",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions._chat_allow_crawl",
return_value=False,
):
out = prepare_audit_run(conn, ctx, {"start_url": "https://example.com"})
@@ -45,10 +45,10 @@ def test_prepare_audit_run_ready_default() -> None:
ctx = AuditToolContext(property_id=1)
saved = {"site_name": "Test", "run_crawl": "false"}
with patch(
- "website_profiling.tools.audit_tools.crawl_actions._chat_allow_crawl",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions._chat_allow_crawl",
return_value=True,
), patch(
- "website_profiling.tools.audit_tools.crawl_actions.read_pipeline_config",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions.read_pipeline_config",
return_value=(saved, []),
):
out = prepare_audit_run(
@@ -73,10 +73,10 @@ def test_prepare_audit_run_custom_overrides() -> None:
conn = _FakeConn()
ctx = AuditToolContext(property_id=2)
with patch(
- "website_profiling.tools.audit_tools.crawl_actions._chat_allow_crawl",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions._chat_allow_crawl",
return_value=True,
), patch(
- "website_profiling.tools.audit_tools.crawl_actions.read_pipeline_config",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions.read_pipeline_config",
return_value=({}, []),
):
out = prepare_audit_run(
@@ -104,10 +104,10 @@ def test_prepare_audit_run_new_property_payload() -> None:
conn = _FakeConn()
ctx = AuditToolContext(property_id=None)
with patch(
- "website_profiling.tools.audit_tools.crawl_actions._chat_allow_crawl",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions._chat_allow_crawl",
return_value=True,
), patch(
- "website_profiling.tools.audit_tools.crawl_actions.read_pipeline_config",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions.read_pipeline_config",
return_value=({}, []),
):
out = prepare_audit_run(
@@ -132,7 +132,7 @@ def test_prepare_audit_run_job_running() -> None:
conn = _FakeConn(job_running=True)
ctx = AuditToolContext(property_id=1)
with patch(
- "website_profiling.tools.audit_tools.crawl_actions._chat_allow_crawl",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions._chat_allow_crawl",
return_value=True,
):
out = prepare_audit_run(conn, ctx, {"start_url": "https://example.com"})
@@ -149,13 +149,13 @@ def test_prepare_audit_run_uses_property_default_preset() -> None:
"default_crawl_preset": "spa",
}
with patch(
- "website_profiling.tools.audit_tools.crawl_actions._chat_allow_crawl",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions._chat_allow_crawl",
return_value=True,
), patch(
- "website_profiling.tools.audit_tools.crawl_actions.read_pipeline_config",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions.read_pipeline_config",
return_value=({}, []),
), patch(
- "website_profiling.tools.audit_tools.crawl_actions.get_property_by_id",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions.get_property_by_id",
return_value=prop,
):
out = prepare_audit_run(
diff --git a/tests/tools/test_export_tools_coverage.py b/tests/tools/test_export_tools_coverage.py
index 6544607d..4afabc78 100644
--- a/tests/tools/test_export_tools_coverage.py
+++ b/tests/tools/test_export_tools_coverage.py
@@ -37,27 +37,27 @@ def test_export_tools_formats(conn: MagicMock, ctx: Ctx, tmp_path, monkeypatch)
assert dispatch_tool("export_list_as_csv", {"tool_name": "nope"}, context=ctx, conn=conn)["error"]
with patch.object(Ctx, "load_payload", return_value=payload), patch(
- "website_profiling.tools.audit_tools.export_tools.export_audit_html",
+ "website_profiling.tools.audit_tools.export.export_tools.export_audit_html",
return_value="",
):
out = dispatch_tool("export_audit_report", {"format": "html"}, context=ctx, conn=conn)
assert out.get("artifact_id")
with patch.object(Ctx, "load_payload", return_value=payload), patch(
- "website_profiling.tools.audit_tools.export_tools.export_audit_json",
+ "website_profiling.tools.audit_tools.export.export_tools.export_audit_json",
return_value="{}",
):
out = dispatch_tool("export_audit_report", {"format": "json"}, context=ctx, conn=conn)
assert out.get("format") == "json"
with patch.object(Ctx, "load_payload", return_value=payload), patch(
- "website_profiling.tools.audit_tools.export_tools.export_audit_pdf",
+ "website_profiling.tools.audit_tools.export.export_tools.export_audit_pdf",
side_effect=FileNotFoundError,
):
assert dispatch_tool("export_audit_report", {"format": "pdf"}, context=ctx, conn=conn)["error"]
with patch.object(Ctx, "load_payload", return_value=payload), patch(
- "website_profiling.tools.audit_tools.export_tools._dispatch",
+ "website_profiling.tools.audit_tools.export.export_tools._dispatch",
return_value={"meta": "only"},
):
assert dispatch_tool(
@@ -68,7 +68,7 @@ def test_export_tools_formats(conn: MagicMock, ctx: Ctx, tmp_path, monkeypatch)
)["error"]
with patch.object(Ctx, "load_payload", return_value=payload), patch(
- "website_profiling.tools.audit_tools.export_tools.load_compare_pair",
+ "website_profiling.tools.audit_tools.export.export_tools.load_compare_pair",
return_value=(None, None, None, None, {"error": "bad"}),
):
assert dispatch_tool("export_compare_csv", {"baseline_report_id": 1}, context=ctx, conn=conn)["error"]
@@ -77,14 +77,14 @@ def test_export_tools_formats(conn: MagicMock, ctx: Ctx, tmp_path, monkeypatch)
def test_export_audit_report_paths(conn: MagicMock, ctx: Ctx, tmp_path, monkeypatch) -> None:
monkeypatch.setenv("DATA_DIR", str(tmp_path))
with patch.object(Ctx, "load_payload", return_value=_payload()), patch(
- "website_profiling.tools.audit_tools.export_tools.export_audit_pdf",
+ "website_profiling.tools.audit_tools.export.export_tools.export_audit_pdf",
return_value=b"%PDF",
):
pdf_out = dispatch_tool("export_audit_report", {"format": "pdf"}, context=ctx, conn=conn)
assert pdf_out.get("format") == "pdf"
with patch.object(Ctx, "load_payload", return_value=_payload()), patch(
- "website_profiling.tools.audit_tools.export_tools.export_audit_csv",
+ "website_profiling.tools.audit_tools.export.export_tools.export_audit_csv",
side_effect=RuntimeError("export failed"),
):
assert "export failed" in dispatch_tool("export_audit_report", {"format": "csv"}, context=ctx, conn=conn)["error"]
diff --git a/tests/tools/test_geo_parity.py b/tests/tools/test_geo_parity.py
index 111b097e..35cd7144 100644
--- a/tests/tools/test_geo_parity.py
+++ b/tests/tools/test_geo_parity.py
@@ -10,7 +10,7 @@
# Phase 1 helpers: llms.txt depth scoring
# ---------------------------------------------------------------------------
-from website_profiling.tools.audit_tools.geo_tools import (
+from website_profiling.tools.audit_tools.geo.geo_tools import (
_band,
_fetch_ai_discovery,
_fetch_llms_txt,
@@ -148,7 +148,7 @@ def side_effect(url, **kwargs):
# Phase 1: robots AI-bot tier parsing
# ---------------------------------------------------------------------------
-from website_profiling.tools.audit_tools.geo_list_tools import (
+from website_profiling.tools.audit_tools.geo.geo_list_tools import (
_AI_BOT_TIERS,
_AI_CRAWLER_AGENTS,
_agent_blocked,
@@ -210,7 +210,7 @@ def test_agent_blocked_empty_robots() -> None:
# Phase 2: citability scoring
# ---------------------------------------------------------------------------
-from website_profiling.tools.audit_tools.geo_citability import _citability_signals
+from website_profiling.tools.audit_tools.geo.geo_citability import _citability_signals
def _make_rec(**kwargs) -> dict:
@@ -292,7 +292,7 @@ def test_citability_full_page() -> None:
# Phase 3: generative fix tools
# ---------------------------------------------------------------------------
-from website_profiling.tools.audit_tools.llm_tools import (
+from website_profiling.tools.audit_tools.integrations.llm_tools import (
generate_geo_fix_bundle,
generate_meta_tags,
generate_robots_txt,
@@ -312,9 +312,9 @@ def _make_conn_ctx():
def test_generate_robots_txt_has_all_bots() -> None:
- from website_profiling.tools.audit_tools.geo_list_tools import _AI_BOT_TIERS
+ from website_profiling.tools.audit_tools.geo.geo_list_tools import _AI_BOT_TIERS
conn, ctx = _make_conn_ctx()
- with patch("website_profiling.tools.audit_tools.llm_tools._llm_disabled_response", return_value={"error": "disabled"}):
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response", return_value={"error": "disabled"}):
result = generate_robots_txt(conn, ctx, {})
robots = result["robots_txt"]
for agent in list(_AI_BOT_TIERS.keys())[:5]:
@@ -325,7 +325,7 @@ def test_generate_robots_txt_has_all_bots() -> None:
def test_generate_schema_website() -> None:
conn, ctx = _make_conn_ctx()
- with patch("website_profiling.tools.audit_tools.llm_tools._llm_disabled_response", return_value={"error": "disabled"}):
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response", return_value={"error": "disabled"}):
result = generate_schema(conn, ctx, {"schema_type": "WebSite"})
assert result["schema_type"] == "WebSite"
schema = result["schema_json"]
@@ -336,14 +336,14 @@ def test_generate_schema_website() -> None:
def test_generate_schema_organization() -> None:
conn, ctx = _make_conn_ctx()
- with patch("website_profiling.tools.audit_tools.llm_tools._llm_disabled_response", return_value={"error": "disabled"}):
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response", return_value={"error": "disabled"}):
result = generate_schema(conn, ctx, {"schema_type": "Organization"})
assert result["schema_json"]["@type"] == "Organization"
def test_generate_schema_unknown_type_defaults_to_website() -> None:
conn, ctx = _make_conn_ctx()
- with patch("website_profiling.tools.audit_tools.llm_tools._llm_disabled_response", return_value={"error": "disabled"}):
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response", return_value={"error": "disabled"}):
result = generate_schema(conn, ctx, {"schema_type": "NonExistent"})
assert result["schema_type"] == "WebSite"
@@ -367,7 +367,7 @@ def test_generate_geo_fix_bundle_returns_structure() -> None:
not_found_resp.text = ""
not_found_resp.content = b""
with patch("requests.get", return_value=not_found_resp):
- with patch("website_profiling.tools.audit_tools.llm_tools._llm_disabled_response", return_value={"error": "disabled"}):
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response", return_value={"error": "disabled"}):
result = generate_geo_fix_bundle(conn, ctx, {})
assert "domain" in result
assert "llms_txt" in result
@@ -431,7 +431,7 @@ def test_resolve_api_key_missing(monkeypatch) -> None:
def test_check_ai_citations_live_requires_opt_in() -> None:
- from website_profiling.tools.audit_tools.integration_tools import check_ai_citations_live
+ from website_profiling.tools.audit_tools.integrations.integration_tools import check_ai_citations_live
conn, ctx = _make_conn_ctx()
result = check_ai_citations_live(conn, ctx, {"brand": "Example", "provider": "perplexity"})
assert "error" in result
@@ -439,7 +439,7 @@ def test_check_ai_citations_live_requires_opt_in() -> None:
def test_check_ai_citations_live_missing_key() -> None:
- from website_profiling.tools.audit_tools.integration_tools import check_ai_citations_live
+ from website_profiling.tools.audit_tools.integrations.integration_tools import check_ai_citations_live
import os
conn, ctx = _make_conn_ctx()
env_key = "PERPLEXITY_API_KEY"
@@ -457,7 +457,7 @@ def test_check_ai_citations_live_missing_key() -> None:
# Phase 5: advanced detectors
# ---------------------------------------------------------------------------
-from website_profiling.tools.audit_tools.geo_detectors import (
+from website_profiling.tools.audit_tools.geo.geo_detectors import (
_check_negative_signals_for_page,
_INJECTION_PATTERNS,
)
@@ -525,19 +525,19 @@ def test_injection_pattern_llm_instruction() -> None:
def test_content_decay_temporal_pattern() -> None:
- from website_profiling.tools.audit_tools.geo_detectors import _TEMPORAL_DECAY
+ from website_profiling.tools.audit_tools.geo.geo_detectors import _TEMPORAL_DECAY
text = "As of 2024, the platform has grown significantly."
assert _TEMPORAL_DECAY.search(text)
def test_content_decay_version_pattern() -> None:
- from website_profiling.tools.audit_tools.geo_detectors import _VERSION_DECAY
+ from website_profiling.tools.audit_tools.geo.geo_detectors import _VERSION_DECAY
text = "The app requires version v2.3 or higher."
assert _VERSION_DECAY.search(text)
def test_rag_chunk_readiness_anchor_sentence() -> None:
- from website_profiling.tools.audit_tools.geo_detectors import _ANCHOR_SENTENCE_PATTERN
+ from website_profiling.tools.audit_tools.geo.geo_detectors import _ANCHOR_SENTENCE_PATTERN
text = "Python is a high-level programming language that enables rapid development."
assert _ANCHOR_SENTENCE_PATTERN.search(text)
@@ -546,13 +546,13 @@ def test_rag_chunk_readiness_anchor_sentence() -> None:
# Phase 6: GEO drift compare
# ---------------------------------------------------------------------------
-from website_profiling.tools.audit_tools.compare_slices import compare_geo_score_deltas
+from website_profiling.tools.audit_tools.compare.compare_slices import compare_geo_score_deltas
def test_compare_geo_score_deltas_missing_baseline() -> None:
conn = MagicMock()
ctx = MagicMock()
- with patch("website_profiling.tools.audit_tools.compare_slices.load_compare_pair",
+ with patch("website_profiling.tools.audit_tools.compare.compare_slices.load_compare_pair",
return_value=(None, None, None, None, {"error": "no baseline"})):
result = compare_geo_score_deltas(conn, ctx, {})
assert "error" in result
@@ -571,13 +571,13 @@ def test_compare_geo_score_deltas_structure() -> None:
zero_fresh = {"freshness_score": 4, "checked": True}
zero_disc = {"found_count": 1, "discovery_score": 2}
- with patch("website_profiling.tools.audit_tools.compare_slices.load_compare_pair",
+ with patch("website_profiling.tools.audit_tools.compare.compare_slices.load_compare_pair",
return_value=(current, baseline, 2, 1, None)):
- with patch("website_profiling.tools.audit_tools.geo_tools._score_robots_ai_access", return_value=zero_robots):
- with patch("website_profiling.tools.audit_tools.geo_tools._fetch_llms_txt", return_value=zero_llms):
- with patch("website_profiling.tools.audit_tools.geo_tools._score_meta_signals", return_value=zero_meta):
- with patch("website_profiling.tools.audit_tools.geo_tools._score_freshness_signals", return_value=zero_fresh):
- with patch("website_profiling.tools.audit_tools.geo_tools._fetch_ai_discovery", return_value=zero_disc):
+ with patch("website_profiling.tools.audit_tools.geo.geo_tools._score_robots_ai_access", return_value=zero_robots):
+ with patch("website_profiling.tools.audit_tools.geo.geo_tools._fetch_llms_txt", return_value=zero_llms):
+ with patch("website_profiling.tools.audit_tools.geo.geo_tools._score_meta_signals", return_value=zero_meta):
+ with patch("website_profiling.tools.audit_tools.geo.geo_tools._score_freshness_signals", return_value=zero_fresh):
+ with patch("website_profiling.tools.audit_tools.geo.geo_tools._fetch_ai_discovery", return_value=zero_disc):
result = compare_geo_score_deltas(conn, ctx, {})
assert "geo_deltas" in result
assert "regression_detected" in result
diff --git a/tests/tools/test_image_tools.py b/tests/tools/test_image_tools.py
index 72611529..5404e34c 100644
--- a/tests/tools/test_image_tools.py
+++ b/tests/tools/test_image_tools.py
@@ -8,7 +8,7 @@
from website_profiling.tools.audit_tools import dispatch_tool
from website_profiling.tools.audit_tools.context import AuditToolContext as Ctx
-from website_profiling.tools.audit_tools.image_tools import IMAGE_LIGHTHOUSE_AUDIT_IDS
+from website_profiling.tools.audit_tools.images.image_tools import IMAGE_LIGHTHOUSE_AUDIT_IDS
@pytest.fixture
@@ -72,15 +72,22 @@ def test_get_image_audit_summary(conn: MagicMock, ctx: Ctx) -> None:
])
with patch.object(Ctx, "load_payload", return_value=_payload()), patch.object(Ctx, "load_crawl_df", return_value=df):
summary = dispatch_tool("get_image_audit_summary", {}, context=ctx, conn=conn)
- assert summary["pages_missing_alt"] >= 1
- assert summary["pages_without_lazy_images"] >= 1
- assert summary["pages_missing_image_dimensions"] >= 1
+ # The crawl DataFrame is authoritative: it reports 1 page missing alt and 0
+ # for lazy/dimensions, so a real 0 must NOT be replaced by a content_urls count.
+ assert summary["pages_missing_alt"] == 1
+ assert summary["pages_without_lazy_images"] == 0
+ assert summary["pages_missing_image_dimensions"] == 0
assert summary["images_total_crawled"] == 5
assert summary["lighthouse_image_diagnostics"] == 2
assert summary["image_inventory_available"] is True
assert "page_previews" in summary
assert summary["page_previews"]["missing_lazy"]["total"] >= 1
+ # When the DataFrame is absent the count falls back to content_urls.
+ with patch.object(Ctx, "load_payload", return_value=_payload()), patch.object(Ctx, "load_crawl_df", return_value=None):
+ fb = dispatch_tool("get_image_audit_summary", {}, context=ctx, conn=conn)
+ assert fb["pages_without_lazy_images"] == len(_payload().get("content_urls", {}).get("missing_lazy") or [])
+
def test_list_site_image_urls(conn: MagicMock, ctx: Ctx) -> None:
with patch.object(Ctx, "load_payload", return_value=_payload()):
diff --git a/tests/tools/test_mcp_registry.py b/tests/tools/test_mcp_registry.py
index 480b51f2..29a4c06f 100644
--- a/tests/tools/test_mcp_registry.py
+++ b/tests/tools/test_mcp_registry.py
@@ -42,7 +42,7 @@ def test_dispatch_list_properties_roundtrip() -> None:
conn = MagicMock()
props = [{"id": 1, "name": "ex.com", "canonical_domain": "ex.com"}]
with patch(
- "website_profiling.tools.audit_tools.properties.list_properties_public",
+ "website_profiling.tools.audit_tools.portfolio.properties.list_properties_public",
return_value=props,
):
result = dispatch_tool("list_properties", {}, conn=conn)
diff --git a/tests/tools/test_router_tools.py b/tests/tools/test_router_tools.py
index 08129cfb..59969620 100644
--- a/tests/tools/test_router_tools.py
+++ b/tests/tools/test_router_tools.py
@@ -4,8 +4,8 @@
from unittest.mock import MagicMock, patch
from website_profiling.tools.audit_tools import AuditToolContext
-from website_profiling.tools.audit_tools import router_tools as router_mod
-from website_profiling.tools.audit_tools.router_tools import run_domain_agent
+from website_profiling.tools.audit_tools.core import router_tools as router_mod
+from website_profiling.tools.audit_tools.core.router_tools import run_domain_agent
def test_run_domain_agent_falls_back_to_global_search() -> None:
diff --git a/tests/tools/test_schedule_runner.py b/tests/tools/test_schedule_runner.py
index 34797ecc..92609cd2 100644
--- a/tests/tools/test_schedule_runner.py
+++ b/tests/tools/test_schedule_runner.py
@@ -33,6 +33,39 @@ def test_cron_matches_invalid_minute_or_hour() -> None:
assert _cron_matches("abc 14 * * *", now) is False
+def test_cron_matches_day_of_month() -> None:
+ # Regression: "0 9 1 * *" previously fired EVERY day (DOM ignored); it must
+ # only fire on the 1st.
+ first = datetime(2026, 6, 1, 9, 0, tzinfo=timezone.utc)
+ second = datetime(2026, 6, 2, 9, 0, tzinfo=timezone.utc)
+ assert _cron_matches("0 9 1 * *", first) is True
+ assert _cron_matches("0 9 1 * *", second) is False
+
+
+def test_cron_matches_month_field() -> None:
+ jan1 = datetime(2026, 1, 1, 9, 0, tzinfo=timezone.utc)
+ jun1 = datetime(2026, 6, 1, 9, 0, tzinfo=timezone.utc)
+ assert _cron_matches("0 9 1 1 *", jan1) is True
+ assert _cron_matches("0 9 1 1 *", jun1) is False # wrong month
+
+
+def test_cron_dom_dow_or_semantics() -> None:
+ # "0 9 1 * 0" = 1st of month OR Sunday (standard cron OR-semantics).
+ mon_first = datetime(2026, 6, 1, 9, 0, tzinfo=timezone.utc) # Monday, day 1
+ sunday = datetime(2026, 6, 7, 9, 0, tzinfo=timezone.utc) # Sunday, day 7
+ mon_eighth = datetime(2026, 6, 8, 9, 0, tzinfo=timezone.utc) # Monday, day 8
+ assert _cron_matches("0 9 1 * 0", mon_first) is True # DOM matches
+ assert _cron_matches("0 9 1 * 0", sunday) is True # DOW matches
+ assert _cron_matches("0 9 1 * 0", mon_eighth) is False # neither
+
+
+def test_cron_dom_list_and_invalid_field() -> None:
+ fifteenth = datetime(2026, 6, 15, 9, 0, tzinfo=timezone.utc)
+ assert _cron_matches("0 9 1,15 * *", fifteenth) is True
+ # A malformed day-of-month token fails closed (no spurious run).
+ assert _cron_matches("0 9 x * *", fifteenth) is False
+
+
def test_cron_invalid_expression() -> None:
now = datetime(2026, 6, 7, 10, 0, tzinfo=timezone.utc)
assert _cron_matches("bad cron", now) is False
diff --git a/tests/tools/test_sql_query_tool.py b/tests/tools/test_sql_query_tool.py
index ea143c9c..057a12f7 100644
--- a/tests/tools/test_sql_query_tool.py
+++ b/tests/tools/test_sql_query_tool.py
@@ -8,7 +8,7 @@
import pytest
-from website_profiling.tools.audit_tools.sql_query import (
+from website_profiling.tools.audit_tools.core.sql_query import (
ReadOnlyViolation,
_ALLOWED_TABLES,
_MAX_SQL_BYTES,
@@ -321,6 +321,17 @@ def test_schema_qualified_secret_table_rejected(self) -> None:
with pytest.raises(ReadOnlyViolation):
assert_read_only("SELECT * FROM public.llm_config")
+ def test_schema_qualified_allowed_table_rejected(self) -> None:
+ # public. resolves to the real base table and would bypass the
+ # injected tenant-scope CTE — must be rejected even though google_data
+ # is allowlisted unqualified.
+ with pytest.raises(ReadOnlyViolation, match="Schema-qualified"):
+ assert_read_only("SELECT * FROM public.google_data")
+
+ def test_catalog_qualified_table_rejected(self) -> None:
+ with pytest.raises(ReadOnlyViolation, match="Schema-qualified"):
+ assert_read_only("SELECT * FROM cat.public.crawl_results")
+
# ---------------------------------------------------------------------------
# assert_read_only — rejected: dangerous functions
@@ -445,6 +456,18 @@ def test_conflict_raises(self) -> None:
with pytest.raises(ReadOnlyViolation, match="conflict"):
_inject_scope_ctes(sql, self._stmt(sql), property_id=1)
+ def test_recursive_query_is_subquery_wrapped(self) -> None:
+ # Scope CTEs self-shadow their base tables, which is invalid under
+ # WITH RECURSIVE; the query must instead be wrapped in a subquery so the
+ # outer scope CTEs apply without breaking the recursive form.
+ sql = "WITH RECURSIVE sub AS (SELECT crawl_run_id FROM crawl_results) SELECT * FROM sub"
+ result = _inject_scope_ctes(sql, self._stmt(sql), property_id=3)
+ assert "WHERE property_id = 3" in result
+ assert "_scoped" in result
+ assert "WITH RECURSIVE sub" in result
+ # The injected CTEs must NOT be spliced in front of RECURSIVE.
+ assert ",\nRECURSIVE" not in result
+
# ---------------------------------------------------------------------------
# run_sql_query handler
@@ -492,7 +515,7 @@ def _fake_ro_session() -> Iterator[_FakeConn]:
yield _FakeConn()
return patch(
- "website_profiling.tools.audit_tools.sql_query.readonly_session",
+ "website_profiling.tools.audit_tools.core.sql_query.readonly_session",
_fake_ro_session,
)
@@ -532,7 +555,7 @@ def _never_called() -> Iterator[None]:
yield None
with patch(
- "website_profiling.tools.audit_tools.sql_query.readonly_session",
+ "website_profiling.tools.audit_tools.core.sql_query.readonly_session",
_never_called,
):
result = run_sql_query(
@@ -553,7 +576,7 @@ def _never_called() -> Iterator[None]:
yield None
with patch(
- "website_profiling.tools.audit_tools.sql_query.readonly_session",
+ "website_profiling.tools.audit_tools.core.sql_query.readonly_session",
_never_called,
):
result = run_sql_query(
@@ -573,7 +596,7 @@ def _never_called() -> Iterator[None]:
yield None
with patch(
- "website_profiling.tools.audit_tools.sql_query.readonly_session",
+ "website_profiling.tools.audit_tools.core.sql_query.readonly_session",
_never_called,
):
result = run_sql_query(
@@ -609,7 +632,7 @@ def _broken_session():
yield _BrokenConn()
with patch(
- "website_profiling.tools.audit_tools.sql_query.readonly_session",
+ "website_profiling.tools.audit_tools.core.sql_query.readonly_session",
_broken_session,
):
result = run_sql_query(
@@ -697,7 +720,7 @@ def _fake_ro():
yield _FakeConn()
with patch(
- "website_profiling.tools.audit_tools.sql_query.readonly_session",
+ "website_profiling.tools.audit_tools.core.sql_query.readonly_session",
_fake_ro,
):
run_sql_query(
@@ -748,7 +771,7 @@ def _fake_ro():
yield _FakeConn()
with patch(
- "website_profiling.tools.audit_tools.sql_query.readonly_session",
+ "website_profiling.tools.audit_tools.core.sql_query.readonly_session",
_fake_ro,
):
run_sql_query(
@@ -826,7 +849,7 @@ def _fake_ro():
yield _FakeConn()
with patch(
- "website_profiling.tools.audit_tools.sql_query.readonly_session",
+ "website_profiling.tools.audit_tools.core.sql_query.readonly_session",
_fake_ro,
):
result = get_sql_schema(self._conn(), self._ctx(), {})
@@ -880,7 +903,7 @@ def _fake_ro():
yield _FakeConn()
with patch(
- "website_profiling.tools.audit_tools.sql_query.readonly_session",
+ "website_profiling.tools.audit_tools.core.sql_query.readonly_session",
_fake_ro,
):
result = get_sql_schema(self._conn(), self._ctx(), {})
@@ -908,7 +931,7 @@ def _broken_session():
yield _BrokenConn()
with patch(
- "website_profiling.tools.audit_tools.sql_query.readonly_session",
+ "website_profiling.tools.audit_tools.core.sql_query.readonly_session",
_broken_session,
):
result = get_sql_schema(self._conn(), self._ctx(), {})
@@ -964,7 +987,7 @@ def test_sql_tools_excluded_when_disabled(self) -> None:
class TestSqlQueryRemainingBranches:
def test_anonymous_forbidden_function_with_regex_bypass(self) -> None:
- with patch("website_profiling.tools.audit_tools.sql_query.assert_read_only_regex"):
+ with patch("website_profiling.tools.audit_tools.core.sql_query.assert_read_only_regex"):
with pytest.raises(ReadOnlyViolation, match="not permitted"):
assert_read_only("SELECT pg_sleep(1)")
@@ -974,8 +997,8 @@ def test_select_for_update_locks_rejected(self) -> None:
stmt = sqlglot.parse_one("SELECT 1")
stmt.set("locks", [object()])
- with patch("website_profiling.tools.audit_tools.sql_query.assert_read_only_regex"), patch(
- "website_profiling.tools.audit_tools.sql_query.sqlglot.parse",
+ with patch("website_profiling.tools.audit_tools.core.sql_query.assert_read_only_regex"), patch(
+ "website_profiling.tools.audit_tools.core.sql_query.sqlglot.parse",
return_value=[stmt],
):
with pytest.raises(ReadOnlyViolation, match="FOR UPDATE"):
@@ -983,7 +1006,7 @@ def test_select_for_update_locks_rejected(self) -> None:
def test_check_table_refs_skips_empty_table_name(self) -> None:
from sqlglot import exp
- from website_profiling.tools.audit_tools.sql_query import _check_table_refs
+ from website_profiling.tools.audit_tools.core.sql_query import _check_table_refs
table = exp.Table(this=exp.to_identifier(""))
select = exp.Select().from_(table)
@@ -1036,7 +1059,7 @@ def _fake_ro():
_FakeCursor._call_count = 0
yield _FakeConn()
- with patch("website_profiling.tools.audit_tools.sql_query.readonly_session", _fake_ro):
+ with patch("website_profiling.tools.audit_tools.core.sql_query.readonly_session", _fake_ro):
result = get_sql_schema(MagicMock(), AuditToolContext(), {})
assert result["tables"][0]["foreign_keys"] == []
@@ -1052,7 +1075,7 @@ def _ro():
conn.cursor.return_value.__enter__.return_value = cur
yield conn
- with patch("website_profiling.tools.audit_tools.sql_query.readonly_session", _ro):
+ with patch("website_profiling.tools.audit_tools.core.sql_query.readonly_session", _ro):
result = run_sql_query(MagicMock(), AuditToolContext(), {"sql": "SELECT 1", "row_cap": "bad"})
assert result["row_count"] == 1
@@ -1068,10 +1091,10 @@ def _ro():
conn.cursor.return_value.__enter__.return_value = cur
yield conn
- with patch("website_profiling.tools.audit_tools.sql_query.readonly_session", _ro), patch(
- "website_profiling.tools.audit_tools.sql_query.assert_read_only",
+ with patch("website_profiling.tools.audit_tools.core.sql_query.readonly_session", _ro), patch(
+ "website_profiling.tools.audit_tools.core.sql_query.assert_read_only",
), patch(
- "website_profiling.tools.audit_tools.sql_query.sqlglot.parse",
+ "website_profiling.tools.audit_tools.core.sql_query.sqlglot.parse",
side_effect=RuntimeError("parse fail"),
):
result = run_sql_query(MagicMock(), AuditToolContext(property_id=1), {"sql": "SELECT 1"})
@@ -1080,11 +1103,11 @@ def _ro():
def test_run_sql_query_scope_injection_rejected(self) -> None:
import sqlglot
- with patch("website_profiling.tools.audit_tools.sql_query.assert_read_only"), patch(
- "website_profiling.tools.audit_tools.sql_query.sqlglot.parse",
+ with patch("website_profiling.tools.audit_tools.core.sql_query.assert_read_only"), patch(
+ "website_profiling.tools.audit_tools.core.sql_query.sqlglot.parse",
return_value=[sqlglot.parse_one("SELECT 1")],
), patch(
- "website_profiling.tools.audit_tools.sql_query._inject_scope_ctes",
+ "website_profiling.tools.audit_tools.core.sql_query._inject_scope_ctes",
side_effect=ReadOnlyViolation("scope fail"),
):
scoped = run_sql_query(MagicMock(), AuditToolContext(property_id=1), {"sql": "SELECT 1"})
@@ -1130,6 +1153,6 @@ def _fake_ro():
_FakeCursor._call_count = 0
yield _FakeConn()
- with patch("website_profiling.tools.audit_tools.sql_query.readonly_session", _fake_ro):
+ with patch("website_profiling.tools.audit_tools.core.sql_query.readonly_session", _fake_ro):
result = get_sql_schema(MagicMock(), AuditToolContext(), {})
assert result["tables"][0]["foreign_keys"] == []
diff --git a/tests/tools/test_tools_branch_coverage.py b/tests/tools/test_tools_branch_coverage.py
index 5f4c5b23..78ec9049 100644
--- a/tests/tools/test_tools_branch_coverage.py
+++ b/tests/tools/test_tools_branch_coverage.py
@@ -39,7 +39,7 @@ def test_slice_and_context_remaining_branches() -> None:
def test_crawl_remaining_branches(conn: MagicMock, ctx: Ctx) -> None:
- from website_profiling.tools.audit_tools import crawl as crawl_mod
+ from website_profiling.tools.audit_tools.crawl import crawl as crawl_mod
with patch.object(Ctx, "load_payload", return_value={"crawl_run_id": "bad"}), patch(
"website_profiling.db.crawl_store.read_edges",
@@ -108,7 +108,7 @@ def test_crawl_remaining_branches(conn: MagicMock, ctx: Ctx) -> None:
def test_crawl_lists_remaining_branches(conn: MagicMock, ctx: Ctx) -> None:
- from website_profiling.tools.audit_tools import crawl_lists as cl_mod
+ from website_profiling.tools.audit_tools.crawl import crawl_lists as cl_mod
assert cl_mod._is_2xx("") is False
df_no_status = pd.DataFrame([{"url": "https://ex.com"}])
@@ -153,10 +153,10 @@ def test_crawl_lists_remaining_branches(conn: MagicMock, ctx: Ctx) -> None:
def test_backlinks_charts_compare_and_content(conn: MagicMock, ctx: Ctx) -> None:
- from website_profiling.tools.audit_tools import backlinks as bl_mod
- from website_profiling.tools.audit_tools import charts as charts_mod
- from website_profiling.tools.audit_tools import compare_slices as cmp_mod
- from website_profiling.tools.audit_tools import content as content_mod
+ from website_profiling.tools.audit_tools.backlinks import backlinks as bl_mod
+ from website_profiling.tools.audit_tools.portfolio import charts as charts_mod
+ from website_profiling.tools.audit_tools.compare import compare_slices as cmp_mod
+ from website_profiling.tools.audit_tools.content import content as content_mod
assert bl_mod.get_gsc_links_import_status(conn, Ctx(property_id=None), {})["error"]
assert bl_mod.get_gsc_sample_links(conn, Ctx(property_id=None), {})["error"]
@@ -167,16 +167,16 @@ def test_backlinks_charts_compare_and_content(conn: MagicMock, ctx: Ctx) -> None
with patch.object(Ctx, "load_payload", return_value={}):
assert charts_mod.get_outlink_distribution(conn, ctx, {})["error"]
- with patch("website_profiling.tools.audit_tools.report.get_report_summary", return_value={"error": "x"}):
+ with patch("website_profiling.tools.audit_tools.report.report.get_report_summary", return_value={"error": "x"}):
assert charts_mod.get_issue_priority_breakdown(conn, ctx, {})["error"]
with patch(
- "website_profiling.tools.audit_tools.report.get_report_summary",
+ "website_profiling.tools.audit_tools.report.report.get_report_summary",
return_value={"issue_counts": "bad", "total_issues": 0},
):
assert charts_mod.get_issue_priority_breakdown(conn, ctx, {})["items"] == []
err = {"error": "baseline missing"}
- with patch("website_profiling.tools.audit_tools.compare_slices.load_compare_pair", return_value=(None, None, None, None, err)):
+ with patch("website_profiling.tools.audit_tools.compare.compare_slices.load_compare_pair", return_value=(None, None, None, None, err)):
for fn in (
cmp_mod.compare_security_deltas,
cmp_mod.compare_content_metrics,
@@ -192,10 +192,10 @@ def test_backlinks_charts_compare_and_content(conn: MagicMock, ctx: Ctx) -> None
def test_report_report_extras_keywords_ops(conn: MagicMock, ctx: Ctx) -> None:
- from website_profiling.tools.audit_tools import report as report_mod
- from website_profiling.tools.audit_tools import report_extras as rex_mod
- from website_profiling.tools.audit_tools import keywords as kw_mod
- from website_profiling.tools.audit_tools import ops as ops_mod
+ from website_profiling.tools.audit_tools.report import report as report_mod
+ from website_profiling.tools.audit_tools.report import report_extras as rex_mod
+ from website_profiling.tools.audit_tools.keywords import keywords as kw_mod
+ from website_profiling.tools.audit_tools.ops import ops as ops_mod
assert report_mod._normalize_priority("weird") == "weird"
with patch.object(Ctx, "load_payload", return_value={}):
@@ -251,7 +251,7 @@ def test_report_report_extras_keywords_ops(conn: MagicMock, ctx: Ctx) -> None:
"analysis": {},
"line_count": 1,
}
- with patch("website_profiling.tools.audit_tools.ops._load_log_analysis", return_value=log_row):
+ with patch("website_profiling.tools.audit_tools.ops.ops._load_log_analysis", return_value=log_row):
assert ops_mod.get_log_top_paths(conn, ctx, {})["total"] == 0
assert ops_mod.list_log_only_paths(conn, ctx, {})["total"] == 0
assert ops_mod.list_crawl_only_paths(conn, ctx, {})["total"] == 0
@@ -259,15 +259,15 @@ def test_report_report_extras_keywords_ops(conn: MagicMock, ctx: Ctx) -> None:
def test_lighthouse_links_google_health_security(conn: MagicMock, ctx: Ctx) -> None:
- from website_profiling.tools.audit_tools import lighthouse as lh_mod
- from website_profiling.tools.audit_tools import links as links_mod
- from website_profiling.tools.audit_tools import google as google_mod
- from website_profiling.tools.audit_tools import health as health_mod
- from website_profiling.tools.audit_tools import security as sec_mod
- from website_profiling.tools.audit_tools import indexation_tools as idx_mod
+ from website_profiling.tools.audit_tools.performance import lighthouse as lh_mod
+ from website_profiling.tools.audit_tools.links import links as links_mod
+ from website_profiling.tools.audit_tools.google import google as google_mod
+ from website_profiling.tools.audit_tools.portfolio import health as health_mod
+ from website_profiling.tools.audit_tools.security import security as sec_mod
+ from website_profiling.tools.audit_tools.indexation import indexation_tools as idx_mod
with patch.object(Ctx, "load_payload", return_value={"lighthouse_by_url": "bad"}), patch(
- "website_profiling.tools.audit_tools.lighthouse.read_lighthouse_page_summaries",
+ "website_profiling.tools.audit_tools.performance.lighthouse.read_lighthouse_page_summaries",
return_value={"https://ex.com": {"performance": 10}},
):
assert lh_mod.get_lighthouse_for_url(conn, ctx, {"url": "https://ex.com"})["lighthouse"]
@@ -317,10 +317,10 @@ def test_lighthouse_links_google_health_security(conn: MagicMock, ctx: Ctx) -> N
def test_image_tools_and_misc_audit_modules(conn: MagicMock, ctx: Ctx) -> None:
- from website_profiling.tools.audit_tools import image_tools as img_mod
- from website_profiling.tools.audit_tools import onpage as onpage_mod
- from website_profiling.tools.audit_tools import llm_tools as llm_mod
- from website_profiling.tools.audit_tools import workflow as wf_mod
+ from website_profiling.tools.audit_tools.images import image_tools as img_mod
+ from website_profiling.tools.audit_tools.onpage import onpage as onpage_mod
+ from website_profiling.tools.audit_tools.integrations import llm_tools as llm_mod
+ from website_profiling.tools.audit_tools.ops import workflow as wf_mod
payload = {
"lighthouse_diagnostics": ["bad", {"lighthouse_audit_id": "uses-optimized-images", "title": "Images"}] * 10,
@@ -369,7 +369,7 @@ def test_image_tools_and_misc_audit_modules(conn: MagicMock, ctx: Ctx) -> None:
with patch.object(Ctx, "load_crawl_df", return_value=pd.DataFrame([{"url": "https://ex.com", "status": "200", "noindex": "true"}])):
assert onpage_mod.list_pages_noindex(conn, ctx, {})["total"] == 1
- with patch("website_profiling.tools.audit_tools.llm_tools.run_page_coach", return_value={"coach": "ok"}):
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools.run_page_coach", return_value={"coach": "ok"}):
assert llm_mod.get_page_coach(conn, ctx, {"url": "https://ex.com"})["coach"] == "ok"
assert wf_mod.list_issue_workflow(conn, Ctx(property_id=None), {})["error"]
@@ -402,25 +402,25 @@ def test_export_artifacts_workbook_and_custom(tmp_path, monkeypatch, conn: Magic
def test_tools_remaining_branch_coverage(conn: MagicMock, ctx: Ctx, tmp_path, monkeypatch) -> None:
- from website_profiling.tools.audit_tools import backlinks as bl_mod
- from website_profiling.tools.audit_tools import charts as charts_mod
- from website_profiling.tools.audit_tools import content as content_mod
- from website_profiling.tools.audit_tools import crawl as crawl_mod
- from website_profiling.tools.audit_tools import crawl_lists as cl_mod
- from website_profiling.tools.audit_tools import export_tools as et_mod
- from website_profiling.tools.audit_tools import google as google_mod
- from website_profiling.tools.audit_tools import health as health_mod
- from website_profiling.tools.audit_tools import image_tools as img_mod
- from website_profiling.tools.audit_tools import issues as issues_mod
- from website_profiling.tools.audit_tools import keywords as kw_mod
- from website_profiling.tools.audit_tools import lighthouse as lh_mod
- from website_profiling.tools.audit_tools import links as links_mod
- from website_profiling.tools.audit_tools import llm_tools as llm_mod
- from website_profiling.tools.audit_tools import onpage as onpage_mod
- from website_profiling.tools.audit_tools import ops as ops_mod
- from website_profiling.tools.audit_tools import report as report_mod
- from website_profiling.tools.audit_tools import report_extras as rex_mod
- from website_profiling.tools.audit_tools import security as sec_mod
+ from website_profiling.tools.audit_tools.backlinks import backlinks as bl_mod
+ from website_profiling.tools.audit_tools.portfolio import charts as charts_mod
+ from website_profiling.tools.audit_tools.content import content as content_mod
+ from website_profiling.tools.audit_tools.crawl import crawl as crawl_mod
+ from website_profiling.tools.audit_tools.crawl import crawl_lists as cl_mod
+ from website_profiling.tools.audit_tools.export import export_tools as et_mod
+ from website_profiling.tools.audit_tools.google import google as google_mod
+ from website_profiling.tools.audit_tools.portfolio import health as health_mod
+ from website_profiling.tools.audit_tools.images import image_tools as img_mod
+ from website_profiling.tools.audit_tools.issues import issues as issues_mod
+ from website_profiling.tools.audit_tools.keywords import keywords as kw_mod
+ from website_profiling.tools.audit_tools.performance import lighthouse as lh_mod
+ from website_profiling.tools.audit_tools.links import links as links_mod
+ from website_profiling.tools.audit_tools.integrations import llm_tools as llm_mod
+ from website_profiling.tools.audit_tools.onpage import onpage as onpage_mod
+ from website_profiling.tools.audit_tools.ops import ops as ops_mod
+ from website_profiling.tools.audit_tools.report import report as report_mod
+ from website_profiling.tools.audit_tools.report import report_extras as rex_mod
+ from website_profiling.tools.audit_tools.security import security as sec_mod
from website_profiling.tools import export_crawl_workbook as wb_mod
monkeypatch.setenv("DATA_DIR", str(tmp_path))
@@ -598,7 +598,7 @@ def test_tools_remaining_branch_coverage(conn: MagicMock, ctx: Ctx, tmp_path, mo
"analysis": {"top_paths": "bad", "parsed_lines": 10, "googlebot_hits": 2, "crawl_compare": {"log_only_paths": ["/a"], "crawl_only_paths": ["/b"]}},
"line_count": 1,
}
- with patch("website_profiling.tools.audit_tools.ops._load_log_analysis", return_value=log_row):
+ with patch("website_profiling.tools.audit_tools.ops.ops._load_log_analysis", return_value=log_row):
assert ops_mod.get_log_top_paths(conn, ctx, {})["total"] == 0
assert ops_mod.list_log_only_paths(conn, ctx, {})["total"] == 1
assert ops_mod.list_crawl_only_paths(conn, ctx, {})["total"] == 1
@@ -608,7 +608,7 @@ def test_tools_remaining_branch_coverage(conn: MagicMock, ctx: Ctx, tmp_path, mo
snap = MagicMock()
snap.__getitem__ = lambda self, i: [80, datetime.now(timezone.utc), 1, "not-json"][i]
conn.execute = MagicMock(return_value=MagicMock(fetchone=MagicMock(return_value=snap)))
- with patch("website_profiling.tools.audit_tools.llm_tools.list_properties_public", return_value=props):
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools.list_properties_public", return_value=props):
portfolio = llm_mod.get_portfolio_summary(conn, ctx, {})
assert portfolio["properties"][0]["issue_counts"] == {}
@@ -622,12 +622,12 @@ def test_tools_remaining_branch_coverage(conn: MagicMock, ctx: Ctx, tmp_path, mo
export_artifacts.delete_artifact(aid)
with patch.object(Ctx, "load_payload", return_value={"site_name": "Ex"}), patch(
- "website_profiling.tools.audit_tools.export_tools._dispatch",
+ "website_profiling.tools.audit_tools.export.export_tools._dispatch",
return_value={"error": "tool failed"},
):
assert et_mod.export_list_as_csv(conn, ctx, {"tool_name": "list_broken_links"})["error"] == "tool failed"
with patch.object(Ctx, "load_payload", return_value={"site_name": "Ex"}), patch(
- "website_profiling.tools.audit_tools.export_tools._dispatch",
+ "website_profiling.tools.audit_tools.export.export_tools._dispatch",
return_value={"pages": [{"url": "https://ex.com"}]},
):
out = et_mod.export_list_as_csv(conn, ctx, {"tool_name": "list_broken_links"})
@@ -673,7 +673,7 @@ def test_tools_remaining_branch_coverage(conn: MagicMock, ctx: Ctx, tmp_path, mo
snap_dict = MagicMock()
snap_dict.__getitem__ = lambda self, i: [80, datetime.now(timezone.utc), 1, {"High": 1}][i]
conn.execute = MagicMock(return_value=MagicMock(fetchone=MagicMock(return_value=snap_dict)))
- with patch("website_profiling.tools.audit_tools.llm_tools.list_properties_public", return_value=[{"id": 1}]):
+ with patch("website_profiling.tools.audit_tools.integrations.llm_tools.list_properties_public", return_value=[{"id": 1}]):
assert llm_mod.get_portfolio_summary(conn, ctx, {})["properties"][0]["issue_counts"] == {"High": 1}
with patch.object(Ctx, "load_payload", return_value={"categories": ["bad", {"id": "c", "issues": ["bad", {"message": "m", "llm_recommendation": "fix"}]}]}):
@@ -696,7 +696,7 @@ def test_tools_remaining_branch_coverage(conn: MagicMock, ctx: Ctx, tmp_path, mo
assert ops_mod.get_log_googlebot_stats(conn, Ctx(property_id=None), {})["error"]
assert ops_mod._parse_analysis_field({"k": 1}) == {"k": 1}
assert ops_mod._parse_analysis_field([1, 2]) == {}
- with patch("website_profiling.tools.audit_tools.ops._load_log_analysis", return_value=None):
+ with patch("website_profiling.tools.audit_tools.ops.ops._load_log_analysis", return_value=None):
assert ops_mod.list_log_only_paths(conn, ctx, {})["missing"]
assert ops_mod.list_crawl_only_paths(conn, ctx, {})["missing"]
assert ops_mod.get_log_googlebot_stats(conn, ctx, {})["missing"]
diff --git a/tests/tools/test_tools_gate100_coverage.py b/tests/tools/test_tools_gate100_coverage.py
index 711ff4f3..43bd0afe 100644
--- a/tests/tools/test_tools_gate100_coverage.py
+++ b/tests/tools/test_tools_gate100_coverage.py
@@ -7,17 +7,15 @@
import pandas as pd
import pytest
-from website_profiling.tools.audit_tools import insight_helpers as ih
+from website_profiling.tools.audit_tools.insight import insight_helpers as ih
from website_profiling.tools.audit_tools.context import AuditToolContext as Ctx
-from website_profiling.tools.audit_tools import (
- crawl as crawl_mod,
- data_coverage as dc_mod,
- google as google_mod,
- insight_tools as insight_mod,
- keywords as kw_mod,
- registry,
- router_tools as router_mod,
-)
+from website_profiling.tools.audit_tools import (registry)
+from website_profiling.tools.audit_tools.crawl import crawl as crawl_mod
+from website_profiling.tools.audit_tools.core import data_coverage as dc_mod
+from website_profiling.tools.audit_tools.google import google as google_mod
+from website_profiling.tools.audit_tools.insight import insight_tools as insight_mod
+from website_profiling.tools.audit_tools.keywords import keywords as kw_mod
+from website_profiling.tools.audit_tools.core import router_tools as router_mod
from website_profiling.tools.audit_tools.tool_domains import (
CANONICAL_DOMAINS,
CHAT_ONLY_TOOLS,
@@ -83,7 +81,7 @@ def test_context_load_google_full_and_pair_fallbacks(conn: MagicMock, ctx: Ctx)
def test_data_coverage_report_all_branches(conn: MagicMock, ctx: Ctx) -> None:
assert dc_mod.get_data_coverage_report(conn, Ctx(property_id=None), {})["error"]
- with patch("website_profiling.tools.audit_tools.data_coverage.get_property_by_id", return_value=None):
+ with patch("website_profiling.tools.audit_tools.core.data_coverage.get_property_by_id", return_value=None):
assert dc_mod.get_data_coverage_report(conn, ctx, {})["error"] == "property not found"
prop = {"google_refresh_token": "tok"}
@@ -106,7 +104,7 @@ def test_data_coverage_report_all_branches(conn: MagicMock, ctx: Ctx) -> None:
}
google_full = {"gsc_full": {"summary": {}}, "ga4_full": {"summary": {}}}
- with patch("website_profiling.tools.audit_tools.data_coverage.get_property_by_id", return_value=prop), patch.object(
+ with patch("website_profiling.tools.audit_tools.core.data_coverage.get_property_by_id", return_value=prop), patch.object(
Ctx, "load_payload", return_value=payload,
), patch.object(Ctx, "load_google", return_value=google), patch.object(
Ctx, "load_keywords", return_value=keywords,
@@ -121,7 +119,7 @@ def test_data_coverage_report_all_branches(conn: MagicMock, ctx: Ctx) -> None:
assert len(result["checks"]) >= 10
sparse_prop = {"id": 1}
- with patch("website_profiling.tools.audit_tools.data_coverage.get_property_by_id", return_value=sparse_prop), patch.object(
+ with patch("website_profiling.tools.audit_tools.core.data_coverage.get_property_by_id", return_value=sparse_prop), patch.object(
Ctx, "load_payload", return_value={},
), patch.object(Ctx, "load_google", return_value=None), patch.object(
Ctx, "load_keywords", return_value=None,
@@ -325,10 +323,10 @@ def test_insight_tools_dispatch(conn: MagicMock, ctx: Ctx) -> None:
slash_diag = insight_mod.get_landing_page_full_diagnosis(conn, ctx, {"url": "https://ex.com/"})
assert slash_diag["lighthouse"]["performance"] == 70
- with patch("website_profiling.tools.audit_tools.insight_tools.list_issues", return_value={"error": "boom"}):
+ with patch("website_profiling.tools.audit_tools.insight.insight_tools.list_issues", return_value={"error": "boom"}):
assert insight_mod.get_issue_to_traffic_map(conn, ctx, {})["error"] == "boom"
- with patch("website_profiling.tools.audit_tools.insight_tools.list_issues", return_value={
+ with patch("website_profiling.tools.audit_tools.insight.insight_tools.list_issues", return_value={
"issues": ["bad", {
"url": "https://ex.com/x",
"priority": "High",
diff --git a/tests/tools/test_tools_gate_remaining_coverage.py b/tests/tools/test_tools_gate_remaining_coverage.py
index d7769a70..4e8f30fa 100644
--- a/tests/tools/test_tools_gate_remaining_coverage.py
+++ b/tests/tools/test_tools_gate_remaining_coverage.py
@@ -9,17 +9,15 @@
import requests
from website_profiling.tools.audit_tools.context import AuditToolContext as Ctx
-from website_profiling.tools.audit_tools import (
- crawl_actions as ca_mod,
- geo_citability as cit_mod,
- geo_detectors as det_mod,
- geo_list_tools as geo_list_mod,
- geo_tools as geo_mod,
- integration_tools as int_mod,
- llm_tools as llm_mod,
- sql_query as sql_mod,
-)
-from website_profiling.tools.audit_tools.sql_query import ReadOnlyViolation, assert_read_only, get_sql_schema, run_sql_query
+from website_profiling.tools.audit_tools.crawl import crawl_actions as ca_mod
+from website_profiling.tools.audit_tools.geo import geo_citability as cit_mod
+from website_profiling.tools.audit_tools.geo import geo_detectors as det_mod
+from website_profiling.tools.audit_tools.geo import geo_list_tools as geo_list_mod
+from website_profiling.tools.audit_tools.geo import geo_tools as geo_mod
+from website_profiling.tools.audit_tools.integrations import integration_tools as int_mod
+from website_profiling.tools.audit_tools.integrations import llm_tools as llm_mod
+from website_profiling.tools.audit_tools.core import sql_query as sql_mod
+from website_profiling.tools.audit_tools.core.sql_query import ReadOnlyViolation, assert_read_only, get_sql_schema, run_sql_query
@pytest.fixture
@@ -99,32 +97,32 @@ def test_crawl_action_helpers_and_validation_paths(conn: MagicMock) -> None:
assert ca_mod._normalize_url("") == ""
assert ca_mod._normalize_url("example.com/path") == "https://example.com/path"
assert ca_mod._is_valid_url("") is False
- with patch("website_profiling.tools.audit_tools.crawl_actions.urlparse", side_effect=ValueError("bad")):
+ with patch("website_profiling.tools.audit_tools.crawl.crawl_actions.urlparse", side_effect=ValueError("bad")):
assert ca_mod._is_valid_url("https://example.com") is False
broken = MagicMock()
broken.execute.side_effect = RuntimeError("db down")
assert ca_mod._pipeline_job_running(broken) is False
- with patch("website_profiling.tools.audit_tools.crawl_actions._chat_allow_crawl", return_value=True), patch(
- "website_profiling.tools.audit_tools.crawl_actions._pipeline_job_running",
+ with patch("website_profiling.tools.audit_tools.crawl.crawl_actions._chat_allow_crawl", return_value=True), patch(
+ "website_profiling.tools.audit_tools.crawl.crawl_actions._pipeline_job_running",
return_value=False,
), patch(
- "website_profiling.tools.audit_tools.crawl_actions.read_pipeline_config",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions.read_pipeline_config",
return_value=({"crawl_discovery_mode": "list", "crawl_url_list": ""}, []),
):
out = ca_mod.prepare_audit_run(conn, Ctx(property_id=1), {"mode": "default", "start_url": "https://ex.com"})
assert out.get("ready") is False
assert "URL list is required" in out["errors"][0]
- with patch("website_profiling.tools.audit_tools.crawl_actions.load_llm_config_from_db", return_value={"llm_chat_allow_crawl": "true"}):
+ with patch("website_profiling.tools.audit_tools.crawl.crawl_actions.load_llm_config_from_db", return_value={"llm_chat_allow_crawl": "true"}):
assert ca_mod._chat_allow_crawl() is True
- with patch("website_profiling.tools.audit_tools.crawl_actions._chat_allow_crawl", return_value=True), patch(
- "website_profiling.tools.audit_tools.crawl_actions._pipeline_job_running",
+ with patch("website_profiling.tools.audit_tools.crawl.crawl_actions._chat_allow_crawl", return_value=True), patch(
+ "website_profiling.tools.audit_tools.crawl.crawl_actions._pipeline_job_running",
return_value=False,
), patch(
- "website_profiling.tools.audit_tools.crawl_actions.read_pipeline_config",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions.read_pipeline_config",
return_value=({}, []),
):
assert ca_mod.prepare_audit_run(conn, Ctx(), {"mode": "bogus", "start_url": "https://ex.com"})["errors"]
@@ -135,7 +133,7 @@ def test_crawl_action_helpers_and_validation_paths(conn: MagicMock) -> None:
)
assert create_bad["ready"] is False
with patch(
- "website_profiling.tools.audit_tools.crawl_actions.canonical_domain_from_start_url",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions.canonical_domain_from_start_url",
return_value="",
):
no_domain = ca_mod.prepare_audit_run(
@@ -148,25 +146,25 @@ def test_crawl_action_helpers_and_validation_paths(conn: MagicMock) -> None:
assert no_url["ready"] is False
prop = {"id": 9, "site_url": "https://ex.com", "default_crawl_preset": "starter"}
- with patch("website_profiling.tools.audit_tools.crawl_actions._chat_allow_crawl", return_value=True), patch(
- "website_profiling.tools.audit_tools.crawl_actions._pipeline_job_running",
+ with patch("website_profiling.tools.audit_tools.crawl.crawl_actions._chat_allow_crawl", return_value=True), patch(
+ "website_profiling.tools.audit_tools.crawl.crawl_actions._pipeline_job_running",
return_value=False,
), patch(
- "website_profiling.tools.audit_tools.crawl_actions.get_property_by_id",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions.get_property_by_id",
return_value=prop,
), patch(
- "website_profiling.tools.audit_tools.crawl_actions.read_pipeline_config",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions.read_pipeline_config",
return_value=({}, []),
):
out = ca_mod.prepare_audit_run(conn, Ctx(property_id=9), {"mode": "default"})
assert out["ready"] is True
assert out["run_spec"]["state"]["active_property_id"] == "9"
- with patch("website_profiling.tools.audit_tools.crawl_actions._chat_allow_crawl", return_value=True), patch(
- "website_profiling.tools.audit_tools.crawl_actions._pipeline_job_running",
+ with patch("website_profiling.tools.audit_tools.crawl.crawl_actions._chat_allow_crawl", return_value=True), patch(
+ "website_profiling.tools.audit_tools.crawl.crawl_actions._pipeline_job_running",
return_value=False,
), patch(
- "website_profiling.tools.audit_tools.crawl_actions.read_pipeline_config",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions.read_pipeline_config",
return_value=({}, []),
):
custom = ca_mod.prepare_audit_run(
@@ -185,25 +183,25 @@ def test_crawl_action_helpers_and_validation_paths(conn: MagicMock) -> None:
)
assert custom["ready"] is True
assert any("Concurrency" in h for h in custom["summary"]["highlights"])
- with patch("website_profiling.tools.audit_tools.crawl_actions._chat_allow_crawl", return_value=True), patch(
- "website_profiling.tools.audit_tools.crawl_actions._pipeline_job_running",
+ with patch("website_profiling.tools.audit_tools.crawl.crawl_actions._chat_allow_crawl", return_value=True), patch(
+ "website_profiling.tools.audit_tools.crawl.crawl_actions._pipeline_job_running",
return_value=False,
), patch(
- "website_profiling.tools.audit_tools.crawl_actions.read_pipeline_config",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions.read_pipeline_config",
return_value=({}, []),
), patch(
- "website_profiling.tools.audit_tools.crawl_actions.get_property_by_id",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions.get_property_by_id",
return_value={"id": 4, "site_url": "https://prop.example.com"},
):
from_url = ca_mod.prepare_audit_run(conn, Ctx(property_id=4), {"mode": "default"})
assert from_url["ready"] is True
assert from_url["summary"]["start_url"] == "https://prop.example.com"
- with patch("website_profiling.tools.audit_tools.crawl_actions._chat_allow_crawl", return_value=True), patch(
- "website_profiling.tools.audit_tools.crawl_actions._pipeline_job_running",
+ with patch("website_profiling.tools.audit_tools.crawl.crawl_actions._chat_allow_crawl", return_value=True), patch(
+ "website_profiling.tools.audit_tools.crawl.crawl_actions._pipeline_job_running",
return_value=False,
), patch(
- "website_profiling.tools.audit_tools.crawl_actions.read_pipeline_config",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions.read_pipeline_config",
return_value=({}, []),
):
lh = ca_mod.prepare_audit_run(
@@ -217,14 +215,14 @@ def test_crawl_action_helpers_and_validation_paths(conn: MagicMock) -> None:
)
assert lh["ready"] is True
assert any("Lighthouse on pages: no" in h for h in lh["summary"]["highlights"])
- with patch("website_profiling.tools.audit_tools.crawl_actions._chat_allow_crawl", return_value=True), patch(
- "website_profiling.tools.audit_tools.crawl_actions._pipeline_job_running",
+ with patch("website_profiling.tools.audit_tools.crawl.crawl_actions._chat_allow_crawl", return_value=True), patch(
+ "website_profiling.tools.audit_tools.crawl.crawl_actions._pipeline_job_running",
return_value=False,
), patch(
- "website_profiling.tools.audit_tools.crawl_actions.get_property_by_id",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions.get_property_by_id",
return_value={"id": 5, "site_url": ""},
), patch(
- "website_profiling.tools.audit_tools.crawl_actions.read_pipeline_config",
+ "website_profiling.tools.audit_tools.crawl.crawl_actions.read_pipeline_config",
return_value=({}, []),
):
no_site = ca_mod.prepare_audit_run(conn, Ctx(property_id=5), {"mode": "default"})
@@ -297,7 +295,7 @@ def test_citability_tool_handlers(conn: MagicMock, ctx: Ctx) -> None:
"word_count": 50,
"heading_sequence": "",
}
- with patch("website_profiling.tools.audit_tools.geo_citability.flesch_kincaid_grade", return_value=6.5):
+ with patch("website_profiling.tools.audit_tools.geo.geo_citability.flesch_kincaid_grade", return_value=6.5):
assert cit_mod._citability_signals(mid_fluency)["signals"]["fluency"] == 6
@@ -384,7 +382,7 @@ def test_robots_ai_access_score(conn: MagicMock, ctx: Ctx) -> None:
assert geo_list_mod.get_robots_ai_access_score(conn, ctx, {})["error"] == "domain unknown"
with patch.object(Ctx, "resolve_property_domain", return_value="ex.com"), patch(
- "website_profiling.tools.audit_tools.geo_list_tools._parse_robots_txt",
+ "website_profiling.tools.audit_tools.geo.geo_list_tools._parse_robots_txt",
return_value="",
):
missing = geo_list_mod.get_robots_ai_access_score(conn, ctx, {})
@@ -392,7 +390,7 @@ def test_robots_ai_access_score(conn: MagicMock, ctx: Ctx) -> None:
robots = "User-agent: GPTBot\nDisallow: /private/\nUser-agent: *\nAllow: /\n"
with patch.object(Ctx, "resolve_property_domain", return_value="ex.com"), patch(
- "website_profiling.tools.audit_tools.geo_list_tools._parse_robots_txt",
+ "website_profiling.tools.audit_tools.geo.geo_list_tools._parse_robots_txt",
return_value=robots,
):
scored = geo_list_mod.get_robots_ai_access_score(conn, ctx, {})
@@ -411,22 +409,22 @@ def test_geo_tools_depth_and_fetch_helpers() -> None:
assert geo_mod._score_llms_txt_depth(many_links)["depth_score"] >= 10
mock_resp = MagicMock(status_code=200, text="# llms\n")
- with patch("website_profiling.tools.audit_tools.geo_tools.requests.get", return_value=mock_resp):
+ with patch("website_profiling.tools.audit_tools.geo.geo_tools.requests.get", return_value=mock_resp):
assert geo_mod._fetch_llms_full_txt("https://ex.com") is True
- with patch("website_profiling.tools.audit_tools.geo_tools._fetch_llms_txt", return_value={"found": True, "depth": {}}), patch(
- "website_profiling.tools.audit_tools.geo_tools._fetch_llms_full_txt",
+ with patch("website_profiling.tools.audit_tools.geo.geo_tools._fetch_llms_txt", return_value={"found": True, "depth": {}}), patch(
+ "website_profiling.tools.audit_tools.geo.geo_tools._fetch_llms_full_txt",
return_value=True,
), patch.object(Ctx, "resolve_property_domain", return_value="ex.com"):
status = geo_mod.get_llms_txt_status(MagicMock(), Ctx(), {})
assert status["llms_full_txt_found"] is True
miss = MagicMock(status_code=404, text="")
- with patch("website_profiling.tools.audit_tools.geo_tools.requests.get", return_value=miss):
+ with patch("website_profiling.tools.audit_tools.geo.geo_tools.requests.get", return_value=miss):
disc = geo_mod._fetch_ai_discovery("ex.com")
assert disc["found_count"] == 0
- with patch("website_profiling.tools.audit_tools.geo_tools.requests.get", side_effect=requests.RequestException("fail")):
+ with patch("website_profiling.tools.audit_tools.geo.geo_tools.requests.get", side_effect=requests.RequestException("fail")):
disc_err = geo_mod._fetch_ai_discovery("ex.com")
assert disc_err["endpoints"]
@@ -490,7 +488,7 @@ def test_llm_generator_tools(conn: MagicMock, ctx: Ctx) -> None:
payload = {"site_name": "Ex", "categories": []}
with patch.object(Ctx, "load_payload", return_value=payload), patch.object(Ctx, "load_crawl_df", return_value=df), patch(
- "website_profiling.tools.audit_tools.llm_tools._llm_disabled_response",
+ "website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response",
return_value={},
), patch(
"website_profiling.llm.base.get_llm_client",
@@ -514,28 +512,28 @@ def test_llm_generator_tools(conn: MagicMock, ctx: Ctx) -> None:
with patch.object(Ctx, "resolve_property_domain", return_value="ex.com"), patch.object(
Ctx, "load_payload", return_value=payload,
), patch.object(Ctx, "load_crawl_df", return_value=df), patch(
- "website_profiling.tools.audit_tools.llm_tools.draft_llms_txt",
+ "website_profiling.tools.audit_tools.integrations.llm_tools.draft_llms_txt",
return_value={"llms_txt_draft": "# Ex"},
), patch(
- "website_profiling.tools.audit_tools.llm_tools.generate_robots_txt",
+ "website_profiling.tools.audit_tools.integrations.llm_tools.generate_robots_txt",
return_value={"robots_txt": "Allow: /"},
), patch(
- "website_profiling.tools.audit_tools.llm_tools.generate_schema",
+ "website_profiling.tools.audit_tools.integrations.llm_tools.generate_schema",
side_effect=[{"schema_json": {}}, {"schema_json": {}}],
), patch(
- "website_profiling.tools.audit_tools.geo_tools._fetch_llms_txt",
+ "website_profiling.tools.audit_tools.geo.geo_tools._fetch_llms_txt",
return_value={"found": False},
), patch(
- "website_profiling.tools.audit_tools.geo_tools._fetch_ai_discovery",
+ "website_profiling.tools.audit_tools.geo.geo_tools._fetch_ai_discovery",
return_value={"endpoints": {"ai_txt": {"found": False}}},
), patch(
- "website_profiling.tools.audit_tools.geo_tools._score_meta_signals",
+ "website_profiling.tools.audit_tools.geo.geo_tools._score_meta_signals",
return_value={"has_meta_description": False},
), patch(
- "website_profiling.tools.audit_tools.geo_list_tools._parse_robots_txt",
+ "website_profiling.tools.audit_tools.geo.geo_list_tools._parse_robots_txt",
return_value="User-agent: GPTBot\nDisallow: /\n",
), patch(
- "website_profiling.tools.audit_tools.geo_list_tools._parse_robots_access",
+ "website_profiling.tools.audit_tools.geo.geo_list_tools._parse_robots_access",
return_value={"gptbot": "blocked"},
):
bundle = llm_mod.generate_geo_fix_bundle(conn, ctx, {})
@@ -551,11 +549,11 @@ def test_sql_query_remaining_branches() -> None:
assert_read_only("SELECT * FROM")
with pytest.raises(ReadOnlyViolation, match="empty after parsing"):
- with patch("website_profiling.tools.audit_tools.sql_query.sqlglot.parse", return_value=[None]):
+ with patch("website_profiling.tools.audit_tools.core.sql_query.sqlglot.parse", return_value=[None]):
assert_read_only("SELECT 1")
with pytest.raises(ReadOnlyViolation, match="not permitted"):
- with patch("website_profiling.tools.audit_tools.sql_query.assert_read_only_regex"):
+ with patch("website_profiling.tools.audit_tools.core.sql_query.assert_read_only_regex"):
assert_read_only("SELECT pg_sleep(1)")
@@ -602,7 +600,7 @@ def _fake_ro():
_FakeCursor._call_count = 0
yield _FakeConn()
- with patch("website_profiling.tools.audit_tools.sql_query.readonly_session", _fake_ro):
+ with patch("website_profiling.tools.audit_tools.core.sql_query.readonly_session", _fake_ro):
result = get_sql_schema(MagicMock(), Ctx(), {})
tables = {t["table"]: t for t in result["tables"]}
assert "crawl_runs" in tables
@@ -665,18 +663,18 @@ def test_remaining_geo_and_llm_gaps(conn: MagicMock, ctx: Ctx) -> None:
capped = det_mod.get_topic_authority(conn, ctx, {"limit": 5})
assert capped["total_pages"] == 200
- with patch("website_profiling.tools.audit_tools.geo_tools.requests.get", side_effect=requests.RequestException("fail")):
+ with patch("website_profiling.tools.audit_tools.geo.geo_tools.requests.get", side_effect=requests.RequestException("fail")):
assert geo_mod._fetch_llms_full_txt("https://ex.com") is False
with patch.object(Ctx, "resolve_property_domain", return_value="ex.com"), patch(
- "website_profiling.tools.audit_tools.geo_tools._fetch_ai_discovery",
+ "website_profiling.tools.audit_tools.geo.geo_tools._fetch_ai_discovery",
return_value={"found_count": 1, "endpoints": {}, "discovery_score": 2},
):
assert geo_mod.get_ai_discovery_status(conn, ctx, {})["found_count"] == 1
ok = MagicMock(status_code=200, text='https://ex.com2024-01-01')
feed = MagicMock(status_code=200, text='')
- with patch("website_profiling.tools.audit_tools.geo_tools.requests.get", side_effect=[ok, feed, feed, feed]):
+ with patch("website_profiling.tools.audit_tools.geo.geo_tools.requests.get", side_effect=[ok, feed, feed, feed]):
fresh = geo_mod._score_freshness_signals("ex.com")
assert fresh["freshness_score"] > 0
@@ -685,7 +683,7 @@ def test_remaining_geo_and_llm_gaps(conn: MagicMock, ctx: Ctx) -> None:
for i in range(12)
])
with patch.object(Ctx, "load_payload", return_value={"site_name": "Ex"}), patch.object(Ctx, "load_crawl_df", return_value=faq_rows), patch(
- "website_profiling.tools.audit_tools.llm_tools._llm_disabled_response",
+ "website_profiling.tools.audit_tools.integrations.llm_tools._llm_disabled_response",
return_value={},
), patch(
"website_profiling.llm.base.get_llm_client",
@@ -734,7 +732,7 @@ def _fake_ro():
_FakeCursor._call_count = 0
yield _FakeConn()
- with patch("website_profiling.tools.audit_tools.sql_query.readonly_session", _fake_ro):
+ with patch("website_profiling.tools.audit_tools.core.sql_query.readonly_session", _fake_ro):
schema = get_sql_schema(MagicMock(), Ctx(), {})
assert schema["tables"][0]["table"] == "crawl_runs"
assert schema["tables"][0]["foreign_keys"] == []
@@ -791,22 +789,22 @@ def _fake_ro():
with patch.object(Ctx, "load_payload", return_value={"ner_site_summary": {"entities": ["Ex"]}}), patch.object(
Ctx, "load_crawl_df", return_value=readiness_df,
), patch.object(Ctx, "resolve_property_domain", return_value="ex.com"), patch(
- "website_profiling.tools.audit_tools.geo_tools._fetch_llms_txt",
+ "website_profiling.tools.audit_tools.geo.geo_tools._fetch_llms_txt",
return_value={"found": False},
), patch(
- "website_profiling.tools.audit_tools.geo_tools._score_robots_ai_access",
+ "website_profiling.tools.audit_tools.geo.geo_tools._score_robots_ai_access",
return_value={"robots_score": 5},
), patch(
- "website_profiling.tools.audit_tools.geo_tools._score_meta_signals",
+ "website_profiling.tools.audit_tools.geo.geo_tools._score_meta_signals",
return_value={"meta_score": 5},
), patch(
- "website_profiling.tools.audit_tools.geo_tools._score_freshness_signals",
+ "website_profiling.tools.audit_tools.geo.geo_tools._score_freshness_signals",
return_value={"freshness_score": 4},
), patch(
- "website_profiling.tools.audit_tools.geo_tools._fetch_ai_discovery",
+ "website_profiling.tools.audit_tools.geo.geo_tools._fetch_ai_discovery",
return_value={"discovery_score": 2},
), patch(
- "website_profiling.tools.audit_tools.geo_tools.get_faq_schema_coverage",
+ "website_profiling.tools.audit_tools.geo.geo_tools.get_faq_schema_coverage",
return_value={"coverage_pct": 50},
):
score = geo_mod.get_geo_readiness_score(conn, ctx, {})
diff --git a/web/app/api/app-settings/route.ts b/web/app/api/app-settings/route.ts
new file mode 100644
index 00000000..1b28bd8d
--- /dev/null
+++ b/web/app/api/app-settings/route.ts
@@ -0,0 +1,61 @@
+import { NextResponse, type NextRequest } from 'next/server';
+import { forbiddenIfNotLocal } from '@/server/localOnly';
+import { loadAppSetting, saveAppSetting } from '@/server/appSettings';
+import type { ApiRouteHandler } from '@/types/api';
+
+export const runtime = 'nodejs';
+
+/** GET /api/app-settings?key= — Returns { key, value } or { key, value: null }. */
+export const GET: ApiRouteHandler = async (request: NextRequest): Promise => {
+ const denied = forbiddenIfNotLocal(request);
+ if (denied) return denied;
+
+ const key = request.nextUrl.searchParams.get('key');
+ if (!key || typeof key !== 'string' || key.trim() === '') {
+ return NextResponse.json({ error: 'Missing key query parameter' }, { status: 400 });
+ }
+
+ try {
+ const value = await loadAppSetting(key.trim());
+ return NextResponse.json({ key: key.trim(), value });
+ } catch (e) {
+ const msg = e instanceof Error ? e.message : String(e);
+ return NextResponse.json({ error: msg }, { status: 500 });
+ }
+};
+
+/** PUT /api/app-settings — Body: { key: string; value: string } */
+export const PUT: ApiRouteHandler = async (request: NextRequest): Promise => {
+ const denied = forbiddenIfNotLocal(request);
+ if (denied) return denied;
+
+ let body: unknown;
+ try {
+ body = await request.json();
+ } catch {
+ return NextResponse.json({ error: 'Invalid JSON body' }, { status: 400 });
+ }
+
+ if (
+ typeof body !== 'object' ||
+ body === null ||
+ typeof (body as Record).key !== 'string' ||
+ typeof (body as Record).value !== 'string'
+ ) {
+ return NextResponse.json({ error: 'Body must be { key: string; value: string }' }, { status: 400 });
+ }
+
+ const { key, value } = body as { key: string; value: string };
+
+ if (key.trim() === '') {
+ return NextResponse.json({ error: 'key must not be empty' }, { status: 400 });
+ }
+
+ try {
+ await saveAppSetting(key.trim(), value);
+ return NextResponse.json({ ok: true });
+ } catch (e) {
+ const msg = e instanceof Error ? e.message : String(e);
+ return NextResponse.json({ error: msg }, { status: 500 });
+ }
+};
diff --git a/web/app/api/mcp-tools/route.ts b/web/app/api/mcp-tools/route.ts
new file mode 100644
index 00000000..4b800122
--- /dev/null
+++ b/web/app/api/mcp-tools/route.ts
@@ -0,0 +1,75 @@
+import { type NextRequest, NextResponse } from 'next/server';
+import { execFile } from 'child_process';
+import { promisify } from 'util';
+import { forbiddenIfNotLocal } from '@/server/localOnly';
+
+const execFileAsync = promisify(execFile);
+
+const PYTHON_SCRIPT = `
+import json, sys
+try:
+ from website_profiling.tools.audit_tools.registry import (
+ TOOL_DEFINITIONS, get_tool_meta, mcp_tool_names
+ )
+ from website_profiling.tools.audit_tools.tool_domains import (
+ MCP_DOMAIN_BUNDLES, CANONICAL_DOMAINS, classify_tool_domain
+ )
+ bundle_sets = {b: mcp_tool_names(b) for b in MCP_DOMAIN_BUNDLES.keys()}
+ tools = []
+ for spec in TOOL_DEFINITIONS:
+ name = spec.get("name", "")
+ if not name:
+ continue
+ meta = (get_tool_meta(name) or {})
+ domain = meta.get("domain") or classify_tool_domain(name)
+ in_bundles = [b for b, names in bundle_sets.items() if name in names]
+ tools.append({
+ "name": name,
+ "description": spec.get("description", ""),
+ "domain": domain,
+ "bundles": in_bundles,
+ })
+ print(json.dumps({
+ "tools": tools,
+ "bundles": {k: sorted(v) for k, v in bundle_sets.items()},
+ "domains": list(CANONICAL_DOMAINS),
+ }))
+except Exception as e:
+ print(json.dumps({"error": str(e), "tools": [], "bundles": {}, "domains": []}))
+`;
+
+export async function GET(request: NextRequest): Promise {
+ const guard = forbiddenIfNotLocal(request);
+ if (guard) return guard;
+
+ try {
+ const pythonBin = process.env.PYTHON_BIN || 'python3';
+ const { stdout } = await execFileAsync(
+ pythonBin,
+ ['-c', PYTHON_SCRIPT],
+ {
+ timeout: 15_000,
+ env: {
+ ...process.env,
+ PYTHONPATH: process.env.PYTHONPATH || 'src',
+ },
+ },
+ );
+ const data = JSON.parse(stdout.trim()) as {
+ tools: { name: string; description: string; domain: string; bundles: string[] }[];
+ bundles: Record;
+ domains: string[];
+ error?: string;
+ };
+ if (data.error) {
+ return NextResponse.json({ error: data.error, tools: [], bundles: {}, domains: [] }, { status: 500 });
+ }
+ return NextResponse.json(data);
+ } catch (err) {
+ const message = err instanceof Error ? err.message : String(err);
+ return NextResponse.json(
+ { error: `Failed to load tool catalog: ${message}`, tools: [], bundles: {}, domains: [] },
+ { status: 500 },
+ );
+ }
+}
diff --git a/web/app/client-providers.tsx b/web/app/client-providers.tsx
index 46d3ec0a..3586437d 100644
--- a/web/app/client-providers.tsx
+++ b/web/app/client-providers.tsx
@@ -5,6 +5,8 @@ import '@/patchConsole';
import { ThemeProvider } from '@/context/ThemeProvider';
import { PipelineProvider } from '@/context/PipelineContext';
import { SessionProvider } from '@/context/SessionContext';
+import BrandingProvider from '@/context/BrandingProvider';
+import { RiskFeaturesProvider } from '@/context/RiskFeaturesContext';
import ChatFab from '@/components/chat/ChatFab';
import PipelineRunnerFab from '@/components/pipeline/PipelineRunnerFab';
import AppLoadingScreen from '@/components/AppLoadingScreen';
@@ -16,15 +18,19 @@ function LoadingFallback() {
export default function ClientProviders({ children }: { children: ReactNode }): ReactNode {
return (
+
- }>
-
- {children}
-
-
-
-
+
+ }>
+
+ {children}
+
+
+
+
+
+
);
}
diff --git a/web/app/docs/integrations/[slug]/page.tsx b/web/app/docs/integrations/[slug]/page.tsx
new file mode 100644
index 00000000..10efaa77
--- /dev/null
+++ b/web/app/docs/integrations/[slug]/page.tsx
@@ -0,0 +1,17 @@
+import { notFound } from 'next/navigation';
+import { isIntegrationGuideSlug } from '@/lib/docs/integrationGuides';
+import DocsIntegrationGuide from '@/views/DocsIntegrationGuide';
+
+export const dynamic = 'force-dynamic';
+
+export default async function DocsIntegrationRoutePage({
+ params,
+}: {
+ params: Promise<{ slug: string }>;
+}) {
+ const { slug } = await params;
+ if (!isIntegrationGuideSlug(slug)) {
+ notFound();
+ }
+ return ;
+}
diff --git a/web/app/docs/page.tsx b/web/app/docs/page.tsx
new file mode 100644
index 00000000..1cebb50a
--- /dev/null
+++ b/web/app/docs/page.tsx
@@ -0,0 +1,7 @@
+import DocsHome from '@/views/DocsHome';
+
+export const dynamic = 'force-dynamic';
+
+export default function DocsRoutePage() {
+ return ;
+}
diff --git a/web/app/globals.css b/web/app/globals.css
index d3dd3c26..e5840bbd 100644
--- a/web/app/globals.css
+++ b/web/app/globals.css
@@ -14,20 +14,21 @@
--app-bg-muted: #f1f5f9;
/* Image placeholders / sunken panels (maps to Tailwind brand-950) */
--app-bg-sunken: #e2e8f0;
- --app-border: rgba(15, 23, 42, 0.14);
- --app-border-muted: rgba(15, 23, 42, 0.09);
+ /* Derived border/track/overlay from editable heading token so custom palette auto-follows */
+ --app-border: color-mix(in srgb, var(--app-text-heading) 14%, transparent);
+ --app-border-muted: color-mix(in srgb, var(--app-text-heading) 9%, transparent);
--app-text: #334155;
--app-text-heading: #0f172a;
--app-text-subtle: #526077;
- --app-input: #ffffff;
- --app-track: rgba(15, 23, 42, 0.08);
- --app-overlay: rgba(15, 23, 42, 0.45);
- --scrollbar-track: #f1f5f9;
+ --app-input: var(--app-bg-elevated);
+ --app-track: color-mix(in srgb, var(--app-text-heading) 8%, transparent);
+ --app-overlay: color-mix(in srgb, var(--app-text-heading) 45%, transparent);
+ --scrollbar-track: var(--app-bg-muted);
--scrollbar-thumb: #cbd5e1;
--chart-grid: rgba(148, 163, 184, 0.4);
--chart-legend: #64748b;
--chart-title: #64748b;
- --code-bg: #f1f5f9;
+ --code-bg: var(--app-bg-muted);
--shadow-elevated:
rgba(15, 23, 42, 0.06) 0 10px 15px -3px, rgba(15, 23, 42, 0.04) 0 4px 6px -2px;
@@ -36,8 +37,9 @@
--bg: var(--app-bg);
--border: var(--app-border);
--accent: #2563eb;
- --accent-bg: rgba(37, 99, 235, 0.1);
- --accent-border: rgba(37, 99, 235, 0.45);
+ /* Derived accent alpha tokens — auto-follow when --accent is customized */
+ --accent-bg: color-mix(in srgb, var(--accent) 10%, transparent);
+ --accent-border: color-mix(in srgb, var(--accent) 45%, transparent);
--social-bg: rgba(241, 245, 249, 0.8);
--shadow: var(--shadow-elevated);
/* Links / interactive blue (readable on light surfaces) */
@@ -54,13 +56,37 @@
--accent-warm: #f97316;
--accent-warm-soft: #fb923c;
--accent-2: #8b5cf6;
- --surface-warm: rgba(249, 115, 22, 0.06);
+ --surface-warm: color-mix(in srgb, var(--accent-warm) 6%, transparent);
/* Elevation scale (depth) */
--elevation-1: 0 1px 2px rgba(15, 23, 42, 0.06);
--elevation-2: 0 6px 16px -4px rgba(15, 23, 42, 0.1);
--elevation-3: 0 16px 36px -8px rgba(15, 23, 42, 0.18);
+ /* Semantic status / severity colors */
+ --color-danger: #ef4444;
+ --color-danger-bg: color-mix(in srgb, #ef4444 10%, transparent);
+ --color-danger-border: color-mix(in srgb, #ef4444 35%, transparent);
+ --color-warning: #f59e0b;
+ --color-warning-bg: color-mix(in srgb, #f59e0b 10%, transparent);
+ --color-warning-border: color-mix(in srgb, #f59e0b 35%, transparent);
+ --color-success: #22c55e;
+ --color-success-bg: color-mix(in srgb, #22c55e 10%, transparent);
+ --color-success-border: color-mix(in srgb, #22c55e 35%, transparent);
+ --color-info: var(--accent);
+ --color-info-bg: var(--accent-bg);
+ --color-info-border: var(--accent-border);
+
+ /* Chat surface tokens (referenced by components, overridable by user) */
+ --chat-header-bg: #030712;
+ --chat-header-fg: rgba(255, 255, 255, 0.9);
+ --chat-header-fg-muted: rgba(255, 255, 255, 0.5);
+ --chat-user-bubble: var(--accent);
+ --chat-user-bubble-fg: #ffffff;
+ --chat-assistant-bubble: var(--app-bg-sunken);
+ --status-online: #22c55e;
+ --status-online-ping: rgba(34, 197, 94, 0.6);
+
/* Motion tokens (theme-independent) */
--ease-out: cubic-bezier(0.16, 1, 0.3, 1);
--ease-spring: cubic-bezier(0.34, 1.56, 0.64, 1);
@@ -75,15 +101,16 @@ html.dark {
--app-bg-elevated: #111827;
--app-bg-muted: #1f2937;
--app-bg-sunken: #06080f;
- --app-border: rgba(255, 255, 255, 0.07);
- --app-border-muted: rgba(255, 255, 255, 0.04);
+ /* Derived border/track/overlay — auto-follow custom --app-text-heading on dark */
+ --app-border: color-mix(in srgb, var(--app-text-heading) 7%, transparent);
+ --app-border-muted: color-mix(in srgb, var(--app-text-heading) 4%, transparent);
--app-text: #cbd5e1;
--app-text-heading: #e2e8f0;
--app-text-subtle: #64748b;
- --app-input: #0b0f19;
- --app-track: rgba(255, 255, 255, 0.06);
- --app-overlay: rgba(0, 0, 0, 0.55);
- --scrollbar-track: #0b0f19;
+ --app-input: var(--app-bg);
+ --app-track: color-mix(in srgb, var(--app-text-heading) 6%, transparent);
+ --app-overlay: color-mix(in srgb, black 55%, transparent);
+ --scrollbar-track: var(--app-bg);
--scrollbar-thumb: #374151;
--chart-grid: rgba(100, 116, 139, 0.45);
--chart-legend: #94a3b8;
@@ -97,8 +124,9 @@ html.dark {
--bg: var(--app-bg);
--border: var(--app-border);
--accent: #60a5fa;
- --accent-bg: rgba(96, 165, 250, 0.12);
- --accent-border: rgba(96, 165, 250, 0.45);
+ /* Derived accent alpha tokens — auto-follow when --accent is customized */
+ --accent-bg: color-mix(in srgb, var(--accent) 12%, transparent);
+ --accent-border: color-mix(in srgb, var(--accent) 45%, transparent);
--social-bg: rgba(47, 48, 58, 0.5);
--shadow: var(--shadow-elevated);
--app-link: #60a5fa;
@@ -114,12 +142,33 @@ html.dark {
--accent-warm: #fb923c;
--accent-warm-soft: #fdba74;
--accent-2: #a78bfa;
- --surface-warm: rgba(251, 146, 60, 0.08);
+ --surface-warm: color-mix(in srgb, var(--accent-warm) 8%, transparent);
/* Elevation scale — deeper shadows on dark */
--elevation-1: 0 1px 2px rgba(0, 0, 0, 0.4);
--elevation-2: 0 6px 16px -4px rgba(0, 0, 0, 0.45);
--elevation-3: 0 16px 36px -8px rgba(0, 0, 0, 0.55);
+
+ /* Semantic status / severity colors (dark) */
+ --color-danger: #f87171;
+ --color-danger-bg: color-mix(in srgb, #f87171 12%, transparent);
+ --color-danger-border: color-mix(in srgb, #f87171 35%, transparent);
+ --color-warning: #fbbf24;
+ --color-warning-bg: color-mix(in srgb, #fbbf24 12%, transparent);
+ --color-warning-border: color-mix(in srgb, #fbbf24 35%, transparent);
+ --color-success: #4ade80;
+ --color-success-bg: color-mix(in srgb, #4ade80 12%, transparent);
+ --color-success-border: color-mix(in srgb, #4ade80 35%, transparent);
+
+ /* Chat surface tokens (dark) */
+ --chat-header-bg: #030712;
+ --chat-header-fg: rgba(255, 255, 255, 0.9);
+ --chat-header-fg-muted: rgba(255, 255, 255, 0.5);
+ --chat-user-bubble: var(--accent);
+ --chat-user-bubble-fg: #ffffff;
+ --chat-assistant-bubble: var(--app-bg-sunken);
+ --status-online: #4ade80;
+ --status-online-ping: rgba(74, 222, 128, 0.6);
}
@theme {
@@ -146,6 +195,15 @@ html.dark {
--color-accent-warm-soft: var(--accent-warm-soft);
--color-accent-2: var(--accent-2);
+ --color-danger: var(--color-danger);
+ --color-warning: var(--color-warning);
+ --color-success: var(--color-success);
+ --color-status-online: var(--status-online);
+
+ --shadow-elevation-1: var(--elevation-1);
+ --shadow-elevation-2: var(--elevation-2);
+ --shadow-elevation-3: var(--elevation-3);
+
--radius-sm: 0.5rem;
--radius-card: 1rem;
--radius-lg: 1.25rem;
@@ -157,7 +215,8 @@ html.dark {
}
:root {
- font: 18px/145% var(--font-dm-sans), var(--sans, system-ui, "Segoe UI", Roboto, sans-serif);
+ --font-size-base: 18px;
+ font: var(--font-size-base)/145% var(--font-dm-sans), var(--sans, system-ui, "Segoe UI", Roboto, sans-serif);
letter-spacing: 0.18px;
color: var(--text);
background: var(--bg);
@@ -241,15 +300,9 @@ html:has(.landing-grid-bg) {
}
.tab-active {
- background-color: rgba(37, 99, 235, 0.12);
- border-color: rgba(37, 99, 235, 0.35);
- color: #2563eb;
-}
-
-html.dark .tab-active {
- background-color: rgba(59, 130, 246, 0.1);
- border-color: rgba(59, 130, 246, 0.25);
- color: #60a5fa;
+ background-color: var(--accent-bg);
+ border-color: var(--accent-border);
+ color: var(--accent);
}
h1,
@@ -784,6 +837,222 @@ select:focus-visible {
min-height: 0;
}
+/* Hero lifecycle loop — pentagon diagram with animated flow arrows */
+.landing-lifecycle {
+ width: min(100%, 560px);
+ max-height: 100%;
+ margin: 0 auto;
+ aspect-ratio: 560 / 448;
+}
+
+.landing-lifecycle-backdrop {
+ background:
+ radial-gradient(ellipse 72% 68% at 50% 44%, rgba(37, 99, 235, 0.22), transparent 68%),
+ radial-gradient(ellipse 48% 42% at 18% 78%, rgba(139, 92, 246, 0.14), transparent 70%),
+ radial-gradient(ellipse 42% 38% at 88% 22%, rgba(34, 197, 94, 0.12), transparent 72%);
+}
+
+.landing-lifecycle-orbit {
+ stroke: rgba(96, 165, 250, 0.14);
+ stroke-width: 1.5;
+ stroke-dasharray: 4 8;
+}
+
+.landing-lifecycle-hub-ring {
+ background: linear-gradient(135deg, rgba(59, 130, 246, 0.55), rgba(139, 92, 246, 0.45), rgba(34, 197, 94, 0.4));
+ box-shadow:
+ 0 0 28px rgba(59, 130, 246, 0.35),
+ 0 0 48px rgba(139, 92, 246, 0.15);
+}
+
+.landing-lifecycle-hub-label {
+ background: linear-gradient(90deg, #93c5fd, #c4b5fd, #86efac);
+ -webkit-background-clip: text;
+ background-clip: text;
+ color: transparent;
+}
+
+.landing-lifecycle-node {
+ border: 1px solid transparent;
+ background-clip: padding-box;
+ box-shadow: 0 8px 24px rgba(0, 0, 0, 0.28);
+ transition:
+ transform 0.2s var(--ease-out),
+ box-shadow 0.2s var(--ease-out);
+}
+
+.landing-lifecycle-node:hover {
+ transform: translateY(-2px);
+}
+
+.landing-lifecycle-node[data-accent='audit'] {
+ background: linear-gradient(160deg, rgba(37, 99, 235, 0.28), rgba(15, 23, 42, 0.92));
+ border-color: rgba(96, 165, 250, 0.55);
+ box-shadow:
+ 0 8px 28px rgba(37, 99, 235, 0.22),
+ inset 0 1px 0 rgba(147, 197, 253, 0.12);
+}
+
+.landing-lifecycle-node[data-accent='report'] {
+ background: linear-gradient(160deg, rgba(124, 58, 237, 0.26), rgba(15, 23, 42, 0.92));
+ border-color: rgba(167, 139, 250, 0.55);
+ box-shadow:
+ 0 8px 28px rgba(124, 58, 237, 0.2),
+ inset 0 1px 0 rgba(196, 181, 253, 0.12);
+}
+
+.landing-lifecycle-node[data-accent='mcp'] {
+ background: linear-gradient(160deg, rgba(6, 182, 212, 0.26), rgba(15, 23, 42, 0.92));
+ border-color: rgba(34, 211, 238, 0.55);
+ box-shadow:
+ 0 8px 28px rgba(6, 182, 212, 0.2),
+ inset 0 1px 0 rgba(165, 243, 252, 0.12);
+}
+
+.landing-lifecycle-node[data-accent='fix'] {
+ background: linear-gradient(160deg, rgba(249, 115, 22, 0.24), rgba(15, 23, 42, 0.92));
+ border-color: rgba(251, 146, 60, 0.55);
+ box-shadow:
+ 0 8px 28px rgba(249, 115, 22, 0.18),
+ inset 0 1px 0 rgba(253, 186, 116, 0.12);
+}
+
+.landing-lifecycle-node[data-accent='review'] {
+ background: linear-gradient(160deg, rgba(34, 197, 94, 0.26), rgba(15, 23, 42, 0.92));
+ border-color: rgba(74, 222, 128, 0.55);
+ box-shadow:
+ 0 8px 28px rgba(34, 197, 94, 0.2),
+ inset 0 1px 0 rgba(134, 239, 172, 0.12);
+}
+
+.landing-lifecycle-node[data-accent='audit'] .landing-lifecycle-node-icon {
+ color: #93c5fd;
+ background: rgba(59, 130, 246, 0.25);
+ border: 1px solid rgba(96, 165, 250, 0.45);
+}
+
+.landing-lifecycle-node[data-accent='report'] .landing-lifecycle-node-icon {
+ color: #c4b5fd;
+ background: rgba(124, 58, 237, 0.25);
+ border: 1px solid rgba(167, 139, 250, 0.45);
+}
+
+.landing-lifecycle-node[data-accent='mcp'] .landing-lifecycle-node-icon {
+ color: #67e8f9;
+ background: rgba(6, 182, 212, 0.25);
+ border: 1px solid rgba(34, 211, 238, 0.45);
+}
+
+.landing-lifecycle-node[data-accent='fix'] .landing-lifecycle-node-icon {
+ color: #fdba74;
+ background: rgba(249, 115, 22, 0.25);
+ border: 1px solid rgba(251, 146, 60, 0.45);
+}
+
+.landing-lifecycle-node[data-accent='review'] .landing-lifecycle-node-icon {
+ color: #86efac;
+ background: rgba(34, 197, 94, 0.25);
+ border: 1px solid rgba(74, 222, 128, 0.45);
+}
+
+.landing-lifecycle-node[data-accent='audit'] .landing-lifecycle-step {
+ color: #bfdbfe;
+ background: rgba(59, 130, 246, 0.35);
+ border: 1px solid rgba(96, 165, 250, 0.4);
+}
+
+.landing-lifecycle-node[data-accent='report'] .landing-lifecycle-step {
+ color: #ddd6fe;
+ background: rgba(124, 58, 237, 0.35);
+ border: 1px solid rgba(167, 139, 250, 0.4);
+}
+
+.landing-lifecycle-node[data-accent='mcp'] .landing-lifecycle-step {
+ color: #a5f3fc;
+ background: rgba(6, 182, 212, 0.35);
+ border: 1px solid rgba(34, 211, 238, 0.4);
+}
+
+.landing-lifecycle-node[data-accent='fix'] .landing-lifecycle-step {
+ color: #fed7aa;
+ background: rgba(249, 115, 22, 0.35);
+ border: 1px solid rgba(251, 146, 60, 0.4);
+}
+
+.landing-lifecycle-node[data-accent='review'] .landing-lifecycle-step {
+ color: #bbf7d0;
+ background: rgba(34, 197, 94, 0.35);
+ border: 1px solid rgba(74, 222, 128, 0.4);
+}
+
+.landing-lifecycle-arrow {
+ stroke-width: 2.5;
+ stroke-dasharray: 7 6;
+ animation: lifecycleFlow 2s linear infinite;
+}
+
+.landing-lifecycle-arrow--audit {
+ stroke: #60a5fa;
+}
+
+.landing-lifecycle-arrow--report {
+ stroke: #a78bfa;
+}
+
+.landing-lifecycle-arrow--mcp {
+ stroke: #22d3ee;
+}
+
+.landing-lifecycle-arrow--fix {
+ stroke: #fb923c;
+}
+
+.landing-lifecycle-arrow--review {
+ stroke: #4ade80;
+}
+
+.landing-lifecycle-arrow--closing {
+ stroke-width: 3;
+ stroke: #4ade80;
+ filter: url(#lifecycle-glow);
+}
+
+.landing-lifecycle-marker--audit {
+ fill: #60a5fa;
+}
+
+.landing-lifecycle-marker--report {
+ fill: #a78bfa;
+}
+
+.landing-lifecycle-marker--mcp {
+ fill: #22d3ee;
+}
+
+.landing-lifecycle-marker--fix {
+ fill: #fb923c;
+}
+
+.landing-lifecycle-marker--review {
+ fill: #4ade80;
+}
+
+@keyframes lifecycleFlow {
+ from {
+ stroke-dashoffset: 0;
+ }
+ to {
+ stroke-dashoffset: -26;
+ }
+}
+
+@media (prefers-reduced-motion: reduce) {
+ .landing-lifecycle-arrow {
+ animation: none;
+ stroke-dasharray: none;
+ }
+}
+
.landing-footer-snap {
flex: 0 0 100%;
width: 100%;
diff --git a/web/app/layout.tsx b/web/app/layout.tsx
index 9c4b8ad7..e66a9f2e 100644
--- a/web/app/layout.tsx
+++ b/web/app/layout.tsx
@@ -20,7 +20,39 @@ export const metadata = {
},
};
-const themeInit = `(function(){try{var v=localStorage.getItem('wp-theme');var d=window.matchMedia('(prefers-color-scheme: dark)').matches;var dark=v==='dark'?true:v==='light'?false:d;if(dark)document.documentElement.classList.add('dark');else document.documentElement.classList.remove('dark');document.documentElement.style.colorScheme=dark?'dark':'light';}catch(e){}})()`;
+const themeInit = `(function(){try{
+var v=localStorage.getItem('wp-theme');
+var d=window.matchMedia('(prefers-color-scheme: dark)').matches;
+var dark=v==='dark'?true:v==='light'?false:d;
+if(dark)document.documentElement.classList.add('dark');
+else document.documentElement.classList.remove('dark');
+document.documentElement.style.colorScheme=dark?'dark':'light';
+var raw=localStorage.getItem('wp-theme-custom:v1');
+if(raw){
+ var ct=JSON.parse(raw);
+ var map=dark?ct.dark:ct.light;
+ if(map&&typeof map==='object'){
+ var el=document.documentElement;
+ Object.keys(map).forEach(function(k){if(map[k])el.style.setProperty(k,map[k]);});
+ }
+}
+var rp=localStorage.getItem('wp-ui-prefs:v1');
+if(rp){
+ var up=JSON.parse(rp);
+ var rv=up.radius;
+ var dv=up.density;
+ var av=up.animations;
+ var rl=document.documentElement;
+ var RVARS={'sharp':{'--radius-sm':'0.125rem','--radius-card':'0.25rem','--radius-lg':'0.375rem','--radius-xl':'0.5rem'},'rounded':{'--radius-sm':'0.75rem','--radius-card':'1.25rem','--radius-lg':'1.75rem','--radius-xl':'2rem'},'pill':{'--radius-sm':'999px','--radius-card':'1.75rem','--radius-lg':'2.5rem','--radius-xl':'3rem'}};
+ var DVARS={'compact':{'--spacing-page-x':'0.75rem','--spacing-page-y':'0.75rem','--spacing-card':'0.75rem'},'spacious':{'--spacing-page-x':'2.5rem','--spacing-page-y':'2.5rem','--spacing-card':'2rem'}};
+ if(RVARS[rv]){Object.keys(RVARS[rv]).forEach(function(k){rl.style.setProperty(k,RVARS[rv][k]);});}
+ if(DVARS[dv]){Object.keys(DVARS[dv]).forEach(function(k){rl.style.setProperty(k,DVARS[dv][k]);});}
+ if(av===false){rl.style.setProperty('--dur-fast','0ms');rl.style.setProperty('--dur-base','1ms');rl.style.setProperty('--dur-slow','1ms');}
+ var fv=up.fontSize;
+ if(fv==='small')rl.style.setProperty('--font-size-base','15px');
+ else if(fv==='large')rl.style.setProperty('--font-size-base','20px');
+}
+}catch(e){}})()`.replace(/\n/g, '');
export default function RootLayout({ children }: { children: ReactNode }): ReactNode {
return (
@@ -29,7 +61,7 @@ export default function RootLayout({ children }: { children: ReactNode }): React
{children}
diff --git a/web/app/risk-settings/page.tsx b/web/app/risk-settings/page.tsx
new file mode 100644
index 00000000..c35c066f
--- /dev/null
+++ b/web/app/risk-settings/page.tsx
@@ -0,0 +1,9 @@
+import RiskSettingsPage from '@/views/RiskSettings';
+
+export const metadata = {
+ title: 'Risk Settings',
+};
+
+export default function Page() {
+ return ;
+}
diff --git a/web/app/settings/page.tsx b/web/app/settings/page.tsx
new file mode 100644
index 00000000..18c9700a
--- /dev/null
+++ b/web/app/settings/page.tsx
@@ -0,0 +1,7 @@
+import SettingsPage from '@/views/Settings';
+
+export const dynamic = 'force-dynamic';
+
+export default function SettingsRoutePage() {
+ return ;
+}
diff --git a/web/package-lock.json b/web/package-lock.json
index f3f27939..04b7eef9 100644
--- a/web/package-lock.json
+++ b/web/package-lock.json
@@ -26,6 +26,7 @@
"@tiptap/starter-kit": "^3.26.1",
"3d-force-graph": "^1.79.1",
"chart.js": "^4.5.1",
+ "d3": "^7.9.0",
"lucide-react": "^0.577.0",
"next": "15.5.14",
"pg": "^8.21.0",
@@ -41,6 +42,7 @@
"devDependencies": {
"@eslint/eslintrc": "^3",
"@tailwindcss/postcss": "^4",
+ "@types/d3": "^7.4.3",
"@types/node": "^25.9.1",
"@types/pg": "^8.20.0",
"@types/react": "^19.2.16",
@@ -2776,6 +2778,290 @@
"assertion-error": "^2.0.1"
}
},
+ "node_modules/@types/d3": {
+ "version": "7.4.3",
+ "resolved": "https://registry.npmjs.org/@types/d3/-/d3-7.4.3.tgz",
+ "integrity": "sha512-lZXZ9ckh5R8uiFVt8ogUNf+pIrK4EsWrx2Np75WvF/eTpJ0FMHNhjXk8CKEx/+gpHbNQyJWehbFaTvqmHWB3ww==",
+ "dev": true,
+ "license": "MIT",
+ "dependencies": {
+ "@types/d3-array": "*",
+ "@types/d3-axis": "*",
+ "@types/d3-brush": "*",
+ "@types/d3-chord": "*",
+ "@types/d3-color": "*",
+ "@types/d3-contour": "*",
+ "@types/d3-delaunay": "*",
+ "@types/d3-dispatch": "*",
+ "@types/d3-drag": "*",
+ "@types/d3-dsv": "*",
+ "@types/d3-ease": "*",
+ "@types/d3-fetch": "*",
+ "@types/d3-force": "*",
+ "@types/d3-format": "*",
+ "@types/d3-geo": "*",
+ "@types/d3-hierarchy": "*",
+ "@types/d3-interpolate": "*",
+ "@types/d3-path": "*",
+ "@types/d3-polygon": "*",
+ "@types/d3-quadtree": "*",
+ "@types/d3-random": "*",
+ "@types/d3-scale": "*",
+ "@types/d3-scale-chromatic": "*",
+ "@types/d3-selection": "*",
+ "@types/d3-shape": "*",
+ "@types/d3-time": "*",
+ "@types/d3-time-format": "*",
+ "@types/d3-timer": "*",
+ "@types/d3-transition": "*",
+ "@types/d3-zoom": "*"
+ }
+ },
+ "node_modules/@types/d3-array": {
+ "version": "3.2.2",
+ "resolved": "https://registry.npmjs.org/@types/d3-array/-/d3-array-3.2.2.tgz",
+ "integrity": "sha512-hOLWVbm7uRza0BYXpIIW5pxfrKe0W+D5lrFiAEYR+pb6w3N2SwSMaJbXdUfSEv+dT4MfHBLtn5js0LAWaO6otw==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/d3-axis": {
+ "version": "3.0.6",
+ "resolved": "https://registry.npmjs.org/@types/d3-axis/-/d3-axis-3.0.6.tgz",
+ "integrity": "sha512-pYeijfZuBd87T0hGn0FO1vQ/cgLk6E1ALJjfkC0oJ8cbwkZl3TpgS8bVBLZN+2jjGgg38epgxb2zmoGtSfvgMw==",
+ "dev": true,
+ "license": "MIT",
+ "dependencies": {
+ "@types/d3-selection": "*"
+ }
+ },
+ "node_modules/@types/d3-brush": {
+ "version": "3.0.6",
+ "resolved": "https://registry.npmjs.org/@types/d3-brush/-/d3-brush-3.0.6.tgz",
+ "integrity": "sha512-nH60IZNNxEcrh6L1ZSMNA28rj27ut/2ZmI3r96Zd+1jrZD++zD3LsMIjWlvg4AYrHn/Pqz4CF3veCxGjtbqt7A==",
+ "dev": true,
+ "license": "MIT",
+ "dependencies": {
+ "@types/d3-selection": "*"
+ }
+ },
+ "node_modules/@types/d3-chord": {
+ "version": "3.0.6",
+ "resolved": "https://registry.npmjs.org/@types/d3-chord/-/d3-chord-3.0.6.tgz",
+ "integrity": "sha512-LFYWWd8nwfwEmTZG9PfQxd17HbNPksHBiJHaKuY1XeqscXacsS2tyoo6OdRsjf+NQYeB6XrNL3a25E3gH69lcg==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/d3-color": {
+ "version": "3.1.3",
+ "resolved": "https://registry.npmjs.org/@types/d3-color/-/d3-color-3.1.3.tgz",
+ "integrity": "sha512-iO90scth9WAbmgv7ogoq57O9YpKmFBbmoEoCHDB2xMBY0+/KVrqAaCDyCE16dUspeOvIxFFRI+0sEtqDqy2b4A==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/d3-contour": {
+ "version": "3.0.6",
+ "resolved": "https://registry.npmjs.org/@types/d3-contour/-/d3-contour-3.0.6.tgz",
+ "integrity": "sha512-BjzLgXGnCWjUSYGfH1cpdo41/hgdWETu4YxpezoztawmqsvCeep+8QGfiY6YbDvfgHz/DkjeIkkZVJavB4a3rg==",
+ "dev": true,
+ "license": "MIT",
+ "dependencies": {
+ "@types/d3-array": "*",
+ "@types/geojson": "*"
+ }
+ },
+ "node_modules/@types/d3-delaunay": {
+ "version": "6.0.4",
+ "resolved": "https://registry.npmjs.org/@types/d3-delaunay/-/d3-delaunay-6.0.4.tgz",
+ "integrity": "sha512-ZMaSKu4THYCU6sV64Lhg6qjf1orxBthaC161plr5KuPHo3CNm8DTHiLw/5Eq2b6TsNP0W0iJrUOFscY6Q450Hw==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/d3-dispatch": {
+ "version": "3.0.7",
+ "resolved": "https://registry.npmjs.org/@types/d3-dispatch/-/d3-dispatch-3.0.7.tgz",
+ "integrity": "sha512-5o9OIAdKkhN1QItV2oqaE5KMIiXAvDWBDPrD85e58Qlz1c1kI/J0NcqbEG88CoTwJrYe7ntUCVfeUl2UJKbWgA==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/d3-drag": {
+ "version": "3.0.7",
+ "resolved": "https://registry.npmjs.org/@types/d3-drag/-/d3-drag-3.0.7.tgz",
+ "integrity": "sha512-HE3jVKlzU9AaMazNufooRJ5ZpWmLIoc90A37WU2JMmeq28w1FQqCZswHZ3xR+SuxYftzHq6WU6KJHvqxKzTxxQ==",
+ "dev": true,
+ "license": "MIT",
+ "dependencies": {
+ "@types/d3-selection": "*"
+ }
+ },
+ "node_modules/@types/d3-dsv": {
+ "version": "3.0.7",
+ "resolved": "https://registry.npmjs.org/@types/d3-dsv/-/d3-dsv-3.0.7.tgz",
+ "integrity": "sha512-n6QBF9/+XASqcKK6waudgL0pf/S5XHPPI8APyMLLUHd8NqouBGLsU8MgtO7NINGtPBtk9Kko/W4ea0oAspwh9g==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/d3-ease": {
+ "version": "3.0.2",
+ "resolved": "https://registry.npmjs.org/@types/d3-ease/-/d3-ease-3.0.2.tgz",
+ "integrity": "sha512-NcV1JjO5oDzoK26oMzbILE6HW7uVXOHLQvHshBUW4UMdZGfiY6v5BeQwh9a9tCzv+CeefZQHJt5SRgK154RtiA==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/d3-fetch": {
+ "version": "3.0.7",
+ "resolved": "https://registry.npmjs.org/@types/d3-fetch/-/d3-fetch-3.0.7.tgz",
+ "integrity": "sha512-fTAfNmxSb9SOWNB9IoG5c8Hg6R+AzUHDRlsXsDZsNp6sxAEOP0tkP3gKkNSO/qmHPoBFTxNrjDprVHDQDvo5aA==",
+ "dev": true,
+ "license": "MIT",
+ "dependencies": {
+ "@types/d3-dsv": "*"
+ }
+ },
+ "node_modules/@types/d3-force": {
+ "version": "3.0.10",
+ "resolved": "https://registry.npmjs.org/@types/d3-force/-/d3-force-3.0.10.tgz",
+ "integrity": "sha512-ZYeSaCF3p73RdOKcjj+swRlZfnYpK1EbaDiYICEEp5Q6sUiqFaFQ9qgoshp5CzIyyb/yD09kD9o2zEltCexlgw==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/d3-format": {
+ "version": "3.0.4",
+ "resolved": "https://registry.npmjs.org/@types/d3-format/-/d3-format-3.0.4.tgz",
+ "integrity": "sha512-fALi2aI6shfg7vM5KiR1wNJnZ7r6UuggVqtDA+xiEdPZQwy/trcQaHnwShLuLdta2rTymCNpxYTiMZX/e09F4g==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/d3-geo": {
+ "version": "3.1.0",
+ "resolved": "https://registry.npmjs.org/@types/d3-geo/-/d3-geo-3.1.0.tgz",
+ "integrity": "sha512-856sckF0oP/diXtS4jNsiQw/UuK5fQG8l/a9VVLeSouf1/PPbBE1i1W852zVwKwYCBkFJJB7nCFTbk6UMEXBOQ==",
+ "dev": true,
+ "license": "MIT",
+ "dependencies": {
+ "@types/geojson": "*"
+ }
+ },
+ "node_modules/@types/d3-hierarchy": {
+ "version": "3.1.7",
+ "resolved": "https://registry.npmjs.org/@types/d3-hierarchy/-/d3-hierarchy-3.1.7.tgz",
+ "integrity": "sha512-tJFtNoYBtRtkNysX1Xq4sxtjK8YgoWUNpIiUee0/jHGRwqvzYxkq0hGVbbOGSz+JgFxxRu4K8nb3YpG3CMARtg==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/d3-interpolate": {
+ "version": "3.0.4",
+ "resolved": "https://registry.npmjs.org/@types/d3-interpolate/-/d3-interpolate-3.0.4.tgz",
+ "integrity": "sha512-mgLPETlrpVV1YRJIglr4Ez47g7Yxjl1lj7YKsiMCb27VJH9W8NVM6Bb9d8kkpG/uAQS5AmbA48q2IAolKKo1MA==",
+ "dev": true,
+ "license": "MIT",
+ "dependencies": {
+ "@types/d3-color": "*"
+ }
+ },
+ "node_modules/@types/d3-path": {
+ "version": "3.1.1",
+ "resolved": "https://registry.npmjs.org/@types/d3-path/-/d3-path-3.1.1.tgz",
+ "integrity": "sha512-VMZBYyQvbGmWyWVea0EHs/BwLgxc+MKi1zLDCONksozI4YJMcTt8ZEuIR4Sb1MMTE8MMW49v0IwI5+b7RmfWlg==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/d3-polygon": {
+ "version": "3.0.2",
+ "resolved": "https://registry.npmjs.org/@types/d3-polygon/-/d3-polygon-3.0.2.tgz",
+ "integrity": "sha512-ZuWOtMaHCkN9xoeEMr1ubW2nGWsp4nIql+OPQRstu4ypeZ+zk3YKqQT0CXVe/PYqrKpZAi+J9mTs05TKwjXSRA==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/d3-quadtree": {
+ "version": "3.0.6",
+ "resolved": "https://registry.npmjs.org/@types/d3-quadtree/-/d3-quadtree-3.0.6.tgz",
+ "integrity": "sha512-oUzyO1/Zm6rsxKRHA1vH0NEDG58HrT5icx/azi9MF1TWdtttWl0UIUsjEQBBh+SIkrpd21ZjEv7ptxWys1ncsg==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/d3-random": {
+ "version": "3.0.3",
+ "resolved": "https://registry.npmjs.org/@types/d3-random/-/d3-random-3.0.3.tgz",
+ "integrity": "sha512-Imagg1vJ3y76Y2ea0871wpabqp613+8/r0mCLEBfdtqC7xMSfj9idOnmBYyMoULfHePJyxMAw3nWhJxzc+LFwQ==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/d3-scale": {
+ "version": "4.0.9",
+ "resolved": "https://registry.npmjs.org/@types/d3-scale/-/d3-scale-4.0.9.tgz",
+ "integrity": "sha512-dLmtwB8zkAeO/juAMfnV+sItKjlsw2lKdZVVy6LRr0cBmegxSABiLEpGVmSJJ8O08i4+sGR6qQtb6WtuwJdvVw==",
+ "dev": true,
+ "license": "MIT",
+ "dependencies": {
+ "@types/d3-time": "*"
+ }
+ },
+ "node_modules/@types/d3-scale-chromatic": {
+ "version": "3.1.0",
+ "resolved": "https://registry.npmjs.org/@types/d3-scale-chromatic/-/d3-scale-chromatic-3.1.0.tgz",
+ "integrity": "sha512-iWMJgwkK7yTRmWqRB5plb1kadXyQ5Sj8V/zYlFGMUBbIPKQScw+Dku9cAAMgJG+z5GYDoMjWGLVOvjghDEFnKQ==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/d3-selection": {
+ "version": "3.0.11",
+ "resolved": "https://registry.npmjs.org/@types/d3-selection/-/d3-selection-3.0.11.tgz",
+ "integrity": "sha512-bhAXu23DJWsrI45xafYpkQ4NtcKMwWnAC/vKrd2l+nxMFuvOT3XMYTIj2opv8vq8AO5Yh7Qac/nSeP/3zjTK0w==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/d3-shape": {
+ "version": "3.1.8",
+ "resolved": "https://registry.npmjs.org/@types/d3-shape/-/d3-shape-3.1.8.tgz",
+ "integrity": "sha512-lae0iWfcDeR7qt7rA88BNiqdvPS5pFVPpo5OfjElwNaT2yyekbM0C9vK+yqBqEmHr6lDkRnYNoTBYlAgJa7a4w==",
+ "dev": true,
+ "license": "MIT",
+ "dependencies": {
+ "@types/d3-path": "*"
+ }
+ },
+ "node_modules/@types/d3-time": {
+ "version": "3.0.4",
+ "resolved": "https://registry.npmjs.org/@types/d3-time/-/d3-time-3.0.4.tgz",
+ "integrity": "sha512-yuzZug1nkAAaBlBBikKZTgzCeA+k1uy4ZFwWANOfKw5z5LRhV0gNA7gNkKm7HoK+HRN0wX3EkxGk0fpbWhmB7g==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/d3-time-format": {
+ "version": "4.0.3",
+ "resolved": "https://registry.npmjs.org/@types/d3-time-format/-/d3-time-format-4.0.3.tgz",
+ "integrity": "sha512-5xg9rC+wWL8kdDj153qZcsJ0FWiFt0J5RB6LYUNZjwSnesfblqrI/bJ1wBdJ8OQfncgbJG5+2F+qfqnqyzYxyg==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/d3-timer": {
+ "version": "3.0.2",
+ "resolved": "https://registry.npmjs.org/@types/d3-timer/-/d3-timer-3.0.2.tgz",
+ "integrity": "sha512-Ps3T8E8dZDam6fUyNiMkekK3XUsaUEik+idO9/YjPtfj2qruF8tFBXS7XhtE4iIXBLxhmLjP3SXpLhVf21I9Lw==",
+ "dev": true,
+ "license": "MIT"
+ },
+ "node_modules/@types/d3-transition": {
+ "version": "3.0.9",
+ "resolved": "https://registry.npmjs.org/@types/d3-transition/-/d3-transition-3.0.9.tgz",
+ "integrity": "sha512-uZS5shfxzO3rGlu0cC3bjmMFKsXv+SmZZcgp0KD22ts4uGXp5EVYGzu/0YdwZeKmddhcAccYtREJKkPfXkZuCg==",
+ "dev": true,
+ "license": "MIT",
+ "dependencies": {
+ "@types/d3-selection": "*"
+ }
+ },
+ "node_modules/@types/d3-zoom": {
+ "version": "3.0.8",
+ "resolved": "https://registry.npmjs.org/@types/d3-zoom/-/d3-zoom-3.0.8.tgz",
+ "integrity": "sha512-iqMC4/YlFCSlO8+2Ii1GGGliCAY4XdeG748w5vQUbevlbDu0zSjH/+jojorQVBK/se0j6DUFNPBGSqD3YWYnDw==",
+ "dev": true,
+ "license": "MIT",
+ "dependencies": {
+ "@types/d3-interpolate": "*",
+ "@types/d3-selection": "*"
+ }
+ },
"node_modules/@types/debug": {
"version": "4.1.13",
"resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.13.tgz",
@@ -2807,6 +3093,13 @@
"@types/estree": "*"
}
},
+ "node_modules/@types/geojson": {
+ "version": "7946.0.16",
+ "resolved": "https://registry.npmjs.org/@types/geojson/-/geojson-7946.0.16.tgz",
+ "integrity": "sha512-6C8nqWur3j98U6+lXDfTUWIfgvZU+EumvpHKcYjujKH7woYyLj2sUmff0tRhrqM7BohUw7Pz3ZB1jj2gW9Fvmg==",
+ "dev": true,
+ "license": "MIT"
+ },
"node_modules/@types/hast": {
"version": "3.0.4",
"resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz",
@@ -4178,6 +4471,15 @@
"url": "https://github.com/sponsors/wooorm"
}
},
+ "node_modules/commander": {
+ "version": "7.2.0",
+ "resolved": "https://registry.npmjs.org/commander/-/commander-7.2.0.tgz",
+ "integrity": "sha512-QrWXB+ZQSVPmIWIhtEO9H+gwHaMGYiF5ChvoJ+K9ZGHG/sVsa6yiesAD1GC/x46sET00Xlwo1u49RVVVzvcSkw==",
+ "license": "MIT",
+ "engines": {
+ "node": ">= 10"
+ }
+ },
"node_modules/concat-map": {
"version": "0.0.1",
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
@@ -4206,6 +4508,47 @@
"integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==",
"license": "MIT"
},
+ "node_modules/d3": {
+ "version": "7.9.0",
+ "resolved": "https://registry.npmjs.org/d3/-/d3-7.9.0.tgz",
+ "integrity": "sha512-e1U46jVP+w7Iut8Jt8ri1YsPOvFpg46k+K8TpCb0P+zjCkjkPnV7WzfDJzMHy1LnA+wj5pLT1wjO901gLXeEhA==",
+ "license": "ISC",
+ "dependencies": {
+ "d3-array": "3",
+ "d3-axis": "3",
+ "d3-brush": "3",
+ "d3-chord": "3",
+ "d3-color": "3",
+ "d3-contour": "4",
+ "d3-delaunay": "6",
+ "d3-dispatch": "3",
+ "d3-drag": "3",
+ "d3-dsv": "3",
+ "d3-ease": "3",
+ "d3-fetch": "3",
+ "d3-force": "3",
+ "d3-format": "3",
+ "d3-geo": "3",
+ "d3-hierarchy": "3",
+ "d3-interpolate": "3",
+ "d3-path": "3",
+ "d3-polygon": "3",
+ "d3-quadtree": "3",
+ "d3-random": "3",
+ "d3-scale": "4",
+ "d3-scale-chromatic": "3",
+ "d3-selection": "3",
+ "d3-shape": "3",
+ "d3-time": "3",
+ "d3-time-format": "4",
+ "d3-timer": "3",
+ "d3-transition": "3",
+ "d3-zoom": "3"
+ },
+ "engines": {
+ "node": ">=12"
+ }
+ },
"node_modules/d3-array": {
"version": "3.2.4",
"resolved": "https://registry.npmjs.org/d3-array/-/d3-array-3.2.4.tgz",
@@ -4218,12 +4561,49 @@
"node": ">=12"
}
},
+ "node_modules/d3-axis": {
+ "version": "3.0.0",
+ "resolved": "https://registry.npmjs.org/d3-axis/-/d3-axis-3.0.0.tgz",
+ "integrity": "sha512-IH5tgjV4jE/GhHkRV0HiVYPDtvfjHQlQfJHs0usq7M30XcSBvOotpmH1IgkcXsO/5gEQZD43B//fc7SRT5S+xw==",
+ "license": "ISC",
+ "engines": {
+ "node": ">=12"
+ }
+ },
"node_modules/d3-binarytree": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/d3-binarytree/-/d3-binarytree-1.0.2.tgz",
"integrity": "sha512-cElUNH+sHu95L04m92pG73t2MEJXKu+GeKUN1TJkFsu93E5W8E9Sc3kHEGJKgenGvj19m6upSn2EunvMgMD2Yw==",
"license": "MIT"
},
+ "node_modules/d3-brush": {
+ "version": "3.0.0",
+ "resolved": "https://registry.npmjs.org/d3-brush/-/d3-brush-3.0.0.tgz",
+ "integrity": "sha512-ALnjWlVYkXsVIGlOsuWH1+3udkYFI48Ljihfnh8FZPF2QS9o+PzGLBslO0PjzVoHLZ2KCVgAM8NVkXPJB2aNnQ==",
+ "license": "ISC",
+ "dependencies": {
+ "d3-dispatch": "1 - 3",
+ "d3-drag": "2 - 3",
+ "d3-interpolate": "1 - 3",
+ "d3-selection": "3",
+ "d3-transition": "3"
+ },
+ "engines": {
+ "node": ">=12"
+ }
+ },
+ "node_modules/d3-chord": {
+ "version": "3.0.1",
+ "resolved": "https://registry.npmjs.org/d3-chord/-/d3-chord-3.0.1.tgz",
+ "integrity": "sha512-VE5S6TNa+j8msksl7HwjxMHDM2yNK3XCkusIlpX5kwauBfXuyLAtNg9jCp/iHH61tgI4sb6R/EIMWCqEIdjT/g==",
+ "license": "ISC",
+ "dependencies": {
+ "d3-path": "1 - 3"
+ },
+ "engines": {
+ "node": ">=12"
+ }
+ },
"node_modules/d3-color": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/d3-color/-/d3-color-3.1.0.tgz",
@@ -4233,6 +4613,30 @@
"node": ">=12"
}
},
+ "node_modules/d3-contour": {
+ "version": "4.0.2",
+ "resolved": "https://registry.npmjs.org/d3-contour/-/d3-contour-4.0.2.tgz",
+ "integrity": "sha512-4EzFTRIikzs47RGmdxbeUvLWtGedDUNkTcmzoeyg4sP/dvCexO47AaQL7VKy/gul85TOxw+IBgA8US2xwbToNA==",
+ "license": "ISC",
+ "dependencies": {
+ "d3-array": "^3.2.0"
+ },
+ "engines": {
+ "node": ">=12"
+ }
+ },
+ "node_modules/d3-delaunay": {
+ "version": "6.0.4",
+ "resolved": "https://registry.npmjs.org/d3-delaunay/-/d3-delaunay-6.0.4.tgz",
+ "integrity": "sha512-mdjtIZ1XLAM8bm/hx3WwjfHt6Sggek7qH043O8KEjDXN40xi3vx/6pYSVTwLjEgiXQTbvaouWKynLBiUZ6SK6A==",
+ "license": "ISC",
+ "dependencies": {
+ "delaunator": "5"
+ },
+ "engines": {
+ "node": ">=12"
+ }
+ },
"node_modules/d3-dispatch": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/d3-dispatch/-/d3-dispatch-3.0.1.tgz",
@@ -4242,6 +4646,79 @@
"node": ">=12"
}
},
+ "node_modules/d3-drag": {
+ "version": "3.0.0",
+ "resolved": "https://registry.npmjs.org/d3-drag/-/d3-drag-3.0.0.tgz",
+ "integrity": "sha512-pWbUJLdETVA8lQNJecMxoXfH6x+mO2UQo8rSmZ+QqxcbyA3hfeprFgIT//HW2nlHChWeIIMwS2Fq+gEARkhTkg==",
+ "license": "ISC",
+ "dependencies": {
+ "d3-dispatch": "1 - 3",
+ "d3-selection": "3"
+ },
+ "engines": {
+ "node": ">=12"
+ }
+ },
+ "node_modules/d3-dsv": {
+ "version": "3.0.1",
+ "resolved": "https://registry.npmjs.org/d3-dsv/-/d3-dsv-3.0.1.tgz",
+ "integrity": "sha512-UG6OvdI5afDIFP9w4G0mNq50dSOsXHJaRE8arAS5o9ApWnIElp8GZw1Dun8vP8OyHOZ/QJUKUJwxiiCCnUwm+Q==",
+ "license": "ISC",
+ "dependencies": {
+ "commander": "7",
+ "iconv-lite": "0.6",
+ "rw": "1"
+ },
+ "bin": {
+ "csv2json": "bin/dsv2json.js",
+ "csv2tsv": "bin/dsv2dsv.js",
+ "dsv2dsv": "bin/dsv2dsv.js",
+ "dsv2json": "bin/dsv2json.js",
+ "json2csv": "bin/json2dsv.js",
+ "json2dsv": "bin/json2dsv.js",
+ "json2tsv": "bin/json2dsv.js",
+ "tsv2csv": "bin/dsv2dsv.js",
+ "tsv2json": "bin/dsv2json.js"
+ },
+ "engines": {
+ "node": ">=12"
+ }
+ },
+ "node_modules/d3-ease": {
+ "version": "3.0.1",
+ "resolved": "https://registry.npmjs.org/d3-ease/-/d3-ease-3.0.1.tgz",
+ "integrity": "sha512-wR/XK3D3XcLIZwpbvQwQ5fK+8Ykds1ip7A2Txe0yxncXSdq1L9skcG7blcedkOX+ZcgxGAmLX1FrRGbADwzi0w==",
+ "license": "BSD-3-Clause",
+ "engines": {
+ "node": ">=12"
+ }
+ },
+ "node_modules/d3-fetch": {
+ "version": "3.0.1",
+ "resolved": "https://registry.npmjs.org/d3-fetch/-/d3-fetch-3.0.1.tgz",
+ "integrity": "sha512-kpkQIM20n3oLVBKGg6oHrUchHM3xODkTzjMoj7aWQFq5QEM+R6E4WkzT5+tojDY7yjez8KgCBRoj4aEr99Fdqw==",
+ "license": "ISC",
+ "dependencies": {
+ "d3-dsv": "1 - 3"
+ },
+ "engines": {
+ "node": ">=12"
+ }
+ },
+ "node_modules/d3-force": {
+ "version": "3.0.0",
+ "resolved": "https://registry.npmjs.org/d3-force/-/d3-force-3.0.0.tgz",
+ "integrity": "sha512-zxV/SsA+U4yte8051P4ECydjD/S+qeYtnaIyAs9tgHCqfguma/aAQDjo85A9Z6EKhBirHRJHXIgJUlffT4wdLg==",
+ "license": "ISC",
+ "dependencies": {
+ "d3-dispatch": "1 - 3",
+ "d3-quadtree": "1 - 3",
+ "d3-timer": "1 - 3"
+ },
+ "engines": {
+ "node": ">=12"
+ }
+ },
"node_modules/d3-force-3d": {
"version": "3.0.6",
"resolved": "https://registry.npmjs.org/d3-force-3d/-/d3-force-3d-3.0.6.tgz",
@@ -4267,6 +4744,27 @@
"node": ">=12"
}
},
+ "node_modules/d3-geo": {
+ "version": "3.1.1",
+ "resolved": "https://registry.npmjs.org/d3-geo/-/d3-geo-3.1.1.tgz",
+ "integrity": "sha512-637ln3gXKXOwhalDzinUgY83KzNWZRKbYubaG+fGVuc/dxO64RRljtCTnf5ecMyE1RIdtqpkVcq0IbtU2S8j2Q==",
+ "license": "ISC",
+ "dependencies": {
+ "d3-array": "2.5.0 - 3"
+ },
+ "engines": {
+ "node": ">=12"
+ }
+ },
+ "node_modules/d3-hierarchy": {
+ "version": "3.1.2",
+ "resolved": "https://registry.npmjs.org/d3-hierarchy/-/d3-hierarchy-3.1.2.tgz",
+ "integrity": "sha512-FX/9frcub54beBdugHjDCdikxThEqjnR93Qt7PvQTOHxyiNCAlvMrHhclk3cD5VeAaq9fxmfRp+CnWw9rEMBuA==",
+ "license": "ISC",
+ "engines": {
+ "node": ">=12"
+ }
+ },
"node_modules/d3-interpolate": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/d3-interpolate/-/d3-interpolate-3.0.1.tgz",
@@ -4285,6 +4783,24 @@
"integrity": "sha512-F8gPlqpP+HwRPMO/8uOu5wjH110+6q4cgJvgJT6vlpy3BEaDIKlTZrgHKZSp/i1InRpVfh4puY/kvL6MxK930A==",
"license": "MIT"
},
+ "node_modules/d3-path": {
+ "version": "3.1.0",
+ "resolved": "https://registry.npmjs.org/d3-path/-/d3-path-3.1.0.tgz",
+ "integrity": "sha512-p3KP5HCf/bvjBSSKuXid6Zqijx7wIfNW+J/maPs+iwR35at5JCbLUT0LzF1cnjbCHWhqzQTIN2Jpe8pRebIEFQ==",
+ "license": "ISC",
+ "engines": {
+ "node": ">=12"
+ }
+ },
+ "node_modules/d3-polygon": {
+ "version": "3.0.1",
+ "resolved": "https://registry.npmjs.org/d3-polygon/-/d3-polygon-3.0.1.tgz",
+ "integrity": "sha512-3vbA7vXYwfe1SYhED++fPUQlWSYTTGmFmQiany/gdbiWgU/iEyQzyymwL9SkJjFFuCS4902BSzewVGsHHmHtXg==",
+ "license": "ISC",
+ "engines": {
+ "node": ">=12"
+ }
+ },
"node_modules/d3-quadtree": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/d3-quadtree/-/d3-quadtree-3.0.1.tgz",
@@ -4294,6 +4810,15 @@
"node": ">=12"
}
},
+ "node_modules/d3-random": {
+ "version": "3.0.1",
+ "resolved": "https://registry.npmjs.org/d3-random/-/d3-random-3.0.1.tgz",
+ "integrity": "sha512-FXMe9GfxTxqd5D6jFsQ+DJ8BJS4E/fT5mqqdjovykEB2oFbTMDVdg1MGFxfQW+FBOGoB++k8swBrgwSHT1cUXQ==",
+ "license": "ISC",
+ "engines": {
+ "node": ">=12"
+ }
+ },
"node_modules/d3-scale": {
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/d3-scale/-/d3-scale-4.0.2.tgz",
@@ -4332,6 +4857,18 @@
"node": ">=12"
}
},
+ "node_modules/d3-shape": {
+ "version": "3.2.0",
+ "resolved": "https://registry.npmjs.org/d3-shape/-/d3-shape-3.2.0.tgz",
+ "integrity": "sha512-SaLBuwGm3MOViRq2ABk3eLoxwZELpH6zhl3FbAoJ7Vm1gofKx6El1Ib5z23NUEhF9AsGl7y+dzLe5Cw2AArGTA==",
+ "license": "ISC",
+ "dependencies": {
+ "d3-path": "^3.1.0"
+ },
+ "engines": {
+ "node": ">=12"
+ }
+ },
"node_modules/d3-time": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/d3-time/-/d3-time-3.1.0.tgz",
@@ -4365,6 +4902,41 @@
"node": ">=12"
}
},
+ "node_modules/d3-transition": {
+ "version": "3.0.1",
+ "resolved": "https://registry.npmjs.org/d3-transition/-/d3-transition-3.0.1.tgz",
+ "integrity": "sha512-ApKvfjsSR6tg06xrL434C0WydLr7JewBB3V+/39RMHsaXTOG0zmt/OAXeng5M5LBm0ojmxJrpomQVZ1aPvBL4w==",
+ "license": "ISC",
+ "dependencies": {
+ "d3-color": "1 - 3",
+ "d3-dispatch": "1 - 3",
+ "d3-ease": "1 - 3",
+ "d3-interpolate": "1 - 3",
+ "d3-timer": "1 - 3"
+ },
+ "engines": {
+ "node": ">=12"
+ },
+ "peerDependencies": {
+ "d3-selection": "2 - 3"
+ }
+ },
+ "node_modules/d3-zoom": {
+ "version": "3.0.0",
+ "resolved": "https://registry.npmjs.org/d3-zoom/-/d3-zoom-3.0.0.tgz",
+ "integrity": "sha512-b8AmV3kfQaqWAuacbPuNbL6vahnOJflOhexLzMMNLga62+/nh0JzvJ0aO/5a5MVgUFGS7Hu1P9P03o3fJkDCyw==",
+ "license": "ISC",
+ "dependencies": {
+ "d3-dispatch": "1 - 3",
+ "d3-drag": "2 - 3",
+ "d3-interpolate": "1 - 3",
+ "d3-selection": "2 - 3",
+ "d3-transition": "2 - 3"
+ },
+ "engines": {
+ "node": ">=12"
+ }
+ },
"node_modules/damerau-levenshtein": {
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/damerau-levenshtein/-/damerau-levenshtein-1.0.8.tgz",
@@ -4521,6 +5093,15 @@
"url": "https://github.com/sponsors/ljharb"
}
},
+ "node_modules/delaunator": {
+ "version": "5.1.0",
+ "resolved": "https://registry.npmjs.org/delaunator/-/delaunator-5.1.0.tgz",
+ "integrity": "sha512-AGrQ4QSgssa1NGmWmLPqN5NY2KajF5MqxetNEO+o0n3ZwZZeTmt7bBnvzHWrmkZFxGgr4HdyFgelzgi06otLuQ==",
+ "license": "ISC",
+ "dependencies": {
+ "robust-predicates": "^3.0.2"
+ }
+ },
"node_modules/dequal": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz",
@@ -5882,6 +6463,18 @@
"url": "https://opencollective.com/unified"
}
},
+ "node_modules/iconv-lite": {
+ "version": "0.6.3",
+ "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz",
+ "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==",
+ "license": "MIT",
+ "dependencies": {
+ "safer-buffer": ">= 2.1.2 < 3.0.0"
+ },
+ "engines": {
+ "node": ">=0.10.0"
+ }
+ },
"node_modules/ignore": {
"version": "5.3.2",
"resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz",
@@ -9095,6 +9688,12 @@
"node": ">=0.10.0"
}
},
+ "node_modules/robust-predicates": {
+ "version": "3.0.3",
+ "resolved": "https://registry.npmjs.org/robust-predicates/-/robust-predicates-3.0.3.tgz",
+ "integrity": "sha512-NS3levdsRIUOmiJ8FZWCP7LG3QpJyrs/TE0Zpf1yvZu8cAJJ6QMW92H1c7kWpdIHo8RvmLxN/o2JXTKHp74lUA==",
+ "license": "Unlicense"
+ },
"node_modules/rollup": {
"version": "4.61.0",
"resolved": "https://registry.npmjs.org/rollup/-/rollup-4.61.0.tgz",
@@ -9170,6 +9769,12 @@
"queue-microtask": "^1.2.2"
}
},
+ "node_modules/rw": {
+ "version": "1.3.3",
+ "resolved": "https://registry.npmjs.org/rw/-/rw-1.3.3.tgz",
+ "integrity": "sha512-PdhdWy89SiZogBLaw42zdeqtRJ//zFd2PgQavcICDUgJT5oW10QCRKbJ6bg4r0/UY2M6BWd5tkxuGFRvCkgfHQ==",
+ "license": "BSD-3-Clause"
+ },
"node_modules/safe-array-concat": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/safe-array-concat/-/safe-array-concat-1.1.3.tgz",
@@ -9225,6 +9830,12 @@
"url": "https://github.com/sponsors/ljharb"
}
},
+ "node_modules/safer-buffer": {
+ "version": "2.1.2",
+ "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
+ "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==",
+ "license": "MIT"
+ },
"node_modules/scheduler": {
"version": "0.26.0",
"resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.26.0.tgz",
diff --git a/web/package.json b/web/package.json
index 8846b2a4..7cc274fe 100644
--- a/web/package.json
+++ b/web/package.json
@@ -30,6 +30,7 @@
"@tiptap/starter-kit": "^3.26.1",
"3d-force-graph": "^1.79.1",
"chart.js": "^4.5.1",
+ "d3": "^7.9.0",
"lucide-react": "^0.577.0",
"next": "15.5.14",
"pg": "^8.21.0",
@@ -45,6 +46,7 @@
"devDependencies": {
"@eslint/eslintrc": "^3",
"@tailwindcss/postcss": "^4",
+ "@types/d3": "^7.4.3",
"@types/node": "^25.9.1",
"@types/pg": "^8.20.0",
"@types/react": "^19.2.16",
diff --git a/web/src/components/AppLogo.tsx b/web/src/components/AppLogo.tsx
index ced9344a..8966a843 100644
--- a/web/src/components/AppLogo.tsx
+++ b/web/src/components/AppLogo.tsx
@@ -1,17 +1,27 @@
+'use client';
+
+import { useBranding } from '@/context/useBranding';
+
interface AppLogoProps {
className?: string;
size?: number;
}
-/** Codefrydev mark — inverted for dark sidebar surfaces. */
+/** App mark. Uses custom logo from branding settings when set; falls back to /logo.svg. */
export default function AppLogo({ className = '', size = 24 }: AppLogoProps) {
+ const { logoUrl } = useBranding();
+ const src = logoUrl || '/logo.svg';
+ // The default /logo.svg is a black mark — invert it for the dark sidebar.
+ // Custom logos are shown as-is (user chose them).
+ const filter = !logoUrl ? 'shrink-0 brightness-0 invert' : 'shrink-0 object-contain';
+
return (
);
diff --git a/web/src/components/AppShell.tsx b/web/src/components/AppShell.tsx
index df54240d..ad0b6bfd 100644
--- a/web/src/components/AppShell.tsx
+++ b/web/src/components/AppShell.tsx
@@ -16,6 +16,7 @@ import IntegrationsModal from '@/components/IntegrationsModal';
import { Badge, Breadcrumb, ReportSelector } from '@/components';
import { useReport } from '@/context/useReport';
import { useSession } from '@/context/SessionContext';
+import { useBranding } from '@/context/useBranding';
import { strings, format } from '@/lib/strings';
import { canonicalDomainFromPayload } from '@/lib/domainSlug';
import { OPEN_INTEGRATIONS } from '@/lib/pipelineJobEvents';
@@ -26,6 +27,7 @@ import {
navHref,
type NavItemId,
} from '@/lib/appNav';
+import { useRiskFeatures } from '@/context/RiskFeaturesContext';
import type { ReportPayload } from '@/types';
import {
getBrowserDiagnosticsScope,
@@ -81,6 +83,8 @@ export default function AppShell({
const [integrationsToast, setIntegrationsToast] = useState(null);
const { data, startUrlByRunId } = useReport();
const { readonly: sessionReadonly } = useSession();
+ const { productName, productSubtitle } = useBranding();
+ const { featureEnabled } = useRiskFeatures();
const trailing = searchParams.toString() ? `?${searchParams.toString()}` : '';
const closeSidebar = () => setSidebarOpen(false);
@@ -193,14 +197,14 @@ export default function AppShell({
href="/home"
className={`flex items-center min-w-0 ${sidebarCollapsed ? 'md:justify-center' : ''}`}
onClick={closeSidebar}
- title={sidebarCollapsed ? strings.app.productName : undefined}
+ title={sidebarCollapsed ? productName : undefined}
>