Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/website_profiling/reporting/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,14 @@ def run_simple_report(
report_data.get("categories") or [],
google_data,
)
# Add a category scored from real Search Console performance.
# Returns None when there is no GSC search data, so the headline
# health average stays internal-only when Google isn't connected.
from .categories import category_search_performance

sp = category_search_performance(google_data.get("gsc"))
if sp is not None:
report_data.setdefault("categories", []).append(sp)
except Exception:
pass
try:
Expand Down
2 changes: 2 additions & 0 deletions src/website_profiling/reporting/categories/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
category_core_web_vitals_from_lighthouse,
category_performance,
)
from .search_performance import category_search_performance
from .security import category_security
from .technical_seo import category_technical_seo
from ._helpers import (
Expand Down Expand Up @@ -53,6 +54,7 @@
"category_mobile",
"category_security",
"category_intelligence",
"category_search_performance",
"_issue",
"_sort_issues",
"_page_analysis_dict",
Expand Down
164 changes: 164 additions & 0 deletions src/website_profiling/reporting/categories/search_performance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
"""Report category: search_performance.

Scored from real Google Search Console data (average position, CTR, query
distribution, click/impression trend) — unlike the other categories, this one
reflects how the site actually performs in Google, not internal audit heuristics.

Returns ``None`` when GSC data is unavailable (Google not connected, or the
property has no search impressions in the window) so the builder can skip it and
the headline Site-health average stays internal-only.
"""
from __future__ import annotations

from typing import Any, Optional

from ._helpers import (
_issue,
_score_deductions,
_sort_issues,
)
from ..terminology import CATEGORY_SEARCH_PERFORMANCE

# Minimum impressions before a CTR / zero-click signal is meaningful (low-volume
# queries have noisy CTR and shouldn't drive deductions).
_MIN_IMPRESSIONS_FOR_CTR = 100
_STRIKING_MIN_IMPRESSIONS = 10
# Need at least this many daily points to split the window into halves for a trend.
_TREND_MIN_DAYS = 6
# A half-over-half drop below this ratio counts as a decline.
_DECLINE_RATIO = 0.8


def _expected_ctr(position: float) -> float:
"""Rough organic CTR (percent) for an average position. Lower rank → less CTR."""
if position <= 1.5:
return 28.0
if position <= 2.5:
return 15.0
if position <= 3.5:
return 11.0
if position <= 5.0:
return 7.0
if position <= 10.0:
return 3.0
return 1.0


def category_search_performance(gsc: Optional[dict[str, Any]]) -> Optional[dict]:
"""Score real Google Search Console performance, or ``None`` if no GSC data."""
if not gsc or not isinstance(gsc, dict):
return None
summary = gsc.get("summary") or {}
impressions = float(summary.get("impressions") or 0)
if impressions <= 0:
return None

position = float(summary.get("position") or 0)
ctr = float(summary.get("ctr") or 0) # percent (0–100)
top_queries = gsc.get("top_queries") or []
daily = gsc.get("daily") or []

issues: list[dict] = []
deductions: list[tuple[int, bool]] = []

# --- Average position: the headline ranking signal (1 = best) -------------
if position > 0:
if position > 20:
issues.append(_issue(
f"Average Google position is {position:.1f} — most queries rank beyond page 2.",
priority="High",
recommendation="Strengthen on-page relevance, internal linking, and content depth for target queries.",
))
deductions.append((35, True))
elif position > 10:
issues.append(_issue(
f"Average Google position is {position:.1f} — ranking on page 2 for many queries.",
priority="High",
recommendation="Improve on-page optimisation and internal links to push key queries onto page 1.",
))
deductions.append((20, True))
elif position > 3:
issues.append(_issue(
f"Average Google position is {position:.1f} — room to reach the top 3.",
priority="Medium",
recommendation="Refine titles, content, and internal links for queries ranking 4–10.",
))
deductions.append((8, True))

# --- CTR vs. expected for the average position ----------------------------
if impressions >= _MIN_IMPRESSIONS_FOR_CTR and position > 0:
expected = _expected_ctr(position)
if ctr < expected * 0.6:
issues.append(_issue(
f"Click-through rate ({ctr:.1f}%) is below the ~{expected:.0f}% typical for "
f"average position {position:.1f}.",
priority="Medium",
recommendation="Improve titles and meta descriptions, and add structured data for richer SERP snippets.",
))
deductions.append((10, True))

# --- Striking-distance queries (page 2: positions 11–20) ------------------
striking = [
q for q in top_queries
if isinstance(q, dict)
and 10 < float(q.get("position") or 0) <= 20
and float(q.get("impressions") or 0) >= _STRIKING_MIN_IMPRESSIONS
]
if striking:
sample = ", ".join(str(q.get("query") or "") for q in striking[:3] if q.get("query"))
more = f" (+{len(striking) - 3} more)" if len(striking) > 3 else ""
issues.append(_issue(
f"{len(striking)} quer(y/ies) rank on page 2 (positions 11–20): {sample}{more}.",
priority="Medium",
recommendation="These are close to page 1 — add internal links and refresh content to push them up.",
))
deductions.append((min(10, len(striking)), True))

# --- Zero-click, high-impression queries ---------------------------------
zero_click = [
q for q in top_queries
if isinstance(q, dict)
and float(q.get("impressions") or 0) >= _MIN_IMPRESSIONS_FOR_CTR
and float(q.get("clicks") or 0) == 0
]
if zero_click:
sample = ", ".join(str(q.get("query") or "") for q in zero_click[:3] if q.get("query"))
more = f" (+{len(zero_click) - 3} more)" if len(zero_click) > 3 else ""
issues.append(_issue(
f"{len(zero_click)} quer(y/ies) get impressions but no clicks: {sample}{more}.",
priority="Medium",
recommendation="Review search intent match and rewrite titles/descriptions to earn the click.",
))
deductions.append((min(8, len(zero_click)), True))

# --- Click / impression trend (first vs. second half of the window) ------
if len(daily) >= _TREND_MIN_DAYS:
mid = len(daily) // 2
first, second = daily[:mid], daily[mid:]
first_clicks = sum(float(d.get("clicks") or 0) for d in first)
second_clicks = sum(float(d.get("clicks") or 0) for d in second)
first_impr = sum(float(d.get("impressions") or 0) for d in first)
second_impr = sum(float(d.get("impressions") or 0) for d in second)
if first_clicks > 0 and second_clicks < first_clicks * _DECLINE_RATIO:
issues.append(_issue(
"Search clicks are declining over the reporting window.",
priority="High",
recommendation="Investigate ranking losses or seasonality; refresh affected pages.",
))
deductions.append((12, True))
elif first_impr > 0 and second_impr < first_impr * _DECLINE_RATIO:
issues.append(_issue(
"Search impressions are declining over the reporting window.",
priority="Medium",
recommendation="Check for indexing or visibility losses; expand and refresh content.",
))
deductions.append((8, True))

score = _score_deductions(100, deductions)
return {
"id": "search_performance",
"name": CATEGORY_SEARCH_PERFORMANCE,
"score": int(score),
"issues": _sort_issues(issues),
"recommendations": list({i["recommendation"] for i in issues if i["recommendation"]}),
}
1 change: 1 addition & 0 deletions src/website_profiling/reporting/terminology.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
CATEGORY_MOBILE = "Mobile SEO"
CATEGORY_SECURITY = "Security"
CATEGORY_CONTENT_QUALITY = "Content quality"
CATEGORY_SEARCH_PERFORMANCE = "Search performance"

# Older audits may still use legacy names — map for exports and UI fallbacks
LEGACY_CATEGORY_DISPLAY: dict[str, str] = {
Expand Down
154 changes: 154 additions & 0 deletions tests/reporting/test_category_search_performance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
"""Unit tests for the Search performance category (real GSC-driven scoring)."""
from __future__ import annotations

from website_profiling.reporting.categories import category_search_performance


def _daily(values: list[tuple[int, int]]) -> list[dict]:
"""Build a daily series from (clicks, impressions) pairs."""
return [
{
"date": f"2024-01-{i + 1:02d}",
"clicks": c,
"impressions": imp,
"ctr": round(c / imp * 100, 2) if imp else 0.0,
"position": 5.0,
}
for i, (c, imp) in enumerate(values)
]


# --- no data --------------------------------------------------------------


def test_none_when_no_gsc() -> None:
assert category_search_performance(None) is None
assert category_search_performance({}) is None


def test_none_when_zero_impressions() -> None:
gsc = {"summary": {"clicks": 0, "impressions": 0, "ctr": 0.0, "position": 0.0}}
assert category_search_performance(gsc) is None


# --- strong performance: full score, no issues ----------------------------


def test_strong_rankings_scores_100_no_issues() -> None:
gsc = {
"summary": {"clicks": 500, "impressions": 1000, "ctr": 50.0, "position": 2.0},
"top_queries": [
{"query": "brand", "clicks": 200, "impressions": 300, "ctr": 66.6, "position": 1.4},
],
"top_pages": [],
"daily": _daily([(5, 100), (5, 100), (5, 100), (20, 200), (20, 200), (20, 200)]),
}
cat = category_search_performance(gsc)
assert cat is not None
assert cat["id"] == "search_performance"
assert cat["name"] == "Search performance"
assert cat["score"] == 100
assert cat["issues"] == []


# --- poor performance: deductions + issues --------------------------------


def test_poor_rankings_declining_trend_and_striking_distance() -> None:
gsc = {
"summary": {"clicks": 5, "impressions": 1000, "ctr": 0.5, "position": 25.0},
"top_queries": [
{"query": "q1", "clicks": 0, "impressions": 50, "ctr": 0.0, "position": 15.0},
{"query": "q2", "clicks": 0, "impressions": 60, "ctr": 0.0, "position": 18.0},
{"query": "q3", "clicks": 0, "impressions": 30, "ctr": 0.0, "position": 12.0},
{"query": "q4", "clicks": 0, "impressions": 200, "ctr": 0.0, "position": 30.0},
],
"top_pages": [],
"daily": _daily([(20, 400), (20, 400), (20, 400), (2, 100), (2, 100), (2, 100)]),
}
cat = category_search_performance(gsc)
assert cat is not None
assert cat["score"] < 60
priorities = {i["priority"] for i in cat["issues"]}
assert "High" in priorities # avg position > 20 and/or declining clicks
messages = " ".join(i["message"] for i in cat["issues"]).lower()
assert "page 2" in messages # striking-distance queries surfaced
assert "declining" in messages # trend signal surfaced
assert cat["recommendations"] # recommendations derived from issues


def test_top_position_uses_high_expected_ctr() -> None:
# Average position <= 1.5 -> ~28% expected CTR; a healthy CTR earns no deduction.
gsc = {
"summary": {"clicks": 600, "impressions": 1000, "ctr": 60.0, "position": 1.2},
"top_queries": [],
"top_pages": [],
"daily": [],
}
cat = category_search_performance(gsc)
assert cat is not None
assert cat["score"] == 100
assert cat["issues"] == []


def test_position_three_band_low_ctr_flagged() -> None:
# Average position in (2.5, 3.5] -> ~11% expected CTR; a low CTR is flagged.
gsc = {
"summary": {"clicks": 20, "impressions": 1000, "ctr": 2.0, "position": 3.0},
"top_queries": [],
"top_pages": [],
"daily": [],
}
cat = category_search_performance(gsc)
assert cat is not None
messages = " ".join(i["message"] for i in cat["issues"]).lower()
assert "click-through rate" in messages
# position 3.0 is not > 3, so no average-position issue
assert "average google position" not in messages


def test_mid_position_band_expected_ctr() -> None:
# Average position in (5, 10] -> ~3% expected CTR band; healthy CTR, no CTR deduction.
gsc = {
"summary": {"clicks": 80, "impressions": 1000, "ctr": 8.0, "position": 8.0},
"top_queries": [],
"top_pages": [],
"daily": [],
}
cat = category_search_performance(gsc)
assert cat is not None
messages = " ".join(i["message"] for i in cat["issues"]).lower()
assert "click-through rate" not in messages
assert "average google position is 8.0" in messages # 4–10 band issue


def test_page2_average_position_and_declining_impressions() -> None:
# Average position in (10, 20] -> page-2 branch; clicks flat but impressions falling.
gsc = {
"summary": {"clicks": 60, "impressions": 1000, "ctr": 6.0, "position": 15.0},
"top_queries": [],
"top_pages": [],
"daily": _daily([(10, 400), (10, 400), (10, 400), (10, 100), (10, 100), (10, 100)]),
}
cat = category_search_performance(gsc)
assert cat is not None
messages = " ".join(i["message"] for i in cat["issues"]).lower()
assert "page 2 for many queries" in messages
assert "impressions are declining" in messages
assert "clicks are declining" not in messages


def test_striking_distance_ignores_low_impression_queries() -> None:
gsc = {
"summary": {"clicks": 50, "impressions": 500, "ctr": 10.0, "position": 4.0},
"top_queries": [
# position 11-20 but only a handful of impressions -> not striking
{"query": "noise", "clicks": 0, "impressions": 3, "ctr": 0.0, "position": 14.0},
],
"top_pages": [],
"daily": [],
}
cat = category_search_performance(gsc)
assert cat is not None
messages = " ".join(i["message"] for i in cat["issues"]).lower()
assert "page 2" not in messages
1 change: 1 addition & 0 deletions web/src/lib/categoryDisplayNames.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const CATEGORY_DISPLAY: Record<string, string> = {
'Mobile SEO': 'Mobile SEO',
Security: 'Security',
'Content quality': 'Content quality',
'Search performance': 'Search performance',
// Legacy payloads
'HTML/Accessibility': 'Accessibility & markup',
'HTML & Accessibility': 'Accessibility & markup',
Expand Down
2 changes: 1 addition & 1 deletion web/src/strings.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
},
"healthScore": {
"title": "Site health",
"body": "Score from 0100 based on issue category weights in this audit. Higher is better. Reflects technical SEO health, not Google rankings."
"body": "Score from 0-100 averaged across this audit's category scores. Higher is better. Mainly reflects technical SEO health; also includes real Search Console performance (rankings, CTR, trends) when Google is connected."
},
"impactScore": {
"title": "Impact score",
Expand Down
Loading