From 6fc15b7c338e48a02095d63453acac665d4e5354 Mon Sep 17 00:00:00 2001 From: PrashantUnity Date: Thu, 25 Jun 2026 09:58:27 +0530 Subject: [PATCH] Gsc Ranking --- src/website_profiling/reporting/builder.py | 8 + .../reporting/categories/__init__.py | 2 + .../categories/search_performance.py | 164 ++++++++++++++++++ .../reporting/terminology.py | 1 + .../test_category_search_performance.py | 154 ++++++++++++++++ web/src/lib/categoryDisplayNames.ts | 1 + web/src/strings.json | 2 +- 7 files changed, 331 insertions(+), 1 deletion(-) create mode 100644 src/website_profiling/reporting/categories/search_performance.py create mode 100644 tests/reporting/test_category_search_performance.py diff --git a/src/website_profiling/reporting/builder.py b/src/website_profiling/reporting/builder.py index 8a547baf..2e6c1d16 100644 --- a/src/website_profiling/reporting/builder.py +++ b/src/website_profiling/reporting/builder.py @@ -589,6 +589,14 @@ def run_simple_report( report_data.get("categories") or [], google_data, ) + # Add a category scored from real Search Console performance. + # Returns None when there is no GSC search data, so the headline + # health average stays internal-only when Google isn't connected. + from .categories import category_search_performance + + sp = category_search_performance(google_data.get("gsc")) + if sp is not None: + report_data.setdefault("categories", []).append(sp) except Exception: pass try: diff --git a/src/website_profiling/reporting/categories/__init__.py b/src/website_profiling/reporting/categories/__init__.py index e9921746..70f36f69 100644 --- a/src/website_profiling/reporting/categories/__init__.py +++ b/src/website_profiling/reporting/categories/__init__.py @@ -14,6 +14,7 @@ category_core_web_vitals_from_lighthouse, category_performance, ) +from .search_performance import category_search_performance from .security import category_security from .technical_seo import category_technical_seo from ._helpers import ( @@ -53,6 +54,7 @@ "category_mobile", "category_security", "category_intelligence", + "category_search_performance", "_issue", "_sort_issues", "_page_analysis_dict", diff --git a/src/website_profiling/reporting/categories/search_performance.py b/src/website_profiling/reporting/categories/search_performance.py new file mode 100644 index 00000000..44dc4220 --- /dev/null +++ b/src/website_profiling/reporting/categories/search_performance.py @@ -0,0 +1,164 @@ +"""Report category: search_performance. + +Scored from real Google Search Console data (average position, CTR, query +distribution, click/impression trend) — unlike the other categories, this one +reflects how the site actually performs in Google, not internal audit heuristics. + +Returns ``None`` when GSC data is unavailable (Google not connected, or the +property has no search impressions in the window) so the builder can skip it and +the headline Site-health average stays internal-only. +""" +from __future__ import annotations + +from typing import Any, Optional + +from ._helpers import ( + _issue, + _score_deductions, + _sort_issues, +) +from ..terminology import CATEGORY_SEARCH_PERFORMANCE + +# Minimum impressions before a CTR / zero-click signal is meaningful (low-volume +# queries have noisy CTR and shouldn't drive deductions). +_MIN_IMPRESSIONS_FOR_CTR = 100 +_STRIKING_MIN_IMPRESSIONS = 10 +# Need at least this many daily points to split the window into halves for a trend. +_TREND_MIN_DAYS = 6 +# A half-over-half drop below this ratio counts as a decline. +_DECLINE_RATIO = 0.8 + + +def _expected_ctr(position: float) -> float: + """Rough organic CTR (percent) for an average position. Lower rank → less CTR.""" + if position <= 1.5: + return 28.0 + if position <= 2.5: + return 15.0 + if position <= 3.5: + return 11.0 + if position <= 5.0: + return 7.0 + if position <= 10.0: + return 3.0 + return 1.0 + + +def category_search_performance(gsc: Optional[dict[str, Any]]) -> Optional[dict]: + """Score real Google Search Console performance, or ``None`` if no GSC data.""" + if not gsc or not isinstance(gsc, dict): + return None + summary = gsc.get("summary") or {} + impressions = float(summary.get("impressions") or 0) + if impressions <= 0: + return None + + position = float(summary.get("position") or 0) + ctr = float(summary.get("ctr") or 0) # percent (0–100) + top_queries = gsc.get("top_queries") or [] + daily = gsc.get("daily") or [] + + issues: list[dict] = [] + deductions: list[tuple[int, bool]] = [] + + # --- Average position: the headline ranking signal (1 = best) ------------- + if position > 0: + if position > 20: + issues.append(_issue( + f"Average Google position is {position:.1f} — most queries rank beyond page 2.", + priority="High", + recommendation="Strengthen on-page relevance, internal linking, and content depth for target queries.", + )) + deductions.append((35, True)) + elif position > 10: + issues.append(_issue( + f"Average Google position is {position:.1f} — ranking on page 2 for many queries.", + priority="High", + recommendation="Improve on-page optimisation and internal links to push key queries onto page 1.", + )) + deductions.append((20, True)) + elif position > 3: + issues.append(_issue( + f"Average Google position is {position:.1f} — room to reach the top 3.", + priority="Medium", + recommendation="Refine titles, content, and internal links for queries ranking 4–10.", + )) + deductions.append((8, True)) + + # --- CTR vs. expected for the average position ---------------------------- + if impressions >= _MIN_IMPRESSIONS_FOR_CTR and position > 0: + expected = _expected_ctr(position) + if ctr < expected * 0.6: + issues.append(_issue( + f"Click-through rate ({ctr:.1f}%) is below the ~{expected:.0f}% typical for " + f"average position {position:.1f}.", + priority="Medium", + recommendation="Improve titles and meta descriptions, and add structured data for richer SERP snippets.", + )) + deductions.append((10, True)) + + # --- Striking-distance queries (page 2: positions 11–20) ------------------ + striking = [ + q for q in top_queries + if isinstance(q, dict) + and 10 < float(q.get("position") or 0) <= 20 + and float(q.get("impressions") or 0) >= _STRIKING_MIN_IMPRESSIONS + ] + if striking: + sample = ", ".join(str(q.get("query") or "") for q in striking[:3] if q.get("query")) + more = f" (+{len(striking) - 3} more)" if len(striking) > 3 else "" + issues.append(_issue( + f"{len(striking)} quer(y/ies) rank on page 2 (positions 11–20): {sample}{more}.", + priority="Medium", + recommendation="These are close to page 1 — add internal links and refresh content to push them up.", + )) + deductions.append((min(10, len(striking)), True)) + + # --- Zero-click, high-impression queries --------------------------------- + zero_click = [ + q for q in top_queries + if isinstance(q, dict) + and float(q.get("impressions") or 0) >= _MIN_IMPRESSIONS_FOR_CTR + and float(q.get("clicks") or 0) == 0 + ] + if zero_click: + sample = ", ".join(str(q.get("query") or "") for q in zero_click[:3] if q.get("query")) + more = f" (+{len(zero_click) - 3} more)" if len(zero_click) > 3 else "" + issues.append(_issue( + f"{len(zero_click)} quer(y/ies) get impressions but no clicks: {sample}{more}.", + priority="Medium", + recommendation="Review search intent match and rewrite titles/descriptions to earn the click.", + )) + deductions.append((min(8, len(zero_click)), True)) + + # --- Click / impression trend (first vs. second half of the window) ------ + if len(daily) >= _TREND_MIN_DAYS: + mid = len(daily) // 2 + first, second = daily[:mid], daily[mid:] + first_clicks = sum(float(d.get("clicks") or 0) for d in first) + second_clicks = sum(float(d.get("clicks") or 0) for d in second) + first_impr = sum(float(d.get("impressions") or 0) for d in first) + second_impr = sum(float(d.get("impressions") or 0) for d in second) + if first_clicks > 0 and second_clicks < first_clicks * _DECLINE_RATIO: + issues.append(_issue( + "Search clicks are declining over the reporting window.", + priority="High", + recommendation="Investigate ranking losses or seasonality; refresh affected pages.", + )) + deductions.append((12, True)) + elif first_impr > 0 and second_impr < first_impr * _DECLINE_RATIO: + issues.append(_issue( + "Search impressions are declining over the reporting window.", + priority="Medium", + recommendation="Check for indexing or visibility losses; expand and refresh content.", + )) + deductions.append((8, True)) + + score = _score_deductions(100, deductions) + return { + "id": "search_performance", + "name": CATEGORY_SEARCH_PERFORMANCE, + "score": int(score), + "issues": _sort_issues(issues), + "recommendations": list({i["recommendation"] for i in issues if i["recommendation"]}), + } diff --git a/src/website_profiling/reporting/terminology.py b/src/website_profiling/reporting/terminology.py index bd892106..2480696e 100644 --- a/src/website_profiling/reporting/terminology.py +++ b/src/website_profiling/reporting/terminology.py @@ -14,6 +14,7 @@ CATEGORY_MOBILE = "Mobile SEO" CATEGORY_SECURITY = "Security" CATEGORY_CONTENT_QUALITY = "Content quality" +CATEGORY_SEARCH_PERFORMANCE = "Search performance" # Older audits may still use legacy names — map for exports and UI fallbacks LEGACY_CATEGORY_DISPLAY: dict[str, str] = { diff --git a/tests/reporting/test_category_search_performance.py b/tests/reporting/test_category_search_performance.py new file mode 100644 index 00000000..da9f07d5 --- /dev/null +++ b/tests/reporting/test_category_search_performance.py @@ -0,0 +1,154 @@ +"""Unit tests for the Search performance category (real GSC-driven scoring).""" +from __future__ import annotations + +from website_profiling.reporting.categories import category_search_performance + + +def _daily(values: list[tuple[int, int]]) -> list[dict]: + """Build a daily series from (clicks, impressions) pairs.""" + return [ + { + "date": f"2024-01-{i + 1:02d}", + "clicks": c, + "impressions": imp, + "ctr": round(c / imp * 100, 2) if imp else 0.0, + "position": 5.0, + } + for i, (c, imp) in enumerate(values) + ] + + +# --- no data -------------------------------------------------------------- + + +def test_none_when_no_gsc() -> None: + assert category_search_performance(None) is None + assert category_search_performance({}) is None + + +def test_none_when_zero_impressions() -> None: + gsc = {"summary": {"clicks": 0, "impressions": 0, "ctr": 0.0, "position": 0.0}} + assert category_search_performance(gsc) is None + + +# --- strong performance: full score, no issues ---------------------------- + + +def test_strong_rankings_scores_100_no_issues() -> None: + gsc = { + "summary": {"clicks": 500, "impressions": 1000, "ctr": 50.0, "position": 2.0}, + "top_queries": [ + {"query": "brand", "clicks": 200, "impressions": 300, "ctr": 66.6, "position": 1.4}, + ], + "top_pages": [], + "daily": _daily([(5, 100), (5, 100), (5, 100), (20, 200), (20, 200), (20, 200)]), + } + cat = category_search_performance(gsc) + assert cat is not None + assert cat["id"] == "search_performance" + assert cat["name"] == "Search performance" + assert cat["score"] == 100 + assert cat["issues"] == [] + + +# --- poor performance: deductions + issues -------------------------------- + + +def test_poor_rankings_declining_trend_and_striking_distance() -> None: + gsc = { + "summary": {"clicks": 5, "impressions": 1000, "ctr": 0.5, "position": 25.0}, + "top_queries": [ + {"query": "q1", "clicks": 0, "impressions": 50, "ctr": 0.0, "position": 15.0}, + {"query": "q2", "clicks": 0, "impressions": 60, "ctr": 0.0, "position": 18.0}, + {"query": "q3", "clicks": 0, "impressions": 30, "ctr": 0.0, "position": 12.0}, + {"query": "q4", "clicks": 0, "impressions": 200, "ctr": 0.0, "position": 30.0}, + ], + "top_pages": [], + "daily": _daily([(20, 400), (20, 400), (20, 400), (2, 100), (2, 100), (2, 100)]), + } + cat = category_search_performance(gsc) + assert cat is not None + assert cat["score"] < 60 + priorities = {i["priority"] for i in cat["issues"]} + assert "High" in priorities # avg position > 20 and/or declining clicks + messages = " ".join(i["message"] for i in cat["issues"]).lower() + assert "page 2" in messages # striking-distance queries surfaced + assert "declining" in messages # trend signal surfaced + assert cat["recommendations"] # recommendations derived from issues + + +def test_top_position_uses_high_expected_ctr() -> None: + # Average position <= 1.5 -> ~28% expected CTR; a healthy CTR earns no deduction. + gsc = { + "summary": {"clicks": 600, "impressions": 1000, "ctr": 60.0, "position": 1.2}, + "top_queries": [], + "top_pages": [], + "daily": [], + } + cat = category_search_performance(gsc) + assert cat is not None + assert cat["score"] == 100 + assert cat["issues"] == [] + + +def test_position_three_band_low_ctr_flagged() -> None: + # Average position in (2.5, 3.5] -> ~11% expected CTR; a low CTR is flagged. + gsc = { + "summary": {"clicks": 20, "impressions": 1000, "ctr": 2.0, "position": 3.0}, + "top_queries": [], + "top_pages": [], + "daily": [], + } + cat = category_search_performance(gsc) + assert cat is not None + messages = " ".join(i["message"] for i in cat["issues"]).lower() + assert "click-through rate" in messages + # position 3.0 is not > 3, so no average-position issue + assert "average google position" not in messages + + +def test_mid_position_band_expected_ctr() -> None: + # Average position in (5, 10] -> ~3% expected CTR band; healthy CTR, no CTR deduction. + gsc = { + "summary": {"clicks": 80, "impressions": 1000, "ctr": 8.0, "position": 8.0}, + "top_queries": [], + "top_pages": [], + "daily": [], + } + cat = category_search_performance(gsc) + assert cat is not None + messages = " ".join(i["message"] for i in cat["issues"]).lower() + assert "click-through rate" not in messages + assert "average google position is 8.0" in messages # 4–10 band issue + + +def test_page2_average_position_and_declining_impressions() -> None: + # Average position in (10, 20] -> page-2 branch; clicks flat but impressions falling. + gsc = { + "summary": {"clicks": 60, "impressions": 1000, "ctr": 6.0, "position": 15.0}, + "top_queries": [], + "top_pages": [], + "daily": _daily([(10, 400), (10, 400), (10, 400), (10, 100), (10, 100), (10, 100)]), + } + cat = category_search_performance(gsc) + assert cat is not None + messages = " ".join(i["message"] for i in cat["issues"]).lower() + assert "page 2 for many queries" in messages + assert "impressions are declining" in messages + assert "clicks are declining" not in messages + + +def test_striking_distance_ignores_low_impression_queries() -> None: + gsc = { + "summary": {"clicks": 50, "impressions": 500, "ctr": 10.0, "position": 4.0}, + "top_queries": [ + # position 11-20 but only a handful of impressions -> not striking + {"query": "noise", "clicks": 0, "impressions": 3, "ctr": 0.0, "position": 14.0}, + ], + "top_pages": [], + "daily": [], + } + cat = category_search_performance(gsc) + assert cat is not None + messages = " ".join(i["message"] for i in cat["issues"]).lower() + assert "page 2" not in messages diff --git a/web/src/lib/categoryDisplayNames.ts b/web/src/lib/categoryDisplayNames.ts index ac08feee..e79df625 100644 --- a/web/src/lib/categoryDisplayNames.ts +++ b/web/src/lib/categoryDisplayNames.ts @@ -8,6 +8,7 @@ const CATEGORY_DISPLAY: Record = { 'Mobile SEO': 'Mobile SEO', Security: 'Security', 'Content quality': 'Content quality', + 'Search performance': 'Search performance', // Legacy payloads 'HTML/Accessibility': 'Accessibility & markup', 'HTML & Accessibility': 'Accessibility & markup', diff --git a/web/src/strings.json b/web/src/strings.json index 00753e64..3bd5c479 100644 --- a/web/src/strings.json +++ b/web/src/strings.json @@ -39,7 +39,7 @@ }, "healthScore": { "title": "Site health", - "body": "Score from 0–100 based on issue category weights in this audit. Higher is better. Reflects technical SEO health, not Google rankings." + "body": "Score from 0-100 averaged across this audit's category scores. Higher is better. Mainly reflects technical SEO health; also includes real Search Console performance (rankings, CTR, trends) when Google is connected." }, "impactScore": { "title": "Impact score",