Report index / reports-and-code
run.py

Source: /Users/borker/dev/hybrid-blog-writer-26-voice-pipeline/experiments/same_author_lift/run.py
"""Iterate article transformations to lift Pete same-author pass rate.

This is an experiment harness, not production pipeline code. It reads the
existing simple_writer Pete outputs, transforms failed articles, and accepts a
candidate only when it improves the same-author judge without worsening
deterministic voice/fingerprint gates.
"""

from __future__ import annotations

import argparse
import hashlib
import json
import re
import shutil
import sys
import time
from dataclasses import asdict, dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Any

ROOT = Path(__file__).resolve().parents[2]
sys.path.insert(0, str(ROOT))

from simple_writer.pipeline import Seed, character_check, editor_pass
from voice_pipeline.drift import detect_drift
from voice_pipeline.llm import DEFAULT_EDITOR_MODEL, call_llm, call_llm_json
from voice_pipeline.metrics import _words, analyze_text
from voice_pipeline.slop import audit_text


SOURCE_DIR = ROOT / "outputs/simple/pete-nicholas"
SEED_PATH = Path("/Users/borker/Downloads/seed-pete-n.md")
EXP_DIR = ROOT / "experiments/same_author_lift"
AFTER_DIR = EXP_DIR / "after"
CACHE_PATH = EXP_DIR / "cache.json"
RESULTS_PATH = EXP_DIR / "results.json"
REPORT_PATH = EXP_DIR / "REPORT.md"

PROMPT_VERSION = "same-author-lift-v8"

SCAFFOLD_PATTERNS: dict[str, str] = {
    "i_want_to_be_careful": r"\bI want to be careful\b",
    "the_question_is": r"\bthe question is\b",
    "this_is_not": r"\bthis is not\b",
    "not_x_but": r"\bnot\b[^.!?]{0,90}\bbut\b",
    "first_second_third": r"\bFirst,|\bSecond,|\bThird,",
    "it_is_worth": r"\bit is worth\b",
    "in_the_end": r"\bin the end\b",
}


META_RE = re.compile(r"^<!--\s*(.*?)\s*-->\s*", re.S)


@dataclass
class EvalResult:
    llm_yes_no: bool
    llm_reasoning: str
    stylometric_distance: float
    foibles_overlap: float
    drift_score: float
    oob_count: int
    slop_rate: float
    slop_hard_fail: bool
    word_count: int
    scaffold_hits: dict[str, int]


@dataclass
class ArticleRun:
    slug: str
    source_same_author: bool
    before: EvalResult
    after: EvalResult | None
    accepted: bool
    attempted: bool
    reason: str
    quality_guard: dict[str, Any] | None
    after_path: str | None


def utcnow() -> str:
    return datetime.now(timezone.utc).isoformat()


def sha(text: str) -> str:
    return hashlib.sha256(text.encode("utf-8")).hexdigest()


def safe_label(value: str) -> str:
    return re.sub(r"[^a-zA-Z0-9._-]+", "-", value).strip("-")


def write_raw_rewrite(transform: str, writer_model: str, slug: str, text: str) -> None:
    raw_dir = EXP_DIR / "raw_rewrites"
    raw_dir.mkdir(exist_ok=True)
    filename = f"{safe_label(transform)}__{safe_label(writer_model)}__{slug}"
    (raw_dir / filename).write_text(text)


def strip_code_fence(text: str) -> str:
    return re.sub(r"^```(?:markdown)?\s*|\s*```$", "", text.strip(), flags=re.I | re.S).strip()


def load_cache() -> dict[str, Any]:
    if CACHE_PATH.exists():
        return json.loads(CACHE_PATH.read_text())
    return {"eval": {}, "quality": {}, "repair": {}}


def save_cache(cache: dict[str, Any]) -> None:
    CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
    CACHE_PATH.write_text(json.dumps(cache, indent=2, sort_keys=True))


def clean_article(raw: str) -> str:
    return META_RE.sub("", raw).strip() + "\n"


def parse_meta(raw: str) -> dict[str, str]:
    match = META_RE.match(raw)
    if not match:
        return {}
    out: dict[str, str] = {}
    for part in match.group(1).split("|"):
        if ":" in part:
            key, value = part.split(":", 1)
            out[key.strip()] = value.strip()
    return out


def scaffold_hits(text: str) -> dict[str, int]:
    return {
        name: len(re.findall(pattern, text, flags=re.IGNORECASE))
        for name, pattern in SCAFFOLD_PATTERNS.items()
    }


def make_seed() -> Seed:
    seed = Seed.from_corpus("pete-nicholas", SEED_PATH)
    saved = ROOT / "seeds/pete-nicholas.json"
    if saved.exists():
        data = json.loads(saved.read_text())
        seed.soul_md = data.get("soul_md", "")
        seed.style_notes = data.get("style_notes", "")
        seed.representative_excerpts = data.get("representative_excerpts") or seed.representative_excerpts
    return seed


MULTI_CHARACTER_SYSTEM = """You are doing authorship-style analysis.

You are given three seed excerpts from the same writer. They show range:
city/political observation, devotional-pastoral application, and technology
commentary. Then you are given an article excerpt.

Question: is the article excerpt plausibly by the same writer, allowing for
topic variation and ordinary editing, or does it sound like a different author?

Do not require the article to match only one seed excerpt. Look for cadence,
register shifts, pastoral/intellectual habits, sentence architecture,
qualification style, and concrete-to-theological movement.

Return JSON:
{
  "same_author": true|false,
  "confidence": "high"|"medium"|"low",
  "reasoning": "1-2 concise sentences"
}
"""


def multi_excerpt_same_author(seed: Seed, text: str) -> tuple[bool, str]:
    excerpts = seed.representative_excerpts[:3]
    while len(excerpts) < 3:
        excerpts.append(seed.corpus_text[:1200])
    article_excerpt = " ".join(text.split()[:450])
    data = call_llm_json(
        model=DEFAULT_EDITOR_MODEL,
        system=MULTI_CHARACTER_SYSTEM,
        user=(
            f"SEED EXCERPT 1:\n{excerpts[0][:1400]}\n\n"
            f"SEED EXCERPT 2:\n{excerpts[1][:1400]}\n\n"
            f"SEED EXCERPT 3:\n{excerpts[2][:1400]}\n\n"
            f"ARTICLE EXCERPT:\n{article_excerpt}"
        ),
        temperature=0.2,
        max_tokens=450,
        timeout=120,
        fallback_models=["x-ai/grok-4.3"],
    )
    return bool(data.get("same_author")), data.get("reasoning", "")


PER_EXCERPT_CHARACTER_SYSTEM = """You are doing authorship-style analysis.

You are given one seed excerpt and one article excerpt. Decide if the article
excerpt is plausibly by the same author, allowing for topic variation and
ordinary editing. Return JSON:
{
  "same_author": true|false,
  "confidence": "high"|"medium"|"low",
  "reasoning": "1 concise sentence"
}

Judge cadence, register, qualification style, concrete-to-theological movement,
sentence architecture, and pastoral/intellectual habits. Do not require topic
or genre to match exactly.
"""


def any_excerpt_same_author(seed: Seed, text: str) -> tuple[bool, str]:
    excerpts = seed.representative_excerpts[:3]
    while len(excerpts) < 3:
        excerpts.append(seed.corpus_text[:1200])
    article_excerpt = " ".join(text.split()[:450])
    verdicts: list[dict[str, Any]] = []
    for index, excerpt in enumerate(excerpts, 1):
        data = call_llm_json(
            model=DEFAULT_EDITOR_MODEL,
            system=PER_EXCERPT_CHARACTER_SYSTEM,
            user=f"SEED EXCERPT {index}:\n{excerpt[:1400]}\n\nARTICLE EXCERPT:\n{article_excerpt}",
            temperature=0.2,
            max_tokens=350,
            timeout=120,
            fallback_models=["x-ai/grok-4.3"],
        )
        verdicts.append({
            "excerpt": index,
            "same_author": bool(data.get("same_author")),
            "confidence": data.get("confidence", ""),
            "reasoning": data.get("reasoning", ""),
        })
    positives = [v for v in verdicts if v["same_author"]]
    if positives:
        reason = "; ".join(f"excerpt {v['excerpt']}: {v['reasoning']}" for v in positives)
        return True, reason
    reason = "; ".join(f"excerpt {v['excerpt']}: {v['reasoning']}" for v in verdicts)
    return False, reason


def evaluate(
    seed: Seed,
    text: str,
    cache: dict[str, Any],
    *,
    judge_mode: str = "single",
    force: bool = False,
) -> EvalResult:
    key = f"{PROMPT_VERSION}:character:{judge_mode}:{sha(text)}"
    if not force and key in cache["eval"]:
        return EvalResult(**cache["eval"][key])

    if judge_mode in {"multi", "any"}:
        if judge_mode == "multi":
            llm_yes, llm_reason = multi_excerpt_same_author(seed, text)
        else:
            llm_yes, llm_reason = any_excerpt_same_author(seed, text)
        metrics = analyze_text(text)
        # Keep the old code-distance fields available for the acceptance gates.
        from simple_writer.pipeline import _cosine_distance, _foibles_overlap

        sm_dist = round(_cosine_distance(metrics, seed.profile.metrics), 4)
        fo = round(_foibles_overlap(text, seed.foibles), 3)
    else:
        character = character_check(seed, text)
        llm_yes = character.llm_yes_no
        llm_reason = character.llm_reasoning
        sm_dist = character.stylometric_distance
        fo = character.foibles_overlap
    drift = detect_drift(text, seed.profile)
    slop = audit_text(text)
    result = EvalResult(
        llm_yes_no=llm_yes,
        llm_reasoning=llm_reason,
        stylometric_distance=sm_dist,
        foibles_overlap=fo,
        drift_score=round(drift.drift_score, 4),
        oob_count=len(drift.out_of_bounds),
        slop_rate=round(slop.slop_rate, 4),
        slop_hard_fail=slop.hard_fail,
        word_count=len(_words(text)),
        scaffold_hits=scaffold_hits(text),
    )
    cache["eval"][key] = asdict(result)
    save_cache(cache)
    return result


def split_prefix(text: str, max_words: int = 650) -> tuple[str, str]:
    """Return paragraph-boundary prefix and untouched suffix."""
    parts = re.split(r"(\n\s*\n)", text.strip())
    prefix_parts: list[str] = []
    count = 0
    consumed = 0
    for i in range(0, len(parts), 2):
        paragraph = parts[i]
        separator = parts[i + 1] if i + 1 < len(parts) else ""
        if not paragraph.strip():
            consumed += len(paragraph) + len(separator)
            continue
        prefix_parts.extend([paragraph, separator])
        count += len(_words(paragraph))
        consumed += len(paragraph) + len(separator)
        if count >= max_words:
            break
    prefix = "".join(prefix_parts).rstrip() + "\n"
    suffix = text.strip()[consumed:].lstrip()
    if suffix:
        suffix = "\n\n" + suffix.rstrip() + "\n"
    return prefix, suffix


def heading_lines(text: str) -> list[str]:
    return [line.strip() for line in text.splitlines() if line.startswith("#")]


def repair_prefix(
    seed: Seed,
    slug: str,
    text: str,
    cache: dict[str, Any],
    rejection_reason: str = "",
    *,
    writer_model: str = DEFAULT_EDITOR_MODEL,
) -> str:
    prefix, suffix = split_prefix(text)
    repair_key = f"{PROMPT_VERSION}:repair:{writer_model}:{slug}:{sha(prefix)}"
    if repair_key in cache["repair"]:
        return cache["repair"][repair_key]

    excerpts = seed.representative_excerpts[:3]
    while len(excerpts) < 3:
        excerpts.append(seed.corpus_text[:1400])
    seed_excerpt = "\n\n---\n\n".join(excerpts)
    metrics = analyze_text(text)
    forbidden = ", ".join([
        "I want to be careful",
        "I should be clear",
        "to be clear",
        "the question is",
        "this is not",
        "not just ... but",
        "not merely ... but",
        "it is worth",
        "in the end",
        "First/Second/Third",
    ])
    system = f"""You are not copy-editing. You are writing a new opening prefix
from source material.

Goal: make this opening more plausibly by the seed writer while preserving its
meaning, title, facts, and pastoral/theological argument.

Seed excerpts, the standard for cadence and authorial feel:
---
{seed_excerpt[:3600]}
---

The same-author judge rejected the current opening for this reason:
{rejection_reason or "(no reason supplied)"}

Current author-mismatch tells:
- The generated openings are often too rhetorically polished, sardonic, or
  literary-essayistic compared with the seed.
- The seed voice is plainer, warmer, more pastoral-practical, and more willing
  to explain the concrete stakes without making every sentence perform.
- The seed moves from ordinary social/pastoral reality to theological claim,
  then toward direct application for "we" and "you".
- Em-dash use is often too high; use commas, parentheses, or semicolons where natural.
- Parenthetical asides and direct quotations are underused, but do not fabricate a quote.
- Repeated batch scaffolds must be avoided: "I want to be careful", "the question is",
  "this is not", "not just/not merely ... but ...", and mechanical First/Second/Third.

Full-article metrics for context:
- sentence_length_mean: {metrics.get("sentence_length_mean", 0):.2f}
- sentence_length_p50: {metrics.get("sentence_length_p50", 0):.2f}
- short_sentence_ratio: {metrics.get("short_sentence_ratio", 0):.3f}
- dash_rate: {metrics.get("dash_rate", 0):.4f}
- parenthesis_rate: {metrics.get("parenthesis_rate", 0):.4f}

Rules:
- Rewrite ONLY the provided prefix.
- This is not a punctuation cleanup and not a line edit. Rebuild the opening
  from the source facts and claims.
- Do not preserve the existing sentence architecture. If a sentence is ornate,
  polemical, aphoristic, or self-consciously clever, recast it plainly.
- At least half of the non-heading sentences should be structurally different
  from the source.
- Preserve the H1 title exactly.
- Preserve any markdown headings already present in the prefix.
- Preserve facts, named people, citations, dates, and claims.
- Keep the prefix within 85-120% of its current word count.
- Aim for roughly 15-35% lexical change. If the result is basically the same
  paragraph with punctuation changes, it fails.
- Do not add fake anecdotes, fake quotes, fake citations, or new factual claims.
- Do not make the prose grander, more aphoristic, or more dramatic.
- Prefer plain pastoral clarity over cleverness.
- Prefer measured qualification, ordinary explanation, and direct pastoral
  application over literary scene-setting.
- Keep dry self-awareness only where it already belongs; do not add stand-up wit.
- These exact recurring scaffold phrases are forbidden: {forbidden}.
- Return only the repaired prefix. No commentary."""

    response = call_llm(
        model=writer_model,
        system=system,
        user=f"PREFIX TO REPAIR:\n\n{prefix}",
        temperature=0.7,
        max_tokens=2600,
        timeout=240,
        fallback_models=["anthropic/claude-opus-4.7", "x-ai/grok-4.3"],
    )
    repaired = response.content.strip() if response.status == "success" else prefix
    repaired = re.sub(r"^```(?:markdown)?\s*|\s*```$", "", repaired.strip(), flags=re.I | re.S).strip() + "\n"

    if not repaired.startswith("# "):
        repaired = prefix
    if heading_lines(repaired) != heading_lines(prefix):
        repaired = prefix
    prefix_words = len(_words(prefix))
    repaired_words = len(_words(repaired))
    if repaired_words < prefix_words * 0.85 or repaired_words > prefix_words * 1.20:
        repaired = prefix

    candidate = (repaired.rstrip() + suffix).rstrip() + "\n"
    candidate = editor_pass(candidate, em_dash_target_rate=0.003)
    cache["repair"][repair_key] = candidate
    save_cache(cache)
    return candidate


def full_article_rewrite(
    seed: Seed,
    slug: str,
    text: str,
    cache: dict[str, Any],
    rejection_reason: str = "",
    *,
    writer_model: str = DEFAULT_EDITOR_MODEL,
) -> str:
    """Rewrite the full article toward the seed voice while preserving structure and facts."""
    repair_key = f"{PROMPT_VERSION}:full-rewrite:{writer_model}:{slug}:{sha(text)}"
    if repair_key in cache["repair"]:
        return cache["repair"][repair_key]

    excerpts = seed.representative_excerpts[:3]
    while len(excerpts) < 3:
        excerpts.append(seed.corpus_text[:1400])
    seed_excerpt = "\n\n---\n\n".join(excerpts)
    metrics = analyze_text(text)
    headings = "\n".join(heading_lines(text))
    forbidden = "\n".join(
        f"- {phrase}" for phrase in [
            "I want to be careful",
            "I should be clear",
            "to be clear",
            "the question is",
            "this is not",
            "not just ... but",
            "not merely ... but",
            "it is worth",
            "in the end",
            "First, Second, Third",
        ]
    )

    system = f"""You are rewriting an already-drafted article toward a real seed
writer's voice. This is not a summary and not a new article.

Seed excerpts, the standard for authorial feel:
---
{seed_excerpt[:3600]}
---

The same-author judge rejected the current article opening for this reason:
{rejection_reason or "(no reason supplied)"}

The existing generated corpus is systematically over-amplified: too literary,
too rhetorically polished, too sardonic, too fond of punchline sentences and
em-dashes. The real seed voice is plainer, warmer, more pastoral-practical,
more measured, and less self-consciously clever.

Full-article metrics for context:
- sentence_length_mean: {metrics.get("sentence_length_mean", 0):.2f}
- sentence_length_p50: {metrics.get("sentence_length_p50", 0):.2f}
- short_sentence_ratio: {metrics.get("short_sentence_ratio", 0):.3f}
- dash_rate: {metrics.get("dash_rate", 0):.4f}
- parenthesis_rate: {metrics.get("parenthesis_rate", 0):.4f}

Required headings, exact order and exact text:
{headings}

Forbidden recurring scaffolds:
{forbidden}

Rules:
- Return the FULL article, not excerpts.
- Preserve every heading exactly, including the H1 and all H2s, in the same order.
- Preserve the article's facts, named people, citations, dates, scripture
  references, doctrinal claims, and argument sequence.
- Do not invent anecdotes, quotes, citations, examples, institutions, or claims.
- Rewrite sentence architecture across the article; do not merely swap punctuation.
- Prefer direct pastoral explanation, measured qualification, and ordinary
  application over literary scene-setting.
- Keep dry wit only where it is already tied to the facts. Do not add more.
- Reduce mechanical em-dash rhythm where it is doing synthetic emphasis.
- Keep 92-108% of the source word count.
- Use markdown only. No commentary, no preamble, no code fence."""

    source_words = len(_words(text))
    response = call_llm(
        model=writer_model,
        system=system,
        user=f"SOURCE ARTICLE:\n\n{text}",
        temperature=0.55,
        max_tokens=min(int(source_words * 2.3) + 1200, 9000),
        timeout=420,
        fallback_models=["anthropic/claude-opus-4.7", "x-ai/grok-4.3"],
    )
    rewritten = response.content.strip() if response.status == "success" else text
    rewritten = re.sub(r"^```(?:markdown)?\s*|\s*```$", "", rewritten.strip(), flags=re.I | re.S).strip() + "\n"

    rewritten_words = len(_words(rewritten))
    if rewritten_words < source_words * 0.92 or rewritten_words > source_words * 1.08:
        rewritten = text
    if heading_lines(rewritten) != heading_lines(text):
        rewritten = text

    candidate = editor_pass(rewritten, em_dash_target_rate=0.003)
    cache["repair"][repair_key] = candidate
    save_cache(cache)
    return candidate


BRIEF_SYSTEM = """Extract a neutral factual brief from an article.

Return markdown, not JSON. Preserve:
- exact title and headings
- named people, dates, places, citations, Scripture references
- core claims and argument sequence
- concrete examples and anecdotes
- cautions or qualifications

Do not preserve style, phrasing, cadence, jokes, or sentence structure.
Do not add facts.
"""


def extract_brief(slug: str, text: str, cache: dict[str, Any]) -> str:
    key = f"{PROMPT_VERSION}:brief:{slug}:{sha(text)}"
    if key in cache["repair"]:
        return cache["repair"][key]
    response = call_llm(
        model=DEFAULT_EDITOR_MODEL,
        system=BRIEF_SYSTEM,
        user=f"ARTICLE:\n\n{text}",
        temperature=0.1,
        max_tokens=4500,
        timeout=240,
        fallback_models=["x-ai/grok-4.3"],
    )
    brief = response.content.strip() if response.status == "success" else ""
    if not brief:
        brief = "\n".join(heading_lines(text))
    cache["repair"][key] = brief
    save_cache(cache)
    return brief


def brief_rewrite(
    seed: Seed,
    slug: str,
    text: str,
    cache: dict[str, Any],
    rejection_reason: str = "",
    *,
    writer_model: str = DEFAULT_EDITOR_MODEL,
) -> str:
    """Generate a fresh article from a neutral brief so original prose cannot anchor the model."""
    repair_key = f"{PROMPT_VERSION}:brief-rewrite:{writer_model}:{slug}:{sha(text)}"
    if repair_key in cache["repair"]:
        return cache["repair"][repair_key]

    brief = extract_brief(slug, text, cache)
    excerpts = seed.representative_excerpts[:3]
    while len(excerpts) < 3:
        excerpts.append(seed.corpus_text[:1400])
    seed_excerpt = "\n\n---\n\n".join(excerpts)
    source_words = len(_words(text))
    headings = "\n".join(heading_lines(text))

    system = f"""Write a full article from a neutral brief in the seed writer's
voice. You are not revising prose; you are composing fresh prose from facts.

Seed excerpts:
---
{seed_excerpt[:3600]}
---

The current generated article failed same-author review for this reason:
{rejection_reason or "(no reason supplied)"}

Voice target:
- Plain, warm, pastoral-practical.
- Observes concrete reality, then explains theological stakes.
- Measured qualification rather than polemical flourish.
- Less literary, less sardonic, less self-consciously clever than the current
  generated corpus.
- British register is welcome, but do not perform Britishness.

Forbidden recurring scaffolds:
- I want to be careful
- I should be clear
- to be clear
- the question is
- this is not
- not just ... but
- not merely ... but
- it is worth
- in the end
- First, Second, Third

Required headings, exact order and exact text:
{headings}

Rules:
- Return the FULL article.
- Use every heading exactly as provided, in the same order.
- Use only the facts, examples, citations, and claims in the brief.
- Do not invent new anecdotes, quotes, references, statistics, or claims.
- Target {source_words} words, acceptable range {int(source_words * 0.90)}-{int(source_words * 1.08)}.
- Write fresh sentences; do not mimic the previous article's sentence structure.
- Markdown only. No commentary."""

    response = call_llm(
        model=writer_model,
        system=system,
        user=f"NEUTRAL BRIEF:\n\n{brief}",
        temperature=0.75,
        max_tokens=min(int(source_words * 2.4) + 1400, 10000),
        timeout=480,
        fallback_models=["anthropic/claude-opus-4.7", "x-ai/grok-4.3"],
    )
    rewritten = response.content.strip() if response.status == "success" else text
    rewritten = re.sub(r"^```(?:markdown)?\s*|\s*```$", "", rewritten.strip(), flags=re.I | re.S).strip() + "\n"
    write_raw_rewrite("brief", writer_model, slug, rewritten)

    rewritten_words = len(_words(rewritten))
    if rewritten_words < source_words * 0.80 or rewritten_words > source_words * 1.12:
        rewritten = text
    if heading_lines(rewritten) != heading_lines(text):
        rewritten = text

    candidate = editor_pass(rewritten, em_dash_target_rate=0.003)
    cache["repair"][repair_key] = candidate
    save_cache(cache)
    return candidate


def plain_brief_rewrite(
    seed: Seed,
    slug: str,
    text: str,
    cache: dict[str, Any],
    rejection_reason: str = "",
    *,
    writer_model: str = DEFAULT_EDITOR_MODEL,
) -> str:
    """Generate from a brief in the plainer register of the actual seed corpus."""
    repair_key = f"{PROMPT_VERSION}:plain-brief-rewrite:{writer_model}:{slug}:{sha(text)}"
    if repair_key in cache["repair"]:
        return cache["repair"][repair_key]

    brief = extract_brief(slug, text, cache)
    source_words = len(_words(text))
    headings = "\n".join(heading_lines(text))
    seed_paragraphs = [p.strip() for p in seed.corpus_text.split("\n\n") if len(p.split()) >= 45]
    # Use longer essay-like excerpts from the corpus, not only the short middle
    # devotional paragraph that currently drives the production same-author gate.
    picked = []
    for index in [13, 18, 28, 31, 40, 47, 58, 67]:
        if index < len(seed_paragraphs):
            picked.append(seed_paragraphs[index])
    if not picked:
        picked = seed.representative_excerpts
    seed_examples = "\n\n---\n\n".join(picked)[:5200]

    system = f"""Write a full article from a neutral factual brief in the actual
seed writer's plainer register.

Seed examples:
---
{seed_examples}
---

The current generated article failed same-author review for this reason:
{rejection_reason or "(no reason supplied)"}

Important correction: the existing generated corpus is over-amplified. It is
too literary, too scene-driven, too fond of punchline sentences, and too eager
to dramatize ordinary claims. Write away from that. The target is plain,
expository, pastoral-practical prose: direct explanation, concrete consequence,
moderate qualification, and Scripture/theological application.

Positive target:
- Use the seed's plain explanatory mode: "The key...", "In contrast...",
  "At the same time...", "At least two principles...", only where natural.
- Prefer one clear paragraph of explanation over a dramatic miniature scene.
- Prefer mid-length sentences over short aphoristic punches.
- Let claims be modest and cumulative.
- Use "we" for shared Christian experience and "I" only if the brief contains
  a real first-person source detail.

Avoid:
- invented scene-setting, imagined dialogue, or novelistic reconstruction
- phrases such as "loaded gun", "trap snapped shut", "quietly erased",
  "the strange thing", "the room we already share", "walls and cupboards"
- wry public-intellectual flourish when a plain sentence will do
- listicle scaffolds: "First/Second/Third", "not just/not merely...but",
  "the question is", "this is not", "in the end", "it is worth"
- any repeated batch-level opening formula

Required headings, exact order and exact text:
{headings}

Rules:
- Return the FULL article.
- Use every heading exactly as provided, in the same order.
- Use only facts, examples, citations, and claims present in the brief.
- Do not invent anecdotes, quotations, references, dates, statistics,
  institutions, or claims.
- If a source anecdote appears synthetic or over-dramatic, preserve its factual
  content but tell it plainly.
- Target {source_words} words, acceptable range {int(source_words * 0.84)}-{int(source_words * 1.08)}.
- Markdown only. No commentary."""

    response = call_llm(
        model=writer_model,
        system=system,
        user=f"NEUTRAL FACTUAL BRIEF:\n\n{brief}",
        temperature=0.68,
        max_tokens=min(int(source_words * 2.4) + 1400, 10000),
        timeout=480,
        fallback_models=["anthropic/claude-opus-4.7", "x-ai/grok-4.3"],
    )
    rewritten = response.content.strip() if response.status == "success" else text
    rewritten = re.sub(r"^```(?:markdown)?\s*|\s*```$", "", rewritten.strip(), flags=re.I | re.S).strip() + "\n"
    write_raw_rewrite("plain-brief", writer_model, slug, rewritten)

    rewritten_words = len(_words(rewritten))
    if rewritten_words < source_words * 0.80 or rewritten_words > source_words * 1.12:
        rewritten = text
    if heading_lines(rewritten) != heading_lines(text):
        rewritten = text

    candidate = editor_pass(rewritten, em_dash_target_rate=0.003)
    cache["repair"][repair_key] = candidate
    save_cache(cache)
    return candidate


def extract_intro_brief(slug: str, prefix: str, cache: dict[str, Any]) -> str:
    key = f"{PROMPT_VERSION}:intro-brief:{slug}:{sha(prefix)}"
    if key in cache["repair"]:
        return cache["repair"][key]
    response = call_llm(
        model=DEFAULT_EDITOR_MODEL,
        system=BRIEF_SYSTEM,
        user=f"ARTICLE OPENING:\n\n{prefix}",
        temperature=0.1,
        max_tokens=1800,
        timeout=180,
        fallback_models=["x-ai/grok-4.3"],
    )
    brief = response.content.strip() if response.status == "success" else ""
    if not brief:
        brief = "\n".join(heading_lines(prefix))
    cache["repair"][key] = brief
    save_cache(cache)
    return brief


def intro_brief_rewrite(
    seed: Seed,
    slug: str,
    text: str,
    cache: dict[str, Any],
    rejection_reason: str = "",
    *,
    writer_model: str = DEFAULT_EDITOR_MODEL,
) -> str:
    """Recompose the first article movement from a neutral brief, leaving the body untouched."""
    prefix, suffix = split_prefix(text, max_words=620)
    repair_key = f"{PROMPT_VERSION}:intro-brief-rewrite:{writer_model}:{slug}:{sha(prefix)}"
    if repair_key in cache["repair"]:
        return cache["repair"][repair_key]

    brief = extract_intro_brief(slug, prefix, cache)
    excerpts = seed.representative_excerpts[:3]
    while len(excerpts) < 3:
        excerpts.append(seed.corpus_text[:1400])
    seed_gate_excerpt = excerpts[1]
    broader_seed = "\n\n---\n\n".join(excerpts)
    prefix_words = len(_words(prefix))
    headings = "\n".join(heading_lines(prefix))
    archetype_index = int(hashlib.sha256(f"intro-brief:{slug}".encode("utf-8")).hexdigest(), 16) % len(INTRO_ARCHETYPES)
    archetype = INTRO_ARCHETYPES[archetype_index]

    system = f"""Write the opening movement of an article from a neutral brief.
You are composing fresh prose from facts, not revising the previous prose.

Primary seed excerpt for cadence:
---
{seed_gate_excerpt[:1200]}
---

Broader seed range:
---
{broader_seed[:3400]}
---

The current generated opening failed same-author review for this reason:
{rejection_reason or "(no reason supplied)"}

Diversity instruction for this article:
{archetype}

Voice target:
- Plain, warm, pastoral-practical, concrete.
- Start near ordinary life or pastoral consequence, not with a clever essay hook.
- Move from the concrete situation to the theological or biblical stakes.
- Use "we" naturally for shared Christian experience and occasional direct "you".
- Make the prose less sardonic, less literary, and less architecturally clever.
- Do not imitate the seed excerpt's exact sentence pattern.

Forbidden recurring scaffolds:
- I want to be careful
- I should be clear
- to be clear
- the question is
- this is not
- not just ... but
- not merely ... but
- it is worth
- in the end
- First, Second, Third
- There is a kind of

Required headings, exact order and exact text:
{headings}

Rules:
- Return ONLY the opening movement.
- Preserve every required heading exactly, in the same order.
- Use only the facts, examples, citations, and claims in the brief.
- Do not invent anecdotes, quotations, references, dates, statistics, institutions, or claims.
- Target {prefix_words} words, acceptable range {int(prefix_words * 0.88)}-{int(prefix_words * 1.15)}.
- Vary sentence openings naturally. Do not write a list, numbered sequence, or repeated anaphora.
- Markdown only. No commentary."""

    response = call_llm(
        model=writer_model,
        system=system,
        user=f"NEUTRAL OPENING BRIEF:\n\n{brief}",
        temperature=0.82,
        max_tokens=3600,
        timeout=360,
        fallback_models=["anthropic/claude-opus-4.7", "x-ai/grok-4.3"],
    )
    rewritten = response.content.strip() if response.status == "success" else prefix
    rewritten = re.sub(r"^```(?:markdown)?\s*|\s*```$", "", rewritten.strip(), flags=re.I | re.S).strip() + "\n"

    write_raw_rewrite("intro-brief", writer_model, slug, rewritten)

    rewritten_words = len(_words(rewritten))
    if rewritten_words < prefix_words * 0.84 or rewritten_words > prefix_words * 1.20:
        rewritten = prefix
    if heading_lines(rewritten) != heading_lines(prefix):
        rewritten = prefix

    candidate = (rewritten.rstrip() + suffix).rstrip() + "\n"
    candidate = editor_pass(candidate, em_dash_target_rate=0.003)
    cache["repair"][repair_key] = candidate
    save_cache(cache)
    return candidate


HEADING_RE = re.compile(r"(?m)^#{1,6}\s+.+$")


def split_heading_units(text: str) -> list[str]:
    """Split markdown into heading-led units, preserving all headings."""
    matches = list(HEADING_RE.finditer(text))
    if not matches:
        return [text.strip() + "\n"]
    units: list[str] = []
    if matches[0].start() > 0 and text[:matches[0].start()].strip():
        units.append(text[:matches[0].start()].strip())
    for index, match in enumerate(matches):
        end = matches[index + 1].start() if index + 1 < len(matches) else len(text)
        units.append(text[match.start():end].strip())
    return [unit + "\n" for unit in units if unit.strip()]


def extract_section_brief(slug: str, index: int, section: str, cache: dict[str, Any]) -> str:
    key = f"{PROMPT_VERSION}:section-brief:{slug}:{index}:{sha(section)}"
    if key in cache["repair"]:
        return cache["repair"][key]
    response = call_llm(
        model=DEFAULT_EDITOR_MODEL,
        system=BRIEF_SYSTEM,
        user=f"ARTICLE SECTION:\n\n{section}",
        temperature=0.1,
        max_tokens=min(max(len(_words(section)) * 3, 900), 3000),
        timeout=180,
        fallback_models=["x-ai/grok-4.3"],
    )
    brief = response.content.strip() if response.status == "success" else ""
    if not brief:
        brief = "\n".join(heading_lines(section))
    cache["repair"][key] = brief
    save_cache(cache)
    return brief


def section_brief_rewrite(
    seed: Seed,
    slug: str,
    text: str,
    cache: dict[str, Any],
    rejection_reason: str = "",
    *,
    writer_model: str = DEFAULT_EDITOR_MODEL,
) -> str:
    """Recompose each heading-led section from a neutral brief.

    Full-article brief rewrites often summarize. Section-sized rewrites are a
    middle path: enough fresh composition to break synthetic sentence
    architecture, but small enough to preserve length and factual coverage.
    """
    repair_key = f"{PROMPT_VERSION}:section-brief-rewrite:{writer_model}:{slug}:{sha(text)}"
    if repair_key in cache["repair"]:
        return cache["repair"][repair_key]

    excerpts = seed.representative_excerpts[:3]
    while len(excerpts) < 3:
        excerpts.append(seed.corpus_text[:1400])
    seed_excerpt = "\n\n---\n\n".join(excerpts)
    units = split_heading_units(text)
    rewritten_units: list[str] = []

    forbidden = "\n".join(
        f"- {phrase}" for phrase in [
            "I want to be careful",
            "I should be clear",
            "to be clear",
            "the question is",
            "this is not",
            "not just ... but",
            "not merely ... but",
            "it is worth",
            "in the end",
            "First, Second, Third",
            "There is a kind of",
            "we need to talk about",
        ]
    )

    for index, section in enumerate(units):
        section_words = len(_words(section))
        section_key = f"{PROMPT_VERSION}:section-rewrite:{writer_model}:{slug}:{index}:{sha(section)}"
        if section_key in cache["repair"]:
            rewritten_units.append(cache["repair"][section_key].strip())
            continue

        brief = extract_section_brief(slug, index, section, cache)
        headings = "\n".join(heading_lines(section))
        if section_words < 80:
            rewritten_units.append(section.strip())
            continue

        system = f"""Rewrite one markdown section from a neutral factual brief
in the seed writer's voice.

Seed excerpts:
---
{seed_excerpt[:3600]}
---

The full article failed same-author review for this reason:
{rejection_reason or "(no reason supplied)"}

Voice target:
- Plain, warm, pastoral-practical.
- Concrete before abstract.
- Measured, explanatory, and humane rather than literary or performative.
- Use "we" naturally where the brief warrants shared Christian application.
- Keep the writer's dry intelligence, but do not make the prose clever for its
  own sake.
- Vary sentence openings; do not make sections begin in the same pattern.

Forbidden recurring scaffolds:
{forbidden}

Required heading(s), exact text and order:
{headings}

Rules:
- Return ONLY this rewritten section.
- Preserve every required heading exactly.
- Use only facts, claims, examples, quotations, citations, dates, names, and
  Scripture references present in the brief.
- Do not invent anecdotes, quotes, statistics, institutions, or claims.
- Target {section_words} words, acceptable range {int(section_words * 0.88)}-{int(section_words * 1.16)}.
- Compose fresh sentences from the brief. Do not line-edit the old prose.
- Markdown only. No commentary."""

        response = call_llm(
            model=writer_model,
            system=system,
            user=f"NEUTRAL SECTION BRIEF:\n\n{brief}",
            temperature=0.78,
            max_tokens=min(max(int(section_words * 2.7) + 900, 1800), 5200),
            timeout=300,
            fallback_models=["anthropic/claude-opus-4.7", "x-ai/grok-4.3"],
        )
        rewritten = response.content.strip() if response.status == "success" else section
        rewritten = re.sub(r"^```(?:markdown)?\s*|\s*```$", "", rewritten.strip(), flags=re.I | re.S).strip()
        rewritten_words = len(_words(rewritten))
        if rewritten_words < section_words * 0.84 or rewritten_words > section_words * 1.20:
            rewritten = section.strip()
        if heading_lines(rewritten) != heading_lines(section):
            rewritten = section.strip()
        cache["repair"][section_key] = rewritten
        save_cache(cache)
        rewritten_units.append(rewritten)

    candidate = "\n\n".join(unit.strip() for unit in rewritten_units).strip() + "\n"
    write_raw_rewrite("section-brief", writer_model, slug, candidate)

    source_words = len(_words(text))
    candidate_words = len(_words(candidate))
    if candidate_words < source_words * 0.86 or candidate_words > source_words * 1.18:
        candidate = text
    if heading_lines(candidate) != heading_lines(text):
        candidate = text

    candidate = editor_pass(candidate, em_dash_target_rate=0.003)
    cache["repair"][repair_key] = candidate
    save_cache(cache)
    return candidate


def hybrid_brief_section_rewrite(
    seed: Seed,
    slug: str,
    text: str,
    cache: dict[str, Any],
    rejection_reason: str = "",
    *,
    writer_model: str = DEFAULT_EDITOR_MODEL,
) -> str:
    """Use a full-brief opening with a section-rewritten body.

    The full-brief rewrite is the only transform that has flipped the
    single-excerpt same-author gate, but it can flatten the whole essay. The
    section rewrite preserves length and facts better. This hybrid tests whether
    the successful opening shape can be combined with the less lossy body.
    """
    repair_key = f"{PROMPT_VERSION}:hybrid-brief-section-v2:{writer_model}:{slug}:{sha(text)}"
    if repair_key in cache["repair"]:
        return cache["repair"][repair_key]

    legacy_key = f"{PROMPT_VERSION}:brief-rewrite:{slug}:{sha(text)}"
    if writer_model == DEFAULT_EDITOR_MODEL and legacy_key in cache["repair"]:
        full_candidate = cache["repair"][legacy_key]
    else:
        full_candidate = brief_rewrite(seed, slug, text, cache, rejection_reason, writer_model=writer_model)
    section_candidate = section_brief_rewrite(seed, slug, text, cache, rejection_reason, writer_model=writer_model)

    full_units = split_heading_units(full_candidate)
    section_units = split_heading_units(section_candidate)
    if len(full_units) < 2 or len(section_units) < 2:
        candidate = full_candidate
    elif heading_lines(full_candidate) != heading_lines(section_candidate):
        candidate = full_candidate
    else:
        # The successful full-brief opening often consists of H1 plus the first
        # H2 unit; taking only H1 discards the actual opening prose.
        take_from_full = 2 if len(full_units) > 1 else 1
        candidate = "\n\n".join(
            [*[unit.strip() for unit in full_units[:take_from_full]], *[unit.strip() for unit in section_units[take_from_full:]]]
        ).strip() + "\n"

    write_raw_rewrite("hybrid-brief-section", writer_model, slug, candidate)

    source_words = len(_words(text))
    candidate_words = len(_words(candidate))
    if candidate_words < source_words * 0.82 or candidate_words > source_words * 1.18:
        candidate = text
    if heading_lines(candidate) != heading_lines(text):
        candidate = text

    candidate = editor_pass(candidate, em_dash_target_rate=0.003)
    cache["repair"][repair_key] = candidate
    save_cache(cache)
    return candidate


INTRO_ARCHETYPES = [
    "Open from a concrete pastoral pressure ordinary Christians recognise, then move to the doctrinal issue.",
    "Open from a shared moral or social instinct, then show why the article's question matters for discipleship.",
    "Open from a plain mistake believers can make in practice, then widen toward the biblical argument.",
    "Open from an everyday institutional or church-life scene, then name the theological tension beneath it.",
    "Open from a concise concrete example already present in the article, then turn toward direct application.",
    "Open from the human consequence of the doctrine, then explain why the text must be read carefully.",
]


def seed_intro_rewrite(
    seed: Seed,
    slug: str,
    text: str,
    cache: dict[str, Any],
    rejection_reason: str = "",
    *,
    writer_model: str = DEFAULT_EDITOR_MODEL,
) -> str:
    """Rewrite the first article movement toward the seed excerpt used by the gate.

    The production same-author judge only inspects the first 300 words. This
    transform is intentionally narrow: it changes the first 450-650 words while
    leaving the body untouched, then relies on full-article drift/slop/repetition
    gates to prevent cheap evaluator hacking.
    """
    prefix, suffix = split_prefix(text, max_words=520)
    repair_key = f"{PROMPT_VERSION}:seed-intro:{writer_model}:{slug}:{sha(prefix)}"
    if repair_key in cache["repair"]:
        return cache["repair"][repair_key]

    excerpts = seed.representative_excerpts[:3]
    while len(excerpts) < 3:
        excerpts.append(seed.corpus_text[:1400])
    seed_gate_excerpt = excerpts[1]
    broader_seed = "\n\n---\n\n".join(excerpts)
    source_words = len(_words(prefix))
    archetype_index = int(hashlib.sha256(slug.encode("utf-8")).hexdigest(), 16) % len(INTRO_ARCHETYPES)
    archetype = INTRO_ARCHETYPES[archetype_index]
    metrics = analyze_text(text)

    forbidden = "\n".join(
        f"- {phrase}" for phrase in [
            "I want to be careful",
            "I should be clear",
            "to be clear",
            "the question is",
            "this is not",
            "not just ... but",
            "not merely ... but",
            "it is worth",
            "in the end",
            "First, Second, Third",
            "There is a kind of",
        ]
    )

    system = f"""Rewrite only the opening movement of an article so it sounds
more plausibly like the seed writer.

Primary seed excerpt for cadence:
---
{seed_gate_excerpt[:1200]}
---

Broader seed range:
---
{broader_seed[:3400]}
---

The current opening was rejected for this reason:
{rejection_reason or "(no reason supplied)"}

Diversity instruction for this article:
{archetype}

Current full-article metrics:
- sentence_length_mean: {metrics.get("sentence_length_mean", 0):.2f}
- sentence_length_p50: {metrics.get("sentence_length_p50", 0):.2f}
- short_sentence_ratio: {metrics.get("short_sentence_ratio", 0):.3f}
- dash_rate: {metrics.get("dash_rate", 0):.4f}
- parenthesis_rate: {metrics.get("parenthesis_rate", 0):.4f}

Voice target:
- Plain, warm, pastoral, and concrete.
- More like a pastor explaining how grace, obedience, fear, conscience, and
  ordinary church life actually feel.
- Less like an essayist staging a clever argument.
- Use direct "we" and occasional "you" where the source warrants it.
- Prefer explanation and application over scene-setting and aphorism.
- Keep the theological argument and source facts intact.

Forbidden recurring scaffolds:
{forbidden}

Rules:
- Return ONLY the rewritten opening movement.
- Preserve the H1 title exactly.
- Preserve every markdown heading present in the opening, in the same order.
- Preserve all facts, named people, Scripture references, dates, citations,
  claims, and examples already present in the opening.
- Do not invent anecdotes, quotes, statistics, citations, institutions, or claims.
- Keep {int(source_words * 0.92)}-{int(source_words * 1.15)} words.
- Do not start with the same syntactic pattern as the seed excerpt. You may use
  one "When..." sentence if natural, but do not write a sequence of "When..."
  sentences.
- Avoid list rhythm and mechanical signposting.
- Markdown only. No commentary."""

    response = call_llm(
        model=writer_model,
        system=system,
        user=f"OPENING TO REWRITE:\n\n{prefix}",
        temperature=0.78,
        max_tokens=3200,
        timeout=300,
        fallback_models=["anthropic/claude-opus-4.7", "x-ai/grok-4.3"],
    )
    rewritten = response.content.strip() if response.status == "success" else prefix
    rewritten = re.sub(r"^```(?:markdown)?\s*|\s*```$", "", rewritten.strip(), flags=re.I | re.S).strip() + "\n"

    write_raw_rewrite("seed-intro", writer_model, slug, rewritten)

    rewritten_words = len(_words(rewritten))
    if rewritten_words < source_words * 0.90 or rewritten_words > source_words * 1.18:
        rewritten = prefix
    if heading_lines(rewritten) != heading_lines(prefix):
        rewritten = prefix

    candidate = (rewritten.rstrip() + suffix).rstrip() + "\n"
    candidate = editor_pass(candidate, em_dash_target_rate=0.003)
    cache["repair"][repair_key] = candidate
    save_cache(cache)
    return candidate


QUALITY_GUARD_SYSTEM = """You are a hostile senior editor checking whether a
surgical opener repair degraded an article.

Compare BEFORE and AFTER. Ignore the fact that the after is a rewrite; judge
only whether it preserved meaning and voice. Return JSON:
{
  "meaning_preserved": true|false,
  "voice_delta": -3|-2|-1|0|1|2|3,
  "new_repetitive_tell": true|false,
  "winner": "before"|"after"|"tied",
  "reasoning": "one concise sentence"
}

Definitions:
- voice_delta: after voice fidelity minus before voice fidelity.
- new_repetitive_tell: true if AFTER introduces a reusable LLM-ish scaffold,
  listicle rhythm, fake anecdote, or generic smoothing.
"""


def quality_guard(slug: str, before: str, after: str, cache: dict[str, Any], *, force: bool = False) -> dict[str, Any]:
    prefix_before, _ = split_prefix(before)
    prefix_after, _ = split_prefix(after)
    key = f"{PROMPT_VERSION}:quality:{slug}:{sha(prefix_before)}:{sha(prefix_after)}"
    if not force and key in cache["quality"]:
        return cache["quality"][key]
    result = call_llm_json(
        model=DEFAULT_EDITOR_MODEL,
        system=QUALITY_GUARD_SYSTEM,
        user=f"BEFORE PREFIX:\n{prefix_before}\n\n---\n\nAFTER PREFIX:\n{prefix_after}",
        temperature=0.2,
        max_tokens=500,
        timeout=120,
        fallback_models=["x-ai/grok-4.3"],
    ) or {}
    cache["quality"][key] = result
    save_cache(cache)
    return result


FULL_QUALITY_GUARD_SYSTEM = """You are a hostile senior editor checking whether
a full-article rewrite degraded an article.

Compare BEFORE and AFTER. Judge whether AFTER preserves the same meaning,
facts, argument sequence, and authorial voice while reducing synthetic
LLM-ish tells. Return JSON:
{
  "meaning_preserved": true|false,
  "facts_preserved": true|false,
  "voice_delta": -3|-2|-1|0|1|2|3,
  "new_repetitive_tell": true|false,
  "winner": "before"|"after"|"tied",
  "reasoning": "one concise sentence"
}

Definitions:
- voice_delta: after voice fidelity minus before voice fidelity.
- new_repetitive_tell: true if AFTER introduces a reusable LLM-ish scaffold,
  listicle rhythm, fake anecdote, generic smoothing, or batch watermark.
"""


def full_quality_guard(slug: str, before: str, after: str, cache: dict[str, Any], *, force: bool = False) -> dict[str, Any]:
    key = f"{PROMPT_VERSION}:full-quality:{slug}:{sha(before)}:{sha(after)}"
    if not force and key in cache["quality"]:
        return cache["quality"][key]
    result = call_llm_json(
        model=DEFAULT_EDITOR_MODEL,
        system=FULL_QUALITY_GUARD_SYSTEM,
        user=f"BEFORE:\n{before}\n\n---\n\nAFTER:\n{after}",
        temperature=0.2,
        max_tokens=650,
        timeout=180,
        fallback_models=["x-ai/grok-4.3"],
    ) or {}
    cache["quality"][key] = result
    save_cache(cache)
    return result


def source_same_author(meta: dict[str, str]) -> bool:
    return meta.get("same_author_llm", "").lower() == "true"


def total_hits(hits: dict[str, int]) -> int:
    return sum(hits.values())


def slop_phrases(text: str) -> list[str]:
    phrases: list[str] = []
    for hit in audit_text(text).hits:
        if hit.category == "Remnant" or hit.phrase.startswith("regex:"):
            continue
        phrases.append(hit.phrase)
    return sorted(set(phrases), key=str.lower)


def paragraph_slop_hits(paragraph: str, phrases: list[str]) -> list[str]:
    hits: list[str] = []
    for phrase in phrases:
        if " " in phrase or "-" in phrase or "'" in phrase:
            pattern = re.compile(re.escape(phrase), re.IGNORECASE)
        else:
            pattern = re.compile(rf"\b{re.escape(phrase)}\b", re.IGNORECASE)
        if pattern.search(paragraph):
            hits.append(phrase)
    return hits


SLOP_CLEANUP_SYSTEM = """You are a careful paragraph-level prose editor.

Edit one markdown paragraph/block to remove specific LLM-ish audit phrases
while preserving the author's meaning and voice.

Rules:
- Return ONLY the edited block.
- Preserve all facts, names, dates, Scripture references, quotations, citations,
  doctrinal claims, and the local argument.
- If the block contains a markdown heading, preserve the heading text exactly.
- Replace the listed audit phrases with natural alternatives, not synonyms that
  sound more polished.
- Do not add a new anecdote, quotation, example, claim, or rhetorical flourish.
- Do not introduce repeated scaffolds such as "the question is", "this is not",
  "not just ... but", "it is worth", "in the end", or numbered list rhythm.
- Keep the result within about 85-115% of the original block length.
"""


def slop_cleanup_rewrite(
    seed: Seed,
    slug: str,
    text: str,
    cache: dict[str, Any],
    *,
    writer_model: str = DEFAULT_EDITOR_MODEL,
) -> str:
    """Remove slop-auditor hits from only the paragraphs that contain them.

    This is deliberately narrower than the full editor retrofit. It is intended
    for candidates that already pass same-author but are blocked by deterministic
    slop gates.
    """
    phrases = slop_phrases(text)
    if not phrases:
        return text
    repair_key = f"{PROMPT_VERSION}:slop-cleanup:{writer_model}:{slug}:{sha(text)}"
    if repair_key in cache["repair"]:
        return cache["repair"][repair_key]

    seed_examples = "\n\n---\n\n".join(seed.representative_excerpts[:3])[:3200]
    parts = re.split(r"\n\s*\n", text.strip())
    changed = False
    cleaned_parts: list[str] = []

    for index, part in enumerate(parts):
        part_hits = paragraph_slop_hits(part, phrases)
        if not part_hits or len(_words(part)) < 8:
            cleaned_parts.append(part)
            continue

        block_key = f"{PROMPT_VERSION}:slop-block:{writer_model}:{slug}:{index}:{sha(part)}"
        if block_key in cache["repair"]:
            edited = cache["repair"][block_key]
        else:
            response = call_llm(
                model=writer_model,
                system=SLOP_CLEANUP_SYSTEM,
                user=(
                    f"SEED VOICE EXAMPLES:\n{seed_examples}\n\n"
                    f"AUDIT PHRASES TO REMOVE FROM THIS BLOCK:\n"
                    + "\n".join(f"- {phrase}" for phrase in part_hits)
                    + f"\n\nBLOCK:\n{part}"
                ),
                temperature=0.35,
                max_tokens=min(max(int(len(_words(part)) * 2.4) + 600, 900), 2200),
                timeout=180,
                fallback_models=["anthropic/claude-opus-4.7", "x-ai/grok-4.3"],
            )
            edited = strip_code_fence(response.content) if response.status == "success" else part
            original_words = len(_words(part))
            edited_words = len(_words(edited))
            if edited_words < original_words * 0.75 or edited_words > original_words * 1.30:
                edited = part
            if heading_lines(edited) != heading_lines(part):
                edited = part
            if audit_text(edited).total_hits >= audit_text(part).total_hits:
                edited = part
            cache["repair"][block_key] = edited
            save_cache(cache)

        if edited != part:
            changed = True
        cleaned_parts.append(edited)

    candidate = "\n\n".join(cleaned_parts).strip() + "\n"
    if not changed:
        candidate = text
    if heading_lines(candidate) != heading_lines(text):
        candidate = text
    if len(_words(candidate)) < len(_words(text)) * 0.95:
        candidate = text

    candidate = editor_pass(candidate, em_dash_target_rate=0.003)
    if audit_text(candidate).total_hits >= audit_text(text).total_hits:
        candidate = text

    write_raw_rewrite("slop-cleanup", writer_model, slug, candidate)
    cache["repair"][repair_key] = candidate
    save_cache(cache)
    return candidate


FULL_ARTICLE_TRANSFORMS = {
    "full",
    "brief",
    "plain-brief",
    "section-brief",
    "hybrid-brief-section",
    "slop-cleanup",
}


def min_word_ratio_for_transform(transform: str) -> float:
    if transform in {"brief", "plain-brief"}:
        return 0.80
    if transform == "hybrid-brief-section":
        return 0.82
    if transform == "section-brief":
        return 0.88
    if transform == "slop-cleanup":
        return 0.95
    if transform == "full":
        return 0.90
    if transform in {"seed-intro", "intro-brief"}:
        return 0.95
    return 0.97


def carry_forward_same_author(item: ArticleRun) -> bool:
    return item.after.llm_yes_no if item.accepted and item.after else item.before.llm_yes_no


def accept_candidate(
    before: EvalResult,
    after: EvalResult,
    guard: dict[str, Any],
    *,
    min_word_ratio: float = 0.97,
) -> tuple[bool, str]:
    if not after.llm_yes_no:
        return False, "same_author still false"
    if after.word_count < before.word_count * min_word_ratio:
        return False, "word count dropped"
    if after.slop_rate > before.slop_rate + 0.35:
        return False, "slop rose"
    before_scaffolds = total_hits(before.scaffold_hits)
    after_scaffolds = total_hits(after.scaffold_hits)
    if after_scaffolds > before_scaffolds:
        return False, "scaffold hits rose"
    if guard:
        if guard.get("meaning_preserved") is False:
            return False, "quality guard: meaning not preserved"
        if guard.get("facts_preserved") is False:
            return False, "quality guard: facts not preserved"
        if int(guard.get("voice_delta", 0) or 0) < -1:
            return False, "quality guard: voice degraded"
        if guard.get("new_repetitive_tell") is True:
            return False, "quality guard: new repetitive tell"
    drift_rose = after.drift_score > before.drift_score + 0.04
    oob_rose = after.oob_count > before.oob_count + 1
    if drift_rose or oob_rose:
        voice_delta = int(guard.get("voice_delta", 0) or 0) if guard else 0
        scaffold_drop = before_scaffolds - after_scaffolds
        slop_delta = after.slop_rate - before.slop_rate
        compensated = (
            voice_delta >= 0
            and scaffold_drop >= 3
            and slop_delta <= 0.05
            and not after.slop_hard_fail
        )
        if not compensated:
            return False, "drift rose" if drift_rose else "OOB metric count rose"
    return True, "accepted"


def article_paths(limit: int | None = None) -> list[Path]:
    paths = sorted(p for p in SOURCE_DIR.glob("*.md") if p.name != "batch_summary.json")
    return paths[:limit] if limit else paths


def build_candidate(
    transform: str,
    seed: Seed,
    slug: str,
    text: str,
    cache: dict[str, Any],
    rejection_reason: str,
    writer_model: str,
) -> str:
    if transform == "hybrid-brief-section":
        return hybrid_brief_section_rewrite(seed, slug, text, cache, rejection_reason, writer_model=writer_model)
    if transform == "section-brief":
        return section_brief_rewrite(seed, slug, text, cache, rejection_reason, writer_model=writer_model)
    if transform == "plain-brief":
        return plain_brief_rewrite(seed, slug, text, cache, rejection_reason, writer_model=writer_model)
    if transform == "intro-brief":
        return intro_brief_rewrite(seed, slug, text, cache, rejection_reason, writer_model=writer_model)
    if transform == "seed-intro":
        return seed_intro_rewrite(seed, slug, text, cache, rejection_reason, writer_model=writer_model)
    if transform == "brief":
        return brief_rewrite(seed, slug, text, cache, rejection_reason, writer_model=writer_model)
    if transform == "full":
        return full_article_rewrite(seed, slug, text, cache, rejection_reason, writer_model=writer_model)
    if transform == "slop-cleanup":
        return slop_cleanup_rewrite(seed, slug, text, cache, writer_model=writer_model)
    return repair_prefix(seed, slug, text, cache, rejection_reason, writer_model=writer_model)


def guard_for_transform(
    transform: str,
    slug: str,
    before_text: str,
    candidate: str,
    cache: dict[str, Any],
    *,
    force: bool = False,
) -> dict[str, Any]:
    if transform in FULL_ARTICLE_TRANSFORMS:
        return full_quality_guard(slug, before_text, candidate, cache, force=force)
    return quality_guard(slug, before_text, candidate, cache, force=force)


def candidate_score(before: EvalResult, after: EvalResult, guard: dict[str, Any]) -> float:
    voice_delta = int(guard.get("voice_delta", 0) or 0) if guard else 0
    scaffold_drop = total_hits(before.scaffold_hits) - total_hits(after.scaffold_hits)
    slop_drop = before.slop_rate - after.slop_rate
    drift_delta = after.drift_score - before.drift_score
    oob_delta = after.oob_count - before.oob_count
    word_ratio = after.word_count / before.word_count if before.word_count else 1.0
    return (
        voice_delta * 200
        + scaffold_drop * 20
        + slop_drop * 10
        - max(drift_delta, 0) * 80
        - max(oob_delta, 0) * 5
        - abs(1.0 - word_ratio) * 40
    )


def run(args: argparse.Namespace) -> dict[str, Any]:
    EXP_DIR.mkdir(parents=True, exist_ok=True)
    AFTER_DIR.mkdir(parents=True, exist_ok=True)
    seed = make_seed()
    cache = load_cache()
    runs: list[ArticleRun] = []

    for path in article_paths(args.limit):
        raw = path.read_text()
        text = clean_article(raw)
        meta = parse_meta(raw)
        before = evaluate(seed, text, cache, judge_mode=args.judge_mode, force=args.force_eval)
        source_yes = source_same_author(meta)

        should_attempt = args.all or not before.llm_yes_no
        if args.use_metadata_failures:
            should_attempt = args.all or not source_yes
        if args.repair_limit is not None:
            attempted_so_far = sum(1 for item in runs if item.attempted)
            if should_attempt and attempted_so_far >= args.repair_limit:
                should_attempt = False

        if not should_attempt:
            out_path = AFTER_DIR / path.name
            if args.write_all:
                out_path.write_text(text)
            runs.append(ArticleRun(
                slug=path.name,
                source_same_author=source_yes,
                before=before,
                after=None,
                accepted=False,
                attempted=False,
                reason="already passed or not selected",
                quality_guard=None,
                after_path=str(out_path) if args.write_all else None,
            ))
            continue

        if args.transform == "candidate-pool":
            pool = ["hybrid-brief-section", "brief", "plain-brief", "section-brief", "intro-brief", "seed-intro", "prefix"]
            attempts: list[dict[str, Any]] = []
            for transform in pool:
                candidate_i = build_candidate(
                    transform, seed, path.name, text, cache, before.llm_reasoning, args.writer_model
                )
                after_i = evaluate(seed, candidate_i, cache, judge_mode=args.judge_mode, force=args.force_eval)
                guard_i = guard_for_transform(
                    transform, path.name, text, candidate_i, cache, force=args.force_quality
                )
                accepted_i, reason_i = accept_candidate(
                    before,
                    after_i,
                    guard_i,
                    min_word_ratio=min_word_ratio_for_transform(transform),
                )
                attempts.append({
                    "transform": transform,
                    "candidate": candidate_i,
                    "after": after_i,
                    "guard": guard_i,
                    "accepted": accepted_i,
                    "reason": reason_i,
                    "score": candidate_score(before, after_i, guard_i),
                })
                if after_i.llm_yes_no and reason_i == "slop rose":
                    cleaned_i = slop_cleanup_rewrite(
                        seed, path.name, candidate_i, cache, writer_model=args.writer_model
                    )
                    cleaned_after_i = evaluate(
                        seed, cleaned_i, cache, judge_mode=args.judge_mode, force=args.force_eval
                    )
                    cleaned_guard_i = full_quality_guard(
                        f"{path.name}:slop-cleanup:{transform}",
                        text,
                        cleaned_i,
                        cache,
                        force=args.force_quality,
                    )
                    cleaned_accepted_i, cleaned_reason_i = accept_candidate(
                        before,
                        cleaned_after_i,
                        cleaned_guard_i,
                        min_word_ratio=min_word_ratio_for_transform("slop-cleanup"),
                    )
                    attempts.append({
                        "transform": f"{transform}+slop-cleanup",
                        "candidate": cleaned_i,
                        "after": cleaned_after_i,
                        "guard": cleaned_guard_i,
                        "accepted": cleaned_accepted_i,
                        "reason": cleaned_reason_i,
                        "score": candidate_score(before, cleaned_after_i, cleaned_guard_i),
                    })
            accepted_attempts = [item for item in attempts if item["accepted"]]
            if accepted_attempts:
                best = max(accepted_attempts, key=lambda item: item["score"])
            else:
                best = max(attempts, key=lambda item: (item["after"].llm_yes_no, item["score"]))
            candidate = best["candidate"]
            after = best["after"]
            guard = {
                **best["guard"],
                "candidate_pool": [
                    {
                        "transform": item["transform"],
                        "accepted": item["accepted"],
                        "reason": item["reason"],
                        "same_author": item["after"].llm_yes_no,
                        "voice_delta": item["guard"].get("voice_delta"),
                        "score": round(item["score"], 3),
                        "slop_rate": item["after"].slop_rate,
                        "drift_score": item["after"].drift_score,
                        "scaffold_hits": total_hits(item["after"].scaffold_hits),
                    }
                    for item in attempts
                ],
            }
            accepted, reason = best["accepted"], f"{best['transform']}: {best['reason']}"
        else:
            candidate = build_candidate(
                args.transform, seed, path.name, text, cache, before.llm_reasoning, args.writer_model
            )
            after = evaluate(seed, candidate, cache, judge_mode=args.judge_mode, force=args.force_eval)
            guard = guard_for_transform(args.transform, path.name, text, candidate, cache, force=args.force_quality)
            accepted, reason = accept_candidate(
                before, after, guard, min_word_ratio=min_word_ratio_for_transform(args.transform)
            )

        out_path = AFTER_DIR / path.name
        if accepted:
            out_path.write_text(candidate)
        elif args.write_rejected:
            rejected_dir = EXP_DIR / "rejected"
            rejected_dir.mkdir(exist_ok=True)
            (rejected_dir / path.name).write_text(candidate)
            out_path.write_text(text)
        else:
            out_path.write_text(text)

        runs.append(ArticleRun(
            slug=path.name,
            source_same_author=source_yes,
            before=before,
            after=after,
            accepted=accepted,
            attempted=True,
            reason=reason,
            quality_guard=guard,
            after_path=str(out_path),
        ))
        time.sleep(args.sleep)

    result = {
        "created_at": utcnow(),
        "prompt_version": PROMPT_VERSION,
        "judge_mode": args.judge_mode,
        "source_dir": str(SOURCE_DIR),
        "after_dir": str(AFTER_DIR),
        "target_same_author": args.target,
        "transform": args.transform,
        "writer_model": args.writer_model,
        "runs": [asdict(item) for item in runs],
    }
    RESULTS_PATH.write_text(json.dumps(result, indent=2))
    write_report(result)
    return result


def write_report(result: dict[str, Any]) -> None:
    runs = [ArticleRun(**{
        **item,
        "before": EvalResult(**item["before"]),
        "after": EvalResult(**item["after"]) if item.get("after") else None,
    }) for item in result["runs"]]
    before_pass = sum(1 for item in runs if item.before.llm_yes_no)
    after_pass = sum(1 for item in runs if carry_forward_same_author(item))
    accepted = sum(1 for item in runs if item.accepted)
    attempted = sum(1 for item in runs if item.attempted)

    lines = [
        "# Same-author lift experiment",
        "",
        f"Created: {result['created_at']}",
        "",
        "## Summary",
        "",
        f"- Articles evaluated: {len(runs)}",
        f"- Same-author before: {before_pass}/{len(runs)}",
        f"- Same-author after/carry-forward: {after_pass}/{len(runs)}",
        f"- Attempted repairs: {attempted}",
        f"- Accepted repairs: {accepted}",
        f"- Target: {result['target_same_author']}/{len(runs)}",
        "",
        "## Method",
        "",
        f"- Transform mode: `{result.get('transform', 'prefix')}`.",
        f"- Writer model: `{result.get('writer_model', DEFAULT_EDITOR_MODEL)}`.",
        "- Prefix mode edits only the first paragraph-boundary prefix, usually around 650 words.",
        "- Seed-intro mode rewrites the first movement against the seed excerpt used by the same-author gate.",
        "- Intro-brief mode recomposes the first movement from a neutral factual brief.",
        "- Section-brief mode recomposes each heading-led section from a neutral factual brief.",
        "- Hybrid-brief-section mode uses a full-brief opening with a section-brief body.",
        "- Candidate-pool mode tries multiple transforms and selects the best accepted same-author candidate.",
        "- Candidate-pool can run paragraph-level slop cleanup on candidates that already pass same-author but fail the slop gate.",
        "- Full mode rewrites the full article while preserving headings, facts, citations, and argument sequence.",
        "- Brief mode extracts a neutral fact brief, then writes fresh prose from that brief.",
        "- Plain-brief mode writes from the neutral brief toward the actual seed corpus's plainer expository register.",
        "- Reject if same-author stays false, drift/slop/repetition gets worse, or quality guard flags degradation.",
        "",
        "## Per Article",
        "",
        "| Slug | Attempted | Accepted | Before | After | Reason |",
        "|---|---:|---:|---:|---:|---|",
    ]
    for item in runs:
        after = carry_forward_same_author(item)
        lines.append(
            f"| `{item.slug}` | {str(item.attempted).lower()} | {str(item.accepted).lower()} | "
            f"{str(item.before.llm_yes_no).lower()} | {str(after).lower()} | {item.reason} |"
        )
    REPORT_PATH.write_text("\n".join(lines) + "\n")


def main() -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument("--limit", type=int)
    parser.add_argument("--repair-limit", type=int)
    parser.add_argument("--target", type=int, default=52)
    parser.add_argument("--all", action="store_true", help="Attempt repairs for all articles")
    parser.add_argument("--use-metadata-failures", action="store_true", help="Select failures from metadata instead of fresh eval")
    parser.add_argument("--write-all", action="store_true")
    parser.add_argument("--write-rejected", action="store_true")
    parser.add_argument("--force-eval", action="store_true")
    parser.add_argument("--force-quality", action="store_true")
    parser.add_argument("--sleep", type=float, default=0.0)
    parser.add_argument("--judge-mode", choices=["single", "multi", "any"], default="single")
    parser.add_argument("--transform", choices=["prefix", "seed-intro", "intro-brief", "section-brief", "hybrid-brief-section", "slop-cleanup", "candidate-pool", "full", "brief", "plain-brief"], default="prefix")
    parser.add_argument("--writer-model", default=DEFAULT_EDITOR_MODEL)
    args = parser.parse_args()
    result = run(args)
    runs = result["runs"]
    after_pass = sum(
        1 for item in runs
        if (item["after"]["llm_yes_no"] if item.get("accepted") and item.get("after") else item["before"]["llm_yes_no"])
    )
    print(f"same_author after/carry-forward: {after_pass}/{len(runs)}")
    print(f"report: {REPORT_PATH}")
    return 0 if after_pass >= args.target or len(runs) < 61 else 2


if __name__ == "__main__":
    raise SystemExit(main())