"""
SiteHub LinkedIn ICP Tracker — PostgreSQL Storage Backend (SQLAlchemy)

Shared by CLI (icp_process.py) and future web app (Phase 2).
All query functions return dicts with the same keys as the Excel-based functions
for drop-in compatibility.
"""

import os
from datetime import datetime, date, timedelta
from sqlalchemy import (
    create_engine, Column, Integer, String, Text, Date, Boolean,
    DateTime, ForeignKey, UniqueConstraint, Index, func, Computed,
    case, extract, literal_column, desc, asc, or_,
)
from sqlalchemy.orm import (
    declarative_base, sessionmaker, Session, relationship,
)

# ---------------------------------------------------------------------------
# Database connection
# ---------------------------------------------------------------------------
DATABASE_URL = os.environ.get("DATABASE_URL", "postgresql://icp_user:icp_local_dev_2024@localhost:5432/icp_tracker")

_engine = None
_SessionFactory = None

Base = declarative_base()


def get_engine():
    """Get or create the SQLAlchemy engine (singleton)."""
    global _engine
    if _engine is None:
        _engine = create_engine(DATABASE_URL, pool_pre_ping=True)
    return _engine


def get_session() -> Session:
    """Create a new database session."""
    global _SessionFactory
    if _SessionFactory is None:
        _SessionFactory = sessionmaker(bind=get_engine())
    return _SessionFactory()


# ---------------------------------------------------------------------------
# SQLAlchemy Models
# ---------------------------------------------------------------------------

class SourceProfile(Base):
    __tablename__ = "source_profiles"
    id = Column(Integer, primary_key=True)
    code = Column(String(5), unique=True, nullable=False)
    full_name = Column(Text, nullable=False)
    created_at = Column(DateTime(timezone=True), server_default=func.now())


class Contact(Base):
    __tablename__ = "contacts"
    id = Column(Integer, primary_key=True)
    name = Column(Text, nullable=False)  # CITEXT in DB
    name_lower = Column(Text, Computed("lower(name::text)", persisted=True))
    title = Column(Text, default="")
    company = Column(Text, default="")
    status = Column(String(20), default="Review")
    company_tier = Column(String(20), default="")
    total_interactions = Column(Integer, default=0)
    first_interaction = Column(Date)
    latest_interaction = Column(Date)
    notes = Column(Text, default="")
    match_reason = Column(Text, default="")
    zoho_id = Column(Text)
    zoho_synced_at = Column(DateTime(timezone=True))
    created_at = Column(DateTime(timezone=True), server_default=func.now())
    updated_at = Column(DateTime(timezone=True), server_default=func.now())


class Interaction(Base):
    __tablename__ = "interactions"
    id = Column(Integer, primary_key=True)
    date = Column(Date)
    post_hook = Column(Text, default="")
    type = Column(String(50), default="")
    name = Column(Text, nullable=False)  # CITEXT in DB
    title = Column(Text, default="")
    company = Column(Text, default="")
    profile_url = Column(Text, default="")
    source_profile_id = Column(Integer, ForeignKey("source_profiles.id"))
    source_profile_raw = Column(String(50), default="")
    contact_id = Column(Integer, ForeignKey("contacts.id"))
    created_at = Column(DateTime(timezone=True), server_default=func.now())


class RefCompany(Base):
    __tablename__ = "ref_companies"
    id = Column(Integer, primary_key=True)
    name = Column(Text, unique=True, nullable=False)  # CITEXT in DB
    tier = Column(String(20))
    qualifying_titles = Column(Text, default="")
    notes = Column(Text, default="")
    created_at = Column(DateTime(timezone=True), server_default=func.now())
    aliases = relationship("RefCompanyAlias", back_populates="ref_company", cascade="all, delete-orphan")


class RefCompanyAlias(Base):
    __tablename__ = "ref_company_aliases"
    id = Column(Integer, primary_key=True)
    ref_company_id = Column(Integer, ForeignKey("ref_companies.id", ondelete="CASCADE"), nullable=False)
    alias = Column(Text, unique=True, nullable=False)  # CITEXT in DB
    created_at = Column(DateTime(timezone=True), server_default=func.now())
    ref_company = relationship("RefCompany", back_populates="aliases")


class RefJobTitle(Base):
    __tablename__ = "ref_job_titles"
    id = Column(Integer, primary_key=True)
    pattern = Column(Text, nullable=False)
    status = Column(String(20), nullable=False)
    priority = Column(Integer, default=0)
    created_at = Column(DateTime(timezone=True), server_default=func.now())


class RefPerson(Base):
    __tablename__ = "ref_people"
    id = Column(Integer, primary_key=True)
    name = Column(Text, unique=True, nullable=False)  # CITEXT in DB
    status = Column(String(20), nullable=False)
    company = Column(Text, default="")
    created_at = Column(DateTime(timezone=True), server_default=func.now())


class SitehubEmployee(Base):
    __tablename__ = "sitehub_employees"
    id = Column(Integer, primary_key=True)
    name = Column(Text, unique=True, nullable=False)  # CITEXT in DB
    created_at = Column(DateTime(timezone=True), server_default=func.now())


class CompanyBlocklist(Base):
    __tablename__ = "company_blocklist"
    id = Column(Integer, primary_key=True)
    name = Column(Text, unique=True, nullable=False)  # CITEXT in DB
    created_at = Column(DateTime(timezone=True), server_default=func.now())


class LearnedPatternTitle(Base):
    __tablename__ = "learned_patterns_titles"
    id = Column(Integer, primary_key=True)
    keyword = Column(Text, unique=True, nullable=False)  # CITEXT in DB
    icp_count = Column(Integer, default=0)
    poi_count = Column(Integer, default=0)
    not_icp_count = Column(Integer, default=0)
    updated_at = Column(DateTime(timezone=True), server_default=func.now())


class LearnedPatternCompany(Base):
    __tablename__ = "learned_patterns_companies"
    id = Column(Integer, primary_key=True)
    company = Column(Text, unique=True, nullable=False)  # CITEXT in DB
    status = Column(String(20), nullable=False)
    count = Column(Integer, default=0)
    updated_at = Column(DateTime(timezone=True), server_default=func.now())


class ApiUsage(Base):
    __tablename__ = "api_usage"
    id = Column(Integer, primary_key=True)
    service = Column(String(50), nullable=False)
    month = Column(String(7), nullable=False)
    count = Column(Integer, default=0)
    limit_notified = Column(Boolean, default=False)
    updated_at = Column(DateTime(timezone=True), server_default=func.now())
    __table_args__ = (
        UniqueConstraint("service", "month", name="uq_api_usage_service_month"),
    )


class User(Base):
    __tablename__ = "users"
    id = Column(Integer, primary_key=True)
    username = Column(String(100), unique=True, nullable=False)
    password_hash = Column(Text, nullable=False)
    role = Column(String(20), default="viewer")
    created_at = Column(DateTime(timezone=True), server_default=func.now())


class ZohoSyncLog(Base):
    __tablename__ = "zoho_sync_log"
    id = Column(Integer, primary_key=True)
    contact_id = Column(Integer, ForeignKey("contacts.id"))
    action = Column(String(50), nullable=False)
    zoho_id = Column(Text)
    details = Column(Text, default="")
    synced_at = Column(DateTime(timezone=True), server_default=func.now())


# ---------------------------------------------------------------------------
# Zoho CRM sync helpers
# ---------------------------------------------------------------------------

def log_zoho_sync(
    session: Session,
    contact_id: int,
    action: str,
    zoho_id: str | None = None,
    details: str = "",
) -> None:
    """Record a Zoho sync event and update contact's zoho_id/zoho_synced_at."""
    from datetime import timezone as tz
    log = ZohoSyncLog(
        contact_id=contact_id,
        action=action,
        zoho_id=zoho_id or "",
        details=details,
    )
    session.add(log)

    if action in ("create", "update") and zoho_id:
        c = session.query(Contact).filter(Contact.id == contact_id).first()
        if c:
            c.zoho_id = zoho_id
            c.zoho_synced_at = datetime.now(tz.utc)

    session.commit()


def get_zoho_sync_log(session: Session, limit: int = 100) -> list[dict]:
    """Recent Zoho sync log entries for admin UI."""
    rows = (
        session.query(ZohoSyncLog, Contact.name)
        .outerjoin(Contact, ZohoSyncLog.contact_id == Contact.id)
        .order_by(desc(ZohoSyncLog.synced_at))
        .limit(limit)
        .all()
    )
    return [
        {
            "id": log.id,
            "contact_id": log.contact_id,
            "contact_name": name or "",
            "action": log.action,
            "zoho_id": log.zoho_id or "",
            "details": log.details or "",
            "synced_at": log.synced_at,
        }
        for log, name in rows
    ]


def get_unsynced_icp_contacts(session: Session) -> list[dict]:
    """ICP contacts needing sync: no zoho_id, or updated_at > zoho_synced_at."""
    rows = session.query(Contact).filter(
        Contact.status == "ICP",
        or_(
            Contact.zoho_id.is_(None),
            Contact.zoho_id == "",
            Contact.updated_at > Contact.zoho_synced_at,
        ),
    ).all()
    return [
        {
            "id": c.id,
            "name": c.name,
            "title": c.title or "",
            "company": c.company or "",
            "status": c.status,
            "company_tier": c.company_tier or "",
            "total_interactions": c.total_interactions or 0,
            "first_interaction": c.first_interaction,
            "latest_interaction": c.latest_interaction,
            "notes": c.notes or "",
            "match_reason": c.match_reason or "",
            "zoho_id": c.zoho_id or "",
            "zoho_synced_at": c.zoho_synced_at,
        }
        for c in rows
    ]


def get_zoho_sync_stats(session: Session) -> dict:
    """Summary stats for the Zoho admin page."""
    from datetime import timezone as tz
    total_icp = session.query(Contact).filter(Contact.status == "ICP").count()
    synced = session.query(Contact).filter(
        Contact.status == "ICP",
        Contact.zoho_id.isnot(None),
        Contact.zoho_id != "",
    ).count()
    unsynced = total_icp - synced

    last_sync = session.query(func.max(ZohoSyncLog.synced_at)).scalar()

    errors_24h = session.query(ZohoSyncLog).filter(
        ZohoSyncLog.action == "error",
        ZohoSyncLog.synced_at >= datetime.now(tz.utc) - timedelta(hours=24),
    ).count()

    return {
        "total_icp": total_icp,
        "synced": synced,
        "unsynced": unsynced,
        "last_sync": last_sync,
        "errors_24h": errors_24h,
    }


# ---------------------------------------------------------------------------
# READ functions — return dicts matching Excel-based function signatures
# ---------------------------------------------------------------------------

def read_contacts(session: Session) -> list[dict]:
    """Read all contacts. Replaces _read_contacts_sheet()."""
    rows = session.query(Contact).all()
    return [
        {
            "Name": r.name,
            "Title": r.title or "",
            "Company": r.company or "",
            "Status": r.status or "Review",
            "Company Tier": r.company_tier or "",
            "Total Interactions": r.total_interactions or 0,
            "First Interaction": r.first_interaction,
            "Latest Interaction": r.latest_interaction,
            "Notes": r.notes or "",
            "Match Reason": r.match_reason or "",
        }
        for r in rows
    ]


def read_interactions(session: Session) -> list[dict]:
    """Read all interactions. Replaces _read_interactions_sheet()."""
    rows = session.query(Interaction).all()
    # Resolve source_profile code for each interaction
    profiles = {p.id: p.code for p in session.query(SourceProfile).all()}
    return [
        {
            "Date": r.date,
            "Post Hook": r.post_hook or "",
            "Type": r.type or "",
            "Name": r.name,
            "Title": r.title or "",
            "Company": r.company or "",
            "Profile URL": r.profile_url or "",
            "Source Profile": profiles.get(r.source_profile_id, r.source_profile_raw or ""),
        }
        for r in rows
    ]


def load_reference_lists(session: Session) -> dict:
    """Load reference lists from DB. Replaces load_reference_lists(path).

    Returns dict with same structure:
        - companies: dict[lowercase_name] -> {name, tier, qualifying_titles}
        - companies_normalized: dict[normalized_name] -> {name, tier, ...}
        - aliases: dict[alias_lowercase] -> canonical_name
        - job_titles: list of {pattern, status, priority}
        - people: dict[lowercase_name] -> {status, company}
    """
    # Import normalize helper from icp_process (avoid circular import at module level)
    from icp_process import normalize_company_name

    # --- Companies ---
    companies = {}
    companies_normalized = {}
    aliases = {}

    for rc in session.query(RefCompany).all():
        name_clean = rc.name.strip()
        name_lower = name_clean.lower()
        name_normalized = normalize_company_name(name_clean)

        qualifying = (
            [t.strip().lower() for t in rc.qualifying_titles.split(",")]
            if rc.qualifying_titles
            else []
        )
        info = {
            "name": name_clean,
            "tier": rc.tier,
            "qualifying_titles": qualifying,
        }
        companies[name_lower] = info
        if name_normalized:
            companies_normalized[name_normalized] = info

        # Aliases
        for alias_row in rc.aliases:
            alias_clean = alias_row.alias.strip()
            alias_lower = alias_clean.lower()
            alias_norm = normalize_company_name(alias_clean)
            aliases[alias_lower] = name_clean
            if alias_norm:
                aliases[alias_norm] = name_clean

    # --- Job Titles ---
    job_titles_rows = session.query(RefJobTitle).order_by(RefJobTitle.priority).all()
    job_titles = [
        {
            "pattern": r.pattern.strip().lower(),
            "status": r.status.strip() if r.status else "Review",
            "priority": r.priority if r.priority is not None else 50,
        }
        for r in job_titles_rows
    ]

    # --- People ---
    people = {}
    for rp in session.query(RefPerson).all():
        people[rp.name.strip().lower()] = {
            "status": rp.status.strip() if rp.status else "Review",
            "company": rp.company or "",
        }

    return {
        "companies": companies,
        "companies_normalized": companies_normalized,
        "aliases": aliases,
        "job_titles": job_titles,
        "people": people,
    }


def load_tracker_companies_cache(session: Session) -> set:
    """Load company names from contacts table. Replaces _load_tracker_companies_cache()."""
    from icp_process import normalize_company_name

    cache = set()
    rows = session.query(Contact.company).filter(
        Contact.company.isnot(None),
        Contact.company != "",
    ).all()
    for (company,) in rows:
        if company and len(company.strip()) > 1:
            cache.add(company.strip().lower())
            normalized = normalize_company_name(company.strip())
            if normalized:
                cache.add(normalized)
    return cache


def load_sitehub_employees(session: Session) -> set:
    """Load SiteHub employee names. Replaces _load_additional_sitehub_employees()."""
    rows = session.query(SitehubEmployee.name).all()
    return {name.strip().lower() for (name,) in rows if name}


def load_company_blocklist(session: Session) -> set:
    """Load company blocklist. Replaces _load_company_blocklist()."""
    rows = session.query(CompanyBlocklist.name).all()
    return {name.strip().lower() for (name,) in rows if name}


def load_learned_patterns(session: Session) -> dict:
    """Load learned patterns. Replaces _load_learned_patterns()."""
    title_keywords = {}
    for r in session.query(LearnedPatternTitle).all():
        title_keywords[r.keyword.lower()] = {
            "ICP": r.icp_count or 0,
            "POI": r.poi_count or 0,
            "Not ICP": r.not_icp_count or 0,
        }

    companies = {}
    for r in session.query(LearnedPatternCompany).all():
        companies[r.company.lower()] = {
            "status": r.status,
            "count": r.count or 0,
        }

    return {"title_keywords": title_keywords, "companies": companies}


def get_search_usage(session: Session, service: str = "brave_search") -> dict:
    """Get API usage for current month. Replaces _get_search_usage()."""
    current_month = datetime.now().strftime("%Y-%m")
    row = session.query(ApiUsage).filter(
        ApiUsage.service == service,
        ApiUsage.month == current_month,
    ).first()

    if row:
        return {
            "month": row.month,
            "count": row.count,
            "limit_notified": row.limit_notified,
        }
    return {"month": current_month, "count": 0, "limit_notified": False}


def get_review_contacts(session: Session) -> list[dict]:
    """Get contacts with status 'Review'. New convenience function."""
    rows = session.query(Contact).filter(Contact.status == "Review").all()
    return [
        {
            "id": r.id,
            "Name": r.name,
            "Title": r.title or "",
            "Company": r.company or "",
            "Status": r.status,
            "Total Interactions": r.total_interactions or 0,
            "Notes": r.notes or "",
        }
        for r in rows
    ]


# ---------------------------------------------------------------------------
# WRITE functions
# ---------------------------------------------------------------------------

def upsert_contacts(session: Session, contacts_list: list[dict]) -> int:
    """Upsert contacts from a list of dicts. Replaces Contacts sheet rewrite.

    Each dict has keys: Name, Title, Company, Status, Company Tier,
    Total Interactions, First Interaction, Latest Interaction, Notes, Match Reason
    """
    upserted = 0
    for c in contacts_list:
        name = (c.get("Name") or "").strip()
        if not name:
            continue

        name_lower = name.lower()
        existing = session.query(Contact).filter(
            func.lower(Contact.name) == name_lower
        ).first()

        if existing:
            existing.title = c.get("Title", existing.title) or ""
            existing.company = c.get("Company", existing.company) or ""
            existing.status = c.get("Status", existing.status) or "Review"
            existing.company_tier = c.get("Company Tier", existing.company_tier) or ""
            existing.total_interactions = c.get("Total Interactions", existing.total_interactions) or 0
            existing.first_interaction = c.get("First Interaction", existing.first_interaction)
            existing.latest_interaction = c.get("Latest Interaction", existing.latest_interaction)
            existing.notes = c.get("Notes", existing.notes) or ""
            existing.match_reason = c.get("Match Reason", existing.match_reason) or ""
        else:
            first_int = c.get("First Interaction")
            if isinstance(first_int, datetime):
                first_int = first_int.date()
            latest_int = c.get("Latest Interaction")
            if isinstance(latest_int, datetime):
                latest_int = latest_int.date()

            new_contact = Contact(
                name=name,
                title=c.get("Title", "") or "",
                company=c.get("Company", "") or "",
                status=c.get("Status", "Review") or "Review",
                company_tier=c.get("Company Tier", "") or "",
                total_interactions=c.get("Total Interactions", 0) or 0,
                first_interaction=first_int,
                latest_interaction=latest_int,
                notes=c.get("Notes", "") or "",
                match_reason=c.get("Match Reason", "") or "",
            )
            session.add(new_contact)
        upserted += 1

    session.commit()
    return upserted


def append_interactions(session: Session, interactions: list[dict]) -> int:
    """Append new interactions. Replaces interaction append to sheet."""
    profiles = {p.code: p.id for p in session.query(SourceProfile).all()}
    # Also map full names to IDs
    for p in session.query(SourceProfile).all():
        profiles[p.full_name.lower()] = p.id

    added = 0
    for inter in interactions:
        d = inter.get("Date")
        if isinstance(d, str) and d:
            try:
                d = datetime.strptime(d, "%Y-%m-%d").date()
            except ValueError:
                pass
        elif isinstance(d, datetime):
            d = d.date()

        source_raw = inter.get("Source Profile", "")
        source_id = profiles.get(source_raw) or profiles.get(source_raw.lower() if source_raw else "")

        row = Interaction(
            date=d,
            post_hook=inter.get("Post Hook", "") or "",
            type=inter.get("Type", "") or "",
            name=(inter.get("Name") or "").strip(),
            title=inter.get("Title", "") or "",
            company=inter.get("Company", "") or "",
            profile_url=inter.get("Profile URL", "") or "",
            source_profile_id=source_id,
            source_profile_raw=source_raw or "",
        )
        session.add(row)
        added += 1

    session.commit()
    return added


def update_contact_status(
    session: Session,
    name: str,
    status: str,
    company: str | None = None,
    notes: str | None = None,
) -> bool:
    """Update a contact's status (and optionally company/notes).
    Replaces cell updates in review mode.
    """
    contact = session.query(Contact).filter(
        func.lower(Contact.name) == name.lower().strip()
    ).first()

    if not contact:
        return False

    contact.status = status
    if company is not None:
        contact.company = company
    if notes is not None:
        contact.notes = notes
    session.commit()
    return True


def backfill_interaction_companies(session: Session, name_to_company: dict) -> int:
    """Back-propagate enriched companies to interactions. Replaces back-propagation loop."""
    updated = 0
    for name_lower, company in name_to_company.items():
        if not company:
            continue
        rows = session.query(Interaction).filter(
            func.lower(Interaction.name) == name_lower,
            (Interaction.company.is_(None)) | (Interaction.company == ""),
        ).all()
        for row in rows:
            row.company = company
            updated += 1

    session.commit()
    return updated


def add_sitehub_employee(session: Session, name: str) -> bool:
    """Add a SiteHub employee. Replaces txt append."""
    name_lower = name.lower().strip()
    existing = session.query(SitehubEmployee).filter(
        func.lower(SitehubEmployee.name) == name_lower
    ).first()
    if existing:
        return False
    session.add(SitehubEmployee(name=name_lower))
    session.commit()
    return True


def add_to_company_blocklist(session: Session, name: str) -> bool:
    """Add to company blocklist. Replaces txt append."""
    name_lower = name.lower().strip()
    if not name_lower or len(name_lower) < 2:
        return False
    existing = session.query(CompanyBlocklist).filter(
        func.lower(CompanyBlocklist.name) == name_lower
    ).first()
    if existing:
        return False
    session.add(CompanyBlocklist(name=name_lower))
    session.commit()
    return True


def save_learned_pattern_title(
    session: Session,
    keyword: str,
    icp: int,
    poi: int,
    not_icp: int,
) -> None:
    """Save a title keyword pattern. Replaces JSON write."""
    keyword_lower = keyword.lower()
    existing = session.query(LearnedPatternTitle).filter(
        func.lower(LearnedPatternTitle.keyword) == keyword_lower
    ).first()
    if existing:
        existing.icp_count = icp
        existing.poi_count = poi
        existing.not_icp_count = not_icp
        existing.updated_at = func.now()
    else:
        session.add(LearnedPatternTitle(
            keyword=keyword_lower,
            icp_count=icp,
            poi_count=poi,
            not_icp_count=not_icp,
        ))
    session.commit()


def save_learned_pattern_company(
    session: Session,
    company: str,
    status: str,
    count: int,
) -> None:
    """Save a company pattern. Replaces JSON write."""
    company_lower = company.lower()
    existing = session.query(LearnedPatternCompany).filter(
        func.lower(LearnedPatternCompany.company) == company_lower
    ).first()
    if existing:
        existing.status = status
        existing.count = count
        existing.updated_at = func.now()
    else:
        session.add(LearnedPatternCompany(
            company=company_lower,
            status=status,
            count=count,
        ))
    session.commit()


def save_search_usage(
    session: Session,
    service: str,
    month: str,
    count: int,
    notified: bool,
) -> None:
    """Save API usage. Replaces JSON write."""
    existing = session.query(ApiUsage).filter(
        ApiUsage.service == service,
        ApiUsage.month == month,
    ).first()
    if existing:
        existing.count = count
        existing.limit_notified = notified
        existing.updated_at = func.now()
    else:
        session.add(ApiUsage(
            service=service,
            month=month,
            count=count,
            limit_notified=notified,
        ))
    session.commit()


def increment_search_usage(session: Session, service: str = "brave_search") -> tuple[int, bool]:
    """Increment usage counter. Returns (new_count, limit_reached).
    Replaces _increment_search_usage().
    """
    current_month = datetime.now().strftime("%Y-%m")
    usage = get_search_usage(session, service)
    new_count = usage["count"] + 1
    limit_reached = new_count >= 2000  # _MONTHLY_SEARCH_LIMIT

    save_search_usage(session, service, current_month, new_count, usage["limit_notified"])
    return new_count, limit_reached


# ---------------------------------------------------------------------------
# UTILITY functions
# ---------------------------------------------------------------------------

def get_contact_count_by_status(session: Session) -> dict:
    """Status summary. Returns {status: count}."""
    rows = session.query(
        Contact.status, func.count(Contact.id)
    ).group_by(Contact.status).all()
    return {status: cnt for status, cnt in rows}


def resolve_source_profile(session: Session, raw: str) -> int | None:
    """Map a raw source profile code/name to its DB ID."""
    if not raw:
        return None
    # Try by code first
    p = session.query(SourceProfile).filter(SourceProfile.code == raw.upper()).first()
    if p:
        return p.id
    # Try by full name
    p = session.query(SourceProfile).filter(
        func.lower(SourceProfile.full_name) == raw.lower()
    ).first()
    return p.id if p else None


def link_interactions_to_contacts(session: Session) -> int:
    """Set contact_id FKs on interactions based on name matching."""
    # Build name_lower -> contact_id map
    contacts = session.query(Contact.id, Contact.name).all()
    name_map = {c.name.lower().strip(): c.id for c in contacts}

    unlinked = session.query(Interaction).filter(
        Interaction.contact_id.is_(None)
    ).all()

    linked = 0
    for inter in unlinked:
        name_lower = inter.name.strip().lower()
        cid = name_map.get(name_lower)
        if cid:
            inter.contact_id = cid
            linked += 1

    session.commit()
    return linked


def check_db_health(session: Session) -> dict:
    """Table row counts for health check."""
    tables = [
        ("contacts", Contact),
        ("interactions", Interaction),
        ("source_profiles", SourceProfile),
        ("ref_companies", RefCompany),
        ("ref_company_aliases", RefCompanyAlias),
        ("ref_job_titles", RefJobTitle),
        ("ref_people", RefPerson),
        ("sitehub_employees", SitehubEmployee),
        ("company_blocklist", CompanyBlocklist),
        ("learned_patterns_titles", LearnedPatternTitle),
        ("learned_patterns_companies", LearnedPatternCompany),
        ("api_usage", ApiUsage),
    ]
    counts = {}
    for name, model in tables:
        counts[name] = session.query(model).count()
    return counts


# ---------------------------------------------------------------------------
# PHASE 2: Web App Query Functions
# ---------------------------------------------------------------------------

# --- Dashboard queries ---

def get_kpi_summary(session: Session, date_from: date | None = None, date_to: date | None = None) -> dict:
    """Dashboard KPI summary: total contacts, status counts, coverage %, monthly ICP interactions.

    KPI status cards are always all-time.  The ICP interaction counts use
    date_from/date_to when provided, otherwise fall back to current/previous month.
    """
    status_counts = get_contact_count_by_status(session)
    total = sum(status_counts.values())
    icp = status_counts.get("ICP", 0)
    poi = status_counts.get("POI", 0)
    not_icp = status_counts.get("Not ICP", 0)
    review = status_counts.get("Review", 0)
    classified = icp + poi + not_icp
    coverage_pct = round((classified / total * 100), 1) if total else 0.0

    if date_from and date_to:
        # Use supplied range and compute a prior period of equal length
        range_days = (date_to - date_from).days
        prior_from = date_from - timedelta(days=range_days + 1)
        prior_to = date_from - timedelta(days=1)
    else:
        today = date.today()
        date_from_eff = today.replace(day=1)
        date_to = today
        if date_from_eff.month == 1:
            prior_from = date_from_eff.replace(year=date_from_eff.year - 1, month=12)
        else:
            prior_from = date_from_eff.replace(month=date_from_eff.month - 1)
        prior_to = date_from_eff - timedelta(days=1)
        date_from = date_from_eff

    # ICP interactions in period
    icp_this_month = session.query(func.count(Interaction.id)).join(
        Contact, Contact.id == Interaction.contact_id
    ).filter(
        Contact.status == "ICP",
        Interaction.date >= date_from,
        Interaction.date <= date_to,
    ).scalar() or 0

    # ICP interactions in prior period
    icp_last_month = session.query(func.count(Interaction.id)).join(
        Contact, Contact.id == Interaction.contact_id
    ).filter(
        Contact.status == "ICP",
        Interaction.date >= prior_from,
        Interaction.date <= prior_to,
    ).scalar() or 0

    # Average engagement (interactions per ICP contact)
    avg_engagement = round(icp_this_month / icp, 1) if icp else 0.0

    return {
        "total_contacts": total,
        "icp": icp,
        "poi": poi,
        "not_icp": not_icp,
        "review": review,
        "coverage_pct": coverage_pct,
        "icp_interactions_this_month": icp_this_month,
        "icp_interactions_last_month": icp_last_month,
        "avg_engagement": avg_engagement,
    }


def get_executive_leaderboard(session: Session, date_from: date | None = None, date_to: date | None = None) -> list[dict]:
    """Per-executive: ICP interactions this/prior period, trend %, posts, new ICPs."""
    if date_from and date_to:
        range_days = (date_to - date_from).days
        prior_from = date_from - timedelta(days=range_days + 1)
        prior_to = date_from - timedelta(days=1)
    else:
        today = date.today()
        date_from = today.replace(day=1)
        date_to = today
        if date_from.month == 1:
            prior_from = date_from.replace(year=date_from.year - 1, month=12)
        else:
            prior_from = date_from.replace(month=date_from.month - 1)
        prior_to = date_from - timedelta(days=1)

    profiles = session.query(SourceProfile).all()
    results = []

    for sp in profiles:
        # ICP interactions in period
        icp_this = session.query(func.count(Interaction.id)).join(
            Contact, Contact.id == Interaction.contact_id
        ).filter(
            Contact.status == "ICP",
            Interaction.source_profile_id == sp.id,
            Interaction.date >= date_from,
            Interaction.date <= date_to,
        ).scalar() or 0

        # ICP interactions in prior period
        icp_last = session.query(func.count(Interaction.id)).join(
            Contact, Contact.id == Interaction.contact_id
        ).filter(
            Contact.status == "ICP",
            Interaction.source_profile_id == sp.id,
            Interaction.date >= prior_from,
            Interaction.date <= prior_to,
        ).scalar() or 0

        # Trend %
        if icp_last > 0:
            trend_pct = round(((icp_this - icp_last) / icp_last) * 100, 1)
        elif icp_this > 0:
            trend_pct = 100.0
        else:
            trend_pct = 0.0

        # Distinct posts in period
        posts = session.query(func.count(func.distinct(Interaction.post_hook))).filter(
            Interaction.source_profile_id == sp.id,
            Interaction.date >= date_from,
            Interaction.date <= date_to,
            Interaction.post_hook.isnot(None),
            Interaction.post_hook != "",
        ).scalar() or 0

        # New ICP contacts (first interaction in period for ICP contacts)
        new_icps = session.query(func.count(Contact.id)).filter(
            Contact.status == "ICP",
            Contact.first_interaction >= date_from,
            Contact.first_interaction <= date_to,
        ).join(
            Interaction, Interaction.contact_id == Contact.id
        ).filter(
            Interaction.source_profile_id == sp.id,
        ).scalar() or 0

        results.append({
            "code": sp.code,
            "name": sp.full_name,
            "icp_this_month": icp_this,
            "icp_last_month": icp_last,
            "trend_pct": trend_pct,
            "posts": posts,
            "new_icps": new_icps,
        })

    results.sort(key=lambda x: x["icp_this_month"], reverse=True)
    return results


def get_posting_frequency(session: Session, date_from: date | None = None, date_to: date | None = None) -> list[dict]:
    """Per-executive: posts in period (default last 30 days), days since last post."""
    today = date.today()
    if not (date_from and date_to):
        date_from = today - timedelta(days=30)
        date_to = today

    profiles = session.query(SourceProfile).all()
    results = []

    for sp in profiles:
        posts_30d = session.query(func.count(func.distinct(Interaction.post_hook))).filter(
            Interaction.source_profile_id == sp.id,
            Interaction.date >= date_from,
            Interaction.date <= date_to,
            Interaction.post_hook.isnot(None),
            Interaction.post_hook != "",
        ).scalar() or 0

        # days_since_last_post always relative to today
        last_post_date = session.query(func.max(Interaction.date)).filter(
            Interaction.source_profile_id == sp.id,
            Interaction.post_hook.isnot(None),
            Interaction.post_hook != "",
        ).scalar()

        days_since = (today - last_post_date).days if last_post_date else None

        results.append({
            "code": sp.code,
            "name": sp.full_name,
            "posts_30d": posts_30d,
            "days_since_last_post": days_since,
        })

    return results


def get_monthly_trends(session: Session, months: int = 12, date_from: date | None = None, date_to: date | None = None) -> list[dict]:
    """Monthly ICP interaction counts for line chart. Returns [{month, count}]."""
    if date_from:
        start_month = date_from.replace(day=1)
    else:
        today = date.today()
        start_month = today.replace(day=1)
        for _ in range(months - 1):
            if start_month.month == 1:
                start_month = start_month.replace(year=start_month.year - 1, month=12)
            else:
                start_month = start_month.replace(month=start_month.month - 1)

    q = session.query(
        func.to_char(Interaction.date, 'YYYY-MM').label('month'),
        func.count(Interaction.id).label('count'),
    ).join(
        Contact, Contact.id == Interaction.contact_id
    ).filter(
        Contact.status == "ICP",
        Interaction.date >= start_month,
    )
    if date_to:
        q = q.filter(Interaction.date <= date_to)

    rows = q.group_by(
        func.to_char(Interaction.date, 'YYYY-MM')
    ).order_by(
        func.to_char(Interaction.date, 'YYYY-MM')
    ).all()

    return [{"month": r.month, "count": r.count} for r in rows]


def get_status_distribution(session: Session, date_from: date | None = None, date_to: date | None = None) -> list[dict]:
    """Status distribution with counts and percentages for doughnut chart.

    When date range provided, only count contacts with >=1 interaction in range.
    """
    if date_from and date_to:
        # Contacts with at least one interaction in range
        active_ids = session.query(func.distinct(Interaction.contact_id)).filter(
            Interaction.date >= date_from,
            Interaction.date <= date_to,
        ).subquery()
        rows = session.query(
            Contact.status, func.count(Contact.id)
        ).filter(
            Contact.id.in_(active_ids)
        ).group_by(Contact.status).all()
        counts = {r[0]: r[1] for r in rows}
    else:
        counts = get_contact_count_by_status(session)

    total = sum(counts.values())
    result = []
    for status in ("ICP", "POI", "Not ICP", "Review"):
        c = counts.get(status, 0)
        pct = round((c / total * 100), 1) if total else 0.0
        result.append({"status": status, "count": c, "percentage": pct})
    return result


def get_recent_activity(session: Session, date_from: date | None = None, date_to: date | None = None) -> dict:
    """Interactions and active ICPs.

    When date range provided, all four metrics are scoped to that range.
    Otherwise uses default 7d/30d windows.
    """
    if date_from and date_to:
        interactions_7d = session.query(func.count(Interaction.id)).filter(
            Interaction.date >= date_from,
            Interaction.date <= date_to,
        ).scalar() or 0

        active_icps_7d = session.query(func.count(func.distinct(Contact.id))).join(
            Interaction, Interaction.contact_id == Contact.id
        ).filter(
            Contact.status == "ICP",
            Interaction.date >= date_from,
            Interaction.date <= date_to,
        ).scalar() or 0

        return {
            "interactions_7d": interactions_7d,
            "interactions_30d": interactions_7d,
            "active_icps_7d": active_icps_7d,
            "active_icps_30d": active_icps_7d,
        }

    today = date.today()
    d7 = today - timedelta(days=7)
    d30 = today - timedelta(days=30)

    interactions_7d = session.query(func.count(Interaction.id)).filter(
        Interaction.date >= d7
    ).scalar() or 0

    interactions_30d = session.query(func.count(Interaction.id)).filter(
        Interaction.date >= d30
    ).scalar() or 0

    active_icps_7d = session.query(func.count(func.distinct(Contact.id))).join(
        Interaction, Interaction.contact_id == Contact.id
    ).filter(
        Contact.status == "ICP",
        Interaction.date >= d7,
    ).scalar() or 0

    active_icps_30d = session.query(func.count(func.distinct(Contact.id))).join(
        Interaction, Interaction.contact_id == Contact.id
    ).filter(
        Contact.status == "ICP",
        Interaction.date >= d30,
    ).scalar() or 0

    return {
        "interactions_7d": interactions_7d,
        "interactions_30d": interactions_30d,
        "active_icps_7d": active_icps_7d,
        "active_icps_30d": active_icps_30d,
    }


def get_top_engagers(session: Session, limit: int = 10, date_from: date | None = None, date_to: date | None = None) -> list[dict]:
    """Top N ICP contacts by interaction count (within date range if provided)."""
    if date_from and date_to:
        rows = session.query(
            Contact.id,
            Contact.name,
            Contact.company,
            func.count(Interaction.id).label("range_interactions"),
            func.max(Interaction.date).label("last_active"),
        ).join(
            Interaction, Interaction.contact_id == Contact.id
        ).filter(
            Contact.status == "ICP",
            Interaction.date >= date_from,
            Interaction.date <= date_to,
        ).group_by(
            Contact.id, Contact.name, Contact.company
        ).order_by(
            desc("range_interactions"),
            desc("last_active"),
        ).limit(limit).all()

        return [
            {
                "id": r.id,
                "name": r.name,
                "company": r.company or "",
                "total_interactions": r.range_interactions,
                "latest_interaction": r.last_active,
            }
            for r in rows
        ]

    rows = session.query(
        Contact.id,
        Contact.name,
        Contact.company,
        Contact.total_interactions,
        Contact.latest_interaction,
    ).filter(
        Contact.status == "ICP",
    ).order_by(
        desc(Contact.total_interactions),
        desc(Contact.latest_interaction),
    ).limit(limit).all()

    return [
        {
            "id": r.id,
            "name": r.name,
            "company": r.company or "",
            "total_interactions": r.total_interactions or 0,
            "latest_interaction": r.latest_interaction,
        }
        for r in rows
    ]


def get_top_companies(session: Session, limit: int = 10, date_from: date | None = None, date_to: date | None = None) -> list[dict]:
    """Top N companies by ICP engagement (within date range if provided)."""
    if date_from and date_to:
        rows = session.query(
            Contact.company,
            func.count(func.distinct(Contact.id)).label("icp_count"),
            func.count(Interaction.id).label("total_interactions"),
        ).join(
            Interaction, Interaction.contact_id == Contact.id
        ).filter(
            Contact.status == "ICP",
            Contact.company.isnot(None),
            Contact.company != "",
            Interaction.date >= date_from,
            Interaction.date <= date_to,
        ).group_by(
            Contact.company
        ).order_by(
            desc("total_interactions")
        ).limit(limit).all()
    else:
        rows = session.query(
            Contact.company,
            func.count(Contact.id).label("icp_count"),
            func.sum(Contact.total_interactions).label("total_interactions"),
        ).filter(
            Contact.status == "ICP",
            Contact.company.isnot(None),
            Contact.company != "",
        ).group_by(
            Contact.company
        ).order_by(
            desc("total_interactions")
        ).limit(limit).all()

    return [
        {
            "company": r.company,
            "icp_count": r.icp_count,
            "total_interactions": r.total_interactions or 0,
        }
        for r in rows
    ]


def get_top_posts(session: Session, limit: int = 5, date_from: date | None = None, date_to: date | None = None) -> list[dict]:
    """Top N posts by reaction count (ICP interactions only, within date range if provided)."""
    q = session.query(
        Interaction.post_hook,
        Interaction.source_profile_id,
        func.count(Interaction.id).label("reaction_count"),
        func.count(func.distinct(Interaction.contact_id)).label("unique_icps"),
    ).join(
        Contact, Contact.id == Interaction.contact_id
    ).filter(
        Contact.status == "ICP",
        Interaction.post_hook.isnot(None),
        Interaction.post_hook != "",
    )
    if date_from and date_to:
        q = q.filter(Interaction.date >= date_from, Interaction.date <= date_to)

    rows = q.group_by(
        Interaction.post_hook,
        Interaction.source_profile_id,
    ).order_by(
        desc("reaction_count")
    ).limit(limit).all()

    profiles = {p.id: p.code for p in session.query(SourceProfile).all()}

    return [
        {
            "post_hook": r.post_hook,
            "source_profile": profiles.get(r.source_profile_id, ""),
            "reaction_count": r.reaction_count,
            "unique_icps": r.unique_icps,
        }
        for r in rows
    ]


# --- Contact queries ---

def list_contacts(
    session: Session,
    status: str | None = None,
    source_profile: str | None = None,
    company: str | None = None,
    search: str | None = None,
    sort_by: str = "latest_interaction",
    sort_dir: str = "desc",
    page: int = 1,
    per_page: int = 50,
    date_from: date | None = None,
    date_to: date | None = None,
) -> dict:
    """Paginated, filtered, sorted contact list.

    When date_from/date_to provided, only contacts with >= 1 interaction in range
    are shown and the last_30d column reflects the in-range count.

    Returns {items: [...], total: int, page: int, per_page: int, pages: int}.
    """
    q = session.query(Contact)

    # --- Date range filter: restrict to contacts with interactions in range ---
    if date_from and date_to:
        active_ids = session.query(func.distinct(Interaction.contact_id)).filter(
            Interaction.date >= date_from,
            Interaction.date <= date_to,
        ).subquery()
        q = q.filter(Contact.id.in_(active_ids))

    # --- Filters ---
    if status:
        statuses = [s.strip() for s in status.split(",")]
        q = q.filter(Contact.status.in_(statuses))

    if company:
        q = q.filter(func.lower(Contact.company).contains(company.lower()))

    if search:
        term = f"%{search.lower()}%"
        q = q.filter(
            or_(
                func.lower(Contact.name).like(term),
                func.lower(Contact.title).like(term),
                func.lower(Contact.company).like(term),
            )
        )

    if source_profile:
        # Filter contacts who have interactions from this source profile
        sp = session.query(SourceProfile).filter(SourceProfile.code == source_profile.upper()).first()
        if sp:
            contact_ids_sq = session.query(func.distinct(Interaction.contact_id)).filter(
                Interaction.source_profile_id == sp.id
            ).subquery()
            q = q.filter(Contact.id.in_(contact_ids_sq))

    # --- Sorting ---
    sort_columns = {
        "name": Contact.name,
        "title": Contact.title,
        "company": Contact.company,
        "status": Contact.status,
        "company_tier": Contact.company_tier,
        "total_interactions": Contact.total_interactions,
        "first_interaction": Contact.first_interaction,
        "latest_interaction": Contact.latest_interaction,
    }
    col = sort_columns.get(sort_by, Contact.latest_interaction)
    if sort_dir == "asc":
        q = q.order_by(asc(col))
    else:
        q = q.order_by(desc(col))

    # --- Pagination ---
    total = q.count()
    pages = max(1, (total + per_page - 1) // per_page)
    page = min(page, pages)
    offset = (page - 1) * per_page

    items = q.offset(offset).limit(per_page).all()

    # Calculate period/last-30d interactions for each contact
    if date_from and date_to:
        range_start = date_from
        range_end = date_to
    else:
        today = date.today()
        range_start = today - timedelta(days=30)
        range_end = today

    contact_ids_list = [c.id for c in items]
    last30_counts = {}
    if contact_ids_list:
        rows = session.query(
            Interaction.contact_id,
            func.count(Interaction.id).label("cnt"),
        ).filter(
            Interaction.contact_id.in_(contact_ids_list),
            Interaction.date >= range_start,
            Interaction.date <= range_end,
        ).group_by(Interaction.contact_id).all()
        last30_counts = {r.contact_id: r.cnt for r in rows}

    return {
        "items": [
            {
                "id": c.id,
                "name": c.name,
                "title": c.title or "",
                "company": c.company or "",
                "status": c.status or "Review",
                "company_tier": c.company_tier or "",
                "total_interactions": c.total_interactions or 0,
                "last_30d": last30_counts.get(c.id, 0),
                "first_interaction": c.first_interaction,
                "latest_interaction": c.latest_interaction,
                "notes": c.notes or "",
            }
            for c in items
        ],
        "total": total,
        "page": page,
        "per_page": per_page,
        "pages": pages,
    }


def get_contact_by_id(session: Session, contact_id: int) -> dict | None:
    """Single contact dict by ID, or None."""
    c = session.query(Contact).filter(Contact.id == contact_id).first()
    if not c:
        return None
    return {
        "id": c.id,
        "name": c.name,
        "title": c.title or "",
        "company": c.company or "",
        "status": c.status or "Review",
        "company_tier": c.company_tier or "",
        "total_interactions": c.total_interactions or 0,
        "first_interaction": c.first_interaction,
        "latest_interaction": c.latest_interaction,
        "notes": c.notes or "",
        "match_reason": c.match_reason or "",
        "zoho_id": c.zoho_id or "",
        "zoho_synced_at": c.zoho_synced_at,
    }


def get_contact_interactions(session: Session, contact_id: int) -> list[dict]:
    """All interactions for a contact, ordered by date desc."""
    profiles = {p.id: p.code for p in session.query(SourceProfile).all()}

    rows = session.query(Interaction).filter(
        Interaction.contact_id == contact_id
    ).order_by(desc(Interaction.date)).all()

    return [
        {
            "id": r.id,
            "date": r.date,
            "type": r.type or "",
            "post_hook": r.post_hook or "",
            "source_profile": profiles.get(r.source_profile_id, r.source_profile_raw or ""),
        }
        for r in rows
    ]


def update_contact_notes(session: Session, contact_id: int, notes: str) -> bool:
    """Update notes for a contact by ID."""
    c = session.query(Contact).filter(Contact.id == contact_id).first()
    if not c:
        return False
    c.notes = notes
    session.commit()
    return True


def update_contact_company(session: Session, contact_id: int, company: str) -> bool:
    """Update company for a contact by ID."""
    c = session.query(Contact).filter(Contact.id == contact_id).first()
    if not c:
        return False
    c.company = company
    session.commit()
    return True


def update_contact_status_by_id(
    session: Session,
    contact_id: int,
    status: str,
    notes: str | None = None,
) -> bool:
    """Update status (and optionally notes) by contact ID."""
    c = session.query(Contact).filter(Contact.id == contact_id).first()
    if not c:
        return False
    c.status = status
    if notes is not None:
        c.notes = notes
    session.commit()
    return True


# --- Review query ---

def get_review_contacts_with_suggestions(session: Session) -> list[dict]:
    """Review contacts enriched with learned classification suggestions.

    Imports get_learned_suggestion from icp_process to generate suggestions.
    """
    from icp_process import get_learned_suggestion

    rows = session.query(Contact).filter(Contact.status == "Review").order_by(
        desc(Contact.total_interactions), Contact.name
    ).all()

    result = []
    for c in rows:
        suggestion, reason, confidence = get_learned_suggestion(c.title or "", c.company or "")
        result.append({
            "id": c.id,
            "name": c.name,
            "title": c.title or "",
            "company": c.company or "",
            "total_interactions": c.total_interactions or 0,
            "notes": c.notes or "",
            "suggestion": suggestion,
            "suggestion_reason": reason,
            "suggestion_confidence": round(confidence * 100),
        })

    return result


# --- Company queries ---

def _get_company_name_groups(session: Session) -> dict:
    """Build a normalization map: normalized_key -> {canonical, raw_names, tier}.

    Uses normalize_company_name from icp_process, ref_companies (curated names),
    ref_company_aliases, and all company names from contacts.

    Returns dict:
        {normalized_key: {
            "canonical": str,       # best display name
            "raw_names": set[str],  # all raw name variants (lowercased)
            "tier": str,            # from ref_companies, or ""
        }}
    """
    import re as _re
    from icp_process import (
        normalize_company_name, _normalize_danish_chars,
        _is_valid_company, _looks_like_company_name,
        fuzzy_match_company,
    )

    def _norm_key(name: str) -> str:
        """Normalize company name AND Danish chars for grouping key."""
        return _normalize_danish_chars(normalize_company_name(name))

    groups = {}  # normalized_key -> {canonical, raw_names, tier}

    # --- 1. Load ref_company_aliases -> canonical name mapping ---
    alias_to_canonical = {}  # alias_lower -> ref_company canonical name
    for alias_row in session.query(RefCompanyAlias).all():
        rc = alias_row.ref_company
        if rc:
            alias_to_canonical[alias_row.alias.strip().lower()] = rc.name.strip()

    # --- 2. Load company blocklist ---
    blocklist = load_company_blocklist(session)

    # --- 3. Seed groups from ref_companies (curated, authoritative) ---
    # Build ref_companies_normalized dict for fuzzy matching in step 5
    ref_companies_normalized = {}

    for rc in session.query(RefCompany).all():
        name_clean = rc.name.strip()
        nk = _norm_key(name_clean)
        if not nk:
            continue

        ref_companies_normalized[nk] = {"name": name_clean, "tier": rc.tier or ""}

        if nk not in groups:
            groups[nk] = {
                "canonical": name_clean,
                "raw_names": set(),
                "tier": rc.tier or "",
            }
        else:
            # ref_companies name wins as canonical
            groups[nk]["canonical"] = name_clean
            if rc.tier:
                groups[nk]["tier"] = rc.tier

        groups[nk]["raw_names"].add(name_clean.lower())

        # Add all aliases for this ref_company into the same group
        for alias_row in rc.aliases:
            alias_clean = alias_row.alias.strip()
            alias_nk = _norm_key(alias_clean)
            groups[nk]["raw_names"].add(alias_clean.lower())
            # Also register the alias's normalized form -> same group
            if alias_nk and alias_nk != nk:
                if alias_nk not in groups:
                    groups[alias_nk] = groups[nk]  # same dict reference
                else:
                    # Merge into existing group — keep ref_company as canonical
                    groups[alias_nk]["raw_names"].update(groups[nk]["raw_names"])
                    groups[alias_nk]["canonical"] = name_clean  # ref_company name wins
                    groups[nk] = groups[alias_nk]  # unify references

    # --- 4. Add all contact company names (with junk filtering) ---
    contact_company_rows = session.query(
        Contact.company, func.count(Contact.id).label("cnt")
    ).filter(
        Contact.company.isnot(None),
        Contact.company != "",
    ).group_by(Contact.company).all()

    # Names that don't match any group yet — candidates for fuzzy matching
    unmatched_contacts = []  # (name_clean, name_lower, pre_stripped_name)

    for company_raw, cnt in contact_company_rows:
        name_clean = company_raw.strip()
        if not name_clean:
            continue
        name_lower = name_clean.lower()

        # Pre-strip pipe/dash descriptions
        name_for_check = name_clean
        for sep in (" | ", " - ", " – ", " — "):
            if sep in name_for_check:
                name_for_check = name_for_check.split(sep)[0].strip()
                break

        # Strip trailing degree/credential text after comma ("Niras, Ph.D" -> "Niras")
        if ", " in name_for_check:
            before_comma = name_for_check.split(", ")[0].strip()
            after_comma = name_for_check.split(", ", 1)[1].strip().lower()
            _degree_words = {"ph.d", "phd", "mba", "msc", "bsc", "cand", "hd",
                             "ma", "ba", "dr", "ing", "meng", "cand.polyt",
                             "cand.arch", "cand.merc", "cand.scient"}
            if any(after_comma.startswith(d) for d in _degree_words):
                name_for_check = before_comma

        # Fix "A7S" typo for "A/S" (LinkedIn rendering artifact)
        name_for_check = _re.sub(r'\bA7S\b', 'A/S', name_for_check)

        # Strip after slash when it looks like a division ("NCC Industry/Råstoffer" -> "NCC Industry")
        if "/" in name_for_check and "a/s" not in name_for_check.lower() \
                and "i/s" not in name_for_check.lower() \
                and "k/s" not in name_for_check.lower() \
                and "p/s" not in name_for_check.lower():
            name_for_check = name_for_check.split("/")[0].strip()

        # Strip trailing period ("Grundejernes Investeringsfond." -> "...")
        name_for_check = name_for_check.rstrip(".")

        # Skip if on blocklist
        if name_lower in blocklist or normalize_company_name(name_clean) in blocklist:
            continue

        # Skip junk: fails validation or doesn't look like a company name
        # (but only if it doesn't already match a known ref_company/alias group)
        canonical_via_alias = alias_to_canonical.get(name_lower)
        if canonical_via_alias:
            nk = _norm_key(canonical_via_alias)
        else:
            nk = _norm_key(name_for_check)

        already_known = nk and nk in groups

        if not already_known:
            if not _is_valid_company(name_for_check):
                continue
            if not _looks_like_company_name(name_for_check):
                continue

        if not nk:
            continue

        if nk not in groups:
            # Not yet in any group — collect for fuzzy matching
            unmatched_contacts.append((name_clean, name_lower, name_for_check))
            continue

        groups[nk]["raw_names"].add(name_lower)

    # --- 5. Fuzzy-match unmatched contacts against ref_companies ---
    for name_clean, name_lower, name_for_check in unmatched_contacts:
        matched_canonical, score = fuzzy_match_company(
            name_for_check, ref_companies_normalized, threshold=0.80
        )
        if matched_canonical:
            # Merge into the matched ref_company group
            nk = _norm_key(matched_canonical)
            if nk and nk in groups:
                groups[nk]["raw_names"].add(name_lower)
                continue

        # No fuzzy match — create a new standalone group
        nk = _norm_key(name_for_check)
        if not nk:
            continue
        if nk not in groups:
            groups[nk] = {
                "canonical": name_clean,
                "raw_names": set(),
                "tier": "",
            }
        groups[nk]["raw_names"].add(name_lower)

    # --- 6. Force ref_company names as canonical (final pass) ---
    # After all contacts/aliases are processed, ref_company names must win.
    for rc in session.query(RefCompany).all():
        nk = _norm_key(rc.name.strip())
        if nk and nk in groups:
            groups[nk]["canonical"] = rc.name.strip()

    # --- 7. Merge truncated-suffix groups ---
    # LinkedIn often truncates "A/S" to "A", creating keys like "cowi a" vs "cowi".
    # If a normalized key ends with " a" and the stripped version exists, merge them.
    truncated_merges = []
    for nk in list(groups.keys()):
        if nk.endswith(" a"):
            base_key = nk[:-2].strip()
            if base_key and base_key in groups and groups[base_key] is not groups[nk]:
                truncated_merges.append((nk, base_key))

    for trunc_key, base_key in truncated_merges:
        base_grp = groups[base_key]
        trunc_grp = groups[trunc_key]
        # Merge raw_names into the base group
        base_grp["raw_names"].update(trunc_grp["raw_names"])
        # Point the truncated key at the base group
        groups[trunc_key] = base_grp

    return groups


def _get_all_raw_names_for_company(session: Session, company_name: str) -> set:
    """Get all raw company name variants (lowercased) that normalize to the same key.

    Useful for detail/update lookups: given a canonical name, find all stored variants.
    """
    from icp_process import normalize_company_name, _normalize_danish_chars

    groups = _get_company_name_groups(session)
    nk = _normalize_danish_chars(normalize_company_name(company_name))

    if nk and nk in groups:
        return groups[nk]["raw_names"]

    # Fallback: just use exact lower match
    return {company_name.lower()}


def list_companies(
    session: Session,
    tier: str | None = None,
    search: str | None = None,
    sort_by: str = "company",
    sort_dir: str = "asc",
    page: int = 1,
    per_page: int = 50,
    date_from: date | None = None,
    date_to: date | None = None,
) -> dict:
    """Paginated, filtered, sorted list of companies (deduplicated by normalized name).

    Aggregates from two sources:
    (a) Companies with at least one ICP contact (from contacts table).
    (b) Companies in ref_companies reference list.

    When date_from/date_to provided, the last_30d column reflects in-range count,
    and (for non-all-time) only companies with >=1 interaction in range are shown
    from the contacts side.

    Returns {items: [...], total, page, per_page, pages}.
    """
    if date_from and date_to:
        d30 = date_from
        d30_end = date_to
    else:
        today = date.today()
        d30 = today - timedelta(days=30)
        d30_end = today

    # Step 1: Build normalization groups (normalized_key -> canonical + raw_names)
    groups = _get_company_name_groups(session)

    # Step 2: Fetch per-raw-company aggregates from DB
    # ICP count per raw company name
    icp_rows = session.query(
        Contact.company.label("company"),
        func.count(Contact.id).label("icp_count"),
    ).filter(
        Contact.status == "ICP",
        Contact.company.isnot(None),
        Contact.company != "",
    ).group_by(Contact.company).all()

    icp_map = {}
    for r in icp_rows:
        icp_map[r.company.lower()] = r.icp_count

    # Total interactions, first_seen, last_active per raw company name
    total_rows = session.query(
        Contact.company.label("company"),
        func.sum(Contact.total_interactions).label("total_interactions"),
        func.min(Contact.first_interaction).label("first_seen"),
        func.max(Contact.latest_interaction).label("last_active"),
    ).filter(
        Contact.company.isnot(None),
        Contact.company != "",
    ).group_by(Contact.company).all()

    total_map = {}
    first_seen_map = {}
    last_active_map = {}
    for r in total_rows:
        total_map[r.company.lower()] = r.total_interactions or 0
        first_seen_map[r.company.lower()] = r.first_seen
        last_active_map[r.company.lower()] = r.last_active

    # Last 30d / in-period interactions per raw company name
    last30_rows = session.query(
        Contact.company.label("company"),
        func.count(Interaction.id).label("last_30d"),
    ).join(
        Interaction, Interaction.contact_id == Contact.id
    ).filter(
        Contact.company.isnot(None),
        Contact.company != "",
        Interaction.date >= d30,
        Interaction.date <= d30_end,
    ).group_by(Contact.company).subquery()

    last30_map = {}
    for r in session.query(last30_rows).all():
        last30_map[r.company.lower()] = r.last_30d

    # Step 3: Determine which groups to show (ICP contacts or ref_companies)
    # Collect normalized keys from: ICP contacts + ref_companies
    icp_q = session.query(Contact.company).filter(
        Contact.company.isnot(None),
        Contact.company != "",
        Contact.status == "ICP",
    )
    if date_from and date_to:
        active_ids = session.query(func.distinct(Interaction.contact_id)).filter(
            Interaction.date >= date_from,
            Interaction.date <= date_to,
        ).subquery()
        icp_q = icp_q.filter(Contact.id.in_(active_ids))
    icp_company_names = icp_q.group_by(Contact.company).all()

    from icp_process import normalize_company_name, _normalize_danish_chars

    def _nk(name: str) -> str:
        return _normalize_danish_chars(normalize_company_name(name))

    visible_norm_keys = set()
    for (name,) in icp_company_names:
        nk = _nk(name.strip())
        if nk:
            visible_norm_keys.add(nk)

    for rc in session.query(RefCompany).all():
        nk = _nk(rc.name.strip())
        if nk:
            visible_norm_keys.add(nk)

    # Step 4: Aggregate per normalized group
    # Deduplicate: ensure each norm_key maps to a single canonical group dict
    seen_group_ids = set()
    items = []
    for norm_key in visible_norm_keys:
        grp = groups.get(norm_key)
        if not grp:
            continue

        # Avoid processing the same merged group twice (aliases can share dict references)
        grp_id = id(grp)
        if grp_id in seen_group_ids:
            continue
        seen_group_ids.add(grp_id)

        raw_names = grp["raw_names"]  # set of lowercased raw names

        agg_icp = sum(icp_map.get(rn, 0) for rn in raw_names)
        agg_total = sum(total_map.get(rn, 0) for rn in raw_names)
        agg_last30 = sum(last30_map.get(rn, 0) for rn in raw_names)

        first_dates = [first_seen_map[rn] for rn in raw_names if rn in first_seen_map and first_seen_map[rn]]
        agg_first = min(first_dates) if first_dates else None

        last_dates = [last_active_map[rn] for rn in raw_names if rn in last_active_map and last_active_map[rn]]
        agg_last = max(last_dates) if last_dates else None

        items.append({
            "company": grp["canonical"],
            "tier": grp["tier"],
            "icp_count": agg_icp,
            "total_interactions": agg_total,
            "last_30d": agg_last30,
            "first_seen": agg_first,
            "last_active": agg_last,
        })

    # Step 5: Filter
    if tier:
        if tier == "Not ICP":
            items = [i for i in items if i["tier"] == "Not ICP"]
        elif tier == "untiered":
            items = [i for i in items if not i["tier"]]
        else:
            items = [i for i in items if i["tier"] == tier]

    if search:
        term = search.lower()
        items = [i for i in items if term in i["company"].lower()]

    # Step 6: Sort
    sort_keys = {
        "company": lambda x: (x["company"] or "").lower(),
        "tier": lambda x: (x["tier"] or "").lower(),
        "icp_count": lambda x: x["icp_count"],
        "total_interactions": lambda x: x["total_interactions"],
        "last_30d": lambda x: x["last_30d"],
        "first_seen": lambda x: x["first_seen"] or date.min,
        "last_active": lambda x: x["last_active"] or date.min,
    }
    key_fn = sort_keys.get(sort_by, sort_keys["company"])
    items.sort(key=key_fn, reverse=(sort_dir == "desc"))

    total = len(items)
    pages = max(1, (total + per_page - 1) // per_page)
    page = min(page, pages)
    offset = (page - 1) * per_page
    items = items[offset:offset + per_page]

    return {
        "items": items,
        "total": total,
        "page": page,
        "per_page": per_page,
        "pages": pages,
    }


def get_company_detail(session: Session, company_name: str) -> dict | None:
    """Company detail: tier, notes, contact counts, list of ICP+POI contacts.

    Matches contacts by normalized company name, so "CG Jensen A/S" detail page
    also shows contacts stored as "CG Jensen", "cg jensen a/s", etc.
    """
    # Get all raw name variants that normalize to the same key
    raw_names = _get_all_raw_names_for_company(session, company_name)

    # Find ref_company row (if any) — check all raw variants
    ref = None
    for rn in raw_names:
        ref = session.query(RefCompany).filter(
            func.lower(RefCompany.name) == rn
        ).first()
        if ref:
            break

    # All contacts at any variant of this company name
    all_contacts = session.query(Contact).filter(
        func.lower(Contact.company).in_(raw_names)
    ).all()

    if not all_contacts and not ref:
        return None

    icp_count = sum(1 for c in all_contacts if c.status == "ICP")
    poi_count = sum(1 for c in all_contacts if c.status == "POI")
    total_interactions = sum(c.total_interactions or 0 for c in all_contacts)

    # ICP + POI contacts with stats
    contacts_list = []
    for c in all_contacts:
        if c.status in ("ICP", "POI"):
            contacts_list.append({
                "id": c.id,
                "name": c.name,
                "title": c.title or "",
                "status": c.status,
                "total_interactions": c.total_interactions or 0,
                "latest_interaction": c.latest_interaction,
            })

    contacts_list.sort(key=lambda x: x["total_interactions"], reverse=True)

    # Use the company name from ref if it exists (canonical casing), else from contacts
    display_name = ref.name if ref else (all_contacts[0].company if all_contacts else company_name)

    return {
        "company": display_name,
        "tier": ref.tier if ref else "",
        "notes": ref.notes if ref else "",
        "icp_count": icp_count,
        "poi_count": poi_count,
        "total_interactions": total_interactions,
        "contacts": contacts_list,
    }


def update_company_tier(session: Session, company_name: str, tier: str) -> dict:
    """Update (or insert) company tier in ref_companies.

    Also updates company_tier on all contacts whose normalized company name
    matches (catches "CG Jensen A/S", "CG Jensen", etc.).
    If tier is 'Not ICP', reclassifies ICP contacts at this company to 'Not ICP'.

    Returns {updated: bool, contacts_reclassified: int}.
    """
    # Upsert ref_companies row
    ref = session.query(RefCompany).filter(
        func.lower(RefCompany.name) == company_name.lower()
    ).first()

    if ref:
        ref.tier = tier
    else:
        ref = RefCompany(name=company_name, tier=tier)
        session.add(ref)

    # Get all raw name variants via normalization
    raw_names = _get_all_raw_names_for_company(session, company_name)

    # Update company_tier on all matching contacts (all name variants)
    session.query(Contact).filter(
        func.lower(Contact.company).in_(raw_names)
    ).update(
        {Contact.company_tier: tier},
        synchronize_session="fetch",
    )

    # If Not ICP, reclassify ICP contacts across all variants
    contacts_reclassified = 0
    if tier == "Not ICP":
        contacts_reclassified = session.query(Contact).filter(
            func.lower(Contact.company).in_(raw_names),
            Contact.status == "ICP",
        ).update(
            {Contact.status: "Not ICP"},
            synchronize_session="fetch",
        )

    session.commit()
    return {"updated": True, "contacts_reclassified": contacts_reclassified}


# --- One-time data fixes ---

def fix_tier_2_to_1(session: Session) -> dict:
    """One-time fix: change all tier '2' to '1' in ref_companies and contacts.

    Returns {ref_companies_updated: int, contacts_updated: int}.
    """
    rc_updated = session.query(RefCompany).filter(
        RefCompany.tier == "2"
    ).update(
        {RefCompany.tier: "1"},
        synchronize_session="fetch",
    )

    ct_updated = session.query(Contact).filter(
        Contact.company_tier == "2"
    ).update(
        {Contact.company_tier: "1"},
        synchronize_session="fetch",
    )

    session.commit()
    return {"ref_companies_updated": rc_updated, "contacts_updated": ct_updated}


# --- Auth helpers ---

def get_user_by_username(session: Session, username: str):
    """Get User object by username, or None."""
    return session.query(User).filter(User.username == username).first()