"""
SiteHub LinkedIn ICP Tracker — One-Time Migration Script

Reads all Excel/JSON/TXT files and inserts into PostgreSQL via SQLAlchemy.
Verifies row counts match after migration.

Usage:
    docker compose up -d
    pip install sqlalchemy psycopg2-binary
    python icp_migrate.py
"""

import json
import sys
from datetime import datetime
from pathlib import Path

import openpyxl

# Ensure project dir is on path
BASE_DIR = Path(__file__).resolve().parent
sys.path.insert(0, str(BASE_DIR))

from icp_db import (
    get_session, Contact, Interaction, SourceProfile,
    RefCompany, RefCompanyAlias, RefJobTitle, RefPerson,
    SitehubEmployee, CompanyBlocklist,
    LearnedPatternTitle, LearnedPatternCompany, ApiUsage,
    check_db_health,
)
from sqlalchemy import func

TRACKER_PATH = BASE_DIR / "SiteHub_LinkedIn_ICP_Tracker.xlsx"
REFERENCE_PATH = BASE_DIR / "ICP_Reference_Lists.xlsx"
LEARNED_PATTERNS_FILE = BASE_DIR / "learned_classification_patterns.json"
SITEHUB_EMPLOYEES_FILE = BASE_DIR / "sitehub_employees.txt"
COMPANY_BLOCKLIST_FILE = BASE_DIR / ".company_blocklist.txt"
SEARCH_USAGE_FILE = BASE_DIR / ".brave_search_usage.json"


def migrate_reference_lists(session):
    """Step 1: Migrate reference lists (companies, aliases, job titles, people)."""
    print("=== Migrating Reference Lists ===")
    wb = openpyxl.load_workbook(REFERENCE_PATH, read_only=True, data_only=True)

    # --- Companies ---
    ws = wb["Companies"]
    company_count = 0
    alias_count = 0
    for row in ws.iter_rows(min_row=2, values_only=True):
        row_list = list(row) + [None] * 5
        name, tier, qualifying_titles, aliases_str, notes, *_ = row_list[:6]
        if not name:
            continue

        name_clean = name.strip()
        rc = RefCompany(
            name=name_clean,
            tier=str(tier).strip() if tier is not None else None,
            qualifying_titles=str(qualifying_titles).strip() if qualifying_titles else "",
            notes=str(notes).strip() if notes else "",
        )
        session.add(rc)
        session.flush()  # Get the ID
        company_count += 1

        # Parse aliases (skip duplicates)
        if aliases_str:
            for alias in str(aliases_str).split(","):
                alias_clean = alias.strip()
                if alias_clean:
                    # Check if alias already exists (case-insensitive via CITEXT)
                    existing = session.query(RefCompanyAlias).filter(
                        func.lower(RefCompanyAlias.alias) == alias_clean.lower()
                    ).first()
                    if not existing:
                        session.add(RefCompanyAlias(
                            ref_company_id=rc.id,
                            alias=alias_clean,
                        ))
                        alias_count += 1
                        session.flush()

    print(f"  Companies: {company_count}")
    print(f"  Aliases: {alias_count}")

    # --- Job Titles ---
    ws = wb["Job Titles"]
    title_count = 0
    for row in ws.iter_rows(min_row=2, values_only=True):
        row_list = list(row) + [None] * 4
        pattern, status, priority, *_ = row_list[:4]
        if not pattern:
            continue
        session.add(RefJobTitle(
            pattern=str(pattern).strip().lower(),
            status=str(status).strip() if status else "Review",
            priority=int(priority) if priority is not None else 50,
        ))
        title_count += 1
    print(f"  Job Titles: {title_count}")

    # --- People ---
    ws = wb["People"]
    people_count = 0
    for row in ws.iter_rows(min_row=2, values_only=True):
        row_list = list(row) + [None] * 4
        pname, status, company, *_ = row_list[:4]
        if not pname or pname == "Add specific people here":
            continue
        session.add(RefPerson(
            name=str(pname).strip(),
            status=str(status).strip() if status else "Review",
            company=str(company).strip() if company else "",
        ))
        people_count += 1
    print(f"  People: {people_count}")

    wb.close()
    session.commit()


def migrate_auxiliary_files(session):
    """Step 2: Migrate sitehub_employees, company_blocklist, learned_patterns, api_usage."""
    print("\n=== Migrating Auxiliary Files ===")

    # --- SiteHub Employees ---
    emp_count = 0
    if SITEHUB_EMPLOYEES_FILE.exists():
        content = SITEHUB_EMPLOYEES_FILE.read_text(encoding="utf-8")
        for line in content.splitlines():
            name = line.strip().lower()
            if name and not name.startswith("#"):
                # Check for duplicate before inserting
                existing = session.query(SitehubEmployee).filter(
                    func.lower(SitehubEmployee.name) == name
                ).first()
                if not existing:
                    session.add(SitehubEmployee(name=name))
                    emp_count += 1
    print(f"  SiteHub Employees (from file): {emp_count}")

    # --- Company Blocklist ---
    block_count = 0
    if COMPANY_BLOCKLIST_FILE.exists():
        content = COMPANY_BLOCKLIST_FILE.read_text(encoding="utf-8")
        for line in content.splitlines():
            name = line.strip().lower()
            if name:
                existing = session.query(CompanyBlocklist).filter(
                    func.lower(CompanyBlocklist.name) == name
                ).first()
                if not existing:
                    session.add(CompanyBlocklist(name=name))
                    block_count += 1
    print(f"  Company Blocklist: {block_count}")

    # --- Learned Patterns ---
    title_count = 0
    company_count = 0
    if LEARNED_PATTERNS_FILE.exists():
        with open(LEARNED_PATTERNS_FILE, "r", encoding="utf-8") as f:
            patterns = json.load(f)

        for keyword, counts in patterns.get("title_keywords", {}).items():
            session.add(LearnedPatternTitle(
                keyword=keyword.lower(),
                icp_count=counts.get("ICP", 0),
                poi_count=counts.get("POI", 0),
                not_icp_count=counts.get("Not ICP", 0),
            ))
            title_count += 1

        for company, info in patterns.get("companies", {}).items():
            session.add(LearnedPatternCompany(
                company=company.lower(),
                status=info.get("status", "Review"),
                count=info.get("count", 0),
            ))
            company_count += 1

    print(f"  Learned Patterns (titles): {title_count}")
    print(f"  Learned Patterns (companies): {company_count}")

    # --- API Usage ---
    usage_count = 0
    if SEARCH_USAGE_FILE.exists():
        with open(SEARCH_USAGE_FILE, "r", encoding="utf-8") as f:
            usage = json.load(f)
        if usage.get("month"):
            session.add(ApiUsage(
                service="brave_search",
                month=usage["month"],
                count=usage.get("count", 0),
                limit_notified=usage.get("limit_notified", False),
            ))
            usage_count += 1
    print(f"  API Usage records: {usage_count}")

    session.commit()


def migrate_contacts(session):
    """Step 3: Migrate contacts from Contacts sheet."""
    print("\n=== Migrating Contacts ===")
    wb = openpyxl.load_workbook(TRACKER_PATH, read_only=True, data_only=True)
    ws = wb["Contacts"]
    headers = [cell.value for cell in ws[1]]

    contact_count = 0
    for row in ws.iter_rows(min_row=2, values_only=True):
        d = dict(zip(headers, row))
        name = (d.get("Name") or "").strip()
        if not name:
            continue

        first_int = d.get("First Interaction")
        if isinstance(first_int, datetime):
            first_int = first_int.date()
        latest_int = d.get("Latest Interaction")
        if isinstance(latest_int, datetime):
            latest_int = latest_int.date()

        session.add(Contact(
            name=name,
            title=(d.get("Title") or "").strip(),
            company=(d.get("Company") or "").strip(),
            status=(d.get("Status") or "Review").strip(),
            company_tier=(d.get("Company Tier") or "").strip(),
            total_interactions=d.get("Total Interactions") or 0,
            first_interaction=first_int,
            latest_interaction=latest_int,
            notes=(d.get("Notes") or "").strip() if d.get("Notes") else "",
        ))
        contact_count += 1

    wb.close()
    session.commit()
    print(f"  Contacts migrated: {contact_count}")
    return contact_count


def migrate_interactions(session):
    """Step 4: Migrate interactions from All Interactions sheet."""
    print("\n=== Migrating Interactions ===")
    wb = openpyxl.load_workbook(TRACKER_PATH, read_only=True, data_only=True)
    ws = wb["All Interactions"]
    headers = [cell.value for cell in ws[1]]

    # Build source profile lookup
    profiles = {p.code: p.id for p in session.query(SourceProfile).all()}
    # Also map full names
    for p in session.query(SourceProfile).all():
        profiles[p.full_name.lower()] = p.id

    interaction_count = 0
    batch = []
    for row in ws.iter_rows(min_row=2, values_only=True):
        d = dict(zip(headers, row))
        name = (d.get("Name") or "").strip()
        if not name:
            continue

        dt = d.get("Date")
        if isinstance(dt, datetime):
            dt = dt.date()

        source_raw = (d.get("Source Profile") or "").strip()
        source_id = profiles.get(source_raw) or profiles.get(source_raw.lower() if source_raw else "")

        batch.append(Interaction(
            date=dt,
            post_hook=(d.get("Post Hook") or "").strip(),
            type=(d.get("Type") or "").strip(),
            name=name,
            title=(d.get("Title") or "").strip(),
            company=(d.get("Company") or "").strip(),
            profile_url=(d.get("Profile URL") or "").strip(),
            source_profile_id=source_id,
            source_profile_raw=source_raw,
        ))
        interaction_count += 1

        # Batch insert every 500 rows
        if len(batch) >= 500:
            session.add_all(batch)
            session.flush()
            batch = []

    if batch:
        session.add_all(batch)

    wb.close()
    session.commit()
    print(f"  Interactions migrated: {interaction_count}")
    return interaction_count


def link_interactions(session):
    """Step 5: Link interactions to contacts via name matching."""
    print("\n=== Linking Interactions to Contacts ===")
    from icp_db import link_interactions_to_contacts
    linked = link_interactions_to_contacts(session)
    print(f"  Linked: {linked} interactions")
    return linked


def verify_migration(session, expected_contacts, expected_interactions):
    """Step 6: Verify row counts match source data."""
    print("\n=== Verification ===")
    health = check_db_health(session)

    print(f"  contacts:                   {health['contacts']:>6}  (expected: {expected_contacts})")
    print(f"  interactions:               {health['interactions']:>6}  (expected: {expected_interactions})")
    print(f"  source_profiles:            {health['source_profiles']:>6}")
    print(f"  ref_companies:              {health['ref_companies']:>6}")
    print(f"  ref_company_aliases:        {health['ref_company_aliases']:>6}")
    print(f"  ref_job_titles:             {health['ref_job_titles']:>6}")
    print(f"  ref_people:                 {health['ref_people']:>6}")
    print(f"  sitehub_employees:          {health['sitehub_employees']:>6}")
    print(f"  company_blocklist:          {health['company_blocklist']:>6}")
    print(f"  learned_patterns_titles:    {health['learned_patterns_titles']:>6}")
    print(f"  learned_patterns_companies: {health['learned_patterns_companies']:>6}")
    print(f"  api_usage:                  {health['api_usage']:>6}")

    ok = True
    if health['contacts'] != expected_contacts:
        print(f"\n  WARNING: Contact count mismatch! DB={health['contacts']}, Excel={expected_contacts}")
        ok = False
    if health['interactions'] != expected_interactions:
        print(f"\n  WARNING: Interaction count mismatch! DB={health['interactions']}, Excel={expected_interactions}")
        ok = False

    if ok:
        print("\n  All counts match. Migration successful.")
    else:
        print("\n  Migration completed with warnings. Check counts above.")

    return ok


def main():
    print("=" * 60)
    print("SiteHub ICP Tracker — Excel to PostgreSQL Migration")
    print("=" * 60)
    print()

    # Verify files exist
    for path, label in [
        (TRACKER_PATH, "Tracker"),
        (REFERENCE_PATH, "Reference Lists"),
    ]:
        if not path.exists():
            print(f"ERROR: {label} file not found: {path}")
            sys.exit(1)

    # Test database connection
    print("Connecting to PostgreSQL...")
    try:
        session = get_session()
        # Quick connectivity test
        session.execute(func.now())
        print("  Connected successfully.\n")
    except Exception as e:
        print(f"  ERROR: Could not connect to database: {e}")
        print("  Make sure PostgreSQL is running: docker compose up -d")
        sys.exit(1)

    # Check if DB already has data
    existing = session.query(Contact).count()
    if existing > 0:
        print(f"WARNING: Database already contains {existing} contacts.")
        response = input("Continue and overwrite? [y/N]: ").strip().lower()
        if response != "y":
            print("Aborted.")
            sys.exit(0)
        # Clear all data
        print("Clearing existing data...")
        for model in [
            Interaction, Contact, RefCompanyAlias, RefCompany,
            RefJobTitle, RefPerson, SitehubEmployee, CompanyBlocklist,
            LearnedPatternTitle, LearnedPatternCompany, ApiUsage,
        ]:
            session.query(model).delete()
        session.commit()

    # Run migration steps
    migrate_reference_lists(session)
    migrate_auxiliary_files(session)
    expected_contacts = migrate_contacts(session)
    expected_interactions = migrate_interactions(session)
    link_interactions(session)
    verify_migration(session, expected_contacts, expected_interactions)

    session.close()
    print("\nDone.")


if __name__ == "__main__":
    main()
