#!/usr/bin/env python3
"""
Copy files/directories between machines using rclone with scp-style syntax.

Rclone provides retry, resume, and efficient transfer for large directories.

Prerequisites:
    - rclone installed: https://rclone.org/install/

Path format: Both forward slashes (/) and backslashes (\\) are accepted.

Usage:
    # Simple mode: just specify model and hosts
    python copy_artifacts.py --model psd1 10.228.200.117 10.228.203.217

    # Direct transfer via SSH (runs rclone on source Windows machine)
    python copy_artifacts.py --model psd1 --via-ssh 10.228.200.117 10.228.203.217

    # Zip mode: tar first, transfer single file, untar (better for flaky networks)
    python copy_artifacts.py --model psd1 --zip 10.228.200.117 10.228.203.217

    # Zip mode with direct SSH transfer (scp between Windows machines)
    python copy_artifacts.py --model psd1 --zip --via-ssh 10.228.200.117 10.228.203.217

    # Full scp-style syntax
    python copy_artifacts.py host1:/path/to/source host2:/path/to/dest

    # With explicit credentials
    python copy_artifacts.py --model psd1 --username Admin --password secret host1 host2
"""

import argparse
import os
import shutil
import subprocess
import sys
import uuid
from pathlib import Path, PureWindowsPath
from typing import Optional, Tuple

import paramiko


# Standard artifact path template
ARTIFACT_PATH_TEMPLATE = r"C:\Users\Administrator\Desktop\shajaisw\{model}\waic_work_compile"
SUPPORTED_MODELS = ["psd1", "psd2", "psd3", "psd4", "psh", "psi", "psu0", "psu1", "vit-base"]


def parse_host_path(spec: str, model: Optional[str] = None) -> Tuple[str, str]:
    """
    Parse host or host:path specification.

    Args:
        spec: String like "host", "host:/path/to/file", "host:C:/Windows/path",
              or local path "/path/to/file" (returns "local" as host)
        model: If provided and spec has no path, use standard artifact path

    Returns:
        Tuple of (host, path) - host is "local" for local paths
    """
    # Check for local path (starts with / or .)
    if spec.startswith("/") or spec.startswith("."):
        return "local", os.path.abspath(spec)

    if ":" not in spec:
        # Just a host, use model path
        if model:
            return spec, ARTIFACT_PATH_TEMPLATE.format(model=model)
        raise ValueError(
            f"Invalid specification: {spec}. Expected format: host:/path or use --model")

    first_colon = spec.index(":")

    # Check if this might be host:C:/path format (Windows drive letter)
    if len(spec) > first_colon + 2 and spec[first_colon + 2] == ":":
        host = spec[:first_colon]
        path = spec[first_colon + 1:]
    else:
        host = spec[:first_colon]
        path = spec[first_colon + 1:]

    if not host:
        raise ValueError(f"Missing host in specification: {spec}")
    if not path:
        # Path is empty after colon, use model path if available
        if model:
            return host, ARTIFACT_PATH_TEMPLATE.format(model=model)
        raise ValueError(f"Missing path in specification: {spec}")

    return host, path


def normalize_sftp_path(path: str) -> str:
    """
    Convert Windows path to SFTP format.

    For SFTP to Windows, paths need a leading slash before the drive letter.
    Example: C:\\Users\\Admin -> /C:/Users/Admin
    """
    # Convert backslashes to forward slashes
    path = path.replace("\\", "/")

    # Add leading slash if path starts with drive letter (e.g., C:/)
    if len(path) >= 2 and path[1] == ":" and path[0].isalpha():
        if not path.startswith("/"):
            path = "/" + path

    return path


def setup_rclone_remote(host: str, username: str, password: str, remote_name: str) -> bool:
    """Configure an rclone SFTP remote dynamically."""
    try:
        # Obscure the password for rclone
        result = subprocess.run(
            ["rclone", "obscure", password],
            capture_output=True,
            text=True,
            check=True,
        )
        obscured_pass = result.stdout.strip()

        # Create the remote config
        # disable_hashcheck=true: Disable hash checking (md5sum fails on Windows SFTP)
        subprocess.run(
            [
                "rclone",
                "config",
                "create",
                remote_name,
                "sftp",
                f"host={host}",
                f"user={username}",
                f"pass={obscured_pass}",
                "shell_type=powershell",
                "disable_hashcheck=true",
            ],
            capture_output=True,
            check=True,
        )
        return True
    except subprocess.CalledProcessError as e:
        print(f"Failed to setup rclone remote: {e}")
        return False


# =============================================================================
# Content Functions (What to copy)
# =============================================================================


def tar_source(
    host: str,
    path: str,
    username: str,
    password: str,
    tar_path: str,
) -> bool:
    """Create gzipped tar archive on source machine."""
    folder_name = PureWindowsPath(path).name
    parent_path = str(PureWindowsPath(path).parent)
    # -czf: create, gzip, file (Windows 10+ tar supports gzip)
    tar_cmd = (
        f'powershell -Command "'
        f"tar -czf '{tar_path}' -C '{parent_path}' '{folder_name}'"
        f'"'
    )
    success, _ = _run_ssh_command(host, username, password, tar_cmd)
    return success


def untar_dest(
    host: str,
    dest_path: str,
    username: str,
    password: str,
    tar_path: str,
    clean: bool = False,
) -> bool:
    """Extract gzipped tar archive on destination machine.

    Args:
        host: Destination host ("local" for local machine)
        dest_path: Path where to extract the archive
        username: SSH username for remote hosts
        password: SSH password for remote hosts
        tar_path: Path to the tar.gz archive
        clean: If True, delete dest_path before extracting
    """
    if host == "local":
        # Local Linux extraction
        dest_parent = str(Path(dest_path).parent)
        if clean and Path(dest_path).exists():
            print(f"    Removing existing directory: {dest_path}")
            try:
                shutil.rmtree(dest_path)
            except OSError as e:
                print(f"    Warning: Failed to remove directory (continuing anyway): {e}")
        os.makedirs(dest_parent, exist_ok=True)
        result = subprocess.run(
            ["tar", "-xzf", tar_path, "-C", dest_parent],
            check=False
        )
        return result.returncode == 0

    # Remote Windows extraction
    dest_parent = str(PureWindowsPath(dest_path).parent)
    clean_cmd = ""
    if clean:
        clean_cmd = f"if (Test-Path '{dest_path}') {{ Remove-Item -Recurse -Force '{dest_path}' -ErrorAction SilentlyContinue }}; "
    extract_cmd = (
        f'powershell -Command "'
        f"{clean_cmd}"
        f"if (-not (Test-Path '{dest_parent}')) {{ New-Item -ItemType Directory -Path '{dest_parent}' -Force }}; "
        f"tar -xzf '{tar_path}' -C '{dest_parent}'"
        f'"'
    )
    success, _ = _run_ssh_command(host, username, password, extract_cmd)
    return success


def cleanup_tar(
    host: str,
    username: str,
    password: str,
    tar_path: str,
) -> None:
    """Remove temporary tar file."""
    if host == "local":
        # Local cleanup
        try:
            os.remove(tar_path)
        except OSError:
            pass
        return

    # Remote cleanup
    cleanup_cmd = f'powershell -Command "Remove-Item \'{tar_path}\' -Force -ErrorAction SilentlyContinue"'
    _run_ssh_command(host, username, password, cleanup_cmd, stream_output=False)


def get_file_size_gb(
    host: str,
    username: str,
    password: str,
    file_path: str,
) -> Optional[float]:
    """Get file size in GB."""
    size_cmd = f'powershell -Command "(Get-Item \'{file_path}\').Length / 1GB"'
    _, size_output = _run_ssh_command(host, username, password, size_cmd, stream_output=False)
    try:
        return float(size_output.strip())
    except ValueError:
        return None


# =============================================================================
# Transfer Functions (How to copy)
# =============================================================================


def transfer_rclone(
    from_host: str,
    from_path: str,
    to_host: str,
    to_path: str,
    username: str,
    password: str,
) -> bool:
    """Transfer using rclone running locally (via SFTP to remote hosts, local paths direct)."""
    from_remote = "copy_from"
    to_remote = "copy_to"
    remotes_to_clean = []

    try:
        # Setup source - local or remote
        if from_host == "local":
            source_spec = from_path
        else:
            if not setup_rclone_remote(from_host, username, password, from_remote):
                return False
            remotes_to_clean.append(from_remote)
            source_spec = f"{from_remote}:{normalize_sftp_path(from_path)}"

        # Setup destination - local or remote
        if to_host == "local":
            target_spec = to_path
            # Ensure local directory exists
            os.makedirs(to_path, exist_ok=True)
        else:
            if not setup_rclone_remote(to_host, username, password, to_remote):
                return False
            remotes_to_clean.append(to_remote)
            target_spec = f"{to_remote}:{normalize_sftp_path(to_path)}"

        cmd = [
            "rclone", "copy", source_spec, target_spec,
            "--retries", "5", "--low-level-retries", "10",
            "--transfers", "4", "--checkers", "8",
            "--progress", "--stats", "10s", "--ignore-checksum",
        ]

        print(f"Running: {' '.join(cmd)}")
        result = subprocess.run(cmd, check=False)
        return result.returncode == 0

    finally:
        for remote in remotes_to_clean:
            subprocess.run(["rclone", "config", "delete", remote], capture_output=True, check=False)


def transfer_rclone_on_source(
    from_host: str,
    from_path: str,
    to_host: str,
    to_path: str,
    username: str,
    password: str,
) -> bool:
    """Transfer by running rclone on the source machine via SSH."""
    ps_command = (
        f'.\\copy_to_remote.ps1 '
        f'-SourcePath "{from_path}" '
        f'-DestHost "{to_host}" '
        f'-DestPath "{to_path}" '
        f'-Username "{username}" '
        f'-Password "{password}"'
    )
    work_dir = r"C:\Users\Administrator\Desktop\shajaisw"
    full_command = f'powershell -Command "Set-Location \'{work_dir}\'; {ps_command}"'

    print(f"Running on {from_host}: {ps_command}")
    success, _ = _run_ssh_command(from_host, username, password, full_command)
    return success


def transfer_scp(
    from_host: str,
    from_path: str,
    to_host: str,
    to_path: str,
    username: str,
    password: str,
    file_size_gb: Optional[float] = None,
) -> bool:
    """
    Transfer using scp from source to destination (via SSH on source).

    Note: scp doesn't show progress over non-interactive SSH.
    The file_size_gb is used for the completion message.
    """
    if file_size_gb:
        print(f"    Transferring {file_size_gb:.2f} GB (no progress available for scp over SSH)...")

    scp_cmd = (
        f'powershell -Command "'
        f"scp -o StrictHostKeyChecking=no "
        f"'{from_path}' "
        f"{username}@{to_host}:'{to_path}'"
        f'"'
    )
    success, _ = _run_ssh_command(from_host, username, password, scp_cmd)
    return success


def _run_ssh_command(
    host: str,
    username: str,
    password: str,
    command: str,
    stream_output: bool = True,
) -> Tuple[bool, str]:
    """
    Run a command on remote host via SSH.

    Returns:
        Tuple of (success, output)
    """
    try:
        client = paramiko.SSHClient()
        client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
        client.connect(host, port=22, username=username, password=password)

        print(f"Executing {command} on {host}...")
        _stdin, stdout, stderr = client.exec_command(command)

        output_lines = []
        for line in iter(stdout.readline, ""):
            if stream_output:
                print(line, end="")
            output_lines.append(line)

        exit_code = stdout.channel.recv_exit_status()

        if exit_code != 0:
            stderr_output = stderr.read().decode()
            if stderr_output:
                print(f"STDERR: {stderr_output}")
            return False, "".join(output_lines)

        return True, "".join(output_lines)

    except paramiko.SSHException as e:
        print(f"SSH error: {e}")
        return False, str(e)
    finally:
        client.close()


# =============================================================================
# Orchestration (Combining content + transfer)
# =============================================================================


def copy(
    from_host: str,
    from_path: str,
    to_host: str,
    to_path: str,
    username: str,
    password: str,
    content: str = "direct",
    method: str = "rclone",
    clean: bool = False,
) -> bool:
    """
    Copy files between hosts with configurable content and transfer modes.

    Args:
        content: "direct" (folder as-is) or "tar" (archive first)
        method: "rclone" (via local runner), "rclone_source" (rclone on source), or "scp" (direct scp)
        clean: If True, delete destination directory before copying (only for tar mode)
    """
    if content == "direct":
        return _copy_direct(from_host, from_path, to_host, to_path, username, password, method)
    if content == "tar":
        return _copy_tar(from_host, from_path, to_host, to_path, username, password, method, clean)
    raise ValueError(f"Unknown content mode: {content}")


def _copy_direct(
    from_host: str,
    from_path: str,
    to_host: str,
    to_path: str,
    username: str,
    password: str,
    method: str,
) -> bool:
    """Copy folder directly using specified transfer method."""
    print(f"Source: {from_host}:{from_path}")
    print(f"Target: {to_host}:{to_path}")
    print()

    if method == "rclone":
        return transfer_rclone(from_host, from_path, to_host, to_path, username, password)
    if method == "rclone_source":
        return transfer_rclone_on_source(from_host, from_path, to_host, to_path, username, password)
    raise ValueError(f"Direct copy does not support method: {method}")


def _copy_tar(
    from_host: str,
    from_path: str,
    to_host: str,
    to_path: str,
    username: str,
    password: str,
    method: str,
    clean: bool = False,
) -> bool:
    """Copy by tarring/gzipping on source, transferring archive, untarring on dest.

    Args:
        clean: If True, delete destination directory before extracting
    """
    tar_name = f"transfer_{uuid.uuid4().hex[:8]}.tar.gz"

    # Source is always Windows (artifacts come from Windows machines)
    source_parent = str(PureWindowsPath(from_path).parent)
    source_tar = f"{source_parent}\\{tar_name}"

    # Destination can be local Linux or remote Windows
    if to_host == "local":
        dest_parent = str(Path(to_path).parent)
        dest_tar = f"{dest_parent}/{tar_name}"
    else:
        dest_parent = str(PureWindowsPath(to_path).parent)
        dest_tar = f"{dest_parent}\\{tar_name}"

    print("=== Tar+Gzip Mode Transfer ===")
    print(f"Source: {from_host}:{from_path}")
    print(f"Dest:   {to_host}:{to_path}")
    print(f"Method: {method}")
    print()

    try:
        # Step 1: Create tar on source
        print(f"[1/4] Creating archive on {from_host}...")
        if not tar_source(from_host, from_path, username, password, source_tar):
            print("Failed to create archive on source")
            return False

        size_gb = get_file_size_gb(from_host, username, password, source_tar)
        if size_gb is not None:
            print(f"    Archive size: {size_gb:.2f} GB")

        # Step 2: Transfer archive
        print(f"\n[2/4] Transferring archive to {to_host}...")
        if method == "rclone":
            success = transfer_rclone(from_host, source_tar, to_host, dest_parent, username, password)
        elif method == "rclone_source":
            success = transfer_rclone_on_source(
                from_host, source_tar, to_host, dest_parent, username, password
            )
        elif method == "scp":
            success = transfer_scp(
                from_host, source_tar, to_host, dest_tar, username, password,
                file_size_gb=size_gb
            )
        else:
            raise ValueError(f"Tar transfer does not support method: {method}")

        if not success:
            print("Failed to transfer archive")
            return False

        # Step 3: Extract on destination
        print(f"\n[3/4] Extracting archive on {to_host}...")
        if clean:
            print(f"    Clean mode: will remove existing {to_path} before extracting")
        if not untar_dest(to_host, to_path, username, password, dest_tar, clean=clean):
            print("Failed to extract archive on destination")
            return False

        # Step 4: Cleanup
        print("\n[4/4] Cleaning up temporary files...")
        cleanup_tar(from_host, username, password, source_tar)
        cleanup_tar(to_host, username, password, dest_tar)

        print("\nTransfer complete!")
        return True

    except Exception as e:  # pylint: disable=broad-except
        print(f"Error during tar transfer: {e}")
        cleanup_tar(from_host, username, password, source_tar)
        cleanup_tar(to_host, username, password, dest_tar)
        return False


def main():
    """Main entry point."""
    parser = argparse.ArgumentParser(
        description="Copy files/directories between machines using rclone (scp-style syntax)",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
    # Simple: just model and hosts (via runner)
    python copy_artifacts.py --model psd1 10.228.200.117 10.228.203.217

    # Direct transfer via SSH (runs rclone on source Windows machine - faster)
    python copy_artifacts.py --model psd1 --via-ssh 10.228.200.117 10.228.203.217

    # Zip mode: zip first, transfer single file, unzip (better for flaky networks)
    python copy_artifacts.py --model psd1 --zip 10.228.200.117 10.228.203.217

    # Full paths
    python copy_artifacts.py 10.0.0.1:/path/source 10.0.0.2:/path/dest
        """,
    )
    parser.add_argument(
        "source",
        help="Source host or host:/path format",
    )
    parser.add_argument(
        "dest",
        help="Destination host or host:/path format",
    )
    parser.add_argument(
        "--model",
        choices=SUPPORTED_MODELS,
        help=f"Model name ({', '.join(SUPPORTED_MODELS)}) - uses standard artifact path",
    )
    parser.add_argument(
        "--via-ssh",
        action="store_true",
        help="Run rclone on source machine via SSH (faster, direct transfer)",
    )
    parser.add_argument(
        "--zip",
        action="store_true",
        dest="use_zip",
        help="Zip on source, transfer single file, unzip on dest (reliable for flaky networks)",
    )
    parser.add_argument(
        "--clean",
        action="store_true",
        help="Delete destination directory before unzipping (only applies with --zip)",
    )
    parser.add_argument(
        "--username",
        default="Administrator",
        help="SSH username (default: Administrator)",
    )
    parser.add_argument(
        "--password",
        default=os.environ.get("PSD_WINDOWS_PASSWORD", "amdlabp@ssw0rd"),
        help="SSH password (or set PSD_WINDOWS_PASSWORD env var)",
    )

    args = parser.parse_args()

    # Check rclone is available when needed
    needs_rclone = (not args.via_ssh and not args.use_zip) or (args.use_zip and not args.via_ssh)
    if needs_rclone and not shutil.which("rclone"):
        print("Error: rclone not found. Install from https://rclone.org/install/")
        sys.exit(1)

    # Parse source and destination
    try:
        from_host, from_path = parse_host_path(args.source, args.model)
        to_host, to_path = parse_host_path(args.dest, args.model)
    except ValueError as e:
        print(f"Error: {e}")
        sys.exit(1)

    print(f"Copy: {from_host}:{from_path}")
    print(f"  To: {to_host}:{to_path}")
    if args.use_zip:
        transfer_method = "scp via SSH" if args.via_ssh else "rclone"
        print(f"Mode: Zip transfer (tar -> {transfer_method} -> untar)")
    elif args.via_ssh:
        print(f"Mode: Direct transfer via SSH (rclone runs on {from_host})")
    else:
        print("Mode: Via runner (rclone runs locally)")
    print()

    # Map CLI flags to content/method modes
    content = "tar" if args.use_zip else "direct"
    method = "rclone_source" if args.via_ssh else "rclone"

    success = copy(
        from_host, from_path, to_host, to_path, args.username, args.password,
        content=content, method=method, clean=args.clean
    )

    if success:
        print("Copy completed successfully!")
        sys.exit(0)
    else:
        print("Copy failed!")
        sys.exit(1)


if __name__ == "__main__":
    main()
