Source code for zea.data.convert.echonetlvh.precompute_crop

"""
Script to precompute cone parameters for the EchoNet-LVH dataset.
This script should be run separately before the main conversion process.
"""

import csv
import json
from pathlib import Path

from tqdm import tqdm

from zea import log
from zea.tools.fit_scan_cone import fit_and_crop_around_scan_cone



[docs]
def load_splits(source_dir):
    """
    Load splits from MeasurementsList.csv and return avi filenames

    Args:
        source_dir: Source directory containing MeasurementsList.csv
    Returns:
        Dictionary with keys 'train', 'val', 'test', 'rejected' and values as lists of avi filenames
    """
    csv_path = Path(source_dir) / "MeasurementsList.csv"
    splits = {"train": [], "val": [], "test": [], "rejected": []}
    # Read CSV using built-in csv module
    with open(csv_path, newline="", encoding="utf-8") as csvfile:
        reader = csv.DictReader(csvfile)
        # Group by HashedFileName
        file_split_map = {}
        for row in reader:
            filename = row["HashedFileName"]
            split = row["split"]
            file_split_map.setdefault(filename, split)
        # Now, for each unique filename, add to the correct split
        for filename, split in file_split_map.items():
            splits[split].append(filename + ".avi")
    return splits




[docs]
def find_avi_file(source_dir, hashed_filename, batch=None):
    """
    Find AVI file in the specified batch directory or any batch if not specified.

    Args:
        source_dir: Source directory containing BatchX subdirectories
        hashed_filename: Hashed filename (with or without .avi extension)
        batch: Specific batch directory to search in (e.g., "Batch2"), or None to search all batches

    Returns:
        Path to the AVI file if found, else None
    """
    # If filename already has .avi extension, strip it
    if hashed_filename.endswith(".avi"):
        hashed_filename = hashed_filename[:-4]

    if batch:
        batch_dir = Path(source_dir) / batch
        avi_path = batch_dir / f"{hashed_filename}.avi"
        if avi_path.exists():
            return avi_path
        return None
    else:
        for batch_dir in Path(source_dir).glob("Batch*"):
            avi_path = batch_dir / f"{hashed_filename}.avi"
            if avi_path.exists():
                return avi_path
        return None




[docs]
def load_first_frame(avi_file):
    """
    Load only the first frame of a video file.

    Args:
        avi_file: Path to the video file

    Returns:
        First frame as numpy array of shape (H, W) and dtype np.uint8 (grayscale)
    """
    try:
        import cv2
    except ImportError as exc:
        raise ImportError(
            "OpenCV is required for loading video files. "
            "Please install it with 'pip install opencv-python' or "
            "'pip install opencv-python-headless'."
        ) from exc

    cap = cv2.VideoCapture(str(avi_file))
    ret, frame = cap.read()
    cap.release()

    if not ret:
        raise ValueError(f"Failed to read first frame from {avi_file}")

    # Convert BGR to grayscale
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    return frame




[docs]
def precompute_cone_parameters(args):
    """
    Precompute and save cone parameters for all AVI files.

    This function loads the first frame from each AVI file, applies fit_scan_cone
    to determine cropping parameters, and saves these parameters to a CSV file
    for later use during the actual data conversion.

    Args:
        args: Argument parser namespace with the following attributes:
            src: Source directory containing EchoNet-LVH data
            dst: Destination directory to save cone parameters
            batch: Specific batch to process (e.g., "Batch2") or None for all
            max_files: Maximum number of files to process (or None for all)
            force: Whether to recompute parameters if they already exist
    Returns:
        Path to the CSV file containing cone parameters
    """

    source_path = Path(args.src)
    output_path = Path(args.dst)
    output_path.mkdir(parents=True, exist_ok=True)

    # Output file for cone parameters
    cone_params_csv = output_path / "cone_parameters.csv"
    cone_params_json = output_path / "cone_parameters.json"

    # Check if parameters already exist
    if cone_params_csv.exists() and not args.force:
        log.warning(f"Parameters already exist at {cone_params_csv}. Use --force to recompute.")
        return cone_params_csv

    # Get list of files to process
    splits = load_splits(source_path)

    files_to_process = []
    for split_files in splits.values():
        for avi_filename in split_files:
            # Strip .avi if present
            base_filename = avi_filename[:-4] if avi_filename.endswith(".avi") else avi_filename
            avi_file = find_avi_file(args.src, base_filename, batch=args.batch)
            if avi_file:
                files_to_process.append((avi_file, avi_filename))
            else:
                log.warning(
                    f"Could not find AVI file for {base_filename} in batch "
                    f"{args.batch if args.batch else 'any'}"
                )

    # Limit files if max_files is specified
    if args.max_files is not None:
        files_to_process = files_to_process[: args.max_files]
        log.info(f"Limited to processing {args.max_files} files due to max_files parameter")

    log.info(f"Computing cone parameters for {len(files_to_process)} files")

    # Dictionary to store parameters for each file
    all_cone_params = {}

    # CSV field names - only the essential parameters needed for cropping
    fieldnames = [
        "avi_filename",
        "crop_left",
        "crop_right",
        "crop_top",
        "crop_bottom",
        "apex_x",
        "new_width",
        "new_height",
        "opening_angle",
        "status",
    ]

    # Open CSV file for writing
    with open(cone_params_csv, "w", newline="", encoding="utf-8") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        # Process each file
        for avi_file, avi_filename in tqdm(files_to_process, desc="Computing cone parameters"):
            try:
                # Load only the first frame of video using OpenCV directly
                first_frame = load_first_frame(avi_file)

                # Detect cone parameters
                _, full_cone_params = fit_and_crop_around_scan_cone(first_frame, return_params=True)

                if (
                    full_cone_params["crop_left"] < 0
                    or full_cone_params["crop_right"] > first_frame.shape[1]
                ):
                    raise ValueError(
                        "Computed crop exceeds frame dimensions, meaning that either cone detection"
                        "failed, due to e.g. DICOM artifacts present in the frame, or the full scan"
                        "cone is not visible in the frame."
                    )

                # Extract only the essential parameters
                essential_params = {
                    "avi_filename": avi_filename,
                    "crop_left": full_cone_params["crop_left"],
                    "crop_right": full_cone_params["crop_right"],
                    "crop_top": full_cone_params["crop_top"],
                    "crop_bottom": full_cone_params["crop_bottom"],
                    "apex_x": full_cone_params["apex_x"],
                    "new_width": full_cone_params["new_width"],
                    "new_height": full_cone_params["new_height"],
                    "opening_angle": full_cone_params["opening_angle"],
                    "status": "success",
                }

                # Save to output CSV
                writer.writerow(essential_params)

                # Store in dictionary
                all_cone_params[avi_filename] = essential_params

            except Exception as e:
                log.error(f"Error processing {avi_file}: {str(e)}")

                # Write failure record
                failure_record = {
                    "avi_filename": avi_filename,
                    "status": f"error: {str(e)}",
                }

                # Fill missing fields with None
                for field in fieldnames:
                    if field not in failure_record:
                        failure_record[field] = None

                writer.writerow(failure_record)

    # Also save as JSON for easier programmatic access
    with open(cone_params_json, "w", encoding="utf-8") as jsonfile:
        json.dump(all_cone_params, jsonfile)

    log.info(f"Cone parameters saved to {cone_params_csv} and {cone_params_json}")
    return cone_params_csv