"""
Script to precompute cone parameters for the EchoNet-LVH dataset.
This script should be run separately before the main conversion process.
"""
import csv
import json
from pathlib import Path
from tqdm import tqdm
from zea import log
from zea.tools.fit_scan_cone import fit_and_crop_around_scan_cone
[docs]
def load_splits(source_dir):
"""
Load splits from MeasurementsList.csv and return avi filenames
Args:
source_dir: Source directory containing MeasurementsList.csv
Returns:
Dictionary with keys 'train', 'val', 'test', 'rejected' and values as lists of avi filenames
"""
csv_path = Path(source_dir) / "MeasurementsList.csv"
splits = {"train": [], "val": [], "test": [], "rejected": []}
# Read CSV using built-in csv module
with open(csv_path, newline="", encoding="utf-8") as csvfile:
reader = csv.DictReader(csvfile)
# Group by HashedFileName
file_split_map = {}
for row in reader:
filename = row["HashedFileName"]
split = row["split"]
file_split_map.setdefault(filename, split)
# Now, for each unique filename, add to the correct split
for filename, split in file_split_map.items():
splits[split].append(filename + ".avi")
return splits
[docs]
def find_avi_file(source_dir, hashed_filename, batch=None):
"""
Find AVI file in the specified batch directory or any batch if not specified.
Args:
source_dir: Source directory containing BatchX subdirectories
hashed_filename: Hashed filename (with or without .avi extension)
batch: Specific batch directory to search in (e.g., "Batch2"), or None to search all batches
Returns:
Path to the AVI file if found, else None
"""
# If filename already has .avi extension, strip it
if hashed_filename.endswith(".avi"):
hashed_filename = hashed_filename[:-4]
if batch:
batch_dir = Path(source_dir) / batch
avi_path = batch_dir / f"{hashed_filename}.avi"
if avi_path.exists():
return avi_path
return None
else:
for batch_dir in Path(source_dir).glob("Batch*"):
avi_path = batch_dir / f"{hashed_filename}.avi"
if avi_path.exists():
return avi_path
return None
[docs]
def load_first_frame(avi_file):
"""
Load only the first frame of a video file.
Args:
avi_file: Path to the video file
Returns:
First frame as numpy array of shape (H, W) and dtype np.uint8 (grayscale)
"""
try:
import cv2
except ImportError as exc:
raise ImportError(
"OpenCV is required for loading video files. "
"Please install it with 'pip install opencv-python' or "
"'pip install opencv-python-headless'."
) from exc
cap = cv2.VideoCapture(str(avi_file))
ret, frame = cap.read()
cap.release()
if not ret:
raise ValueError(f"Failed to read first frame from {avi_file}")
# Convert BGR to grayscale
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
return frame
[docs]
def precompute_cone_parameters(args):
"""
Precompute and save cone parameters for all AVI files.
This function loads the first frame from each AVI file, applies fit_scan_cone
to determine cropping parameters, and saves these parameters to a CSV file
for later use during the actual data conversion.
Args:
args: Argument parser namespace with the following attributes:
src: Source directory containing EchoNet-LVH data
dst: Destination directory to save cone parameters
batch: Specific batch to process (e.g., "Batch2") or None for all
max_files: Maximum number of files to process (or None for all)
force: Whether to recompute parameters if they already exist
Returns:
Path to the CSV file containing cone parameters
"""
source_path = Path(args.src)
output_path = Path(args.dst)
output_path.mkdir(parents=True, exist_ok=True)
# Output file for cone parameters
cone_params_csv = output_path / "cone_parameters.csv"
cone_params_json = output_path / "cone_parameters.json"
# Check if parameters already exist
if cone_params_csv.exists() and not args.force:
log.warning(f"Parameters already exist at {cone_params_csv}. Use --force to recompute.")
return cone_params_csv
# Get list of files to process
splits = load_splits(source_path)
files_to_process = []
for split_files in splits.values():
for avi_filename in split_files:
# Strip .avi if present
base_filename = avi_filename[:-4] if avi_filename.endswith(".avi") else avi_filename
avi_file = find_avi_file(args.src, base_filename, batch=args.batch)
if avi_file:
files_to_process.append((avi_file, avi_filename))
else:
log.warning(
f"Could not find AVI file for {base_filename} in batch "
f"{args.batch if args.batch else 'any'}"
)
# Limit files if max_files is specified
if args.max_files is not None:
files_to_process = files_to_process[: args.max_files]
log.info(f"Limited to processing {args.max_files} files due to max_files parameter")
log.info(f"Computing cone parameters for {len(files_to_process)} files")
# Dictionary to store parameters for each file
all_cone_params = {}
# CSV field names - only the essential parameters needed for cropping
fieldnames = [
"avi_filename",
"crop_left",
"crop_right",
"crop_top",
"crop_bottom",
"apex_x",
"new_width",
"new_height",
"opening_angle",
"status",
]
# Open CSV file for writing
with open(cone_params_csv, "w", newline="", encoding="utf-8") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
# Process each file
for avi_file, avi_filename in tqdm(files_to_process, desc="Computing cone parameters"):
try:
# Load only the first frame of video using OpenCV directly
first_frame = load_first_frame(avi_file)
# Detect cone parameters
_, full_cone_params = fit_and_crop_around_scan_cone(first_frame, return_params=True)
if (
full_cone_params["crop_left"] < 0
or full_cone_params["crop_right"] > first_frame.shape[1]
):
raise ValueError(
"Computed crop exceeds frame dimensions, meaning that either cone detection"
"failed, due to e.g. DICOM artifacts present in the frame, or the full scan"
"cone is not visible in the frame."
)
# Extract only the essential parameters
essential_params = {
"avi_filename": avi_filename,
"crop_left": full_cone_params["crop_left"],
"crop_right": full_cone_params["crop_right"],
"crop_top": full_cone_params["crop_top"],
"crop_bottom": full_cone_params["crop_bottom"],
"apex_x": full_cone_params["apex_x"],
"new_width": full_cone_params["new_width"],
"new_height": full_cone_params["new_height"],
"opening_angle": full_cone_params["opening_angle"],
"status": "success",
}
# Save to output CSV
writer.writerow(essential_params)
# Store in dictionary
all_cone_params[avi_filename] = essential_params
except Exception as e:
log.error(f"Error processing {avi_file}: {str(e)}")
# Write failure record
failure_record = {
"avi_filename": avi_filename,
"status": f"error: {str(e)}",
}
# Fill missing fields with None
for field in fieldnames:
if field not in failure_record:
failure_record[field] = None
writer.writerow(failure_record)
# Also save as JSON for easier programmatic access
with open(cone_params_json, "w", encoding="utf-8") as jsonfile:
json.dump(all_cone_params, jsonfile)
log.info(f"Cone parameters saved to {cone_params_csv} and {cone_params_json}")
return cone_params_csv