#!/usr/bin/env python3
"""
h5_to_csv.py
Library + script:
- Import and call: hdf5_to_csv_per_key("input.h5")
- Or run directly: python h5_to_csv.py
(uses the hard-coded path in main()).
"""
from __future__ import annotations
import csv
import re
import sys
from pathlib import Path
from typing import Any, List, Tuple
import h5py
def _stringify(x: Any) -> str:
if x is None:
return ""
if isinstance(x, (bytes, bytearray)):
try:
return x.decode("utf-8")
except Exception:
return repr(x)
return str(x)
[docs]
def iter_datasets(h5: h5py.File) -> List[Tuple[str, h5py.Dataset]]:
found: List[Tuple[str, h5py.Dataset]] = []
def visitor(name: str, obj: Any) -> None:
if isinstance(obj, h5py.Dataset):
key = ("/" + name).replace("//", "/")
found.append((key, obj))
h5.visititems(visitor)
found.sort(key=lambda t: t[0])
return found
[docs]
def dataset_to_python(ds: h5py.Dataset) -> Any:
return ds[()]
[docs]
def place_cell(grid: List[List[str]], r: int, c: int, val: str) -> None:
while len(grid) <= r:
grid.append([])
row = grid[r]
if len(row) <= c:
row.extend([""] * (c + 1 - len(row)))
row[c] = val
[docs]
def normalize_grid(grid: List[List[str]]) -> List[List[str]]:
width = max((len(r) for r in grid), default=0)
for r in grid:
if len(r) < width:
r.extend([""] * (width - len(r)))
return grid
[docs]
def write_column(grid: List[List[str]], top: int, col: int, key: str, values: List[Any]) -> int:
place_cell(grid, top, col, key)
for i, v in enumerate(values):
place_cell(grid, top + 1 + i, col, _stringify(v))
return 1 + len(values)
[docs]
def write_block(grid: List[List[str]], top: int, left: int, key: str, arr2d: Any) -> Tuple[int, int]:
m = int(arr2d.shape[0])
n = int(arr2d.shape[1])
place_cell(grid, top, left, key)
for i in range(m):
for j in range(n):
place_cell(grid, top + 1 + i, left + j, _stringify(arr2d[i, j]))
return (1 + m, max(1, n))
[docs]
def key_to_filename(key: str, max_len: int = 180) -> str:
s = key.strip()
if s.startswith("/"):
s = s[1:]
if not s:
s = "root"
s = re.sub(r"[\/\\:\*\?\"<>\|\s]+", "_", s)
s = re.sub(r"_{2,}", "_", s).strip("_")
if len(s) > max_len:
s = s[:max_len].rstrip("_")
return s or "dataset"
[docs]
def write_grid_csv(path: Path, grid: List[List[str]], encoding: str = "utf-8") -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", newline="", encoding=encoding) as f:
w = csv.writer(f)
w.writerows(grid)
[docs]
def write_dataset_csv(out_path: Path, key: str, data: Any, encoding: str = "utf-8") -> Tuple[bool, str | None]:
"""
Returns (written, warning). written=False if skipped due to ndim>2.
"""
shape = getattr(data, "shape", ())
ndim = len(shape) if shape is not None else 0
grid: List[List[str]] = []
if ndim == 0:
write_column(grid, 0, 0, key, [data])
elif ndim == 1:
write_column(grid, 0, 0, key, list(data))
elif ndim == 2:
write_block(grid, 0, 0, key, data)
else:
return False, f"Ignoring {key}: ndim={ndim} shape={shape}"
write_grid_csv(out_path, normalize_grid(grid), encoding=encoding)
return True, None
[docs]
def hdf5_to_csv_per_key(h5_filename: str | Path, *, encoding: str = "utf-8") -> Path:
"""
Callable API (for importing).
Always:
- creates directory <input_stem>/ next to the input file
- writes one CSV per dataset key path inside that directory
Returns:
Path to the output directory.
"""
h5_path = Path(h5_filename)
out_dir = h5_path.parent / h5_path.stem
out_dir.mkdir(parents=True, exist_ok=True)
with h5py.File(h5_path, "r") as h5:
for key, ds in iter_datasets(h5):
data = dataset_to_python(ds)
out_csv = out_dir / (key_to_filename(key) + ".csv")
written, warning = write_dataset_csv(out_csv, key, data, encoding=encoding)
if warning:
print("WARNING:", warning, file=sys.stderr)
return out_dir
[docs]
def main() -> None:
# Hard-coded example path for running this file directly.
# Change this to your real file:
h5_path = Path("/home/yeager7/HP__Subset_10MHz_500ns/HP__Subset_10MHz_500ns/3_3_26_500nsPulse_10MHzSeparation_HDF5/rep_1/Limiter=DUT2__Amp=DUT6.h5")
if not h5_path.exists():
raise SystemExit(f"HDF5 file not found: {h5_path}")
out_dir = hdf5_to_csv_per_key(h5_path)
print(f"Wrote: {out_dir}")
if __name__ == "__main__":
main()