Source code for ssapy_toolkit.IO.read_3le_by_bit

import numpy as np
import pandas as pd
from typing import List, Dict, Callable, Optional

def _cast_value(raw: str, kind: str) -> object:
    """
    Convert a fixed-width slice to the requested type.
    Supported kinds: 'str', 'int', 'float', 'tleexp'
    - 'tleexp' parses TLE exponent format like '16538-3' -> 0.00016538
    """
    s = raw.strip()
    if kind == "str":
        return s
    if s == "" or s == ".":
        return np.nan

    if kind == "int":
        try:
            return int(s)
        except Exception:
            return np.nan

    if kind == "float":
        # Accept leading '.' form (e.g., '.00020137')
        try:
            if s.startswith("."):
                return float("0" + s)
            return float(s)
        except Exception:
            return np.nan

    if kind == "tleexp":
        # TLE exponent fields encode mantissa without a decimal and a 1- or 2-digit exponent:
        # '16538-3' => mantissa 0.16538, exponent -3
        # '00000-0' => 0.0
        t = s.replace(" ", "")
        if t == "" or set(t) <= {"0"} or t.endswith("-0") and set(t[:-2]) <= {"0"}:
            return 0.0
        # split mantissa and exponent by the last sign (+/-) near the end
        # prefer 1-digit exponent; fall back to 2-digit if needed
        if len(t) >= 2 and t[-2] in "+-":
            mant, exps = t[:-2], t[-2:]
        elif len(t) >= 3 and t[-3] in "+-":
            mant, exps = t[:-3], t[-3:]
        else:
            # best effort: assume last char is exponent without sign
            mant, exps = t[:-1], "+" + t[-1]

        m_sign = -1 if mant.startswith("-") else 1
        mant_digits = mant.lstrip("+-")
        # normalize to 5 digits if shorter
        mant_digits = mant_digits.rjust(5, "0")[:5]
        try:
            m = m_sign * float("0." + mant_digits)
            e = int(exps)
            return m * np.power(10.0, e)
        except Exception:
            return np.nan

    raise ValueError(f"Unknown kind: {kind}")


def parse_fixed_width_file(
    file_path: str,
    fields: List[Dict],
    record_lines: int = 1,
    line_selector: Optional[Callable[[str], bool]] = None,
    encoding: str = "utf-8",
    errors: str = "ignore",
) -> pd.DataFrame:
    """
    Generic line-by-line, bit-by-bit fixed-width parser.

    Parameters
    ----------
    file_path : path to the text file.
    fields : list of dicts describing slices. Each dict:
        {
          'name': 'column_name',
          'line': 0,                 # 0-based line index within a record
          'start': 0, 'end': 10,     # 0-based [start:end) slice
          'type': 'str'|'int'|'float'|'tleexp'
        }
    record_lines : number of lines that form one logical record (e.g., 2 for TLE).
    line_selector : optional callable(line) -> bool to include only some lines
                    when record_lines == 1. Ignored otherwise.
    encoding, errors : file decoding controls.

    Returns
    -------
    pandas.DataFrame with one row per record and one column per field.
    """
    rows = []
    with open(file_path, "r", encoding=encoding, errors=errors) as f:
        if record_lines == 1:
            for raw in f:
                line = raw.rstrip("\n")
                if line_selector and not line_selector(line):
                    continue
                row = {}
                for spec in fields:
                    s = line[spec["start"]:spec["end"]]
                    row[spec["name"]] = _cast_value(s, spec.get("type", "str"))
                rows.append(row)
        else:
            # group into fixed-size records
            buf = []
            for raw in f:
                line = raw.rstrip("\n")
                if line.strip() == "":
                    continue
                buf.append(line)
                if len(buf) == record_lines:
                    row = {}
                    for spec in fields:
                        li = spec["line"]
                        if li < 0 or li >= record_lines:
                            raise IndexError("field 'line' out of range")
                        src = buf[li]
                        s = src[spec["start"]:spec["end"]]
                        row[spec["name"]] = _cast_value(s, spec.get("type", "str"))
                    rows.append(row)
                    buf = []
            # ignore incomplete trailing record

    return pd.DataFrame(rows)


# --------- Ready-to-use schema for classic 2-line TLE ---------
# Column ranges below are 0-based [start:end), matching the standard TLE spec.
TLE_FIELDS: List[Dict] = [
    # Line 1
    {"name": "satnum",           "line": 0, "start": 2,  "end": 7,  "type": "int"},
    {"name": "classification",   "line": 0, "start": 7,  "end": 8,  "type": "str"},
    {"name": "intldes_year",     "line": 0, "start": 9,  "end": 11, "type": "int"},
    {"name": "intldes_launch",   "line": 0, "start": 11, "end": 14, "type": "int"},
    {"name": "intldes_piece",    "line": 0, "start": 14, "end": 17, "type": "str"},
    {"name": "epoch_year",       "line": 0, "start": 18, "end": 20, "type": "int"},
    {"name": "epoch_day",        "line": 0, "start": 20, "end": 32, "type": "float"},
    {"name": "ndot_over_2",      "line": 0, "start": 33, "end": 43, "type": "float"},
    {"name": "nddot_over_6",     "line": 0, "start": 44, "end": 52, "type": "tleexp"},
    {"name": "bstar",            "line": 0, "start": 53, "end": 61, "type": "tleexp"},
    {"name": "elset_type",       "line": 0, "start": 62, "end": 63, "type": "int"},
    {"name": "elset_num",        "line": 0, "start": 64, "end": 68, "type": "int"},
    # Line 2
    {"name": "inclination_deg",  "line": 1, "start": 8,  "end": 16, "type": "float"},
    {"name": "raan_deg",         "line": 1, "start": 17, "end": 25, "type": "float"},
    {"name": "eccentricity",     "line": 1, "start": 26, "end": 33, "type": "float"},  # implied decimal
    {"name": "arg_perigee_deg",  "line": 1, "start": 34, "end": 42, "type": "float"},
    {"name": "mean_anomaly_deg", "line": 1, "start": 43, "end": 51, "type": "float"},
    {"name": "mean_motion",      "line": 1, "start": 52, "end": 63, "type": "float"},
    {"name": "rev_number",       "line": 1, "start": 63, "end": 68, "type": "int"},
]


[docs]
def read_3le_by_bit(file_path: str) -> pd.DataFrame:
    """
    Robust TLE reader that ignores optional name (line '0') rows and
    pairs each line '1' with the following line '2' having the same satnum.
    Then applies fixed-width slices defined in TLE_FIELDS.

    Returns
    -------
    pandas.DataFrame
    """
    def _get_satnum(line: str) -> str:
        # columns 3-7 (1-based) => [2:7] zero-based
        return line[2:7].strip()

    def _safe_slice(text: str, start: int, end: int) -> str:
        if start >= len(text):
            return ""
        return text[start:min(end, len(text))]

    # Collect well-formed (L1, L2) pairs
    pairs = []
    pending_l1 = None
    pending_sat = None

    with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
        for raw in f:
            line = raw.rstrip("\n\r")
            if not line:
                continue

            # Strip UTF-8 BOM if present on first line
            if line.startswith("\ufeff"):
                line = line.lstrip("\ufeff")

            tag = line[:1]
            if tag == "0":
                # optional name line; skip for pairing (you can capture it separately if needed)
                continue

            if tag == "1":
                # start a new record
                pending_l1 = line
                pending_sat = _get_satnum(line)
                continue

            if tag == "2" and pending_l1 is not None:
                sat2 = _get_satnum(line)
                if sat2 == pending_sat:
                    pairs.append((pending_l1, line))
                # whether matched or not, clear pending to avoid cross-pairing
                pending_l1 = None
                pending_sat = None

            # Any other tags are ignored

    # Now parse each pair using the fixed-width schema
    rows = []
    for l1, l2 in pairs:
        row = {}
        for spec in TLE_FIELDS:
            src = l1 if spec["line"] == 0 else l2
            s = _safe_slice(src, spec["start"], spec["end"])
            row[spec["name"]] = _cast_value(s, spec.get("type", "str"))
        rows.append(row)

    df = pd.DataFrame(rows)

    # Eccentricity is given without a leading decimal in TLE line 2.
    if "eccentricity" in df.columns:
        with np.errstate(all="ignore"):
            df["eccentricity"] = np.where(
                df["eccentricity"].notna(),
                df["eccentricity"] * 1e-7,  # 7 digits in classic TLE
                np.nan,
            )

    return df