Source code for sorcha.readers.EphemerisReader

import logging
import sys

from sorcha.readers.CSVReader import CSVDataReader
from sorcha.readers.HDF5Reader import HDF5DataReader
from sorcha.readers.ObjectDataReader import ObjectDataReader



[docs]
class EphemerisDataReader(ObjectDataReader):
    """A class to read in ephemeris from an external ephemeris file.

    Instead of subclassing the various readers (CSV, HDF5, etc.) individually, this class instantiates
    one of those classes in an internal ``reader`` attribute. As such all reading, validation, etc. is
    passed off to the ``reader`` object this object owns. While this adds a level of indirection, it
    allows us to support a cross product of N file types from M ephemeris generators with M + N readers
    instead of M * N.
    """

    def __init__(self, filename, inputformat, **kwargs):
        """A class for reading the object data from a CSV file.

        Parameters
        -----------
        filename : string
            location/name of the data file.

        inputformat : string
            format of input file ("whitespace"/"comma"/"csv"/"h5"/"hdf5").

        **kwargs : dictionary, optional
            Extra arguments

        """
        super().__init__(**kwargs)

        pplogger = logging.getLogger(__name__)

[docs]
        self.reader = None

        if (inputformat == "whitespace") or (inputformat == "comma") or (inputformat == "csv"):
            self.reader = CSVDataReader(filename, sep=inputformat, **kwargs)
        elif (inputformat == "h5") or (inputformat == "hdf5") or (inputformat == "HDF5"):
            self.reader = HDF5DataReader(filename, **kwargs)
        else:
            pplogger.error(
                f"ERROR: EphemerisDataReader: unknown format for ephemeris simulation results ({inputformat})."
            )
            sys.exit(
                f"ERROR: EphemerisDataReader: unknown format for ephemeris simulation results ({inputformat})."
            )


[docs]
    def get_reader_info(self):
        """Return a string identifying the current reader name
        and input information (for logging and output).

        Returns
        --------
        : string
            The reader information.
        """
        return f"EphemerisDataReader|{self.reader.get_reader_info()}"



[docs]
    def _read_rows_internal(self, block_start=0, block_size=None, **kwargs):
        """Reads in a set number of rows from the input.

        Parameters
        -----------
        block_start : int, optional
            The 0-indexed row number from which
            to start reading the data. For example in a CSV file
            block_start=2 would skip the first two lines after the header
            and return data starting on row=2. Default =0

        block_size : int, optional
            the number of rows to read in.
            Use block_size=None to read in all available data.
            Default = None

        **kwargs : dictionary, optional
            Extra arguments

        Returns
        -----------
        res_df : Pandas dataframe
            dataframe of the object data.

        """
        res_df = self.reader.read_rows(block_start, block_size, **kwargs)
        return res_df



[docs]
    def _read_objects_internal(self, obj_ids, **kwargs):
        """Read in a chunk of data corresponding to all rows for
        a given set of object IDs.

        Parameters
        -----------
        obj_ids : list
            A list of object IDs to use.

        **kwargs : dictionary, optional
            Extra arguments

        Returns
        -----------
        res_df : pandas dataframe
            The dataframe for the object data.
        """
        res_df = self.reader.read_objects(obj_ids, **kwargs)
        return res_df



[docs]
    def _process_and_validate_input_table(self, input_table, **kwargs):
        """Perform any input-specific processing and validation on the input table.
        Modifies the input dataframe in place.

        Parameters
        -----------
        input_table : Pandas dataframe
            A loaded table.

        **kwargs : dictionary, optional
            Extra arguments

        Returns
        -----------
        input_table : Pandas dataframe
            Returns the input dataframe modified in-place.

        Notes
        -----
        The base implementation includes filtering that is common to most
        input types. Subclasses should call super.process_and_validate()
        to ensure that the ancestor’s validation is also applied.

        """
        # We do not call reader.process_and_validate_input_table() or
        # super().process_and_validate_input_table() because reader's read functions have
        # already check the table.

        input_table = input_table.rename(columns=lambda x: x.strip())
        input_table = input_table.drop(["V", "V(H=0)"], axis=1, errors="ignore")

        ephem_cols = [
            "ObjID",
            "FieldID",
            "fieldMJD_TAI",
            "Range_LTC_km",
            "RangeRate_LTC_km_s",
            "RA_deg",
            "RARateCosDec_deg_day",
            "Dec_deg",
            "DecRate_deg_day",
            "Obj_Sun_x_LTC_km",
            "Obj_Sun_y_LTC_km",
            "Obj_Sun_z_LTC_km",
            "Obj_Sun_vx_LTC_km_s",
            "Obj_Sun_vy_LTC_km_s",
            "Obj_Sun_vz_LTC_km_s",
            "Obs_Sun_x_km",
            "Obs_Sun_y_km",
            "Obs_Sun_z_km",
            "Obs_Sun_vx_km_s",
            "Obs_Sun_vy_km_s",
            "Obs_Sun_vz_km_s",
            "phase_deg",
        ]

        optional_cols = ["fieldJD_TDB"]

        if not set(input_table.columns.values) == set(ephem_cols):
            for column in input_table.columns.values:
                if column not in ephem_cols and column not in optional_cols:
                    pplogger = logging.getLogger(__name__)
                    pplogger.error(
                        "ERROR: EphemerisDataReader: column headings do not match expected ephemeris column headings. Check format of file."
                    )
                    sys.exit(
                        "ERROR: EphemerisDataReader: column headings do not match expected ephemeris column headings. Check format of file."
                    )

        # Return only the columns of interest.
        return input_table[ephem_cols]





[docs]
def read_full_ephemeris_table(filename, inputformat):
    """A helper function for testing that reads and returns an entire ephemeris table.

    Parameters
    -----------
    filename : string
        location/name of the data file.

    inputformat : string
        format of input file ("whitespace"/"comma"/"csv"/"h5"/"hdf5").

    Returns
    -----------
    res_df : pandas dataframe
        dataframe of the object data.

    """
    reader = EphemerisDataReader(filename, inputformat)
    res_df = reader.read_rows()
    return res_df