Source code for dsafeatures.parsing

""" Parse a model from a CSV file. """

# Copyright (C) 2024 Juan Pablo Carbajal
# Copyright (C) 2024 Mariane Yvonne Schneider
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

# Author: Juan Pablo Carbajal <ajuanpi@gmail.com>
# Author: Mariane Yvonne Schneider <myschneider@meiru.ch>
from pathlib import Path
from functools import partial
from typing import Any
import pandas as pd
from sympy import Symbol, parse_expr, SparseMatrix, Matrix, Function



[docs]
def parse_symbols(
        data: pd.DataFrame, *, inplace: bool = False, cls=Symbol, assumptions: dict = None
) -> pd.Series:
    r"""Parse variables table.

    Parameters
    ----------
    data:
        Table with variable information.

    Returns
    -------
    vars:
        parsed variables

    Examples
    --------
    >>> data = pd.DataFrame(data={'latex': [r'\dot{x}', r'\frac{a}{2}']})
    >>> parse_symbols(data)
    0        \dot{x}
    1    \frac{a}{2}
    Name: sympy, dtype: object

    >>> parse_symbols(data,inplace=True)
    >>> data
             latex        sympy
    0      \dot{x}      \dot{x}
    1  \frac{a}{2}  \frac{a}{2}

    """
    assumptions = {} if assumptions is None else assumptions
    _func = partial(cls, **assumptions)
    retval = data.latex.map(_func)
    msk = retval.duplicated()
    if msk.any():
        dup = data.loc[msk, "latex"].to_list()
        raise ValueError(f"Duplicated symbols {dup}")

    if inplace:
        data.loc[:, "sympy"] = retval
    else:
        retval.name = "sympy"
        return retval




[docs]
def parse_equation(
        data: pd.DataFrame, *, context: dict = None, inplace: bool = False
) -> pd.Series | None:
    """Parse equations from data."""
    _func = partial(parse_expr, local_dict=context)
    retval = data.equation.map(_func)
    msk = retval.duplicated()
    if msk.any():
        dup = data.index[msk].to_list()
        raise ValueError(f"Duplicated equations {dup}")

    if inplace:
        data.loc[:, "sympy_expr"] = retval
    else:
        retval.name = "sympy_expr"
        return retval



_default_csv_opt = dict(sep=";")
"""Default CSV options."""



[docs]
def parse_matrix(data: pd.DataFrame, *, context: dict = None, on_rows: str = None):
    """Parse matrix from DataFrame."""
    on_rows = "state" if on_rows is None else on_rows
    axes = [x.name for x in data.axes]
    try:
        if axes.index(on_rows) != 0:
            data = data.T
    except ValueError:
        pass  # Log a warning for data not having axis called on_rows

    _func = partial(parse_expr, local_dict=context)
    retval = data.map(_func)

    def _entry(i, j):
        return retval.iloc[int(i), int(j)]

    return SparseMatrix(*retval.shape, _entry)




[docs]
def read_model_file(fname: Path | str, *, is_matrix: bool = False) -> pd.DataFrame:
    """Read a CSV with model components defined on it."""
    if is_matrix:
        df = pd.read_csv(
            fname, index_col=0, header=[0, 1], dtype=str, **_default_csv_opt
        ).droplevel(0, axis=1)
        df.fillna("0", inplace=True)
    else:
        df = pd.read_csv(fname, **_default_csv_opt)
    df.dropna(how="all", axis=1, inplace=True)
    return df




[docs]
def parse_model(model: str, *, path: Path) -> tuple[dict[str, Any], dict[str, dict]]:
    """Parse all files from model.

    Parameters
    ----------
    model:
        Model prefix.
    path:
        Path to model folders.

    Returns
    -------
    components:
        dictionary of model components like states, matrix, etc.
    contexts:
        dictionary of context used for substitutions and other sympy operations.
    """
    cp = {}
    cxt = {}
    X, p, r = "states", "parameters", "processrates"
    for c in (X, p, r):
        n_ = f"{model}_{c}.csv"
        d_ = read_model_file(path / n_)
        parse_symbols(d_, inplace=True, cls=Function if c == r else Symbol)
        # store context
        cxt[c] = {n: s for n, s in zip(d_.name, d_.sympy)}

        # parameter values to sympy
        if c == p:
            msk = d_.value.isna()
            d_["value"] = d_.value.astype(str)
            # parameters without values must be in expressions
            d_.loc[msk, "value"] = d_.loc[msk, "expression"]
            # parse expressions
            _func = partial(parse_expr, local_dict=cxt[c])
            d_["value"] = d_.value.map(_func)

        # store model component
        cp[c] = d_

    parse_equation(cp[r], context=cxt[X] | cxt[p], inplace=True)

    # Assemble rates dependency on states
    dep_X = "dependency_states"
    cp[r][dep_X] = ""
    for idx, row in cp[r].iterrows():
        r_ = row.sympy_expr
        arg_ = tuple(sorted((x for x in cp[X].sympy if r_.has(x)), key=str))
        cp[r].at[idx, dep_X] = arg_

    cxt["processrates_expr"] = {
        n(a): e
        for n, a, e in zip(cp[r].sympy, cp[r].dependency_states, cp[r].sympy_expr)
    }

    # Load matrix
    n_ = f"{model}_matrix.csv"
    cp["matrix"] = read_model_file(path / n_, is_matrix=True)
    cp["matrix_sympy"] = parse_matrix(cp["matrix"], context=cxt["parameters"])

    n_ = f"{model}_compositionmatrix.csv"
    cp["compositionmatrix"] = read_model_file(path / n_, is_matrix=True)
    cp["compositionmatrix_sympy"] = parse_matrix(cp["compositionmatrix"], context=cxt["parameters"])

    X_ = cp[X].set_index("name")
    X_ = [X_.sympy[x] for x in cp["matrix"].columns]
    cp["state_vector"] = Matrix(X_)

    r_ = cp[r].set_index("name")
    r_ = [r_.sympy[x](*r_[dep_X][x]) for x in cp["matrix"].index]
    cp["rates_vector"] = Matrix(r_)

    return cp, cxt