Source code for dsafeatures.parsing

""" Parse a model from a CSV file. """

# Copyright (C) 2024 Juan Pablo Carbajal
# Copyright (C) 2024 Mariane Yvonne Schneider
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

# Author: Juan Pablo Carbajal <ajuanpi@gmail.com>
# Author: Mariane Yvonne Schneider <myschneider@meiru.ch>
from pathlib import Path
from functools import partial
from typing import Any
import pandas as pd
from sympy import Symbol, parse_expr, SparseMatrix, Matrix, Function


[docs] def parse_symbols( data: pd.DataFrame, *, inplace: bool = False, cls=Symbol, assumptions: dict = None ) -> pd.Series: r"""Parse variables table. Parameters ---------- data: Table with variable information. Returns ------- vars: parsed variables Examples -------- >>> data = pd.DataFrame(data={'latex': [r'\dot{x}', r'\frac{a}{2}']}) >>> parse_symbols(data) 0 \dot{x} 1 \frac{a}{2} Name: sympy, dtype: object >>> parse_symbols(data,inplace=True) >>> data latex sympy 0 \dot{x} \dot{x} 1 \frac{a}{2} \frac{a}{2} """ assumptions = {} if assumptions is None else assumptions _func = partial(cls, **assumptions) retval = data.latex.map(_func) msk = retval.duplicated() if msk.any(): dup = data.loc[msk, "latex"].to_list() raise ValueError(f"Duplicated symbols {dup}") if inplace: data.loc[:, "sympy"] = retval else: retval.name = "sympy" return retval
[docs] def parse_equation( data: pd.DataFrame, *, context: dict = None, inplace: bool = False ) -> pd.Series | None: """Parse equations from data.""" _func = partial(parse_expr, local_dict=context) retval = data.equation.map(_func) msk = retval.duplicated() if msk.any(): dup = data.index[msk].to_list() raise ValueError(f"Duplicated equations {dup}") if inplace: data.loc[:, "sympy_expr"] = retval else: retval.name = "sympy_expr" return retval
_default_csv_opt = dict(sep=";") """Default CSV options."""
[docs] def parse_matrix(data: pd.DataFrame, *, context: dict = None, on_rows: str = None): """Parse matrix from DataFrame.""" on_rows = "state" if on_rows is None else on_rows axes = [x.name for x in data.axes] try: if axes.index(on_rows) != 0: data = data.T except ValueError: pass # Log a warning for data not having axis called on_rows _func = partial(parse_expr, local_dict=context) retval = data.map(_func) def _entry(i, j): return retval.iloc[int(i), int(j)] return SparseMatrix(*retval.shape, _entry)
[docs] def read_model_file(fname: Path | str, *, is_matrix: bool = False) -> pd.DataFrame: """Read a CSV with model components defined on it.""" if is_matrix: df = pd.read_csv( fname, index_col=0, header=[0, 1], dtype=str, **_default_csv_opt ).droplevel(0, axis=1) df.fillna("0", inplace=True) else: df = pd.read_csv(fname, **_default_csv_opt) df.dropna(how="all", axis=1, inplace=True) return df
[docs] def parse_model(model: str, *, path: Path) -> tuple[dict[str, Any], dict[str, dict]]: """Parse all files from model. Parameters ---------- model: Model prefix. path: Path to model folders. Returns ------- components: dictionary of model components like states, matrix, etc. contexts: dictionary of context used for substitutions and other sympy operations. """ cp = {} cxt = {} X, p, r = "states", "parameters", "processrates" for c in (X, p, r): n_ = f"{model}_{c}.csv" d_ = read_model_file(path / n_) parse_symbols(d_, inplace=True, cls=Function if c == r else Symbol) # store context cxt[c] = {n: s for n, s in zip(d_.name, d_.sympy)} # parameter values to sympy if c == p: msk = d_.value.isna() d_["value"] = d_.value.astype(str) # parameters without values must be in expressions d_.loc[msk, "value"] = d_.loc[msk, "expression"] # parse expressions _func = partial(parse_expr, local_dict=cxt[c]) d_["value"] = d_.value.map(_func) # store model component cp[c] = d_ parse_equation(cp[r], context=cxt[X] | cxt[p], inplace=True) # Assemble rates dependency on states dep_X = "dependency_states" cp[r][dep_X] = "" for idx, row in cp[r].iterrows(): r_ = row.sympy_expr arg_ = tuple(sorted((x for x in cp[X].sympy if r_.has(x)), key=str)) cp[r].at[idx, dep_X] = arg_ cxt["processrates_expr"] = { n(a): e for n, a, e in zip(cp[r].sympy, cp[r].dependency_states, cp[r].sympy_expr) } # Load matrix n_ = f"{model}_matrix.csv" cp["matrix"] = read_model_file(path / n_, is_matrix=True) cp["matrix_sympy"] = parse_matrix(cp["matrix"], context=cxt["parameters"]) n_ = f"{model}_compositionmatrix.csv" cp["compositionmatrix"] = read_model_file(path / n_, is_matrix=True) cp["compositionmatrix_sympy"] = parse_matrix(cp["compositionmatrix"], context=cxt["parameters"]) X_ = cp[X].set_index("name") X_ = [X_.sympy[x] for x in cp["matrix"].columns] cp["state_vector"] = Matrix(X_) r_ = cp[r].set_index("name") r_ = [r_.sympy[x](*r_[dep_X][x]) for x in cp["matrix"].index] cp["rates_vector"] = Matrix(r_) return cp, cxt