Skip to content

Data module

These are the primary validation functions used in data module integration tests.

clio_tools.data_module.ModuleInterface

Bases: BaseModel

Schema for module INTERFACE.yaml.

pathvars: Pathvars = Pathvars()

Snakemake pathvars, allowing module input re-wiring.

wildcards: dict[str, str] = Field(default_factory=dict)

Module wildcards. If provided, these must be present in the keys of either module resources or results.

check_wildcards() -> Self

Ensure wildcards are specified in file names.

Source code in src/clio_tools/data_module/io.py
@model_validator(mode="after")
def check_wildcards(self) -> Self:
    """Ensure wildcards are specified in file names."""
    io_files = [i.default for i in self.pathvars.user_resources.values()]
    io_files += [i.default for i in self.pathvars.results.values()]

    filename_wildcards: set[str] = set()
    for filename in io_files:
        filename_wildcards.update(_find_between(filename, "{}"))

    diff = filename_wildcards - self.wildcards.keys()
    if diff:
        raise ValueError(
            f"Wildcards not specified in 'user_resources' or 'results' pathvars: {diff}."
        )
    diff = self.wildcards.keys() - filename_wildcards
    if diff:
        raise ValueError(f"Unused wildcards found: {diff}")
    return self

from_yaml(path: str | Path)

Initialise the schema from a YAML file.

Source code in src/clio_tools/data_module/io.py
@classmethod
def from_yaml(cls, path: str | Path):
    """Initialise the schema from a YAML file."""
    with open(Path(path)) as file:
        data = yaml.safe_load(file)
    return cls(**data)

to_mermaid_flowchart(name: str) -> str

Convert to a mermaid diagram.

Source code in src/clio_tools/data_module/io.py
def to_mermaid_flowchart(self, name: str) -> str:
    """Convert to a mermaid diagram."""
    mermaid_txt = dedent(f"""\
        ---
        title: {name}
        ---
        flowchart LR
        M(({name}))
        """)

    # Generate user-related part
    if self.pathvars.user_resources:
        user_txt = "\n    ".join(self.pathvars.user_resources)
        mermaid_txt += f"""C1[/"`**user**\n    {user_txt}\n    `"/] --> M\n"""

    # Generate results part
    results_txt = "\n    ".join(self.pathvars.results)
    mermaid_txt += f"""M --> O1("`**results**\n    {results_txt}\n    `")"""
    return mermaid_txt

clio_tools.data_module.modular_rulegraph_png(snakemake_dotfile: Path | str, output_path: Path | str, prefixes: str | list[str])

Create a PNG file with a simplified DAG with a single rule per module.

Parameters:

Name Type Description Default
snakemake_dotfile Path | str

path to .dot file (e.g., a rulegraph).

required
output_path Path | str

location to save the resulting PNG.

required
prefixes str | list[str]

list of module prefixes to simplify.

required

Raises:

Type Description
ValueError

input was not a .dot file.

Source code in src/clio_tools/data_module/io.py
def modular_rulegraph_png(
    snakemake_dotfile: Path | str, output_path: Path | str, prefixes: str | list[str]
):
    """Create a PNG file with a simplified DAG with a single rule per module.

    Args:
        snakemake_dotfile (Path | str): path to .dot file (e.g., a rulegraph).
        output_path (Path | str): location to save the resulting PNG.
        prefixes (str|list[str]): list of module prefixes to simplify.

    Raises:
        ValueError: input was not a .dot file.
    """
    if not str(snakemake_dotfile).endswith(".dot"):
        raise ValueError("Only .dot files can be processed.")
    if isinstance(prefixes, str):
        prefixes = [prefixes]

    rulegraph = nx.DiGraph(nx.nx_pydot.read_dot(snakemake_dotfile))
    modulegraph = _modularise_snakemake_graph(rulegraph, prefixes)
    dot_graph = nx.drawing.nx_pydot.to_pydot(modulegraph)
    dot_graph.write_png(output_path)