Source code for neuromaps_mouse.regions

"""Functions for resampling and aligning structures."""

from pathlib import Path
import subprocess
import pandas as pd
import shutil
from neuromaps_mouse.datasets import fetch_allenccfv3



[docs]
def query_structure_graph_allenccfv3(
    data, in_col="acronym", out_col="all", data_dir=None, verbose=1
):
    """Query the Allen CCFv3 structure graph.

    Parameters
    ----------
    data : array-like
        Input values to query (e.g., region acronyms or IDs).
    in_col : str, optional
        Column to index by. Default is 'acronym'.
    out_col : str or list of str, optional
        Column(s) to return. Use 'all' to return all columns. Default is 'all'.
    data_dir : str or Path, optional
        Base data directory. If None, uses the default. Default is None.
    verbose : int, optional
        Verbosity level. Default is 1.

    Returns
    -------
    pandas.DataFrame or pandas.Series
        Queried structure graph data.
    """
    # this directly returns the dataframe by indexing, so no none/null input
    df_struct = pd.read_csv(
        fetch_allenccfv3(
            which="structure-graph-csv", data_dir=data_dir, verbose=verbose
        )
    )
    if out_col == "all":
        return df_struct.set_index(in_col).loc[data, :].reset_index()
    else:
        return df_struct.set_index(in_col).loc[data, :].reset_index()[out_col]




[docs]
def get_feature_allenccfv3(
    data, in_col="acronym", out_col="id", data_dir=None, verbose=1
):
    """Get a feature value for each region from the Allen CCFv3 structure graph.

    Unlike ``query_structure_graph_allenccfv3``, this function accepts None/NaN
    values and returns None for those entries.

    Parameters
    ----------
    data : array-like
        Input values (may include None/NaN).
    in_col : str, optional
        Column to index by. Default is 'acronym'.
    out_col : str, optional
        Column to return values from. Default is 'id'.
    data_dir : str or Path, optional
        Base data directory. If None, uses the default. Default is None.
    verbose : int, optional
        Verbosity level. Default is 1.

    Returns
    -------
    list
        Feature values, with None for any None/NaN inputs.
    """
    # this allows none/null input and returns a list
    df_struct = pd.read_csv(
        fetch_allenccfv3(
            which="structure-graph-csv", data_dir=data_dir, verbose=verbose
        )
    ).set_index(in_col)
    out_values = []
    for value in data:
        if pd.isna(value):  # or value is None:
            out_values.append(None)
        else:
            out_values.append(df_struct.loc[value, out_col])
    return out_values



def _get_nearest_ancestor_region_allenccfv3(
    source_structure_id_paths, target_region_ids, include_self=True
):
    matched_region_ids = []
    for p in source_structure_id_paths:
        if p is None:
            matched_region_ids.append(None)
            continue
        if include_self:
            p_list = list(map(int, p.split("/")[2:-1]))[
                ::-1
            ]  # reversed to get the nearest
        else:
            p_list = list(map(int, p.split("/")[2:-2]))[::-1]
        # print(p_list, target_region_ids)
        p_in_target = [_ in target_region_ids for _ in p_list]
        if any(p_in_target):
            _matched_id = p_list[p_in_target.index(True)]  # first match (nearest)
            matched_region_ids.append(_matched_id)
        else:
            # print(p)
            matched_region_ids.append(None)
    # print(matched_region_ids)
    return matched_region_ids


def _get_nearest_descendant_region_allenccfv3(
    source_region_ids, target_structure_id_paths, include_self=True
):
    matched_region_ids = []

    if include_self:
        tp_list = [
            list(map(int, tp.split("/")[2:-1])) for tp in target_structure_id_paths
        ]
    else:
        tp_list = [
            list(map(int, tp.split("/")[2:-2])) for tp in target_structure_id_paths
        ]

    for p in source_region_ids:
        if p is None:
            matched_region_ids.append([])
            continue
        p_in_tp = [_[-1] for _ in tp_list if p in _]

        matched_region_ids.append(p_in_tp)

    return matched_region_ids



[docs]
def align_structures_allenccfv3(acronyms_fixed, acronyms_moving, debug=False):
    """Align moving structures to fixed structures via ancestor matching.

    For each region in ``acronyms_moving``, finds its nearest ancestor that
    exists in ``acronyms_fixed``.

    Parameters
    ----------
    acronyms_fixed : array-like of str
        Target region acronyms (the fixed reference set).
    acronyms_moving : array-like of str
        Source region acronyms to align to the fixed set.
    debug : bool, optional
        If True, also compute descendant mappings and add them to the
        returned DataFrame. Default is False.

    Returns
    -------
    pandas.DataFrame
        DataFrame for moving regions with an added 'id_ancestor_fixed' column.
    """
    df_fixed = query_structure_graph_allenccfv3(
        acronyms_fixed,
        in_col="acronym",
        out_col=["acronym", "id", "structure_id_path"],
        verbose=0,
    )
    df_moving = query_structure_graph_allenccfv3(
        acronyms_moving,
        in_col="acronym",
        out_col=["acronym", "id", "structure_id_path"],
        verbose=0,
    )

    df_moving["id_ancestor_fixed"] = _get_nearest_ancestor_region_allenccfv3(
        df_moving["structure_id_path"].to_list(),
        df_fixed["id"].to_list(),
        include_self=True,
    )
    df_moving["id_ancestor_fixed"] = df_moving["id_ancestor_fixed"].astype("Int64")

    if debug:
        df_moving["id_ancestor_fixed_acronym"] = get_feature_allenccfv3(
            df_moving["id_ancestor_fixed"].tolist(),
            in_col="id",
            out_col="acronym",
            verbose=0,
        )
        # also get descendant
        df_moving["id_descendant_fixed"] = _get_nearest_descendant_region_allenccfv3(
            df_moving["id"].tolist(),
            df_fixed["structure_id_path"].tolist(),
            include_self=True,
        )
        df_moving["id_descendant_fixed_acronym"] = df_moving.apply(
            lambda x: get_feature_allenccfv3(
                x["id_descendant_fixed"], in_col="id", out_col="acronym", verbose=0
            )
            if len(["id_descendant_fixed"]) > 0
            else [],
            axis=1,
        )

    return df_moving




[docs]
def match_structures_fuzzy_allenccfv3():
    """Match structures using fuzzy string matching."""
    pass




[docs]
def visualize_structure_alignment_allenccfv3(
    acronyms_fixed, acronyms_moving, save_path=Path("./"), save_name="graphviz"
):
    """Visualize the alignment between two sets of brain structures as a graph.

    Generates a Graphviz SVG diagram showing the hierarchical relationship
    between fixed and moving region sets. Requires Graphviz to be installed.

    Parameters
    ----------
    acronyms_fixed : array-like of str
        Fixed (reference) region acronyms, marked with a stop symbol in the graph.
    acronyms_moving : array-like of str
        Moving (source) region acronyms, marked with an arrow in the graph.
    save_path : str or Path, optional
        Directory to save output files. Default is current directory.
    save_name : str, optional
        Base filename (without extension) for the output files. Default is 'graphviz'.
    """
    graphviz_path = shutil.which("dot")
    if graphviz_path is None:
        raise ValueError("Graphviz executable not found, please install graphviz")

    if not isinstance(save_path, Path):
        save_path = Path(save_path)

    struct_csv = pd.read_csv(fetch_allenccfv3(which="structure-graph-csv"))

    df_fixed = query_structure_graph_allenccfv3(
        acronyms_fixed,
        in_col="acronym",
        out_col=["acronym", "id", "structure_id_path"],
        verbose=0,
    )
    df_moving = query_structure_graph_allenccfv3(
        acronyms_moving,
        in_col="acronym",
        out_col=["acronym", "id", "structure_id_path"],
        verbose=0,
    )

    all_regions = [
        _.strip("/").split("/")
        for _ in df_fixed["structure_id_path"].tolist()
        + df_moving["structure_id_path"].tolist()
    ]
    all_regions = list(
        map(int, list(set([r for regions in all_regions for r in regions])))
    )

    struct_csv_filtered = struct_csv[struct_csv["id"].isin(all_regions)]
    struct_csv_filtered["parent_structure_id"] = struct_csv_filtered[
        "parent_structure_id"
    ].astype("Int64")

    graphviz_script = [
        "digraph G {",
        'rankdir="LR";',
        'node [shape=box, fontname="Arial", fontsize=12];',
        'edge [fontname="Arial", fontsize=10];',
    ]

    for _i, row in struct_csv_filtered.iterrows():
        curr_label = row["acronym"]
        if row["acronym"] in df_fixed["acronym"].tolist():
            curr_label += " ⏹️"
        if row["acronym"] in df_moving["acronym"].tolist():
            curr_label += " ⬅️"
        graphviz_script.append(f'{row["id"]} [label="{curr_label}"]')

    for _i, row in struct_csv_filtered.iterrows():
        if row["acronym"] == "root":
            continue
        graphviz_script.append(f"    {row['parent_structure_id']} -> {row['id']}")
    graphviz_script += ["}"]

    with open(save_path / f"{save_name}.txt", "w", encoding="utf-8") as f:
        f.writelines("\n".join(graphviz_script))

    subprocess.run(
        [
            graphviz_path,
            "-Tsvg",
            f"{save_path / save_name}.txt",
            "-o",
            f"{save_path / save_name}.svg",
        ]
    )