Source code for neuromaps_mouse.regions

"""Functions for resampling and aligning structures."""

from pathlib import Path
import subprocess
import pandas as pd
import shutil
from neuromaps_mouse.datasets import fetch_allenccfv3


[docs] def query_structure_graph_allenccfv3( data, in_col="acronym", out_col="all", data_dir=None, verbose=1 ): """Query the Allen CCFv3 structure graph. Parameters ---------- data : array-like Input values to query (e.g., region acronyms or IDs). in_col : str, optional Column to index by. Default is 'acronym'. out_col : str or list of str, optional Column(s) to return. Use 'all' to return all columns. Default is 'all'. data_dir : str or Path, optional Base data directory. If None, uses the default. Default is None. verbose : int, optional Verbosity level. Default is 1. Returns ------- pandas.DataFrame or pandas.Series Queried structure graph data. """ # this directly returns the dataframe by indexing, so no none/null input df_struct = pd.read_csv( fetch_allenccfv3( which="structure-graph-csv", data_dir=data_dir, verbose=verbose ) ) if out_col == "all": return df_struct.set_index(in_col).loc[data, :].reset_index() else: return df_struct.set_index(in_col).loc[data, :].reset_index()[out_col]
[docs] def get_feature_allenccfv3( data, in_col="acronym", out_col="id", data_dir=None, verbose=1 ): """Get a feature value for each region from the Allen CCFv3 structure graph. Unlike ``query_structure_graph_allenccfv3``, this function accepts None/NaN values and returns None for those entries. Parameters ---------- data : array-like Input values (may include None/NaN). in_col : str, optional Column to index by. Default is 'acronym'. out_col : str, optional Column to return values from. Default is 'id'. data_dir : str or Path, optional Base data directory. If None, uses the default. Default is None. verbose : int, optional Verbosity level. Default is 1. Returns ------- list Feature values, with None for any None/NaN inputs. """ # this allows none/null input and returns a list df_struct = pd.read_csv( fetch_allenccfv3( which="structure-graph-csv", data_dir=data_dir, verbose=verbose ) ).set_index(in_col) out_values = [] for value in data: if pd.isna(value): # or value is None: out_values.append(None) else: out_values.append(df_struct.loc[value, out_col]) return out_values
def _get_nearest_ancestor_region_allenccfv3( source_structure_id_paths, target_region_ids, include_self=True ): matched_region_ids = [] for p in source_structure_id_paths: if p is None: matched_region_ids.append(None) continue if include_self: p_list = list(map(int, p.split("/")[2:-1]))[ ::-1 ] # reversed to get the nearest else: p_list = list(map(int, p.split("/")[2:-2]))[::-1] # print(p_list, target_region_ids) p_in_target = [_ in target_region_ids for _ in p_list] if any(p_in_target): _matched_id = p_list[p_in_target.index(True)] # first match (nearest) matched_region_ids.append(_matched_id) else: # print(p) matched_region_ids.append(None) # print(matched_region_ids) return matched_region_ids def _get_nearest_descendant_region_allenccfv3( source_region_ids, target_structure_id_paths, include_self=True ): matched_region_ids = [] if include_self: tp_list = [ list(map(int, tp.split("/")[2:-1])) for tp in target_structure_id_paths ] else: tp_list = [ list(map(int, tp.split("/")[2:-2])) for tp in target_structure_id_paths ] for p in source_region_ids: if p is None: matched_region_ids.append([]) continue p_in_tp = [_[-1] for _ in tp_list if p in _] matched_region_ids.append(p_in_tp) return matched_region_ids
[docs] def align_structures_allenccfv3(acronyms_fixed, acronyms_moving, debug=False): """Align moving structures to fixed structures via ancestor matching. For each region in ``acronyms_moving``, finds its nearest ancestor that exists in ``acronyms_fixed``. Parameters ---------- acronyms_fixed : array-like of str Target region acronyms (the fixed reference set). acronyms_moving : array-like of str Source region acronyms to align to the fixed set. debug : bool, optional If True, also compute descendant mappings and add them to the returned DataFrame. Default is False. Returns ------- pandas.DataFrame DataFrame for moving regions with an added 'id_ancestor_fixed' column. """ df_fixed = query_structure_graph_allenccfv3( acronyms_fixed, in_col="acronym", out_col=["acronym", "id", "structure_id_path"], verbose=0, ) df_moving = query_structure_graph_allenccfv3( acronyms_moving, in_col="acronym", out_col=["acronym", "id", "structure_id_path"], verbose=0, ) df_moving["id_ancestor_fixed"] = _get_nearest_ancestor_region_allenccfv3( df_moving["structure_id_path"].to_list(), df_fixed["id"].to_list(), include_self=True, ) df_moving["id_ancestor_fixed"] = df_moving["id_ancestor_fixed"].astype("Int64") if debug: df_moving["id_ancestor_fixed_acronym"] = get_feature_allenccfv3( df_moving["id_ancestor_fixed"].tolist(), in_col="id", out_col="acronym", verbose=0, ) # also get descendant df_moving["id_descendant_fixed"] = _get_nearest_descendant_region_allenccfv3( df_moving["id"].tolist(), df_fixed["structure_id_path"].tolist(), include_self=True, ) df_moving["id_descendant_fixed_acronym"] = df_moving.apply( lambda x: get_feature_allenccfv3( x["id_descendant_fixed"], in_col="id", out_col="acronym", verbose=0 ) if len(["id_descendant_fixed"]) > 0 else [], axis=1, ) return df_moving
[docs] def match_structures_fuzzy_allenccfv3(): """Match structures using fuzzy string matching.""" pass
[docs] def visualize_structure_alignment_allenccfv3( acronyms_fixed, acronyms_moving, save_path=Path("./"), save_name="graphviz" ): """Visualize the alignment between two sets of brain structures as a graph. Generates a Graphviz SVG diagram showing the hierarchical relationship between fixed and moving region sets. Requires Graphviz to be installed. Parameters ---------- acronyms_fixed : array-like of str Fixed (reference) region acronyms, marked with a stop symbol in the graph. acronyms_moving : array-like of str Moving (source) region acronyms, marked with an arrow in the graph. save_path : str or Path, optional Directory to save output files. Default is current directory. save_name : str, optional Base filename (without extension) for the output files. Default is 'graphviz'. """ graphviz_path = shutil.which("dot") if graphviz_path is None: raise ValueError("Graphviz executable not found, please install graphviz") if not isinstance(save_path, Path): save_path = Path(save_path) struct_csv = pd.read_csv(fetch_allenccfv3(which="structure-graph-csv")) df_fixed = query_structure_graph_allenccfv3( acronyms_fixed, in_col="acronym", out_col=["acronym", "id", "structure_id_path"], verbose=0, ) df_moving = query_structure_graph_allenccfv3( acronyms_moving, in_col="acronym", out_col=["acronym", "id", "structure_id_path"], verbose=0, ) all_regions = [ _.strip("/").split("/") for _ in df_fixed["structure_id_path"].tolist() + df_moving["structure_id_path"].tolist() ] all_regions = list( map(int, list(set([r for regions in all_regions for r in regions]))) ) struct_csv_filtered = struct_csv[struct_csv["id"].isin(all_regions)] struct_csv_filtered["parent_structure_id"] = struct_csv_filtered[ "parent_structure_id" ].astype("Int64") graphviz_script = [ "digraph G {", 'rankdir="LR";', 'node [shape=box, fontname="Arial", fontsize=12];', 'edge [fontname="Arial", fontsize=10];', ] for _i, row in struct_csv_filtered.iterrows(): curr_label = row["acronym"] if row["acronym"] in df_fixed["acronym"].tolist(): curr_label += " ⏹️" if row["acronym"] in df_moving["acronym"].tolist(): curr_label += " ⬅️" graphviz_script.append(f'{row["id"]} [label="{curr_label}"]') for _i, row in struct_csv_filtered.iterrows(): if row["acronym"] == "root": continue graphviz_script.append(f" {row['parent_structure_id']} -> {row['id']}") graphviz_script += ["}"] with open(save_path / f"{save_name}.txt", "w", encoding="utf-8") as f: f.writelines("\n".join(graphviz_script)) subprocess.run( [ graphviz_path, "-Tsvg", f"{save_path / save_name}.txt", "-o", f"{save_path / save_name}.svg", ] )