"""Functions for resampling and aligning structures."""
from pathlib import Path
import subprocess
import pandas as pd
import shutil
from neuromaps_mouse.datasets import fetch_allenccfv3
[docs]def query_structure_graph_allenccfv3(
data, in_col="acronym", out_col="all", data_dir=None, verbose=1
):
# this directly returns the dataframe by indexing, so no none/null input
df_struct = pd.read_csv(
fetch_allenccfv3(
which="structure-graph-csv", data_dir=data_dir, verbose=verbose
)
)
if out_col == "all":
return df_struct.set_index(in_col).loc[data, :].reset_index()
else:
return df_struct.set_index(in_col).loc[data, :].reset_index()[out_col]
[docs]def get_feature_allenccfv3(
data, in_col="acronym", out_col="id", data_dir=None, verbose=1
):
# this allows none/null input and returns a list
df_struct = pd.read_csv(
fetch_allenccfv3(
which="structure-graph-csv", data_dir=data_dir, verbose=verbose
)
).set_index(in_col)
out_values = []
for value in data:
if pd.isna(value): # or value is None:
out_values.append(None)
else:
out_values.append(df_struct.loc[value, out_col])
return out_values
def _get_nearest_ancestor_region_allenccfv3(
source_structure_id_paths, target_region_ids, include_self=True
):
matched_region_ids = []
for p in source_structure_id_paths:
if p is None:
matched_region_ids.append(None)
continue
if include_self:
p_list = list(map(int, p.split("/")[2:-1]))[
::-1
] # reversed to get the nearest
else:
p_list = list(map(int, p.split("/")[2:-2]))[::-1]
# print(p_list, target_region_ids)
p_in_target = [_ in target_region_ids for _ in p_list]
if any(p_in_target):
_matched_id = p_list[p_in_target.index(True)] # first match (nearest)
matched_region_ids.append(_matched_id)
else:
# print(p)
matched_region_ids.append(None)
# print(matched_region_ids)
return matched_region_ids
def _get_nearest_descendant_region_allenccfv3(
source_region_ids, target_structure_id_paths, include_self=True
):
matched_region_ids = []
if include_self:
tp_list = [
list(map(int, tp.split("/")[2:-1])) for tp in target_structure_id_paths
]
else:
tp_list = [
list(map(int, tp.split("/")[2:-2])) for tp in target_structure_id_paths
]
for p in source_region_ids:
if p is None:
matched_region_ids.append([])
continue
p_in_tp = [_[-1] for _ in tp_list if p in _]
matched_region_ids.append(p_in_tp)
return matched_region_ids
[docs]def align_structures_allenccfv3(acronyms_fixed, acronyms_moving, debug=False):
df_fixed = query_structure_graph_allenccfv3(
acronyms_fixed, in_col="acronym", out_col=["acronym", "id", "structure_id_path"], verbose=0
)
df_moving = query_structure_graph_allenccfv3(
acronyms_moving, in_col="acronym", out_col=["acronym", "id", "structure_id_path"], verbose=0
)
df_moving["id_ancestor_fixed"] = _get_nearest_ancestor_region_allenccfv3(
df_moving["structure_id_path"].to_list(), df_fixed["id"].to_list(), include_self=True
)
df_moving["id_ancestor_fixed"] = df_moving["id_ancestor_fixed"].astype("Int64")
if debug:
df_moving["id_ancestor_fixed_acronym"] = get_feature_allenccfv3(
df_moving["id_ancestor_fixed"].tolist(),
in_col="id",
out_col="acronym",
verbose=0,
)
# also get descendant
df_moving["id_descendant_fixed"] = _get_nearest_descendant_region_allenccfv3(
df_moving["id"].tolist(), df_fixed["structure_id_path"].tolist(), include_self=True
)
df_moving["id_descendant_fixed_acronym"] = df_moving.apply(
lambda x: get_feature_allenccfv3(
x["id_descendant_fixed"], in_col="id", out_col="acronym", verbose=0
)
if len(["id_descendant_fixed"]) > 0
else [],
axis=1,
)
return df_moving
def match_structures_fuzzy_allenccfv3():
pass
def visualize_structure_alignment_allenccfv3(
acronyms_fixed, acronyms_moving, save_path=Path("./"), save_name="graphviz"
):
graphviz_path = shutil.which("dot")
if graphviz_path is None:
raise ValueError("Graphviz executable not found, please install graphviz")
if not isinstance(save_path, Path):
save_path = Path(save_path)
struct_csv = pd.read_csv(fetch_allenccfv3(which="structure-graph-csv"))
df_fixed = query_structure_graph_allenccfv3(
acronyms_fixed, in_col="acronym", out_col=["acronym", "id", "structure_id_path"], verbose=0
)
df_moving = query_structure_graph_allenccfv3(
acronyms_moving, in_col="acronym", out_col=["acronym", "id", "structure_id_path"], verbose=0
)
all_regions = [
_.strip("/").split("/")
for _ in df_fixed["structure_id_path"].tolist() + df_moving["structure_id_path"].tolist()
]
all_regions = list(map(int, list(set([r for regions in all_regions for r in regions]))))
struct_csv_filtered = struct_csv[struct_csv["id"].isin(all_regions)]
struct_csv_filtered["parent_structure_id"] = struct_csv_filtered["parent_structure_id"].astype("Int64")
graphviz_script = [
"digraph G {",
'rankdir="LR";',
'node [shape=box, fontname="Arial", fontsize=12];',
'edge [fontname="Arial", fontsize=10];'
]
for i, row in struct_csv_filtered.iterrows():
curr_label = row["acronym"]
if row["acronym"] in df_fixed["acronym"].tolist():
curr_label += " ⏹️"
if row["acronym"] in df_moving["acronym"].tolist():
curr_label += " ⬅️"
graphviz_script.append(f'{row["id"]} [label="{curr_label}"]')
for i, row in struct_csv_filtered.iterrows():
if row["acronym"] == "root":
continue
graphviz_script.append(
f" {row['parent_structure_id']} -> {row['id']}"
)
graphviz_script += [
"}"
]
with open(save_path / f"{save_name}.txt", "w", encoding="utf-8") as f:
f.writelines("\n".join(graphviz_script))
subprocess.run([graphviz_path, "-Tsvg", f"{save_path / save_name}.txt", "-o", f"{save_path / save_name}.svg"])