Source code for neuromaps_mouse.datasets.utils

"""Functions for working with datasets."""

import os
import json
import shutil
import importlib.resources
from pathlib import Path

try:
    # nilearn 0.10.3
    from nilearn.datasets._utils import fetch_single_file as _fetch_file, _md5_sum_file
except ImportError:
    from nilearn.datasets.utils import _fetch_file, _md5_sum_file


[docs]def get_data_dir(data_dir=None): if data_dir is None: data_dir = os.environ.get( "MOUSEMAPS_DATA", str(Path.home() / "neuromaps-mouse-data") ) data_dir = Path(data_dir).expanduser() data_dir.mkdir(parents=True, exist_ok=True) return data_dir
def _load_resource_json(relative_path): """ Load JSON file from package resources. Parameters ---------- relative_path : str Path to JSON file relative to package resources Returns ------- resource_json : dict JSON file loaded as a dictionary """ # handling pkg_resources.resource_filename deprecation if getattr(importlib.resources, "files", None) is not None: f_resource = importlib.resources.files("neuromaps_mouse") / relative_path else: from pkg_resources import resource_filename f_resource = resource_filename("neuromaps_mouse", relative_path) with open(f_resource) as src: resource_json = json.load(src) return resource_json MOUSEMAPS_ATLASES = _load_resource_json("datasets/data/atlases.json")["atlases"] MOUSEMAPS_ANNOTS = _load_resource_json("datasets/data/annotations.json")["annotations"] MOUSEMAPS_ANNOTS_META = _load_resource_json("datasets/data/annotations-meta.json")[ "annotations-meta" ] def _osfify_url(osf_file_id): return f"https://osf.io/download/{osf_file_id}/" def fetch_files(annotations, file_type="annotations", data_dir=None, verbose=1): targ_fname_list = [] for annot in annotations: if file_type in ["annotations", "annotations-meta"]: targ_path = Path(data_dir) / "annotations" elif file_type == "atlases": targ_path = Path(data_dir) / "atlases" else: raise ValueError(f"Unknown file_type={file_type}") targ_fname = targ_path / annot["rel_path"] / annot["fname"] if targ_fname.exists(): if _md5_sum_file(targ_fname) == annot["checksum"]: targ_fname_list.append(targ_fname) if verbose: print(f"Found {targ_fname.name} at {targ_fname}") continue else: if verbose: print(f"Checksum mismatch for {targ_fname.name}, redownloading...") dl_fname = _fetch_file( _osfify_url(annot["url"]["osf"]), targ_fname.parent, resume=True, md5sum=annot["checksum"], verbose=1, ) shutil.move(dl_fname, targ_fname) targ_fname_list.append(targ_fname) if verbose: print(f"Downloaded {targ_fname.name} to {targ_fname}") return targ_fname_list def _annot_full_to_tuple(full_list): return [ tuple([annot[key] for key in ["source", "desc", "space", "res"]]) for annot in full_list ] def _match_annots_by_tuple(annot_tuple_list): # match all then sort if not isinstance(annot_tuple_list, list): annot_tuple_list = [annot_tuple_list] matched = [] for annot_tuple in annot_tuple_list: found = False for annot in MOUSEMAPS_ANNOTS: curr_annot_tuple = tuple( [annot[key] for key in ["source", "desc", "space", "res"]] ) if curr_annot_tuple == annot_tuple: matched.append(annot) found = True break if not found: raise ValueError(f"Annotation {annot_tuple} not found in MOUSEMAPS_ANNOTS") return matched def _filter_annots_by_keys(keys_dict): filtered = [] for annot in MOUSEMAPS_ANNOTS: for key in ["source", "desc", "space", "res", "format"]: value = keys_dict[key] if value is not None and annot[key] != value: break if keys_dict["tag"] is not None and keys_dict["tag"] not in annot["tags"]: break filtered.append(annot) return filtered def _check_json(osfstorage_data): """ Check for errors in meta.json. For internal use only. Returns ------- None """ # reload the datasets and meta json files from rich.console import Console console = Console() MOUSEMAPS_ATLASES = _load_resource_json("datasets/data/atlases.json")["atlases"] MOUSEMAPS_ANNOTS = _load_resource_json("datasets/data/annotations.json")[ "annotations" ] MOUSEMAPS_ANNOTS_META = _load_resource_json("datasets/data/annotations-meta.json")[ "annotations-meta" ] console.print("ATLASES") for atlas_k, atlas_v in MOUSEMAPS_ATLASES.items(): console.print(f"{atlas_k} >") for file_k, file_v in atlas_v["files"].items(): console.print(f" {file_k} >") if file_v["checksum"] == osfstorage_data[file_v["fname"]]["md5"]: console.print(" [bold green]✓[/bold green] checksum") else: console.print( f" [bold red]x[/bold red] checksum local: {file_v['checksum']} remote: {osfstorage_data[file_v['fname']]['md5']}" ) if file_v["url"]["osf"] == osfstorage_data[file_v["fname"]]["guid"]: console.print(" [bold green]✓[/bold green] url") else: console.print( f" [bold red]x[/bold red] url local: {file_v['url']['osf']} remote: {osfstorage_data[file_v['fname']]['guid']}" ) console.print("\nANNOTS_META") for annot_meta in MOUSEMAPS_ANNOTS_META: console.print(f"{annot_meta['source']} {annot_meta['name']} >") console.print(" \[annot files] >") for file_v in annot_meta["files"]: console.print(f" {'-'.join(file_v)} >") try: matched = _match_annots_by_tuple([tuple(file_v)]) except ValueError: console.print(" [bold red]x[/bold red] json") if len(matched) == 1: console.print(" [bold green]✓[/bold green] json") else: console.print(" [bold red]x[/bold red] json") console.print(" \[aux files] >") for aux_k, aux_v in annot_meta["aux_files"].items(): console.print(f" {aux_k} >") if not isinstance(aux_v, list): aux_v = [aux_v] for file_v in aux_v: console.print(f" {file_v['fname']} >") if file_v["fname"] not in osfstorage_data: console.print( f" [bold red]x[/bold red] {file_v['fname']} not found in osfstorage" ) continue if file_v["checksum"] == osfstorage_data[file_v["fname"]]["md5"]: console.print(" [bold green]✓[/bold green] checksum") else: console.print( f" [bold red]x[/bold red] checksum local: {file_v['checksum']} remote: {osfstorage_data[file_v['fname']]['md5']}" ) if file_v["url"]["osf"] == osfstorage_data[file_v["fname"]]["guid"]: console.print(" [bold green]✓[/bold green] url") else: console.print( f" [bold red]x[/bold red] url local: {file_v['url']['osf']} remote: {osfstorage_data[file_v['fname']]['guid']}" ) console.print("\nANNOTS") for annot in MOUSEMAPS_ANNOTS: annotstr = "-".join( [annot["source"], annot["desc"], annot["space"], annot["res"]] ) console.print(f" {annotstr} >") if annot["fname"] not in osfstorage_data: console.print(f" [bold red]x[/bold red] {annot['fname']} not found in osfstorage") continue if annot["checksum"] == osfstorage_data[annot["fname"]]["md5"]: console.print(" [bold green]✓[/bold green] checksum") else: console.print( f" [bold red]x[/bold red] checksum local: {annot['checksum']} remote: {osfstorage_data[annot['fname']]['md5']}" ) if annot["url"]["osf"] == osfstorage_data[annot["fname"]]["guid"]: console.print(" [bold green]✓[/bold green] url") else: console.print( f" [bold red]x[/bold red] url local: {annot['url']['osf']} remote: {osfstorage_data[annot['fname']]['guid']}" ) def _check_osfstorage(): """ Check for errors in OSF links. For internal use only. Returns ------- None """ # reload the datasets and meta json files import requests from rich.console import Console console = Console() osfstorage_data = {} OSF_NODEID = "uryk3" OSF_URL = f"https://api.osf.io/v2/nodes/{OSF_NODEID}/files/osfstorage/" def _get_file_href(d): return d["relationships"]["files"]["links"]["related"]["href"] def _get_full_data(url): # handles pagination resp = requests.get(url).json() ret = resp["data"] while resp["links"].get("next"): href = resp["links"]["next"] resp = requests.get(href).json() ret.extend(resp["data"]) return ret for kind in requests.get(OSF_URL).json()["data"]: kind_path = kind["attributes"]["materialized_path"] console.print(f"{kind_path} >") if kind_path == "/atlases/": for source in _get_full_data(_get_file_href(kind)): source_path = source["attributes"]["materialized_path"] console.print(f" {source_path.removeprefix(kind_path)} >") for version in _get_full_data(_get_file_href(source)): version_path = version["attributes"]["materialized_path"] console.print(f" {version_path.removeprefix(source_path)} >") for file in _get_full_data(_get_file_href(version)): file_path = file["attributes"]["materialized_path"] console.print(f" {file_path.removeprefix(version_path)} >") console.print(f" {file['attributes']['guid']}") console.print( f" {file['attributes']['extra']['hashes']['md5']}" ) if not file["attributes"]["guid"]: requests.get(f'https://osf.io/{OSF_NODEID}/files/osfstorage{file["attributes"]["path"]}') osfstorage_data[file["attributes"]["name"]] = { "guid": file["attributes"]["guid"], "md5": file["attributes"]["extra"]["hashes"]["md5"], } elif kind_path == "/annotations/": for source in _get_full_data(_get_file_href(kind)): source_path = source["attributes"]["materialized_path"] console.print(f" {source_path.removeprefix(kind_path)} >") for file in _get_full_data(_get_file_href(source)): file_path = file["attributes"]["materialized_path"] console.print(f" {file_path.removeprefix(source_path)} >") console.print(f" {file['attributes']['guid']}") console.print( f" {file['attributes']['extra']['hashes']['md5']}" ) if not file["attributes"]["guid"]: requests.get(f'https://osf.io/{OSF_NODEID}/files/osfstorage{file["attributes"]["path"]}') osfstorage_data[file["attributes"]["name"]] = { "guid": file["attributes"]["guid"], "md5": file["attributes"]["extra"]["hashes"]["md5"], } else: raise ValueError(f"Unknown kind_path={kind_path}") return osfstorage_data # def _fill_meta_json_refs(bib_file, json_file, overwrite=False, use_defaults=False): # """ # Fill in citation information for references in a JSON file. # For internal use only. # Parameters # ---------- # bib_file : str # Path to BibTeX file containing references # json_file : str # Path to JSON file containing references # overwrite : bool, optional # Whether to overwrite existing citation information. Default: False # use_defaults : bool, optional # Whether to use default paths for `bib_file` and `json_file`. Default: False # Returns # ------- # None # """ # if use_defaults: # bib_file = \ # importlib.resources.files("neuromaps") / "datasets/data/neuromaps.bib" # json_file = \ # importlib.resources.files("neuromaps") / "datasets/data/meta.json" # from pybtex import PybtexEngine # engine = PybtexEngine() # def _get_citation(key): # s = engine.format_from_file( # filename=bib_file, style="unsrt", # citations=[key], output_backend="plaintext" # ) # return s.strip("\n").replace("[1] ", "") # with open(json_file) as src: # nm_meta = json.load(src) # for entry in nm_meta["annotations"]: # for bib_category in ["primary", "secondary"]: # for bib_item in entry["refs"][bib_category]: # if bib_item["bibkey"] not in ["", None]: # if bib_item["citation"] == "" or overwrite: # bib_item["citation"] = _get_citation(bib_item["bibkey"]) # with open(json_file, "w") as dst: # json.dump(nm_meta, dst, indent=4) def _gen_doc_listofmaps_rst(listofmaps_file): """ Generate a list of maps in reStructuredText format. For internal use only. Parameters ---------- listofmaps_file : str Path to write the list of maps Returns ------- None """ output = [] output += [ ".. _listofmaps:", "", "------------", "List of Maps", "------------", "This is a complete list of maps available in the `neuromaps_mouse` package. ", "\n----\n", ] for annot_meta in MOUSEMAPS_ANNOTS_META: title = f"{annot_meta['name']} ({annot_meta['source']})" output += [ title, "=" * len(title), "", "**Full description**", "", f"{annot_meta['description']}", "", ] for file in annot_meta["files"]: curr_annot = _match_annots_by_tuple(tuple(file))[0] file_title = "-".join(file) key_str = ", ".join( [f"{k}='{curr_annot[k]}'" for k in ["source", "desc", "space", "res"]] ) output += [ file_title, "-" * len(file_title), "", f"**Description**: {annot_meta['file_desc'][curr_annot['desc']]}", "", f"**Format**: {curr_annot['format']}", "", "**How to use**", "", ".. code:: python", "", " # get annotation", f" fetch_annotation({key_str})", "", " # file location", f" # $MOUSEMAPS_DATA/{curr_annot['rel_path']}", "", " # file name", f" # {curr_annot['fname']}", "", " # region mapping file", f" # {curr_annot['regionmapping']}", "", ] output.append("**References**") for bib_item in annot_meta["refs"]: if bib_item["bibkey"] not in ["", None]: output += [f" - {bib_item['citation']}"] output.append("\n----\n") output = output[:-1] with open(listofmaps_file, "w") as dst: dst.write("\n".join(output))