Source code for cluster_collections

"""
The :py:mod:`cluster_collections` module provides user access to pre-existing radial profile models of known systems.
"""
import os
from pathlib import Path

import pandas as pd
import yaml

from cluster_generator.model import ClusterModel
from cluster_generator.radial_profiles import RadialProfile
from cluster_generator.utils import _bin_directory, _get_loader, mylog

_dir = Path(os.path.join(_bin_directory, "collections"))


def _enforce_format(schema):
    """Enforces formatting norms on the schema"""
    if any([i not in schema for i in ["main", "schema"]]):
        raise SyntaxError(
            "The schema file doesn't have either section 'main' or 'schema'"
        )

    for main_header in [
        "collection_name",
        "collection_authors",
        "source_authors",
        "n_clusters",
    ]:
        if main_header not in schema["main"]:
            raise SyntaxError(f"The key {main_header} is not in schema[main].")

    assert hasattr(
        ClusterModel, schema["schema"]["build"]["method"]
    ), f"The build method {schema['schema']['build']['method']} is not valid."


[docs] class ProtoCluster: """ The :py:class:`~cluster_collections.ProtoCluster` class is a precursor to the fully realized :py:class`model.ClusterModel` class. These are used as minimally memory intensive placeholders to allow the user to easily load the full cluster model. """
[docs] def __init__(self, profiles, parameters, names, load_method): """ Loads the :py:class:`~cluster_collections.ProtoCluster` instance. Parameters ---------- profiles: dict of str: callable or dict of str: str A dictionary containing the profile definitions for the cluster initialization. There are two available options for the formatting of each element in this argument: - If the dictionary value is a :py:class:`str` type, then it is assumed to be a pre-defined profile already defined in the :py:mod:`cluster_generator.radial_profiles` module. If a corresponding built-in profile cannot be found, an error will be raised. - If the dictionary value is a ``callable`` instance of any kind, it is assumed to be a user defined function (either explicit or lambda). It will be wrapped in a :py:class:`cluster_generator.radial_profiles.RadialProfile` instance during initialization. parameters: dict of str: list A dictionary containing the parameters for the profiles. For each profile in ``profiles``, there should be a corresponding key-value pair in the ``parameters`` argument containing a :py:class:`list` with the values of each of the necessary parameters for the specific cluster being modeled. .. note:: If the type of the elements in the list is :py:class:`float` or :py:class:`int`, then it will be assumed that these parameters are already following the unit conventions of the CGP. If there is any doubt, we recommend passing the parameters as :py:class:`unyt.array.unyt_quantity` instances instead. These will be processed to the correct units before proceeding. load_method: str The ``load_method`` should correspond to the analogous class method on :py:class:`model.ClusterModel` for loading the full model instance. Typically, these should be something like ``from_dens_and_temp`` or ``from_dens_and_tden``. """ self.load_method = load_method self.profiles = {} for k in profiles: if isinstance(profiles[k], str): # This is a built-in and we should look it up. if profiles[k] in RadialProfile.builtin: self.profiles[k] = RadialProfile.built_in( profiles[k], *parameters[k] ) else: raise ValueError( f"The profile type {profiles[k]} is not recognized as a built-in profile" ) else: # This is a newly defined type instance. self.profiles[k] = RadialProfile( lambda x, p=profiles[k], params=parameters[k]: p(x, *params), name=names[k], ) # Double lambda to avoid late interpretation in lambdas.
[docs] def load(self, rmin, rmax, additional_args=None, **kwargs): """ Loads a :py:class:`model.ClusterModel` instance from this :py:class:`cluster_collections.ProtoCluster` instance. Parameters ---------- rmin: float The minimum radius of the generation regime. (kpc) rmax: float The maximum radius of the generation regime. (kpc) additional_args: dict, optional The ``additional_args`` argument allows the user to pass additional arguments into the initialization function. Generally, there should be no reason to specify this unless some alteration has been made to the underlying source code. kwargs: dict, optional Additional key-word arguments to pass along to the initialization function. .. note:: If ``kwargs`` contains a ``stellar_density`` key and corresponding profile, it will be overridden if the underlying :py:class:`cluster_collections.ProtoCluster` instance also has a stellar density profile. Returns ------- """ import inspect if additional_args is None: additional_args = {} load_method = getattr(ClusterModel, self.load_method) signature = ( str(inspect.signature(load_method)).replace(" ", "")[1:-1].split(",") ) # deconstruct so that we use it to get the signature right. arg_sig = [i for i in signature if all(j not in i for j in ["=", "**"])] args = [] for arg in arg_sig: if arg in self.profiles: args.append(self.profiles[arg]) elif arg == "rmin": args.append(rmin) elif arg == "rmax": args.append(rmax) elif arg in additional_args: args.append(additional_args[arg]) else: raise ValueError( f"Determined that {arg} is a required item in the call signature, but it could not be found but it wasn't found in the additional_args dict." ) if "stellar_density" in self.profiles: stellar_density = self.profiles["stellar_density"] elif "stellar_density" in kwargs: stellar_density = kwargs["stellar_density"] del kwargs["stellar_density"] else: stellar_density = None return load_method(*args, stellar_density=stellar_density, **kwargs)
[docs] def keys(self): return self.profiles.keys()
[docs] def items(self): return self.profiles.items()
[docs] def value(self): return self.profiles.values()
[docs] class Collection: """ The :py:class:`cluster_collections.Collection` class is the base class for all of the cluster collections.rst available in the CGP. Generally, this class should not be instantiated but rather the specific sub-class corresponding to the user's desired database. """
[docs] def __init__(self, data, schema): """ Initializes the :py:class:`cluster_collections.Collection` instance. Parameters ---------- data: str or :py:class:`pathlib.Path` or :py:class:`pandas.DataFrame` The parameter data for the collection. If provided as a :py:class:`str` or a :py:class:`~pathlib.Path` object, then the resulting path should point to a ``.csv`` file containing the relevant parameters. If a :py:class:`pandas.DataFrame` instance is provided, then the instance should be a table containing each of the clusters in a column ``"name"`` and a float value for each subsequent parameter (column), which correspond (IN ORDER) with the arguments of the profile functions. schema: str or :py:class:`pathlib.Path` or dict The collection schema. """ if isinstance(schema, (str, Path)): mylog.info(f"Loading collection schema from path: {schema}") try: with open(schema, "r") as f: self._schema = yaml.load(f, _get_loader()) except FileNotFoundError: raise FileNotFoundError( f"Failed to locate the schema file at {schema}." ) except yaml.YAMLError as exp: raise SystemError( f"The format the schema file does not comply with standards: {exp.__repr__()}" ) elif isinstance(schema, dict): self._schema = schema else: raise TypeError("Input 'schema' was not str, Path, or dict.") _enforce_format(self._schema) mylog.info( f"Loaded schema for collection {self._schema['main']['collection_name']}." ) mylog.info( f"Loading the dataset for {self._schema['main']['collection_name']}." ) if isinstance(data, (str, Path)): try: self.db = pd.read_csv(data) except FileNotFoundError: raise FileNotFoundError(f"The database file {data} was not found.") elif isinstance(data, pd.DataFrame): self.db = data else: raise TypeError( f"The 'data' argument had type {type(data)} not str, Path, or pd.DataFrame." ) self.clusters = {} self._initialize_proto_clusters() mylog.info(f"Initialized {self}.")
def __len__(self): return self._schema["main"]["n_clusters"] def __repr__(self): return f"< Collection - {self.name} - {len(self)} >" def __str__(self): return f"{self.name} collection" def __getitem__(self, item): return self.clusters[item] def __contains__(self, item): return item in self.clusters def __class_getitem__(cls, item): inst = cls() return inst.__getitem__(item) def __iter__(self): return iter(self.clusters)
[docs] def keys(self): """Returns the keys of the collection. Equivalent to ``self.cluster.keys()``""" return self.clusters.keys()
[docs] def values(self): """Returns the values of the collection. Equivalent to ``self.cluster.values()``""" return self.clusters.values()
[docs] def items(self): """Returns the items of the collection. Equivalent to ``self.cluster.items()``""" return self.clusters.items()
def _initialize_proto_clusters(self): """This produces the relevant proto-clusters from the available datasets""" self.clusters = {k: None for k in list(self.db["name"])} for cluster in self.clusters.keys(): parameters = { k: self.db.loc[self.db["name"] == cluster, k].item() for k in self.db.columns[1:] } p = {} # holds parameters after sorting. f = {} # holds the functions n = {} # holds the function names for profile, data in self.profiles.items(): _params = data["parameters"] _f = data["function"] if isinstance(_f, str): # This is a built-in and will get its name directly from the built-in name. _n = "" else: # This is the name specified in the schema. _n = data["function_name"] f[profile] = _f p[profile] = [parameters[k] for k in _params] n[profile] = _n self.clusters[cluster] = ProtoCluster( f, p, n, self._schema["schema"]["build"]["method"] ) @property def name(self): """The name of the collection.""" return self._schema["main"]["collection_name"] @property def citation(self): """The citation (bibtex) for the data source.""" try: return self._schema["main"]["citation"] except KeyError: mylog.warning(f"Failed to locate a citation for collection {self.name}") return None @property def authors(self): """The collection authors (not the original source author)""" return self._schema["main"]["collection_authors"] @property def source_authors(self): """The original (source) authors.""" return self._schema["main"]["source_authors"] @property def profiles(self): """The profiles from the schema.""" return self._schema["schema"]["profiles"]
[docs] class Vikhlinin06(Collection): r""" Collection of 13 low-redshift galaxy clusters using *Chandra*. .. rubric:: Collection Details +-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | Collection Type | X-ray Observations | +-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | Observatory | Chandra | +-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | Profiles | :math:`\rho_g + T_g` | +-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | Authors | `A. Vikhlinin <https://ui.adsabs.harvard.edu/search/q=author:%22Vikhlinin%2C+A.%22&sort=date%20desc,%20bibcode%20desc>`_ | | | `A. Kravtsov <https://ui.adsabs.harvard.edu/search/q=author:%22Kravtsov%2C+A.%22&sort=date%20desc,%20bibcode%20desc>`_ | | | `W. Forman <https://ui.adsabs.harvard.edu/search/q=author:%22Forman%2C+W.%22&sort=date%20desc,%20bibcode%20desc>`_ | | | `C. Jones <https://ui.adsabs.harvard.edu/search/q=author:%22Jones%2C+C.%22&sort=date%20desc,%20bibcode%20desc>`_ | | | `M. Markevitch <https://ui.adsabs.harvard.edu/search/q=author:%22Markevitch%2C+M.%22&sort=date%20desc,%20bibcode%20desc>`_ | | | `S. S. Murray <https://ui.adsabs.harvard.edu/search/q=author:%22Murray%2C+S.+S.%22&sort=date%20desc,%20bibcode%20desc>`_ | | | `L. Van Speybroeck <https://ui.adsabs.harvard.edu/search/q=author:%22Van+Speybroeck%2C+L.%22&sort=date%20desc,%20bibcode%20desc>`_ | +-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | Recommended | .. button-link:: https://iopscience.iop.org/article/10.1086/500288/meta | | Citation | :color: secondary | | | | | | Go To Citation | +-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ """ _data = os.path.join(_dir, "Vikhlinin06.csv") _schema_loc = os.path.join(_dir, "Vikhlinin06.yaml")
[docs] def __init__(self): super().__init__(self._data, self._schema_loc)
[docs] class Ascasibar07(Collection): r""" Collection of 13 low-redshift galaxy clusters using *Chandra*. .. rubric:: Collection Details +-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | Collection Type | X-ray Observations [Secondary Analysis] | +-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | Observatory | Chandra | +-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | Profiles | :math:`\rho_g + T_g` | +-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | Authors | `Y. Ascasibar <https://ui.adsabs.harvard.edu/search/q=author:%22Ascasibar%2C+Y.%22&sort=date%20desc,%20bibcode%20desc>`_ | | | `J. M. Diego <https://ui.adsabs.harvard.edu/search/q=author:%22Diego%2C+J.+M.%22&sort=date%20desc,%20bibcode%20desc>`_ | +-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | Recommended | .. button-link:: https://ui.adsabs.harvard.edu/abs/2008MNRAS.383..369A/abstract | | Citation | :color: secondary | | | | | | Go To Citation | +-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ """ _data = os.path.join(_dir, "Ascasibar07.csv") _schema_loc = os.path.join(_dir, "Ascasibar07.yaml")
[docs] def __init__(self): super().__init__(self._data, self._schema_loc)
[docs] class Sanderson10(Collection): r""" Collection of 20 low-redshift galaxy clusters using *Chandra*. .. rubric:: Collection Details +-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | Collection Type | X-ray Observations [Secondary] | +-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | Observatory | Chandra | +-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | Profiles | :math:`\rho_g + T_g` | +-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | Authors | `A.J.R Sanderson <https://ui.adsabs.harvard.edu/search/q=author:%22Sanderson%2C+Alastair+J.+R.%22&sort=date%20desc,%20bibcode%20desc>`_ | | | `T.J. Ponman <https://ui.adsabs.harvard.edu/search/q=author:%22Ponman%2C+Trevor+J.%22&sort=date%20desc,%20bibcode%20desc>`_ | +-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ | Recommended | .. button-link:: https://ui.adsabs.harvard.edu/abs/2010MNRAS.402...65S/abstract | | Citation | :color: secondary | | | | | | Go To Citation | +-------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ """ _data = os.path.join(_dir, "Sanderson10.csv") _schema_loc = os.path.join(_dir, "Sanderson10.yaml")
[docs] def __init__(self): super().__init__(self._data, self._schema_loc)