"""The :py:mod:`cluster_collections` module provides user access to pre-existing radial profile models of known systems."""importosfrompathlibimportPathimportpandasaspdimportyamlfromcluster_generator.modelimportClusterModelfromcluster_generator.radial_profilesimportRadialProfilefromcluster_generator.utilsimport_bin_directory,_get_loader,mylog_dir=Path(os.path.join(_bin_directory,"collections"))def_enforce_format(schema):"""Enforces formatting norms on the schema"""ifany([inotinschemaforiin["main","schema"]]):raiseSyntaxError("The schema file doesn't have either section 'main' or 'schema'")formain_headerin["collection_name","collection_authors","source_authors","n_clusters",]:ifmain_headernotinschema["main"]:raiseSyntaxError(f"The key {main_header} is not in schema[main].")asserthasattr(ClusterModel,schema["schema"]["build"]["method"]),f"The build method {schema['schema']['build']['method']} is not valid."
[docs]classProtoCluster:""" The :py:class:`~cluster_collections.ProtoCluster` class is a precursor to the fully realized :py:class`model.ClusterModel` class. These are used as minimally memory intensive placeholders to allow the user to easily load the full cluster model. """
[docs]def__init__(self,profiles,parameters,names,load_method):""" Loads the :py:class:`~cluster_collections.ProtoCluster` instance. Parameters ---------- profiles: dict of str: callable or dict of str: str A dictionary containing the profile definitions for the cluster initialization. There are two available options for the formatting of each element in this argument: - If the dictionary value is a :py:class:`str` type, then it is assumed to be a pre-defined profile already defined in the :py:mod:`cluster_generator.radial_profiles` module. If a corresponding built-in profile cannot be found, an error will be raised. - If the dictionary value is a ``callable`` instance of any kind, it is assumed to be a user defined function (either explicit or lambda). It will be wrapped in a :py:class:`cluster_generator.radial_profiles.RadialProfile` instance during initialization. parameters: dict of str: list A dictionary containing the parameters for the profiles. For each profile in ``profiles``, there should be a corresponding key-value pair in the ``parameters`` argument containing a :py:class:`list` with the values of each of the necessary parameters for the specific cluster being modeled. .. note:: If the type of the elements in the list is :py:class:`float` or :py:class:`int`, then it will be assumed that these parameters are already following the unit conventions of the CGP. If there is any doubt, we recommend passing the parameters as :py:class:`unyt.array.unyt_quantity` instances instead. These will be processed to the correct units before proceeding. load_method: str The ``load_method`` should correspond to the analogous class method on :py:class:`model.ClusterModel` for loading the full model instance. Typically, these should be something like ``from_dens_and_temp`` or ``from_dens_and_tden``. """self.load_method=load_methodself.profiles={}forkinprofiles:ifisinstance(profiles[k],str):# This is a built-in and we should look it up.ifprofiles[k]inRadialProfile.builtin:self.profiles[k]=RadialProfile.built_in(profiles[k],*parameters[k])else:raiseValueError(f"The profile type {profiles[k]} is not recognized as a built-in profile")else:# This is a newly defined type instance.self.profiles[k]=RadialProfile(lambdax,p=profiles[k],params=parameters[k]:p(x,*params),name=names[k],)# Double lambda to avoid late interpretation in lambdas.
[docs]defload(self,rmin,rmax,additional_args=None,**kwargs):""" Loads a :py:class:`model.ClusterModel` instance from this :py:class:`cluster_collections.ProtoCluster` instance. Parameters ---------- rmin: float The minimum radius of the generation regime. (kpc) rmax: float The maximum radius of the generation regime. (kpc) additional_args: dict, optional The ``additional_args`` argument allows the user to pass additional arguments into the initialization function. Generally, there should be no reason to specify this unless some alteration has been made to the underlying source code. kwargs: dict, optional Additional key-word arguments to pass along to the initialization function. .. note:: If ``kwargs`` contains a ``stellar_density`` key and corresponding profile, it will be overridden if the underlying :py:class:`cluster_collections.ProtoCluster` instance also has a stellar density profile. Returns ------- """importinspectifadditional_argsisNone:additional_args={}load_method=getattr(ClusterModel,self.load_method)signature=(str(inspect.signature(load_method)).replace(" ","")[1:-1].split(","))# deconstruct so that we use it to get the signature right.arg_sig=[iforiinsignatureifall(jnotiniforjin["=","**"])]args=[]forarginarg_sig:ifarginself.profiles:args.append(self.profiles[arg])elifarg=="rmin":args.append(rmin)elifarg=="rmax":args.append(rmax)elifarginadditional_args:args.append(additional_args[arg])else:raiseValueError(f"Determined that {arg} is a required item in the call signature, but it could not be found but it wasn't found in the additional_args dict.")if"stellar_density"inself.profiles:stellar_density=self.profiles["stellar_density"]elif"stellar_density"inkwargs:stellar_density=kwargs["stellar_density"]delkwargs["stellar_density"]else:stellar_density=Nonereturnload_method(*args,stellar_density=stellar_density,**kwargs)
[docs]classCollection:""" The :py:class:`cluster_collections.Collection` class is the base class for all of the cluster collections.rst available in the CGP. Generally, this class should not be instantiated but rather the specific sub-class corresponding to the user's desired database. """
[docs]def__init__(self,data,schema):""" Initializes the :py:class:`cluster_collections.Collection` instance. Parameters ---------- data: str or :py:class:`pathlib.Path` or :py:class:`pandas.DataFrame` The parameter data for the collection. If provided as a :py:class:`str` or a :py:class:`~pathlib.Path` object, then the resulting path should point to a ``.csv`` file containing the relevant parameters. If a :py:class:`pandas.DataFrame` instance is provided, then the instance should be a table containing each of the clusters in a column ``"name"`` and a float value for each subsequent parameter (column), which correspond (IN ORDER) with the arguments of the profile functions. schema: str or :py:class:`pathlib.Path` or dict The collection schema. """ifisinstance(schema,(str,Path)):mylog.info(f"Loading collection schema from path: {schema}")try:withopen(schema,"r")asf:self._schema=yaml.load(f,_get_loader())exceptFileNotFoundError:raiseFileNotFoundError(f"Failed to locate the schema file at {schema}.")exceptyaml.YAMLErrorasexp:raiseSystemError(f"The format the schema file does not comply with standards: {exp.__repr__()}")elifisinstance(schema,dict):self._schema=schemaelse:raiseTypeError("Input 'schema' was not str, Path, or dict.")_enforce_format(self._schema)mylog.info(f"Loaded schema for collection {self._schema['main']['collection_name']}.")mylog.info(f"Loading the dataset for {self._schema['main']['collection_name']}.")ifisinstance(data,(str,Path)):try:self.db=pd.read_csv(data)exceptFileNotFoundError:raiseFileNotFoundError(f"The database file {data} was not found.")elifisinstance(data,pd.DataFrame):self.db=dataelse:raiseTypeError(f"The 'data' argument had type {type(data)} not str, Path, or pd.DataFrame.")self.clusters={}self._initialize_proto_clusters()mylog.info(f"Initialized {self}.")
[docs]defkeys(self):"""Returns the keys of the collection. Equivalent to ``self.cluster.keys()``"""returnself.clusters.keys()
[docs]defvalues(self):"""Returns the values of the collection. Equivalent to ``self.cluster.values()``"""returnself.clusters.values()
[docs]defitems(self):"""Returns the items of the collection. Equivalent to ``self.cluster.items()``"""returnself.clusters.items()
def_initialize_proto_clusters(self):"""This produces the relevant proto-clusters from the available datasets"""self.clusters={k:Noneforkinlist(self.db["name"])}forclusterinself.clusters.keys():parameters={k:self.db.loc[self.db["name"]==cluster,k].item()forkinself.db.columns[1:]}p={}# holds parameters after sorting.f={}# holds the functionsn={}# holds the function namesforprofile,datainself.profiles.items():_params=data["parameters"]_f=data["function"]ifisinstance(_f,str):# This is a built-in and will get its name directly from the built-in name._n=""else:# This is the name specified in the schema._n=data["function_name"]f[profile]=_fp[profile]=[parameters[k]forkin_params]n[profile]=_nself.clusters[cluster]=ProtoCluster(f,p,n,self._schema["schema"]["build"]["method"])@propertydefname(self):"""The name of the collection."""returnself._schema["main"]["collection_name"]@propertydefcitation(self):"""The citation (bibtex) for the data source."""try:returnself._schema["main"]["citation"]exceptKeyError:mylog.warning(f"Failed to locate a citation for collection {self.name}")returnNone@propertydefauthors(self):"""The collection authors (not the original source author)"""returnself._schema["main"]["collection_authors"]@propertydefsource_authors(self):"""The original (source) authors."""returnself._schema["main"]["source_authors"]@propertydefprofiles(self):"""The profiles from the schema."""returnself._schema["schema"]["profiles"]