import re
from pathlib import Path
from typing import Literal
import astropy.units as u
from astropy.table import Table
from astropy.units.quantity import Quantity
from astroquery.splatalogue import Splatalogue
from ..coordinates import obsfreq, restfreq
from ..util import (
append_docstr_nosections,
docstring_parameter,
get_project_data,
minimum_list_match,
minimum_string_match,
replace_col_astype,
)
_VALID_EXCLUDE = ("potential", "atmospheric", "probable", "known", "none")
# @todo this could be a dict with keys=colname and values=description,
# then passed to fancy docstring manipulation.
_default_columns_to_return = [
"species_id",
"name",
"chemical_name",
"resolved_QNs",
"linelist",
"LovasASTIntensity",
"lower_state_energy",
"upper_state_energy",
"sijmu2",
"sij",
"aij",
"intintensity",
"Lovas_NRAO",
"rest_frequency",
"obs_frequency",
"lower_state_energy_K",
"upper_state_energy_K",
# "orderedFreq",
# "measFreq",
"upperStateDegen",
"moleculeTag",
"qnCode",
"labref_Lovas_NIST",
"rel_int_HFS_Lovas",
"unres_quantum_numbers",
"lineid",
"transition_in_space",
"transition_in_G358",
"obsref_Lovas_NIST",
"source_Lovas_NIST",
"telescope_Lovas_NIST",
# "transitionBandColor",
"searchErrorMessage",
#'sqlquery',
#'requestnumber']
]
_allowable_remote_cats = ["splatalogue"]
_allowable_local_cats = [
"gbtlines", # note gbtlines does not include recombination lines
"gbtrecomb",
# we don't have any of the below until splatalogue is fixed or Tony R. provides.
# "top20",
# "planetaryatmosphere",
# "hotcores",
# "darkclouds",
# "diffuseclouds",
# "comets",
# "agb/ppn/pn",
# "extragalactic",
]
_all_cats = _allowable_remote_cats + _allowable_local_cats
# Grab splatalogue keywords from its query method so we are always in sync with it.
# Remove description and first two frequency parameters since we have our own.
__splatdoc__ = Splatalogue.query_lines.__doc__
[docs]
def all_cats():
# needed to access dunder variable from outside this module
return _all_cats
# Remove the first part of splatalogue doc in favor of our own
i = __splatdoc__.index("chemical_name")
__splatdoc__ = __splatdoc__.replace(__splatdoc__[0:i], "")
# Replace how astroquery displays the Return section with our typical format.
ours = "Returns\n-------\nTable\n\tAn `astropy.table.Table` containing the results of the search."
i = __splatdoc__.index("Returns")
__splatdoc__ = __splatdoc__.replace(__splatdoc__[i:], ours)
[docs]
class SpectralLineSearchClass:
def __init__(self):
self._tables = {}
self._create_recomb_lines()
def _process_cat(self, cat: str | Path) -> str:
"""The input catalog can be a string or a Path. If it is a string, first check
if is one of the special strings indicating a local dysh catalog. Otherwise check
that it is a valid path to a file.
Returns
-------
string to a valid path or a special strings
"""
if isinstance(cat, Path):
return str(cat)
if (mc := minimum_string_match(cat.lower(), _all_cats)) is not None:
return mc
cp = Path(cat)
if cp.is_file():
return str(cp)
else:
raise ValueError(f"Unrecognized catalog {cat}. Valid catalogs are {_all_cats} or a valid path name.")
def _patch_line_lists(self, line_lists: list) -> list:
# This is to fix an inconsistency in splatalogue data.
# Although the keyword to trigger recombination line search is 'Recombination', the
# returned value in the Table is 'Recomb'. So you can't search the database and the
# result table with the same keyword! Ugh!
# We are guaranteed by minimum_list_match that line_lists will contain the full word
# if it refers to recombination.
lowerlist = list(map(str.lower, line_lists))
if "recombination" in lowerlist:
lowerlist[lowerlist.index("recombination")] = "Recomb"
return lowerlist
# order of these decorators matters!
[docs]
@append_docstr_nosections(__splatdoc__, sections=[])
@docstring_parameter(str(_all_cats), str(_default_columns_to_return))
@u.quantity_input(min_frequency=u.GHz, equivalencies=u.spectral())
@u.quantity_input(max_frequency=u.GHz, equivalencies=u.spectral())
def query_lines(
self,
min_frequency: Quantity,
max_frequency: Quantity,
# cat: Literal[*_all_cats] | Path = "splatalogue", # * in index allowed in Python 3.11+
cat: Literal[(x for x in _all_cats)] | Path = "splatalogue",
columns: str | list | None = None,
redshift: float = 0.0,
asynchronous: bool = False,
cache: bool = True,
format: str = "ascii.ecsv",
only_NRAO_recommended=True,
**kwargs,
) -> Table:
"""Query locally or remotely for lines and return a table object. The query returns lines
with rest frequencies in the range [`min_frequency`,`max_frequency`].
**Note:** If the search parameters result in no matches, a zero-length Table will be returned.
Parameters
----------
min_frequency : `~astropy.units.quantity.Quantity`
The minimum frequency to search (or any :meth:`u.spectral` equivalent)
max_frequency : `~astropy.units.quantity.Quantity`
The maximum frequency to search (or any :meth:`u.spectral` equivalent)
cat : str or Path
The catalog to use. One of: {0} (minimum string match) or a valid Path to a local astropy-compatible table. The local table
must have all the columns listed in the `columns` parameter.
- `'gbtlines'` is a local catalog of spectral lines between 300 MHz and 120 GHz with CDMS/JP log(intensity) > -9.
- `'gbtrecomb'` is a local catalog of H, He, and C recombination lnes between 300 MHz and 120 GHz.
columns: str or list or None
The query result columns to include in the returned table. Any of {1}. The default is None which means all columns.
redshift: float
Search for redshifted lines. The given redshift will used find lines that would be redshifted into the range `[min_frequency, max_frequency]`. This option is
not available if `cat='splatalogue'.
cache: bool
For a local file query, make an in-memory copy of the input catalog to be used in subsequent queries to this catalog.
asynchronous: bool
Use asynchronous query
format: str
Stringe describing the format of a local input table. Must be a valid `astropy.io.ascii <https://docs.astropy.org/en/latest/io/ascii/index.html>`_ format string. Default is 'ascii.ecsv'
"""
mc = self._process_cat(cat)
# we are overlaying this kwarg with a parameter to expose that we are changing the default.
kwargs.update({"only_NRAO_recommended": only_NRAO_recommended})
# user-friendly keywords
if kwargs.get("line_lists", None) is not None:
kwargs["line_lists"] = minimum_list_match(kwargs["line_lists"], Splatalogue.ALL_LINE_LISTS, casefold=True)
if kwargs.get("line_strengths", None) is not None:
kwargs["line_strengths"] = minimum_list_match(
kwargs["line_strengths"], Splatalogue.VALID_LINE_STRENGTHS, casefold=True
)
if kwargs.get("intensity_type", None) is not None:
kwargs["intensity_type"] = minimum_string_match(
kwargs["intensity_type"], Splatalogue.VALID_INTENSITY_TYPES, casefold=True
)
minfreq = restfreq(min_frequency, redshift)
maxfreq = restfreq(max_frequency, redshift)
if mc == "splatalogue":
if asynchronous:
table = Splatalogue._parse_result(
Splatalogue.query_lines_async(
minfreq,
maxfreq,
**kwargs,
)
)
else:
table = Splatalogue.query_lines(minfreq, maxfreq, **kwargs)
table.rename_column("orderedfreq", "rest_frequency")
# add a rest frequency column if an online query.
# localquery() will add this itself, so keep in this if clause.
if len(table) > 0:
obscol = obsfreq(table["rest_frequency"], redshift)
table.add_column(obscol, name="obs_frequency")
else:
# search a local table
table = self.localquery(
min_frequency, max_frequency, cat=mc, columns=columns, redshift=redshift, cache=cache, **kwargs
)
if "intintensity" in table.colnames:
replace_col_astype(table, "intintensity", float, -1e20)
if columns is not None and len(table) != 0:
return table[columns]
else:
return table
[docs]
@docstring_parameter(str(_all_cats), str(_default_columns_to_return))
@u.quantity_input(min_frequency=u.GHz, equivalencies=u.spectral())
@u.quantity_input(max_frequency=u.GHz, equivalencies=u.spectral())
def localquery(
self,
min_frequency: Quantity,
max_frequency: Quantity,
cat: Literal[(x for x in _allowable_local_cats)] | Path = "gbtlines",
columns: str | list | None = None,
redshift: float = 0.0,
chemical_name: str | None = None,
chem_re_flags: int = re.I,
energy_min: float | None = None,
energy_max: float | None = None,
energy_type: Literal[(x for x in Splatalogue.VALID_ENERGY_TYPES)] | None = None,
intensity_lower_limit=None,
intensity_type: Literal[(x for x in Splatalogue.VALID_INTENSITY_TYPES)] | None = None,
line_lists: Literal[(x for x in Splatalogue.ALL_LINE_LISTS)] | None = None,
cache: bool = False,
format: str = "ascii.ecsv",
**kwargs, # ignore the rest!
) -> Table:
"""Query a local file for lines and return a table object. The query returns lines
with rest frequencies in the range [`min_frequency`,`max_frequency`].
**Note:**
- If the search parameters result no matches, a zero-length Table will be returned.
- Many of the keywords are only supported if `cat='splatalogue'` because local tables do not have all the columns that the Splatalogue database has.
Parameters
----------
min_frequency : `~astropy.units.quantity.Quantity`
The minimum frequency to search (or any :meth:`u.spectral` equivalent). No default.
max_frequency : `~astropy.units.quantity.Quantity`
The maximum frequency to search (or any :meth:`u.spectral` equivalent). No default.
cat : str
The catalog to use. One of: {0} (minimum string match) or a valid local astropy-compatible table. The local table
must have all the columns listed in the `columns` parameter. The default is a GBT-specific line catalog, 'gbtlines'.
columns: str or list or None
The query result columns to include in the returned table. Any of {1}. The default is None which means all columns.
redshift: float
Search for redshifted lines. The given redshift will used find lines that would be redshifted into the range `[min_frequency, max_frequency]`. This option is
not available if `cat='splatalogue'.
chemical_name : str
Name of the chemical to search for. Treated as a regular
expression. An empty set will match *any*
species. Examples:
``'H2CO'`` - 13 species have H2CO somewhere in their formula.
``'Formaldehyde'`` - There are 8 isotopologues of Formaldehyde
(e.g., H213CO).
``'formaldehyde'`` - Thioformaldehyde,Cyanoformaldehyde.
``'formaldehyde',chem_re_flags=re.I`` - Formaldehyde,thioformaldehyde,
and Cyanoformaldehyde.
``' H2CO '`` - Just 1 species, H2CO. The spaces prevent including
others.
chem_re_flags : int
See the `~re` module
energy_min : `None` or float
Energy range to include. See `energy_type`
energy_max : `None` or float
Energy range to include. See `energy_type`
energy_type : ``'el_cm1'``, ``'eu_cm1'``, ``'eu_k'``, ``'el_k'``
Type of energy to restrict. L/U for lower/upper state energy,
cm/K for *inverse* cm, i.e. wavenumber, or K for Kelvin
intensity_lower_limit : `None` or float
Lower limit on the intensity. See `intensity_type`
intensity_type : `None`, ``'CDMS/JPL (log)'``, ``'Sij-mu2'``, ``'Aij (log)'``
The type of intensity on which to place a lower limit
line_lists : list
Options:
Lovas, SLAIM, JPL, CDMS, ToyaMA, OSU, Recombination, RFI
cache: bool
Make an in-memory copy of the input table to be used in subsequent queries to this catalog.
format: str
The astropy IO format string for the input table. Default is ECSV format.
Returns
-------
Table
An astropy table containing the results of the search
"""
_cat = self._process_cat(cat)
if _cat in _allowable_local_cats:
_cat = str(get_project_data() / (_cat + ".csv.gz"))
# using the cache is about 10x faster for gbtlines
if _cat in self._tables:
_table = self._tables[_cat]
else:
_table = Table.read(_cat, format=format)
if cache:
self._cache_local_table(_cat, _table)
# now do the work of downselecting the table.
# The easiest way to do this through pandas; using the Table interface
# is too cumbersome.
if chemical_name is not None:
species = self.get_species_ids(species_regex=chemical_name, reflags=chem_re_flags)
if len(species) == 0:
raise ValueError(f"Unable to find species matching {chemical_name}")
# get species id returns string but 'species_id' column in tables returned by splatalogue is int!
splist = list(map(int, species.values()))
df = _table.to_pandas()
minfreq = restfreq(min_frequency, redshift)
maxfreq = restfreq(max_frequency, redshift)
# Select the frequency range
# fmt: off
df = df[
(
(df["orderedfreq"] >= minfreq.to("MHz").value) &
(df["orderedfreq"] <= maxfreq.to("MHz").value)
)
]
# fmt: on
# chemical name and re_flags via species_id
if chemical_name is not None:
df = df[df["species_id"].isin(splist)]
# energies
if energy_type == "el_cm1":
k = "lower_state_energy"
df = df[(df[k] >= energy_min & df[k] <= energy_max)]
elif energy_type == "eu_cm1":
k = "upper_state_energy"
df = df[(df[k] >= energy_min & df[k] <= energy_max)]
elif energy_type == "el_k":
k = "lower_state_energy_k" # helps with auto-formatting of next line
df = df[(df[k] >= energy_min & df[k] <= energy_max)]
elif energy_type == "eu_k":
k = "upper_state_energy_k"
df = df[(df[k] >= energy_min & df[k] <= energy_max)]
# line lists
if line_lists is not None:
if (line_lists := minimum_list_match(line_lists, Splatalogue.ALL_LINE_LISTS, casefold=True)) is None:
raise ValueError(
f"list_lists must be one or more of {Splatalogue.ALL_LINE_LISTS} (case insensitive, minimum match)."
)
line_lists = self._patch_line_lists(line_lists)
df = df[df["linelist"].isin(line_lists)]
# line strengths
if intensity_lower_limit is not None:
if intensity_type is None:
raise ValueError(
f"If you specify an intensity lower limit, you must also specify its intensity_type. One of {Splatalogue.VALID_INTENSITY_TYPES} (case insensitive, minimum_match)."
)
elif (
intensity_type := minimum_string_match(intensity_type, Splatalogue.VALID_INTENSITY_TYPES, casefold=True)
) is None:
raise ValueError(
f"intensity_type must be one of {Splatalogue.VALID_INTENSITY_TYPES} (case insensitive, minimum match ."
)
else:
df = df[df["intintensity"] >= intensity_lower_limit]
table = Table.from_pandas(df)
table.rename_column("orderedfreq", "rest_frequency")
if len(table) > 0:
obscol = obsfreq(table["rest_frequency"], redshift)
table.add_column(obscol, name="obs_frequency")
if columns is not None:
return table[columns]
else:
return table
[docs]
@docstring_parameter(Splatalogue.get_species_ids.__doc__)
def get_species_ids(self, species_regex, reflags=0, recache=False):
"""
Convenience call-through to :meth:`~astroquery.splatalogue.SplatalogueClass.get_species_id`.
{0}
"""
return Splatalogue.get_species_ids(species_regex, reflags=reflags, recache=recache)
[docs]
def clear_cache(self):
"""
Clear the local caches. This will clear the Splatalogue cache and any local tables
that have been cached.
See https://astroquery.readthedocs.io/en/stable/splatalogue/splatalogue.html#troubleshooting
Returns
-------
None.
"""
Splatalogue.clear_cache()
self._tables = {}
def _cache_local_table(self, tablename: str, table: Table) -> None:
"""
Cache local files in a dict so they don't have to be opened each time.
Parameters
----------
tablename : str
table name key string
table : Table
the table to cache
Returns
-------
None
"""
# is this a bad idea? memory hog?
self._tables[tablename] = table
@property
def colnames(self):
"""
Returns
-------
colname : list
The list of column names present in the default returned table.
You can choose a subset of these when performing a search.
"""
return _default_columns_to_return
def _create_recomb_lines(self):
"""
Make the recombination line ascii to unicode map. Also add some convenient aliases for users.
"""
# Splatalogue wants the unicode Greek characters in recombination lines.
# Create a mapping to allow users to type in e.g. Halpha instead of H\u03B1 or Hα # noqa
self._recomb_dict = {}
unicode_map = {
"alpha": "\u03b1",
"beta": "\u03b2",
"gamma": "\u03b3",
"delta": "\u03b4",
"epsilon": "\u03b6",
"zeta": "\u0364",
}
for line in ["H", "He", "C"]:
for k, v in unicode_map.items():
self._recomb_dict[f"{line}{k}"] = f"{line}{v}"
# aliases since Splatalogue is picky about Case.
self._altrecomb = {
"H": "Hydrogen",
"C": "Carbon",
# "He" already works
"hydrogen": "Hydrogen",
"carbon": "Carbon",
"helium": "Helium",
}
[docs]
@docstring_parameter(str(_all_cats), str(_default_columns_to_return))
def recomb(
self,
min_frequency: Quantity,
max_frequency: Quantity,
line: str,
# cat: Literal[*_all_cats] | Path = "splatalogue", # allowed in Python 3.11+
cat: Literal[(x for x in _all_cats)] | Path = "splatalogue",
columns: str | list | None = None,
redshift: float = 0.0,
convert_to_unicode: bool = True,
only_NRAO_recommended: bool = True,
**kwargs,
) -> Table:
"""
Search for recombination lines of H, He, and C.
Parameters
----------
min_frequency : `~astropy.units.quantity.Quantity`
The minimum frequency to search (or any :meth:`u.spectral` equivalent). No default.
max_frequency : `~astropy.units.quantity.Quantity`
The maximum frequency to search (or any :meth:`u.spectral` equivalent). No default.
line : str
A string describing the line or series to search for, e.g. "Hydrogen", "Halpha", "Hebeta", "C", "carbon". No default.
cat : str or Path
The catalog to use. One of: {0} (minimum string match) or a valid Path to a local astropy-compatible table. The local table
must have all the columns listed in the `columns` parameter. Default is 'splatalogue'.
columns: str or list or None
The query result columns to include in the returned table. Any of {1}. The default is None which means all columns.
redshift: float
Search for redshifted lines. The given redshift will used find lines that would be redshifted into the range `[min_frequency, max_frequency]`. This option is
not available if `cat='splatalogue'.
convert_to_unicode : bool, optional
Splatalogue stores line names using the unicode characters for Greek symbols, e.g. `\u03b1` for 'alpha'. dysh will convert for you, if you put in e.g., 'Halpha'.
You should only change this if a) you are inputing unicode or b) you are searching a local file that you know doesn't use unicode. The default is True.
\\*\\*kwargs : dict
Other keyword arguments supported by :meth:`query_lines` if `cat` is 'splatalogue'.
Returns
-------
Table
An astropy table containing the results of the search
"""
if line in self._altrecomb:
line = self._altrecomb[line]
elif line is not None:
for k in self._recomb_dict:
if k in line:
line = line.replace(k, self._recomb_dict[k])
return self.query_lines(
min_frequency,
max_frequency,
chemical_name=line,
cat=cat,
line_lists=["Recomb"],
columns=columns,
redshift=redshift,
only_NRAO_recommended=only_NRAO_recommended,
chem_re_flags=re.I,
**kwargs,
)
[docs]
@docstring_parameter(str(_all_cats), str(_default_columns_to_return))
def recomball(
self,
min_frequency: Quantity,
max_frequency: Quantity,
cat: Literal[(x for x in _all_cats)] | Path = "splatalogue",
columns: str | list | None = None,
redshift=0.0,
cache: bool = False,
only_NRAO_recommended: bool = True,
**kwargs,
) -> Table:
"""
Fetch all recombination lines of H, He, C in the given frequency range from the catalog.
Parameters
----------
min_frequency : `~astropy.units.quantity.Quantity`
The minimum frequency to search (or any :meth:`u.spectral` equivalent). No default.
max_frequency : `~astropy.units.quantity.Quantity`
The maximum frequency to search (or any :meth:`u.spectral` equivalent). No default.
cat : str or Path
The catalog to use. One of: {0} (minimum string match) or a valid Path to a local astropy-compatible table. The local table
must have all the columns listed in the `columns` parameter. Default is 'splatalogue'.
columns: str or list or None
The query result columns to include in the returned table. Any of {1}. The default is all columns.
redshift: float
Search for redshifted lines. The given redshift will used find lines that would be redshifted into the range `[min_frequency, max_frequency]`. This option is
not available if `cat='splatalogue'.
cache: bool
For a local file query, make an in-memory copy of the input table to be used in subsequent queries to this catalog.
\\*\\*kwargs : dict
Other keyword arguments supported by :meth:`query_lines` if `cat` is 'splatalogue'.
Returns
-------
Table
An astropy table containing the results of the search.
"""
return self.recomb(
min_frequency=min_frequency,
max_frequency=max_frequency,
cat=cat,
line=None,
columns=columns,
redshift=redshift,
cache=cache,
only_NRAO_recommended=only_NRAO_recommended,
**kwargs,
)