Skip to content

WIP: VAMDC-cdms interface #658

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Oct 22, 2016
26 changes: 26 additions & 0 deletions astroquery/vamdc/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
VAMDC molecular line database
"""
from astropy import config as _config
from astropy.config import paths
import os


class Conf(_config.ConfigNamespace):
"""
Configuration parameters for `astroquery.vamdc`.
"""

timeout = _config.ConfigItem(60, "Timeout in seconds")

cache_location = os.path.join(paths.get_cache_dir(), 'astroquery/vamdc',)

conf = Conf()

from .core import VamdcClass
from .core import Vamdc

__all__ = ['Vamdc', 'VamdcClass',
'Conf', 'conf',
]
135 changes: 135 additions & 0 deletions astroquery/vamdc/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# Licensed under a 3-clause BSD style license - see LICENSE.rst
from __future__ import print_function
import time
import os.path
import getpass
import keyring
import numpy as np
import re
import tarfile
import string
import requests
import sys
from pkg_resources import resource_filename
from bs4 import BeautifulSoup

from astropy.extern.six.moves.urllib_parse import urljoin, urlparse
from astropy.extern.six import iteritems
from astropy.extern import six
from astropy.table import Table, Column
from astropy import log
from astropy.utils.console import ProgressBar
from astropy import units as u
import astropy.io.votable as votable

from ..exceptions import (RemoteServiceError, TableParseError,
InvalidQueryError)
from ..utils import commons, system_tools
from ..utils.process_asyncs import async_to_sync
from ..query import BaseQuery
from . import conf
from . load_species_table import species_lookuptable

__doctest_skip__ = ['VamdcClass.*']


@async_to_sync
class VamdcClass(BaseQuery):

TIMEOUT = conf.timeout
CACHE_LOCATION = conf.cache_location


def __init__(self, doimport=True):
super(VamdcClass, self).__init__()

if not doimport:
# this is a hack to allow the docstrings to be produced without
# importing the necessary modules
return

from vamdclib import nodes as vnodes
from vamdclib import request as vrequest
from vamdclib import results as vresults
from vamdclib import specmodel

self._vnodes = vnodes
self._vrequest = vrequest
self._vresults = vresults

self._nl = vnodes.Nodelist()
self._cdms = self._nl.findnode('cdms')

self.specmodel = specmodel

@property
def species_lookuptable(self, cache=True):
"""
As a property, you can't turn off caching....
"""
if not hasattr(self, '_lut'):
self._lut = species_lookuptable(cache=cache)

return self._lut

def query_molecule(self, molecule_name, chem_re_flags=0, cache=True):
"""
Query for the VAMDC data for a specific molecule

Parameters
----------
molecule_name: str
The common name (including unicode characters) or the ordinary
molecular formula (e.g., CH3OH for Methanol) of the molecule.
chem_re_flags: int
The re (regular expression) flags for comparison of the molecule
name with the lookuptable keys
cache: bool
Use the astroquery cache to store/recover the result

Returns
-------
result: ``vamdclib.request.Result``
A vamdclib Result object that has a data attribute. The result
object has dictionary-like entries but has more functionality built
on top of that
"""

myhash = "{0}_re{1}".format(molecule_name, chem_re_flags)
myhashpath = os.path.join(self.CACHE_LOCATION,
myhash)
if os.path.exists(myhashpath) and cache:
with open(myhashpath, 'rb') as fh:
xml = fh.read()
result = self._vresults.Result(xml=xml)
result.populate_model()
else:
species_id_dict = self.species_lookuptable.find(molecule_name,
flags=chem_re_flags)
if len(species_id_dict) == 1:
species_id = list(species_id_dict.values())[0]
else:
raise ValueError("Too many species matched: {0}"
.format(species_id_dict))

request = self._vrequest.Request(node=self._cdms)
query_string = "SELECT ALL WHERE VAMDCSpeciesID='%s'" % species_id
request.setquery(query_string)
result = request.dorequest()

if cache:
with open(myhashpath, 'wb') as fh:
xml = fh.write(result.Xml)

return result

# example use of specmodel; return to this later...
#Q = self.specmodel.calculate_partitionfunction(result.data['States'],
# temperature=tex)[species_id]


try:
Vamdc = VamdcClass()
except ImportError:
log.warning("vamdclib could not be imported; the vamdc astroquery module will not work")
Vamdc = VamdcClass(doimport=False)
49 changes: 49 additions & 0 deletions astroquery/vamdc/load_species_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Licensed under a 3-clause BSD style license - see LICENSE.rst
from astropy import log
import os
import json
from ..splatalogue.load_species_table import SpeciesLookuptable
from . import Conf

def species_lookuptable(cache=True):
"""
Get a lookuptable from chemical name + OrdinaryStructuralFormula to VAMDC
id
"""

if not os.path.exists(Conf.cache_location):
os.makedirs(Conf.cache_location)

lut_path = os.path.join(Conf.cache_location,
'species_lookuptable.json')
if os.path.exists(lut_path) and cache:
log.info("Loading cached molecular line ID database")
with open(lut_path, 'r') as fh:
lutdict = json.load(fh)
lookuptable = SpeciesLookuptable(lutdict)
else:
log.info("Loading molecular line ID database")

from vamdclib import nodes as vnodes
from vamdclib import request as vrequest

nl = vnodes.Nodelist()
nl.findnode('cdms')
cdms = nl.findnode('cdms')

request = vrequest.Request(node=cdms)

# Retrieve all species from CDMS
result = request.getspecies()
molecules = result.data['Molecules']

lutdict = {"{0} {1}".format(molecules[key].ChemicalName,
molecules[key].OrdinaryStructuralFormula):
molecules[key].VAMDCSpeciesID
for key in molecules}
lookuptable = SpeciesLookuptable(lutdict)
if cache:
with open(lut_path, 'w') as fh:
json.dump(lookuptable, fh)

return lookuptable
Empty file.
15 changes: 15 additions & 0 deletions astroquery/vamdc/tests/setup_package.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Licensed under a 3-clause BSD style license - see LICENSE.rst
from __future__ import absolute_import

import os


# setup paths to the test data
# can specify a single file or a list of files
def get_package_data():
paths = [os.path.join('data', '*.dat'),
os.path.join('data', '*.xml'),
] # etc, add other extensions
# you can also enlist files individually by names
# finally construct and return a dict for the sub module
return {'astroquery.vamdc.tests': paths}
82 changes: 82 additions & 0 deletions astroquery/vamdc/tests/test_module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Licensed under a 3-clause BSD style license - see LICENSE.rst
from __future__ import print_function

# astroquery uses the pytest framework for testing
# this is already available in astropy and does
# not require a separate install. Import it using:
from astropy.tests.helper import pytest

# It would be best if tests are separated in two
# modules. This module performs tests on local data
# by mocking HTTP requests and responses. To test the
# same functions on the remote server, put the relevant
# tests in the 'test_module_remote.py'

# Now import other commonly used modules for tests
import os
import requests

from numpy import testing as npt
from astropy.table import Table
import astropy.coordinates as coord
import astropy.units as u

from ...utils.testing_tools import MockResponse

# finally import the module which is to be tested
# and the various configuration items created
from ... import vamdc
from ...vamdc import conf

# Local tests should have the corresponding data stored
# in the ./data folder. This is the actual HTTP response
# one would expect from the server when a valid query is made.
# Its best to keep the data file small, so that testing is
# quicker. When running tests locally the stored data is used
# to mock the HTTP requests and response part of the query
# thereby saving time and bypassing unreliability for
# an actual remote network query.

DATA_FILES = {'GET':
# You might have a different response for each URL you're
# querying:
{'http://dummy_server_mirror_1':
'dummy.dat'}}


# ./setup_package.py helps the test function locate the data file
# define a function that can construct the full path to the file in the
# ./data directory:
def data_path(filename):
data_dir = os.path.join(os.path.dirname(__file__), 'data')
return os.path.join(data_dir, filename)


# define a monkeypatch replacement request function that returns the
# dummy HTTP response for the dummy 'get' function, by
# reading in data from some data file:
def nonremote_request(self, request_type, url, **kwargs):
# kwargs are ignored in this case, but they don't have to be
# (you could use them to define which data file to read)
with open(data_path(DATA_FILES[request_type][url]), 'rb') as f:
response = MockResponse(content=f.read(), url=url)
return response


# use a pytest fixture to create a dummy 'requests.get' function,
# that mocks(monkeypatches) the actual 'requests.get' function:
@pytest.fixture
def patch_request(request):
mp = request.getfuncargvalue("monkeypatch")
mp.setattr(vamdc.core.VamdcClass, '_request',
nonremote_request)
return mp


# finally test the methods using the mock HTTP response
#def test_query_molecule(patch_request):
# ch3oh = vamdc.core.VamdcClass().query_molecule('CH3OH')
# assert 'SCDMS-2369983' in ch3oh.data['States']

# similarly fill in tests for each of the methods
# look at tests in existing modules for more examples
19 changes: 19 additions & 0 deletions astroquery/vamdc/tests/test_module_remote.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Licensed under a 3-clause BSD style license - see LICENSE.rst
from __future__ import print_function

# performs similar tests as test_module.py, but performs
# the actual HTTP request rather than monkeypatching them.
# should be disabled or enabled at will - use the
# remote_data decorator from astropy:

from astropy.tests.helper import remote_data, pytest

from ... import vamdc


@remote_data
class TestVamdcClass:
# now write tests for each method here
def test_query_molecule(self):
ch3oh = vamdc.core.VamdcClass().query_molecule('CH3OH')
assert 'SCDMS-2369983' in ch3oh.data['States']
2 changes: 2 additions & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ The following modules have been completed using a common API:
irsa/irsa_dust.rst
ned/ned.rst
splatalogue/splatalogue.rst
vamdc/vamdc.rst
ibe/ibe.rst
irsa/irsa.rst
ukidss/ukidss.rst
Expand Down Expand Up @@ -252,6 +253,7 @@ above categories.
nist/nist.rst
splatalogue/splatalogue.rst
nasa_ads/nasa_ads.rst
vamdc/vamdc.rst


Developer documentation
Expand Down
44 changes: 44 additions & 0 deletions docs/vamdc/vamdc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
.. doctest-skip-all

.. _astroquery.vamdc:

**********************************
Vamdc Queries (`astroquery.vamdc`)
**********************************

Getting Started
===============

The astroquery vamdc interface requires vamdclib_. The documentation is sparse
to nonexistant, but installation is straightforward::

pip install https://github.com/keflavich/vamdclib/archive/master.zip

This is the personal fork of the astroquery maintainer that includes astropy's
setup helpers on top of the vamdclib infrastructure. If the infrastructure is
`merged <https://github.com/VAMDC/vamdclib/pull/1>`_ into the main vamdclib
library, we'll change these instructions.

Examples
========

If you want to compute the partition function, you can do so using a combination
of astroquery and the vamdclib tools::

.. code-block:: python

>>> from astroquery.vamdc import Vamdc
>>> ch3oh = Vamdc.query_molecule('CH3OH')
>>> from vamdclib import specmodel
>>> partition_func = specmodel.calculate_partitionfunction(ch3oh.data['States'],
temperature=100)
>>> print(partition_func)
{'XCDMS-149': 1185.5304044622881}

Reference/API
=============

.. automodapi:: astroquery.vamdc
:no-inheritance-diagram:

.. _vamdclib: http://vamdclib.readthedocs.io/en/latest/