Skip to content

Commit

Permalink
add docstrings into the formula module, change all the parse_state St…
Browse files Browse the repository at this point in the history
…ate methods called by __init__ into _parse_state and the same for Formula. Reaction and StatefulSpecies. Only methods expected to be called on instantialised objects are now public.
  • Loading branch information
hanicinecm committed Jan 27, 2022
1 parent 38cda1f commit 6b97bec
Show file tree
Hide file tree
Showing 14 changed files with 236 additions and 130 deletions.
171 changes: 131 additions & 40 deletions src/pyvalem/formula.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,31 @@
"""
Defines the Formula class, representing a chemical formula (of an atom,
This module defines the `Formula` class, representing a chemical formula (of an atom,
isotope, ion, molecule, molecular-ion, isotopologue, etc.) and associated
exceptions.
Quantum states are not defined in the `Formula` framework, for chemical species
with quantum states, look at the `StatefulSpecies` class.
Examples
--------
>>> from pyvalem.formula import Formula
>>> Formula("H2O+").charge
1
>>> dict(Formula("CH4").atom_stoich.items())
{'C': 1, 'H': 4}
"""

import re
from collections import defaultdict

import pyparsing as pp

from .atom_data import element_symbols, atoms, isotopes
from ._special_cases import special_cases
from .atom_data import element_symbols, atoms, isotopes

element = pp.oneOf(element_symbols)
# TODO don't allow leading 0
# TODO: don't allow leading 0
integer = pp.Word(pp.nums)
integer_or_x = integer | pp.Literal("x")
plusminus = pp.Literal("+") | pp.Literal("-")
Expand Down Expand Up @@ -152,15 +164,74 @@ class FormulaParseError(FormulaError):


class Formula:
"""
A class representing a chemical formula, with methods for parsing and
transforming its appearance as text or HTML.
"""A class representing a chemical formula without states.
Methods are implemented for the `Formula` from a compatible string and providing
*html* and *LaTeX* representations, atomic stoichiometry, species mass, number of
atoms, or html-save slug.
Parameters
---------
formula : str
PyValem-compatible string. No ``"_"`` or ``"^"`` symbols to indicate subscripts
and superscripts. Brackets are allowed, as well as several common prefixes.
Charges are provided by ``"+"/"-"``, or ``"+n"/"-n"``, where `n` is the charge
Attributes
----------
formula : str
The formula string as passed to the constructor.
atoms : set of `Atom`
atom_stoich : dict[str, int]
charge : int
Charge in [e].
natoms : int
Number of atoms of the `Formula`.
rmm, mass : float
Both are the mass in [amu] (one is alias for the other).
html, latex, slug : str
Different string representations of the `Formula`. The ``slug`` is a url-safe
representation.
Raises
------
FormulaParseError
When an incompatible `formula` string is passed into the constructor
Notes
-----
The ``__repr__`` method is overloaded to provide a *canonicalised* representation
of the formula. The idea is that two formulas representing the same physical entity
will have the same ``repr(formula)`` representation.
Examples
--------
>>> Formula("H2+") # a simple chemical formula instantiation
H2+
>>> Formula("(1H)2(16O)") # isotopologue instantiation
(1H)2(16O)
>>> Formula("ortho-C6H4(CH3)2"), Formula("W+42") # more complicated situations
(ortho-C6H4(CH3)2, W+42)
>>> # Attributes available:
>>> dict(Formula("CH4").atom_stoich)
{'C': 1, 'H': 4}
>>> Formula("(2H)(3H)+").html
'<sup>2</sup>H<sup>3</sup>H<sup>+</sup>'
>>> Formula("Ar").mass
39.95
>>> # Incompatible formula string
>>> Formula("Argon")
Traceback (most recent call last):
...
pyvalem.formula.FormulaParseError: Invalid formula syntax: Argon
"""

def __init__(self, formula):
"""
Initialize the Formula object by parsing the string argument
formula.
"""Initialize the Formula object by parsing the string argument formula.
"""
self.formula = formula
self.atoms = set()
Expand All @@ -172,12 +243,11 @@ def __init__(self, formula):
self.latex = ""
self.slug = ""
self.mass = 0.0
self.parse_formula(formula)
self._parse_formula(formula)

@staticmethod
def _make_prefix_html(prefix_list):
"""
Make the prefix HTML: D- and L- prefixes get written in small caps
"""Make the prefix HTML: D- and L- prefixes get written in small caps
"""
prefix = "-".join(prefix_list)
prefix = prefix.replace("D", '<span style="font-size: 80%;">D</span>')
Expand All @@ -193,8 +263,9 @@ def _make_prefix_latex(prefix_list):

@staticmethod
def _make_prefix_slug(prefix_list):
"""
Make the prefix slug: commas are replaced by underscores and non-ASCII
"""Make the prefix slug
Commas are replaced by underscores and non-ASCII
characters swapped out according to the entries in the prefix_tokens
dictionary. For example,
1,1,3- -> 1_1_3-
Expand All @@ -219,21 +290,32 @@ def _make_prefix_slug(prefix_list):
slug_prefix = "_".join(slug_prefix_tokens)
return "{}__".format(slug_prefix)

def parse_formula(self, formula):
"""Parse the string formula into a Formula object."""
def _parse_formula(self, formula):
"""Parse the formula string into a Formula object.
The main method of the class, populates all the instance attributes.
Parameters
----------
formula : str
See the class docstring.
Raises
------
FormulaParseError
"""
if formula == "D-":
# This is a not-ideal way to deal with the fact that D- breaks
# the parser due to a clash with the D- prefix.
self.parse_formula("D-1")
self._parse_formula("D-1")
return

if any(s in formula for s in ("++", "--", "+-", "-+")):
raise FormulaParseError("Invalid formula syntax: {}".format(formula))

# We make a particular exception for various special cases, including
# photons, electrons, positrons and "M", denoting an unspecified
# "third-body" in many reactions. Note that M does not have a defined
# "third-body" in many reactions. Note that M does not have a defined
# charge or mass.
if formula in special_cases:
for attr, val in special_cases[formula].items():
Expand Down Expand Up @@ -290,7 +372,7 @@ def parse_formula(self, formula):
atom_symbol = isotope.parseString("(3H)")[0]
if isinstance(atom_symbol, pp.ParseResults):
# we got an isotope in the form '(zSy)' with z the mass
# number so symbol is the ParseResults ['z', 'Sy']:
# number so symbol is the ParseResults ['z', 'Sy']:
mass_number, atom_symbol = (int(atom_symbol[0]), atom_symbol[1])
symbol_html = "<sup>%d</sup>%s" % (mass_number, atom_symbol)
symbol_latex = r"^{{{0:d}}}\mathrm{{{1:s}}}".format(
Expand Down Expand Up @@ -428,8 +510,7 @@ def __eq__(self, other):
return repr(self.formula) == repr(other.formula)

def _stoichiometric_formula_atomic_number(self):
"""
Return a list of atoms/isotopes and their stoichiometries.
"""Return a list of atoms/isotopes and their stoichiometries.
The returned list is sorted in order of increasing atomic number.
"""
Expand All @@ -446,8 +527,7 @@ def _stoichiometric_formula_atomic_number(self):
return atom_strs

def _stoichiometric_formula_alphabetical(self):
"""
Return a list of atoms/isotopes and their stoichiometries.
"""Return a list of atoms/isotopes and their stoichiometries.
The returned list is sorted in alphabetical order.
"""
Expand All @@ -456,8 +536,7 @@ def _stoichiometric_formula_alphabetical(self):
return atom_strs

def _stoichiometric_formula_hill(self):
"""
Return a list of atoms/isotopes and their stoichiometries.
"""Return a list of atoms/isotopes and their stoichiometries.
The returned list is sorted in "Hill notation": first carbon, then
hydrogen, then all other chemical elements in alphabetical order.
Expand Down Expand Up @@ -489,11 +568,30 @@ def _stoichiometric_formula_hill(self):
return c_h_strs + atom_strs

def stoichiometric_formula(self, fmt="atomic number"):
"""
Return a string representation of the stoichiometric formula
in one of the formats specified by the fmt argument:
"""Return a string representation of the stoichiometric formula.
The formula is given in one of the formats specified by the fmt argument:
"atomic number": order atoms by increasing atomic number
"alphabetical" : order atoms in alphabetical order of atomic symbol
"hill": first C, then H, then all other chemical elements in alphabetical order,
if C not present, all alphabetical.
Parameters
----------
fmt : {"atomic number", "alphabetical", "hill"}
Returns
-------
str
Stoichiometric formula (with charges)
Examples
--------
>>> Formula("L-CH3CH(NH2)CO2H").stoichiometric_formula("atomic number")
'H7C3NO2'
>>> Formula("L-CH3CH(NH2)CO2H+").stoichiometric_formula("alphabetical")
'C3H7NO2+'
"""
# Special cases
if self.formula in {"M", "e-", "e+"}:
Expand All @@ -503,29 +601,24 @@ def stoichiometric_formula(self, fmt="atomic number"):
return "hν"

fmt = fmt.lower()
if fmt not in ("atomic number", "alphabetical", "hill"):
raise FormulaError(
"Unsupported format for stoichiometric"
" formula output: {}".format(fmt)
)

if fmt == "atomic number":
atom_strs = self._stoichiometric_formula_atomic_number()
elif fmt == "alphabetical":
atom_strs = self._stoichiometric_formula_alphabetical()
elif fmt == "hill":
atom_strs = self._stoichiometric_formula_hill()
else:
raise ValueError("Unknown fmt value!")
raise FormulaError(
"Unsupported format for stoichiometric formula output: {}".format(fmt)
)

# finally, add on the charge string, e.g. '', '-', '+2', ...
atom_strs.append(self._get_charge_string())
return "".join(atom_strs)

@staticmethod
def _get_symbol_stoich(symbol, stoich):
"""
Return Xn for element symbol X and stoichiometry n, unless n is 1,
"""Return Xn for element symbol X and stoichiometry n, unless n is 1,
in which case, just return X.
"""
if stoich is None:
Expand All @@ -535,9 +628,7 @@ def _get_symbol_stoich(symbol, stoich):
return symbol

def _get_charge_string(self):
"""
Return the string representation of the charge: '+', '-', '+2', '-3',
etc.
"""Return the string representation of the charge: '+', '-', '+2', '-3', etc.
"""
if not self.charge:
return ""
Expand Down
8 changes: 5 additions & 3 deletions src/pyvalem/stateful_species.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
States are separated from each other by semicolons (;).
"""
from pyvalem.states.atomic_configuration import AtomicConfiguration
from pyvalem.states.diatomic_molecular_configuration import DiatomicMolecularConfiguration
from pyvalem.states.diatomic_molecular_configuration import (
DiatomicMolecularConfiguration,
)
from .formula import Formula
from pyvalem.states.key_value_pair import KeyValuePair
from pyvalem.states._state_parser import state_parser, STATES
Expand All @@ -29,7 +31,7 @@ def __init__(self, s):
self.formula = Formula(s[:i])
self.states = state_parser(s[i + 1 :].split(";"))

self.verify_states()
self._verify_states()

def __repr__(self):
"""Return a canonical text representation of the StatefulSpecies."""
Expand All @@ -53,7 +55,7 @@ def __eq__(self, other):
def __hash__(self):
return hash(repr(self))

def verify_states(self):
def _verify_states(self):
"""Check that multiple states are not given where this is not allowed.
For example, a StatefulSpecies may not have two atomic configurations.
Expand Down
Loading

0 comments on commit 6b97bec

Please sign in to comment.