Skip to content

Enable refinement of lists and dictionary input parameters #190

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions news/dict_lists_refinement.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
**Added:**

* Functionality for refining lists and dictionaries

**Changed:**

* <news item>

**Deprecated:**

* <news item>

**Removed:**

* <news item>

**Fixed:**

* <news item>

**Security:**

* <news item>
20 changes: 19 additions & 1 deletion src/diffpy/morph/morph_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,17 @@
morph_helpers.TransformXtalRDFtoPDF,
],
qdamp=morphs.MorphResolutionDamping,
squeeze=morphs.MorphSqueeze,
funcy=morphs.MorphFuncy,
)
_default_config = dict(
scale=None, stretch=None, smear=None, baselineslope=None, qdamp=None
scale=None,
stretch=None,
smear=None,
baselineslope=None,
qdamp=None,
squeeze=None,
funcy=None,
)


Expand Down Expand Up @@ -135,6 +143,8 @@ def morph(
- 'smear'
- 'baselineslope'
- 'qdamp'
- 'squeeze'
- 'funcy'

Returns
-------
Expand Down Expand Up @@ -197,6 +207,14 @@ def morph(
if k == "smear":
[chain.append(el()) for el in morph_cls]
refpars.append("baselineslope")
elif k == "funcy":
morph_inst = morph_cls()
morph_inst.function = rv_cfg.get("function", None)
if morph_inst.function is None:
raise ValueError(
"Must provide a 'function' when using 'parameters'"
)
chain.append(morph_inst)
else:
chain.append(morph_cls())
refpars.append(k)
Expand Down
4 changes: 4 additions & 0 deletions src/diffpy/morph/morphs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@

from diffpy.morph.morphs.morph import Morph # noqa: F401
from diffpy.morph.morphs.morphchain import MorphChain # noqa: F401
from diffpy.morph.morphs.morphfuncy import MorphFuncy
from diffpy.morph.morphs.morphishape import MorphISphere, MorphISpheroid
from diffpy.morph.morphs.morphresolution import MorphResolutionDamping
from diffpy.morph.morphs.morphrgrid import MorphRGrid
from diffpy.morph.morphs.morphscale import MorphScale
from diffpy.morph.morphs.morphshape import MorphSphere, MorphSpheroid
from diffpy.morph.morphs.morphshift import MorphShift
from diffpy.morph.morphs.morphsmear import MorphSmear
from diffpy.morph.morphs.morphsqueeze import MorphSqueeze
from diffpy.morph.morphs.morphstretch import MorphStretch

# List of morphs
Expand All @@ -40,6 +42,8 @@
MorphISpheroid,
MorphResolutionDamping,
MorphShift,
MorphSqueeze,
MorphFuncy,
]

# End of file
7 changes: 4 additions & 3 deletions src/diffpy/morph/morphs/morphfuncy.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class MorphFuncy(Morph):
yinlabel = LABEL_GR
xoutlabel = LABEL_RA
youtlabel = LABEL_GR
parnames = ["funcy"]

def morph(self, x_morph, y_morph, x_target, y_target):
"""General morph function that applies a user-supplied function to the
Expand Down Expand Up @@ -49,7 +50,7 @@ def morph(self, x_morph, y_morph, x_target, y_target):
and target array (x_target, y_target):
>>> morph = MorphFuncy()
>>> morph.function = sine_function
>>> morph.parameters = parameters
>>> morph.funcy = parameters
>>> x_morph_out, y_morph_out, x_target_out, y_target_out = morph.morph(
... x_morph, y_morph, x_target, y_target)

Expand All @@ -58,11 +59,11 @@ def morph(self, x_morph, y_morph, x_target, y_target):
>>> y_morph_in = morph.y_morph_in
>>> x_target_in = morph.x_target_in
>>> y_target_in = morph.y_target_in
>>> parameters_out = morph.parameters
>>> parameters_out = morph.funcy
"""
Morph.morph(self, x_morph, y_morph, x_target, y_target)

self.y_morph_out = self.function(
self.x_morph_in, self.y_morph_in, **self.parameters
self.x_morph_in, self.y_morph_in, **self.funcy
)
return self.xyallout
11 changes: 6 additions & 5 deletions src/diffpy/morph/morphs/morphsqueeze.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ def morph(self, x_morph, y_morph, x_target, y_target):

Configuration Variables
-----------------------
squeeze : list
The polynomial coefficients [a0, a1, ..., an] for the squeeze
squeeze : Dictionary
The polynomial coefficients {a0, a1, ..., an} for the squeeze
function where the polynomial would be of the form
a0 + a1*x + a2*x^2 and so on. The order of the polynomial is
determined by the length of the list.
determined by the length of the dictionary.

Returns
-------
Expand All @@ -46,7 +46,7 @@ def morph(self, x_morph, y_morph, x_target, y_target):
Import the squeeze morph function:
>>> from diffpy.morph.morphs.morphsqueeze import MorphSqueeze
Provide initial guess for squeezing coefficients:
>>> squeeze_coeff = [0.1, -0.01, 0.005]
>>> squeeze_coeff = {"a0":0.1, "a1":-0.01, "a2":0.005}
Run the squeeze morph given input morph array (x_morph, y_morph)
and target array (x_target, y_target):
>>> morph = MorphSqueeze()
Expand All @@ -62,7 +62,8 @@ def morph(self, x_morph, y_morph, x_target, y_target):
"""
Morph.morph(self, x_morph, y_morph, x_target, y_target)

squeeze_polynomial = Polynomial(self.squeeze)
coeffs = [self.squeeze[f"a{i}"] for i in range(len(self.squeeze))]
squeeze_polynomial = Polynomial(coeffs)
x_squeezed = self.x_morph_in + squeeze_polynomial(self.x_morph_in)
self.y_morph_out = CubicSpline(x_squeezed, self.y_morph_in)(
self.x_morph_in
Expand Down
33 changes: 29 additions & 4 deletions src/diffpy/morph/refine.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,23 @@ def __init__(self, chain, x_morph, y_morph, x_target, y_target):
self.y_target = y_target
self.pars = []
self.residual = self._residual
self.flat_to_grouped = {}
return

def _update_chain(self, pvals):
"""Update the parameters in the chain."""
pairs = zip(self.pars, pvals)
self.chain.config.update(pairs)
updated = {}
for idx, value in enumerate(pvals):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit hard to follow. I think the code is fine, I just wonder if there is a more elegant and readable way of doing this.

I guess the basic structure "before" was that we had two ordered lists of equal length containing parameter names(?) and values. Apart from this structure being a bit brittle and possibly not optimal, but ok, np. I guess the task here then is that we introduce parameters that are not singles but can be, themselves lists or dicts. If they are sometimes lists and sometimes dicts are shooting ourselves in the foot because we actually control this ourselves, so we could maybe decide on what the best structure is and then stick to that. If we pick a dict, which seems to be the most logical, then there is no different between singles, doubles etc.. So I suggest that we think about this, but maybe change the basic type of our "parameter" objects. I would vote for dicts.

to be clear, I am ok parsing them out into zipped lists for passing to other parts of the code if that is what they are expecting, but just storing them in the Morph object as a dict seems to me to make the most sense.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understood your comment correctly, you're proposing to standardize all morph parameters to use dictionaries — is that across the board, or only for lists (e.g., squeeze) while keeping scalar values as-is? I agree that moving to dictionaries would improve consistency.
The only downside I see is that users would need to explicitly define each coefficient in the squeeze parameter like:
squeeze = {"a0": 0.1, "a1": -0.01, "a2": 0.005}
We'd then need to convert this dict into a list internally before passing it to Polynomial(squeeze), which expects a list of coefficients. My point is that we can make the refinement code more elegant by having all inputs as dicts but then we probably will have to modify squeeze and will be less elegant?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But I agree with you, from the user perspective it is better to be consistent and will be better to having all dicts

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we want to have squeeze as a list I can do the corresponding modifications to the code and the tests. The refine.py will be simplified to

def _update_chain(self, pvals):
    """Update the parameters in the chain, supporting only scalars and dictionaries."""
    updated = {}
    for idx, value in enumerate(pvals):
        param, subkey = self.flat_to_grouped[idx]
        if subkey is None:
            # Scalar parameter
            updated[param] = value
        else:
            # Dictionary
            if param not in updated:
                updated[param] = {}
            updated[param][subkey] = value

    self.chain.config.update(updated)
    return

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

internals of the code and user experience are two different things. We can wrap anything. The basic conversation is "what is the best type for the basic parameter data object?" We would often define a container class to carry these things around, something like class MorphParameters but since it is basically a set we could just use a dictionary. There would be no "scalar" parameter. A scalar parameter would be a dictionary with one key.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good! So far, I’ve modified MorphSqueeze so that its parameter squeeze is now represented as a dictionary—for example: squeeze = {"a0":0, "a1":0.01, "a2":0.0001, "a3":0.001}
If we want to generalize this approach, I could go ahead and update all the morphs, tests and other functions, so that all parameters that are scalars are stored as dictionaries—for instance: scale = {"scale": 0.1}
With this change the refine code will be reduced to:

def _update_chain(self, pvals):
    """Update the parameters in the chain, assuming all parameters are dictionaries."""
    updated = {}
    for idx, value in enumerate(pvals):
        param, subkey = self.flat_to_grouped[idx]
        updated.setdefault(param, {})[subkey] = value
    self.chain.config.update(updated)
    return

However, I’m also wondering if it's worth preserving scalars as-is for simplicity and only using dictionaries where multiple sub-parameters are required. That would avoid the somewhat inelegant repetition of having to write scale = {"scale": 0.1}.
Happy to implement either approach. We can also discuss this tomorrow and choose what direction we want to take.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if we keep scalars and have scalars and dictionaries and create an issue for a near future? A lot of the code that was already written for the other morphs and chains will need to be modified, so I am not sure how quick of a fix is this

param, subkey = self.flat_to_grouped[idx]
if subkey is None: # Scalar
updated[param] = value
else:
if param not in updated:
updated[param] = {}
updated[param][subkey] = value

# Apply the reconstructed grouped parameter back to config
self.chain.config.update(updated)
return

def _residual(self, pvals):
Expand Down Expand Up @@ -118,11 +129,25 @@ def refine(self, *args, **kw):
if not self.pars:
return 0.0

initial = [config[p] for p in self.pars]
# Build flat list of initial parameters and flat_to_grouped mapping
initial = []
self.flat_to_grouped = {}

for p in self.pars:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my suggested refactor probably gets rid of this too....so there is a lot of technical debt we remove with this refactor if we decide to do it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we just had scalars and dicts this would be simiplified to:

for p in self.pars:
    val = config[p]
    if isinstance(val, dict):
        for k, v in val.items():
            initial.append(v)
            self.flat_to_grouped[len(initial) - 1] = (p, k)
    else:
        initial.append(val)
        self.flat_to_grouped[len(initial) - 1] = (p, None)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no scalars, just dicts.

val = config[p]
if isinstance(val, dict):
for k, v in val.items():
initial.append(v)
self.flat_to_grouped[len(initial) - 1] = (p, k)
else:
initial.append(val)
self.flat_to_grouped[len(initial) - 1] = (p, None)

sol, cov_sol, infodict, emesg, ier = leastsq(
self.residual, initial, full_output=1
)
fvec = infodict["fvec"]

if ier not in (1, 2, 3, 4):
emesg
raise ValueError(emesg)
Expand All @@ -131,7 +156,7 @@ def refine(self, *args, **kw):
vals = sol
if not hasattr(vals, "__iter__"):
vals = [vals]
self.chain.config.update(zip(self.pars, vals))
self._update_chain(vals)

return dot(fvec, fvec)

Expand Down
55 changes: 55 additions & 0 deletions tests/test_morph_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,58 @@ def test_smear_with_morph_func():
assert np.allclose(y0, y1, atol=1e-3) # numerical error -> 1e-4
# verify morphed param
assert np.allclose(smear, morphed_cfg["smear"], atol=1e-1)


def test_squeeze_with_morph_func():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tests look awesome!

squeeze_init = {"a0": 0, "a1": -0.001, "a2": -0.0001, "a3": 0.0001}
x_morph = np.linspace(0, 10, 101)
y_morph = 2 * np.sin(
x_morph + x_morph * 0.01 + 0.0001 * x_morph**2 + 0.001 * x_morph**3
)
expected_squeeze = {"a0": 0, "a1": 0.01, "a2": 0.0001, "a3": 0.001}
expected_scale = 1 / 2
x_target = np.linspace(0, 10, 101)
y_target = np.sin(x_target)
cfg = morph_default_config(scale=1.1, squeeze=squeeze_init)
morph_rv = morph(x_morph, y_morph, x_target, y_target, **cfg)
morphed_cfg = morph_rv["morphed_config"]
x_morph_out, y_morph_out, x_target_out, y_target_out = morph_rv[
"morph_chain"
].xyallout
assert np.allclose(x_morph_out, x_target_out)
assert np.allclose(y_morph_out, y_target_out, atol=1e-6)
assert np.allclose(
expected_squeeze["a0"], morphed_cfg["squeeze"]["a0"], atol=1e-6
)
assert np.allclose(
expected_squeeze["a1"], morphed_cfg["squeeze"]["a1"], atol=1e-6
)
assert np.allclose(
expected_squeeze["a2"], morphed_cfg["squeeze"]["a2"], atol=1e-6
)
assert np.allclose(
expected_squeeze["a3"], morphed_cfg["squeeze"]["a3"], atol=1e-6
)
assert np.allclose(expected_scale, morphed_cfg["scale"], atol=1e-6)


def test_funcy_with_morph_func():
def linear_function(x, y, scale, offset):
return (scale * x) * y + offset

x_morph = np.linspace(0, 10, 101)
y_morph = np.sin(x_morph)
x_target = x_morph.copy()
y_target = np.sin(x_target) * 2 * x_target + 0.4
cfg = morph_default_config(funcy={"scale": 1.2, "offset": 0.1})
cfg["function"] = linear_function
morph_rv = morph(x_morph, y_morph, x_target, y_target, **cfg)
morphed_cfg = morph_rv["morphed_config"]
x_morph_out, y_morph_out, x_target_out, y_target_out = morph_rv[
"morph_chain"
].xyallout
assert np.allclose(x_morph_out, x_target_out)
assert np.allclose(y_morph_out, y_target_out, atol=1e-6)
fitted_parameters = morphed_cfg["funcy"]
assert np.allclose(fitted_parameters["scale"], 2, atol=1e-6)
assert np.allclose(fitted_parameters["offset"], 0.4, atol=1e-6)
2 changes: 1 addition & 1 deletion tests/test_morphfuncy.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def test_funcy(function, parameters, expected_function):
y_morph_expected = expected_function(x_morph, y_morph)
morph = MorphFuncy()
morph.function = function
morph.parameters = parameters
morph.funcy = parameters
x_morph_actual, y_morph_actual, x_target_actual, y_target_actual = (
morph.morph(x_morph, y_morph, x_target, y_target)
)
Expand Down
25 changes: 13 additions & 12 deletions tests/test_morphsqueeze.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,24 @@

from diffpy.morph.morphs.morphsqueeze import MorphSqueeze

squeeze_coeffs_list = [
# The order of coefficients is [a0, a1, a2, ..., an]
squeeze_coeffs_dic = [
# The order of coefficients is {a0, a1, a2, ..., an}
# Negative cubic squeeze coefficients
[-0.01, -0.0005, -0.0005, -1e-6],
{"a0": -0.01, "a1": -0.0005, "a2": -0.0005, "a3": -1e-6},
# Positive cubic squeeze coefficients
[0.2, 0.01, 0.001, 0.0001],
{"a0": 0.2, "a1": 0.01, "a2": 0.001, "a3": 0.0001},
# Positive and negative cubic squeeze coefficients
[0.2, -0.01, 0.002, -0.0001],
{"a0": 0.2, "a1": -0.01, "a2": 0.002, "a3": -0.0001},
# Quadratic squeeze coefficients
[-0.2, 0.005, -0.0004],
{"a0": -0.2, "a1": 0.005, "a2": -0.0004},
# Linear squeeze coefficients
[0.1, 0.3],
{"a0": 0.1, "a1": 0.3},
# 4th order squeeze coefficients
[0.2, -0.01, 0.001, -0.001, 0.0001],
{"a0": 0.2, "a1": -0.01, "a2": 0.001, "a3": -0.001, "a4": 0.0001},
# Zeros and non-zeros, the full polynomial is applied
[0, 0.03, 0, -0.0001],
{"a0": 0, "a1": 0.03, "a2": 0, "a3": -0.0001},
# Testing zeros, expect no squeezing
[0, 0, 0, 0, 0, 0],
{"a0": 0, "a1": 0, "a2": 0, "a3": 0, "a4": 0, "a5": 0},
]
morph_target_grids = [
# UCs from issue 181: https://github.com/diffpy/diffpy.morph/issues/181
Expand All @@ -41,10 +41,11 @@


@pytest.mark.parametrize("x_morph, x_target", morph_target_grids)
@pytest.mark.parametrize("squeeze_coeffs", squeeze_coeffs_list)
@pytest.mark.parametrize("squeeze_coeffs", squeeze_coeffs_dic)
def test_morphsqueeze(x_morph, x_target, squeeze_coeffs):
y_target = np.sin(x_target)
squeeze_polynomial = Polynomial(squeeze_coeffs)
coeffs = [squeeze_coeffs[f"a{i}"] for i in range(len(squeeze_coeffs))]
squeeze_polynomial = Polynomial(coeffs)
x_squeezed = x_morph + squeeze_polynomial(x_morph)
y_morph = np.sin(x_squeezed)
low_extrap = np.where(x_morph < x_squeezed[0])[0]
Expand Down
Loading