import inspect
import os
from datetime import datetime
from os.path import basename, normpath, splitext
import chemcoord as cc
import numpy as np
import sympy
from chemcoord.xyz_functions import to_molden
from scipy.optimize import minimize
from sympy import latex
from tabulate import tabulate
from chemopt import __version__, export
from chemopt.configuration import (conf_defaults, fixed_defaults,
substitute_docstr)
from chemopt.exception import ConvergenceFinished
from chemopt.interface.generic import calculate
@export
@substitute_docstr
[docs]def optimise(zmolecule, hamiltonian, basis,
symbols=None,
md_out=None, el_calc_input=None, molden_out=None,
etol=fixed_defaults['etol'],
gtol=fixed_defaults['gtol'],
max_iter=fixed_defaults['max_iter'],
backend=conf_defaults['backend'],
charge=fixed_defaults['charge'],
title=fixed_defaults['title'],
multiplicity=fixed_defaults['multiplicity'],
num_procs=None, mem_per_proc=None,
coord_fmt=fixed_defaults['coord_fmt'],
**kwargs):
"""Optimize a molecule.
Args:
zmolecule (chemcoord.Zmat):
hamiltonian (str): {hamiltonian}
basis (str): {basis}
symbols (list): A list of tuples. Each tuple consists of a
sympy symbolic expression and a starting value.
An example is: ``[(r, 1.1), (alpha, 120)]``.
Has exactly the same format as the multi-parameter substitution
in sympy.
el_calc_input (str): Specify the input filename for electronic
calculations. If it is None, the filename of the calling
python script is used (With the suffix ``.inp`` instead of ``.py``)
and the files for the electronic calculations will reside in their
own directory.
md_out (str): {md_out}
molden_out (str): {molden_out}
backend (str): {backend}
charge (int): {charge}
title (str): {title}
multiplicity (int): {multiplicity}
etol (float): {etol}
gtol (float): {gtol}
max_iter (int): {max_iter}
num_procs (int): {num_procs}
mem_per_proc (str): {mem_per_proc}
coord_fmt (str): {coord_fmt}
Returns:
list: A list of dictionaries. The last one is the optimised structure.
The keys of each dictionary depend on the used optimisation.
In any case each dictionary has at least two keys:
* 'energy': The energy in Hartree.
* 'structure': The Zmatrix.
If ``symbols`` was ``None`` a generic optimisation was performed and
the following keys are available:
* 'grad_energy': The energy gradient ('grad_energy')
in internal coordinates.
The units are Hartree / Angstrom for bonds and
Hartree / radians for angles and dihedrals.
If ``symbols`` was not ``None`` an optimisation with
reduced degrees of freedom was performed and
the following keys are available:
* 'symbols': A list of tuples containing the symbol and its value.
"""
files = _get_default_filepaths(md_out, molden_out, el_calc_input)
for filepath in files:
rename_existing(filepath)
md_out, molden_out, el_calc_input = files
if num_procs is None:
num_procs = conf_defaults['num_procs']
if mem_per_proc is None:
mem_per_proc = conf_defaults['mem_per_proc']
t1 = datetime.now()
if symbols is None:
header = _get_generic_optimise_header(
zmolecule=zmolecule, backend=backend, hamiltonian=hamiltonian,
basis=basis, charge=charge, multiplicity=multiplicity,
num_procs=num_procs, mem_per_proc=mem_per_proc,
start_time=t1, title=title,
etol=etol, gtol=gtol, max_iter=max_iter, coord_fmt=coord_fmt)
with open(md_out, 'w') as f:
f.write(header)
calculated, convergence = _zmat_generic_optimise(
zmolecule=zmolecule, el_calc_input=el_calc_input,
md_out=md_out, backend=backend, hamiltonian=hamiltonian,
basis=basis, charge=charge, title=title, multiplicity=multiplicity,
etol=etol, gtol=gtol, max_iter=max_iter,
num_procs=num_procs, mem_per_proc=mem_per_proc, **kwargs)
else:
header = _get_symb_optimise_header(
zmolecule=zmolecule, symbols=symbols, backend=backend,
hamiltonian=hamiltonian, basis=basis, charge=charge,
multiplicity=multiplicity, num_procs=num_procs,
mem_per_proc=mem_per_proc, start_time=t1, title=title,
etol=etol, gtol=gtol, max_iter=max_iter, coord_fmt=coord_fmt)
with open(md_out, 'w') as f:
f.write(header)
calculated, convergence = _zmat_symb_optimise(
zmolecule=zmolecule, symbols=symbols, el_calc_input=el_calc_input,
md_out=md_out, backend=backend, hamiltonian=hamiltonian,
basis=basis, charge=charge, title=title, multiplicity=multiplicity,
etol=etol, gtol=gtol, max_iter=max_iter,
num_procs=num_procs, mem_per_proc=mem_per_proc, **kwargs)
to_molden(
[x['structure'].get_cartesian() for x in calculated], buf=molden_out)
t2 = datetime.now()
if symbols is None:
with open(md_out, 'a') as f:
footer = _get_generic_footer(
opt_zmat=calculated[-1]['structure'], start_time=t1,
end_time=t2, molden_out=molden_out,
successful=convergence.successful, n_iter=len(calculated),
coord_fmt=coord_fmt)
f.write(footer)
else:
with open(md_out, 'a') as f:
footer = _get_symbolic_footer(
symbols=calculated[-1]['symbols'],
opt_zmat=calculated[-1]['structure'], start_time=t1,
end_time=t2, molden_out=molden_out,
successful=convergence.successful, n_iter=len(calculated),
coord_fmt=coord_fmt)
f.write(footer)
return calculated
def _zmat_generic_optimise(
zmolecule, el_calc_input, md_out, backend, hamiltonian, basis, charge,
title, multiplicity, etol, gtol, max_iter,
num_procs, mem_per_proc, **kwargs):
def _get_C_rad(zmolecule):
C_rad = zmolecule.loc[:, ['bond', 'angle', 'dihedral']].values.T
C_rad[[1, 2], :] = np.radians(C_rad[[1, 2], :])
return C_rad.flatten(order='F')
V = _get_generic_opt_V(
zmolecule=zmolecule, el_calc_input=el_calc_input,
md_out=md_out, backend=backend, hamiltonian=hamiltonian,
basis=basis, charge=charge, title=title, multiplicity=multiplicity,
etol=etol, gtol=gtol, max_iter=max_iter,
num_procs=num_procs, mem_per_proc=mem_per_proc, **kwargs)
try:
opt = minimize(V, x0=_get_C_rad(zmolecule), jac=True, method='BFGS',
options={'gtol': 1e-10})
except ConvergenceFinished as e:
convergence = e
else:
if opt.success:
convergence = ConvergenceFinished(successful=True)
else:
convergence = ConvergenceFinished(successful=False)
calculated = V(get_calculated=True)
return calculated, convergence
def _zmat_symb_optimise(
zmolecule, symbols, el_calc_input, md_out, backend,
hamiltonian, basis, charge,
title, multiplicity, etol, gtol, max_iter,
num_procs, mem_per_proc, **kwargs):
V = _get_symbolic_opt_V(
zmolecule=zmolecule, symbols=symbols, el_calc_input=el_calc_input,
md_out=md_out, backend=backend, hamiltonian=hamiltonian,
basis=basis, charge=charge, title=title, multiplicity=multiplicity,
etol=etol, gtol=gtol, max_iter=max_iter,
num_procs=num_procs, mem_per_proc=mem_per_proc, **kwargs)
try:
opt = minimize(V, x0=np.array([v for s, v in symbols], dtype='f8'),
jac=True, method='BFGS', options={'gtol': 1e-10})
except ConvergenceFinished as e:
convergence = e
else:
if opt.success:
convergence = ConvergenceFinished(successful=True)
else:
convergence = ConvergenceFinished(successful=False)
calculated = V(get_calculated=True)
return calculated, convergence
def _get_generic_opt_V(
zmolecule, el_calc_input, md_out, backend,
hamiltonian, basis, charge, title, multiplicity,
etol, gtol, max_iter, num_procs, mem_per_proc, **kwargs):
def get_new_zmat(C_rad, previous_zmat):
C_deg = C_rad.copy().reshape((3, len(C_rad) // 3), order='F').T
C_deg[:, [1, 2]] = np.rad2deg(C_deg[:, [1, 2]])
new_zm = previous_zmat.copy()
new_zm.safe_loc[zmolecule.index, ['bond', 'angle', 'dihedral']] = C_deg
return new_zm
def V(C_rad=None, get_calculated=False,
calculated=[]): # pylint:disable=dangerous-default-value
if get_calculated:
return calculated
elif C_rad is not None:
try:
previous_zmat = calculated[-1]['structure'].copy()
except IndexError:
new_zmat = zmolecule.copy()
else:
new_zmat = get_new_zmat(C_rad, previous_zmat)
result = calculate(
molecule=new_zmat, forces=True, el_calc_input=el_calc_input,
backend=backend, hamiltonian=hamiltonian, basis=basis,
charge=charge, title=title,
multiplicity=multiplicity,
num_procs=num_procs, mem_per_proc=mem_per_proc, **kwargs)
energy, grad_energy_X = result['energy'], result['gradient']
grad_energy_C = _get_grad_energy_C(new_zmat, grad_energy_X)
new_zmat.metadata['energy'] = energy
new_zmat.metadata['grad_energy'] = grad_energy_C
calculated.append({'energy': energy, 'grad_energy': grad_energy_C,
'structure': new_zmat})
with open(md_out, 'a') as f:
f.write(_get_table_row(calculated, grad_energy_X))
if is_converged(calculated, grad_energy_X, etol=etol, gtol=gtol):
raise ConvergenceFinished(successful=True)
elif len(calculated) >= max_iter:
raise ConvergenceFinished(successful=False)
return energy, grad_energy_C.flatten()
else:
raise ValueError
return V
def _get_symbolic_opt_V(
zmolecule, symbols, el_calc_input, md_out, backend,
hamiltonian, basis, charge, title, multiplicity,
etol, gtol, max_iter, num_procs, mem_per_proc, **kwargs):
# Because substitution has a sideeffect on self
zmolecule = zmolecule.copy()
value_cols = ['bond', 'angle', 'dihedral']
symbolic_expressions = [s for s, v in symbols]
def V(values=None, get_calculated=False,
calculated=[]): # pylint:disable=dangerous-default-value
if get_calculated:
return calculated
elif values is not None:
substitutions = list(zip(symbolic_expressions, values))
new_zmat = zmolecule.subs(substitutions)
result = calculate(
molecule=new_zmat, forces=True, el_calc_input=el_calc_input,
backend=backend, hamiltonian=hamiltonian, basis=basis,
charge=charge, title=title,
multiplicity=multiplicity,
num_procs=num_procs, mem_per_proc=mem_per_proc, **kwargs)
energy, grad_energy_X = result['energy'], result['gradient']
grad_energy_C = _get_grad_energy_C(new_zmat, grad_energy_X)
zm_values_rad = zmolecule.loc[:, value_cols].values
zm_values_rad[:, [1, 2]] = sympy.rad(zm_values_rad[:, [1, 2]])
energy_symb = np.sum(zm_values_rad * grad_energy_C)
grad_energy_symb = sympy.Matrix([
energy_symb.diff(arg) for arg in symbolic_expressions])
grad_energy_symb = np.array(grad_energy_symb.subs(substitutions))
grad_energy_symb = grad_energy_symb.astype('f8').flatten()
new_zmat.metadata['energy'] = energy
new_zmat.metadata['symbols'] = substitutions
calculated.append({'energy': energy, 'structure': new_zmat,
'symbols': substitutions})
with open(md_out, 'a') as f:
f.write(_get_table_row(calculated, grad_energy_symb))
if is_converged(calculated, etol=etol):
raise ConvergenceFinished(successful=True)
elif len(calculated) >= max_iter:
raise ConvergenceFinished(successful=False)
return energy, grad_energy_symb
else:
raise ValueError
return V
def _get_grad_energy_C(zmat, grad_energy_X):
grad_X = zmat.get_grad_cartesian(as_function=False, drop_auto_dummies=True)
grad_V_C = np.sum(grad_energy_X.T[:, :, None, None] * grad_X, axis=(0, 1))
for i in range(min(3, grad_V_C.shape[0])):
grad_V_C[i, i:] = 0.
return grad_V_C
def _get_generic_optimise_header(
zmolecule, backend, hamiltonian, basis, charge, title, multiplicity,
etol, gtol, max_iter, start_time, num_procs, mem_per_proc, coord_fmt):
get_header = """\
# This is ChemOpt {version} optimising a molecule in internal coordinates.
Written by Oskar Weser (oskar.weser@gmail.com)
## Input File
```python
{input_file}
```
## Settings for the calculations
{calculation_setup}
## Starting structures
### Starting structure as Zmatrix
{zmat}
### Starting structure in cartesian coordinates
{cartesian}
## Iterations
Starting {start_time}
{table_header}
""".format
settings_table = _get_settings_table(
backend=backend, hamiltonian=hamiltonian, charge=charge,
multiplicity=multiplicity, basis=basis,
etol=etol, gtol=gtol, max_iter=max_iter,
num_procs=num_procs, mem_per_proc=mem_per_proc,)
input_filepath = basename(inspect.stack()[-1][1])
# Yes I know: One should not use readlines.
# BUT: The Input file won't be large!
with open(input_filepath, 'r') as f:
input_file = ''.join(f.readlines())
header = get_header(
version=__version__,
input_file=input_file,
zmat=_get_geometry_markdown(zmolecule, coord_fmt=coord_fmt),
cartesian=_get_geometry_markdown(
zmolecule.get_cartesian(), coord_fmt=coord_fmt),
calculation_setup=settings_table,
start_time=_get_time_isostr(start_time),
table_header=_get_table_header_generic_opt())
return header
def _get_symb_optimise_header(
zmolecule, symbols, backend, hamiltonian, basis, charge, title,
multiplicity, etol, gtol, max_iter, start_time, num_procs,
mem_per_proc, coord_fmt):
get_header = """\
# This is ChemOpt {version} optimising a molecule in internal coordinates.
Written by Oskar Weser (oskar.weser@gmail.com)
## Input File
```python
{input_file}
```
## Settings for the calculations
{calculation_setup}
## Starting structures
### Starting structure as Zmatrix
{zmat}
### Symbols with starting values
{symbols}
## Iterations
Starting {start_time}
{table_header}
""".format
settings_table = _get_settings_table(
backend=backend, hamiltonian=hamiltonian, charge=charge,
multiplicity=multiplicity, basis=basis,
etol=etol, gtol=gtol, max_iter=max_iter,
num_procs=num_procs, mem_per_proc=mem_per_proc)
input_filepath = basename(inspect.stack()[-1][1])
# Yes I know: One should not use readlines.
# BUT: The Input file won't be large!
with open(input_filepath, 'r') as f:
input_file = ''.join(f.readlines())
header = get_header(
version=__version__,
input_file=input_file,
zmat=_get_geometry_markdown(zmolecule, coord_fmt=coord_fmt),
calculation_setup=settings_table,
symbols=_get_symbol_table(symbols, header=['Symbol', 'Start Value']),
start_time=_get_time_isostr(start_time),
table_header=_get_table_header_symb_opt())
return header
def _get_symbol_table(symbols, header):
latex_table = [('${}$'.format(latex(symb)), v) for symb, v in symbols]
return tabulate(latex_table,
tablefmt='pipe', headers=header)
def _get_settings_table(backend, hamiltonian, charge, multiplicity,
basis, etol, gtol, num_procs, mem_per_proc, max_iter):
data = [['Hamiltonian', hamiltonian],
['Basis', basis],
['Charge', charge],
['Spin multiplicity', multiplicity],
['Convergence energy', etol],
['Convergence gradient', gtol],
['Maximum of iterations', max_iter],
['Number of processes', num_procs],
['Memory per process', mem_per_proc]
]
return tabulate(data, tablefmt='pipe', headers=['Backend', backend])
def _get_geometry_markdown(molecule, coord_fmt=fixed_defaults['coord_fmt']):
def formatter(x):
try:
return '{{:{}}}'.format(coord_fmt).format(x)
except (TypeError, AttributeError, ValueError):
return x
latex_symb = molecule._sympy_formatter()
if isinstance(molecule, cc.Cartesian):
to_be_printed = latex_symb._frame.loc[:, ['atom', 'x', 'y', 'z']]
for col in ['x', 'y', 'z']:
to_be_printed[col] = to_be_printed[col].apply(formatter)
elif isinstance(molecule, cc.Zmat):
columns = ['atom', 'b', 'bond', 'a', 'angle', 'd', 'dihedral']
latex_symb = latex_symb._abs_ref_formatter(format_as='latex')
to_be_printed = latex_symb._frame.loc[:, columns]
for col in ['bond', 'angle', 'dihedral']:
to_be_printed[col] = to_be_printed[col].apply(formatter)
return tabulate(to_be_printed, tablefmt='pipe', headers='keys',
stralign='right')
def _get_table_header_generic_opt():
get_row = '|{:>4.4}|{:^16.16}|{:^16.16}|{:^31.31}|'.format
header = (get_row('n', r'$E [E_h]$', r'$\Delta E [E_h]$',
r'$\max(|\nabla_X E |) [E_h$/Å]')
+ '\n'
+ get_row(3 * '-' + ':', 15 * '-' + ':',
15 * '-' + ':', 30 * '-' + ':'))
return header
def _get_table_header_symb_opt():
get_row = '|{:>4.4}|{:^16.16}|{:^16.16}|{:^31.31}|'.format
header = (get_row('n', r'$E [E_h]$', r'$\Delta E [E_h]$',
r'$\max(|\nabla E |) [E_h$/Å]')
+ '\n'
+ get_row(3 * '-' + ':', 15 * '-' + ':',
15 * '-' + ':', 30 * '-' + ':'))
return header
def _get_table_row(calculated, grad_energy):
n = len(calculated)
energy = calculated[-1]['energy']
if n == 1:
delta = 0.
else:
delta = calculated[-1]['energy'] - calculated[-2]['energy']
# table header was:
# n, energy, Delta energy, max(abs(grad_energy_X))
get_str = '|{:>4}|{:16.10f}|{:16.10f}|{:31.10f}|\n'.format
return get_str(n, energy, delta, abs(grad_energy).max())
def _get_generic_footer(opt_zmat, start_time, end_time,
molden_out, successful, n_iter, coord_fmt):
get_output = """\
## Optimised Structures
### Optimised structure as Zmatrix
{zmat}
### Optimised structure in cartesian coordinates
{cartesian}
## Closing
Structures were written to {molden}.
The calculation finished {successfully}
after {n_iter} iterations at: {end_time}
and needed: {delta_time}.
""".format
output = get_output(
zmat=_get_geometry_markdown(opt_zmat, coord_fmt=coord_fmt),
cartesian=_get_geometry_markdown(
opt_zmat.get_cartesian(), coord_fmt=coord_fmt),
molden=molden_out, end_time=_get_time_isostr(end_time),
successfully='successfully' if successful else 'with errors',
n_iter=n_iter, delta_time=str(end_time - start_time).split('.')[0])
return output
def _get_symbolic_footer(symbols, opt_zmat, start_time, end_time,
molden_out, successful, n_iter, coord_fmt):
get_output = """\
## Optimised Structures
### Symbols with end values
{symbols_end_values}
### Optimised structure as Zmatrix
{zmat}
### Optimised structure in cartesian coordinates
{cartesian}
## Closing
Structures were written to {molden}.
The calculation finished {successfully}
after {n_iter} iterations at: {end_time}
and needed: {delta_time}.
""".format
symbol_table = _get_symbol_table(symbols, header=['Symbol', 'End Value'])
output = get_output(
symbols_end_values=symbol_table,
zmat=_get_geometry_markdown(opt_zmat, coord_fmt=coord_fmt),
cartesian=_get_geometry_markdown(
opt_zmat.get_cartesian(), coord_fmt=coord_fmt),
molden=molden_out, end_time=_get_time_isostr(end_time),
successfully='successfully' if successful else 'with errors',
n_iter=n_iter, delta_time=str(end_time - start_time).split('.')[0])
return output
def _get_time_isostr(time):
return time.replace(microsecond=0).isoformat()
def rename_existing(filepath):
if os.path.exists(filepath):
to_be_moved = normpath(filepath).split(os.path.sep)[0]
get_path = (to_be_moved + '_{}').format
found, end = False, 1
while not found:
if not os.path.exists(get_path(end)):
found = True
os.rename(to_be_moved, get_path(end))
else:
end += 1
@substitute_docstr
def is_converged(calculated, grad_energy_X=None,
etol=fixed_defaults['etol'], gtol=fixed_defaults['gtol']):
"""Returns if an optimization is converged.
Args:
energies (list): List of energies in hartree.
grad_energy_X (numpy.ndarray): Gradient in cartesian coordinates
in Hartree / Angstrom.
etol (float): {etol}
gtol (float): {gtol}
Returns:
bool:
"""
if len(calculated) in {0, 1, 2}:
return False
else:
delta_energy = calculated[-1]['energy'] - calculated[-2]['energy']
if grad_energy_X is None:
return abs(delta_energy) < etol
else:
return abs(delta_energy) < etol and abs(grad_energy_X).max() < gtol
def _get_default_filepaths(md_out, molden_out, el_calc_input):
if __name__ == '__main__':
if md_out is None:
raise ValueError('md_out has to be provided when executing '
'from an interactive session.')
if molden_out is None:
raise ValueError('molden_out has to be provided when executing '
'from an interactive session.')
if el_calc_input is None:
raise ValueError('el_calc_input has to be provided when executing '
'from an interactive session.')
else:
base = splitext(basename(inspect.stack()[-1][1]))[0]
if md_out is None:
md_out = '{}.md'.format(base)
if molden_out is None:
molden_out = '{}.molden'.format(base)
if el_calc_input is None:
el_calc_input = os.path.join('{}_el_calcs'.format(base),
'{}.inp'.format(base))
return md_out, molden_out, el_calc_input