Source code for moxel.utils

# This file is part of MOXελ.
# Copyright (C) 2023-2024 Antonios P. Sarikas

# MOXελ is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

r"""
This module provides helper functions for creating voxels.

.. note::
    Currently, interactions are modelled with the Lennard-Jones (LJ) potential.

.. attention::
    Consider tuning the ``n_jobs`` parameter to get the best performance
    for your system::

        from timeit import timeit

        setup = 'from moxel.utils import voxels_from_file'
        n_jobs = [1, 2, 8, 16]  # Modify this according to your system.

        for n in n_jobs:
            stmt = f'voxels_from_file("path/to/cif", n_jobs={n})'
            time = timeit(stmt=stmt, setup=setup, number=1)
            print(f'Time with {n} jobs: {time:.3f} s')
"""

import os
import itertools
from pathlib import Path
from multiprocessing import Pool
import warnings

import numpy as np
from tqdm import tqdm
from pymatgen.core import Structure

from ._params import lj_params

warnings.filterwarnings('ignore')

# Default values for voxels calculation.
GRID_SIZE = 32
CUTOFF = 10.
EPSILON = 50.
SIGMA = 2.5
CUBIC_BOX = 30.
N_JOBS = None


[docs] def mic_scale_factors(r, lattice_vectors): r""" Return scale factors to satisfy minimum image convention [MIC]_. Parameters ---------- r : float The cutoff radius used in MIC convetion. lattice_vectors : array of shape (3, 3) The lattice vectors of the unit cell. Each row corresponds to a lattice vector. Returns ------- scale_factors : array of shape (3,) ``scale_factors[i]`` scales ``lattice_vectors[i]``. References ---------- .. [MIC] W. Smith, "The Minimum Image Convention in Non-Cubic MD Cells", 1989. """ a, b, c = lattice_vectors volume = np.linalg.norm(np.dot(a, np.cross(b, c))) w_a = volume/np.linalg.norm(np.cross(b, c)) w_b = volume/np.linalg.norm(np.cross(a, c)) w_c = volume/np.linalg.norm(np.cross(a, b)) return np.ceil(2 * r / np.array([w_a, w_b, w_c]))
[docs] class Grid: r""" A 3D energy grid over a crystal structure. Parameters ---------- grid_size : int, default=32 Number of grid points along each dimension. cutoff : float, default=10.0 Cutoff radius (Å) for the LJ potential. epsilon : float, default=50.0 Epsilon value (ε/K) of the probe atom. sigma : float, default=2.5 Sigma value (σ/Å) of the probe atom. Attributes ---------- structure : :class:`pymatgen.core.structure.Structure` Available only after :meth:`Grid.load_structure` has been called. structure_name : str Available only after :meth:`Grid.load_structure` has been called. cubic_box : float or None Available only after :meth:`Grid.calculate` has been called. voxels : array of shape (grid_size,)*3 Available only after :meth:`Grid.calculate` has been called. """ def __init__( self, grid_size=GRID_SIZE, *, cutoff=CUTOFF, epsilon=EPSILON, sigma=SIGMA ): self.grid_size = grid_size self.cutoff = cutoff self.epsilon = epsilon self.sigma = sigma
[docs] def load_structure(self, pathname): r""" Load a crystal structure from a file in a format supported by :meth:`pymatgen.core.Structure.from_file`. Parameters ---------- pathname : str Pathname to the file. """ self.structure = Structure.from_file(pathname) self.structure_name = Path(pathname).stem
[docs] def calculate( self, cubic_box=CUBIC_BOX, potential='lj', n_jobs=N_JOBS, ): r""" Iterate over the grid and return voxels. For computational efficiency and to assure (approximately) the same spatial resolution, the grid is overlayed over a supercell scaled according to MIC, see :func:`mic_scale_factors`. If lattice angles are significantly different than 90°, to avoid distortions set ``cubic_box``. In this case, the grid is overlayed over a cubic box of size ``cubic_box`` centered at the origin but periodicity is no longer guaranteed. Parameters ---------- potential : str, default='lj' The potential used to calculate voxels. Currently, only the LJ potential is supported. cubic_box : float or None, default=30.0 If ``None``, the simulation box is a supercell scaled according to MIC. Otherwise, cubic box of size ``cubic_box``. n_jobs : int, optional Number of jobs to run in parallel. If ``None``, then the number returned by ``os.cpu_count()`` is used. Returns ------- voxels : array of shape (grid_size,)*3 """ self.cubic_box = cubic_box if cubic_box is not None: d = cubic_box / 2 probe_coords = np.linspace(0 - d, 0 + d, self.grid_size, endpoint=False) # Cartesian self._simulation_box = self.structure else: probe_coords = np.linspace(0, 1, self.grid_size, endpoint=False) # Fractional scale = mic_scale_factors(self.cutoff, self.structure.lattice.matrix) self._simulation_box = self.structure * scale if potential == 'lj': # Cache LJ parameters for all atoms in the simulation box. self._lj_params = np.array( [lj_params[atom.species_string] for atom in self._simulation_box] ) # Cache fractional coordinates since this is a slow function in pymatgen. self._frac_coords = self._simulation_box.frac_coords # Embarrassingly parallel. with Pool(processes=n_jobs) as p: energies = p.map( self.lj_potential, itertools.product(*(probe_coords,)*3) ) self.voxels = np.array(energies, dtype=np.float32).reshape((self.grid_size,)*3) return self.voxels
[docs] def lj_potential(self, coords): r""" Calculate LJ potential at cartesian or fractional coordinates. Parameters ---------- coordinates : array_like of shape (3,) If ``cubic_box=None`` fractional, else cartesian. Returns ------- energy : float """ if self.cubic_box is not None: cartesian_coords = coords else: cartesian_coords = self._simulation_box._lattice.get_cartesian_coords(coords) _, r_ij, indices, _ = self._simulation_box._lattice.get_points_in_sphere( self._frac_coords, cartesian_coords, self.cutoff, zip_results=False, ) # Need to check for length of r_ij because of # https://github.com/materialsproject/pymatgen/issues/3794 if len(r_ij) == 0: # No neighbor, zero energy. return 0. if np.any(r_ij < 1e-3): # Close contact, infinite energy. return np.inf es_j = self._lj_params[indices] x = (0.5 * (es_j[:, 1] + self.sigma)) / r_ij e = 4 * np.sqrt(es_j[:, 0] * self.epsilon) return np.sum(e * (x**12 - x**6))
[docs] def voxels_from_file( cif_pathname, grid_size=GRID_SIZE, *, cutoff=CUTOFF, epsilon=EPSILON, sigma=SIGMA, cubic_box=CUBIC_BOX, n_jobs=N_JOBS, only_voxels=True, ): r""" Calculate and return voxels from ``.cif`` file. Parameters ---------- cif_pathname : str Pathname to the ``.cif`` file. only_voxels : bool, default=True Determines ``out`` type. Returns ------- out : array or :class:`Grid` If ``only_voxels=True`` array, else :class:`Grid`. See Also -------- :func:`voxels_from_dir` For a description of the parameters. """ grid = Grid(grid_size, cutoff=cutoff, epsilon=epsilon, sigma=sigma) grid.load_structure(cif_pathname) grid.calculate(cubic_box=cubic_box, n_jobs=n_jobs) if only_voxels: return grid.voxels return grid
[docs] def voxels_from_files( cif_pathnames, out_pathname, grid_size=GRID_SIZE, *, cutoff=CUTOFF, epsilon=EPSILON, sigma=SIGMA, cubic_box=CUBIC_BOX, n_jobs=N_JOBS, ): r""" Calculate voxels from a list of ``.cif`` files and store them. Voxels are stored under ``out_pathname`` as ``.npy`` files. Parameters ---------- cif_pathnames : list List of pathnames to the ``.cif`` files. out_pathname : str Pathname to the directory under which voxels are stored. See Also -------- :func:`voxels_from_dir` For a description of the parameters. Notes ----- Structures that can't be processsed are omitted. """ os.mkdir(out_pathname) for file in tqdm(cif_pathnames, desc='\033[32;1mCreating energy voxels\033[0m'): try: name = Path(file).stem # Name of the structure. grid = voxels_from_file( cif_pathname=file, grid_size=grid_size, cutoff=cutoff, epsilon=epsilon, sigma=sigma, cubic_box=cubic_box, n_jobs=n_jobs, ) pathname = os.path.join(out_pathname, name) np.save(pathname, grid) # .npy extension is appended by default. except Exception as e: print(e)
[docs] def voxels_from_dir( cif_dirname: str, out_pathname: str, grid_size: int = GRID_SIZE, *, cutoff: float = CUTOFF, epsilon: float = EPSILON, sigma: float = SIGMA, cubic_box: float | None = CUBIC_BOX, n_jobs: int | None = N_JOBS, ): r""" Calculate voxels from a directory of ``.cif`` files and store them. Voxels are stored under ``out_pathname`` as ``.npy`` files. Parameters ---------- cif_dirname : str Pathname to the directory containing the ``.cif`` files. out_pathname : str Pathname of an existing directory under which voxels are stored. grid_size : int, default=32 Number of grid points along each dimension. cutoff : float, default=10.0 Cutoff radius (Å) for the LJ potential. epsilon : float, default=50.0 Epsilon value (ε/K) of the probe atom. sigma : float, default=2.5 Sigma value (σ/Å) of the probe atom. cubic_box : float or None, default=30 If ``None``, the simulation box is a supercell scaled according to MIC. Otherwise, cubic box of size ``cubic_box``. n_jobs : int, optional Number of jobs to run in parallel. If ``None``, then the number returned by ``os.cpu_count()`` is used. Notes ----- Structures that can't be processsed are omitted. """ cif_pathanmes = [os.path.join(cif_dirname, f) for f in os.listdir(cif_dirname)] voxels_from_files( cif_pathanmes, out_pathname, grid_size=grid_size, cutoff=cutoff, epsilon=epsilon, sigma=sigma, cubic_box=cubic_box, n_jobs=n_jobs, )