"""Locally averaged atomic fingerprints (LAAF) — G2, G3, G4, G5, SOAP, MBSF."""
import os
import random
import warnings
from math import cos, sqrt
import ase.io
import numpy as np
from numpy import exp
from numba import jit
from AtomicAI.descriptors.acsf import ACSF
from AtomicAI.descriptors.mbsf import MBSF
from dscribe.descriptors import SOAP
warnings.filterwarnings("ignore")
[docs]
def calculate_eta(r0: float = 0.45, cutoff_descriptor: float = 5.0, number_of_eta: int = 100) -> list:
"""
Calculate the decay function [goes from R0 to cutoff]
:return: list of eta
"""
eta_vector = np.array([r0])
multiplier_value = (cutoff_descriptor / r0) ** (1 / number_of_eta)
for _ in range(number_of_eta - 1):
eta_vector = np.append(eta_vector, np.array([multiplier_value * eta_vector[-1]]))
return eta_vector
@jit
def calculate_fingerprint_vector(
x_ij: np.float64, y_ij: np.float64, z_ij: np.float64,
atomic_type_list: np.int64,
lattice_a: np.float64, lattice_b: np.float64, lattice_c: np.float64,
# fingerprint_center_atom_i: int,
eta_all_list: np.float64,
atom_type: int,
cutoff_descriptor: float,
):
# TODO: Should go to analysis.descriptor module
"""
Calculate the fingerprint vector for a single atom i.
Original implementation of Botu descriptor.
:param x_ij:
:param y_ij:
:param z_ij:
:param atomic_type_list:
:param lattice_a:
:param lattice_b:
:param lattice_c:
:param eta_all_list:
:param atom_type:
:param cutoff_descriptor:
:return:
"""
number_of_eta = len(eta_all_list)
number_of_atoms = len(x_ij)
cutoff_squared = cutoff_descriptor * cutoff_descriptor
fingerprint_vector = np.zeros(2 * number_of_eta)
for j in range(number_of_atoms): # Check all potential neighbors
dx = 0.5 * lattice_a - ((x_ij[j] + 1.5 * lattice_a) % lattice_a)
dy = 0.5 * lattice_b - ((y_ij[j] + 1.5 * lattice_b) % lattice_b)
dz = 0.5 * lattice_c - ((z_ij[j] + 1.5 * lattice_c) % lattice_c)
if atomic_type_list[j] == atom_type: # neighbor is same type
eta_offset = 0
else: # neighbor is other type
eta_offset = number_of_eta
if max(abs(dx), abs(dy), abs(dz)) > cutoff_descriptor: # Too far --> skip
continue
dx2 = dx * dx
dy2 = dy * dy
dz2 = dz * dz
if cutoff_squared > dx2 + dy2 + dz2 > 1e-15:
r_ij = sqrt(dx2 + dy2 + dz2)
exponent_term = -(r_ij / eta_all_list) * (r_ij / eta_all_list)
cosine_term = 0.5 * (cos(np.pi * r_ij / cutoff_descriptor) + 1)
addition_term = exp(exponent_term) * cosine_term
fingerprint_vector[eta_offset:eta_offset + number_of_eta] += addition_term
return fingerprint_vector # Size = number of eta x 2
[docs]
def calculate_average_fingerprint(
cutoff_descriptor: float = 5,
cutoff_average: float = 4,
#input_file: str = "./trajectory.xyz",
traj_data: str = "traj_data",
selected_snapshots: str = ':', # Format of ase for selecting the snapshots in trajectories
output_file: str = "average_fingerprint.csv",
append: bool = False,
target_element: int = 0,
number_of_data: int = None, # generated number of data, # TODO: Confirm behaviour if value is None
number_of_eta: int = 100,
start_step: int = 0, # first step to generate data
final_step: int = None, # final step to generate data
element_conversion: dict = None,
seed: int = None,
lattice_parameters=None,
local_descriptor_type=None,
descriptor=None,
use_buffer: bool = False,
descriptor_parameters=None,
target_neighbor_element: int = None,
):
# TODO: Replace my AverageFingerprintCalculator for more flexible usage
"""
Calculate averaged fingerprints for randomly selected center atoms.
Center atomic sites are selected by target_element, neighbor atomic sites are selected by
target_neighbor_element.
REMARK: Descriptor order is different between original and acsf g2:
original is 0-0/0-1 while acsf g2 is 0-1/0-0
:param target_neighbor_element:
:param descriptor_parameters: specific parameters for descriptors. For advanced use!
:param selected_snapshots: str compliant with ase.io.read function
:param descriptor: instance of descriptor # TODO: List all available descriptors
:param local_descriptor_type: name of descriptor # TODO: Can it be inferred from "descriptor"?
:param append: If True, will append at the end of the csv file. Else overwriting it.
:param lattice_parameters: For normal xyz files
:param cutoff_descriptor:
:param cutoff_average:
:param input_file:
:param traj_data:
:param output_file:
:param target_element:
:param number_of_data:
:param number_of_eta:
:param start_step: 0 = first selected snapshot
:param final_step:
:param element_conversion:
:param use_buffer: store calculated fingerprint vectors for later random choice
:param seed:
:return:
"""
if descriptor_parameters is None:
descriptor_parameters = {}
if target_neighbor_element is None:
target_neighbor_element = target_element # Default: Centers and neighbors are of the same type
fingerprint_buffer = {}
if element_conversion is None:
element_conversion = {
'Si': 0,
'O': 1,
}
# Read input file with ASE and select snapshots
atoms = traj_data #ase.io.read(input_file, selected_snapshots)
if final_step is None: # Default is that final step is last step
final_step = len(atoms) - 1
assert final_step < len(atoms), \
f'Final step exceeds available number of snapshots. Stopping now! ({final_step} >= {len(atoms)})'
eta_list = calculate_eta(cutoff_descriptor=cutoff_descriptor, number_of_eta=number_of_eta)
# Initialize the descriptor
# Available local descriptors
local_descriptor_dict = {
'ACSF_G2': ACSF,
'ACSF_G4': ACSF,
'ACSF_G2G4': ACSF,
'SOAP': SOAP,
'MBSF': MBSF,
}
species = list(element_conversion.keys())
local_descriptor = None
if descriptor:
local_descriptor = descriptor
elif not local_descriptor_type:
print('Using default local descriptor')
elif local_descriptor_type == 'ACSF_G2':
# Set G2 parameters
# convert current $\eta$ to the $\eta$ in G2 # TODO: What does it mean?
rs_lst = [0.0]
params_g2 = []
for eta in eta_list:
for rs in rs_lst:
params_g2.append([1.0 / (eta * eta), rs])
params_g2 = np.array(params_g2)
descriptor_parameter_dict = {
'cutoff_descriptor': cutoff_descriptor,
'params_g2': params_g2,
'species': species,
}
descriptor_parameter_dict.update(descriptor_parameters) # Use user input, if any
local_descriptor = local_descriptor_dict[local_descriptor_type](**descriptor_parameter_dict)
elif local_descriptor_type == 'ACSF_G2G4':
# TODO: set parameter defaults vs descriptor is given as input. Think about it.
# Set parameters
# G2
rs_lst = [0.0]
params_g2 = []
for eta in eta_list:
for rs in rs_lst:
params_g2.append([1.0 / (eta * eta), rs])
params_g2 = np.array(params_g2)
# G4
number_of_eta_g4 = 10
eta_g4_list = calculate_eta(cutoff_descriptor=cutoff_descriptor, number_of_eta=number_of_eta_g4)
zeta_lst = [1, 2, 4, 16]
lambda_list = [-1, 1]
params_g4 = []
for eta0 in eta_g4_list:
for zeta0 in zeta_lst:
for lambda0 in lambda_list:
params_g4.append([1.0 / (eta0 * eta0), zeta0, lambda0])
params_g4 = np.array(params_g4)
descriptor_parameter_dict = {
'cutoff_descriptor': cutoff_descriptor,
'params_g2': params_g2,
'params_g4': params_g4,
'species': species,
}
descriptor_parameter_dict.update(descriptor_parameters) # Use user input, if any
local_descriptor = local_descriptor_dict[local_descriptor_type](**descriptor_parameter_dict)
elif local_descriptor_type == 'ACSF_G4':
# Set parameters
# G2
params_g2 = None
# G4
number_of_eta_g4 = 10
eta_g4_list = calculate_eta(cutoff_descriptor=cutoff_descriptor, number_of_eta=number_of_eta_g4)
zeta_lst = [1, 2, 4, 16]
lambda_list = [-1, 1]
params_g4 = []
for eta0 in eta_g4_list:
for zeta0 in zeta_lst:
for lambda0 in lambda_list:
params_g4.append([1.0 / (eta0 * eta0), zeta0, lambda0])
params_g4 = np.array(params_g4)
descriptor_parameter_dict = {
'cutoff_descriptor': cutoff_descriptor,
'params_g2': params_g2,
'params_g4': params_g4,
'species': species,
}
descriptor_parameter_dict.update(descriptor_parameters) # Use user input, if any
local_descriptor = local_descriptor_dict[local_descriptor_type](**descriptor_parameter_dict)
elif local_descriptor_type == 'MBSF':
# set parameter for MBSF gr(g2) + ga (\zeta, \theta_s, \eta, Rs)
rs_lst = [0.0]
params_gr = []
for eta in eta_list:
for rs in rs_lst:
params_gr.append([1.0 / (eta * eta), rs])
params_gr = np.array(params_gr)
# G4
zeta_lst = [1, 2, 4, 16]
theta_s_list = np.linspace(0, 1, 5) * np.pi
number_of_eta_ga = 10
eta_ga_list = calculate_eta(
cutoff_descriptor=cutoff_descriptor,
number_of_eta=number_of_eta_ga,
)
rs_lst = [0.0]
params_ga = []
for zeta0 in zeta_lst:
for theta_s0 in theta_s_list:
for eta0 in eta_ga_list:
for rs0 in rs_lst:
params_ga.append([zeta0, theta_s0, 1.0 / (eta0 * eta0), rs0])
params_ga = np.array(params_ga)
descriptor_parameter_dict = {
'cutoff_descriptor': cutoff_descriptor,
'params_gr': params_gr,
'params_ga': params_ga,
'periodic': True,
'species': species,
}
descriptor_parameter_dict.update(descriptor_parameters) # Use user input, if any
local_descriptor = local_descriptor_dict[local_descriptor_type](**descriptor_parameter_dict)
elif local_descriptor_type == 'SOAP':
descriptor_parameter_dict = {
'species': species,
'periodic': True,
'rcut': cutoff_descriptor,
'nmax': 8,
'lmax': 8,
}
descriptor_parameter_dict.update(descriptor_parameters) # Use user input, if any
local_descriptor = local_descriptor_dict[local_descriptor_type](**descriptor_parameter_dict)
else:
print('Unknown local_descriptor_type:', local_descriptor_type)
print('local_descriptor_list:', local_descriptor_type)
exit()
# If seed is defined to fix the order, mainly for DEBUG
if seed:
print(f'WARNING: As a seed was set ({seed}), this is a deterministic calculation.')
random.seed(seed)
result_str_list = []
# Generate all data points
for data_index in range(number_of_data):
#for data_index in tqdm(range(number_of_data), desc=f'LAAF of {os.path.basename(input_file)}'):
# TODO: Speedup by storing calculated fingerprint vectors as they are created, by snapshot number.
# TODO: Like a cache, if already calculated --> Reuse. Else calculate the fingerprint vector
# Select step
selected_step = random.randint(start_step, final_step)
selected_snapshot = atoms[selected_step]
# Get atomic type list
symbols = selected_snapshot.symbols
number_of_atoms = len(symbols)
atomic_type_list = [element_conversion[symbol] for symbol in symbols]
atomic_type_list = np.array(atomic_type_list)
# Get cell parameters
# Current available cell system : orthorhombic
if lattice_parameters is None:
cell = selected_snapshot.cell
lattice_a, lattice_b, lattice_c = cell[0, 0], cell[1, 1], cell[2, 2]
lattice_parameters = [lattice_a, lattice_b, lattice_c]
else:
lattice_a, lattice_b, lattice_c = lattice_parameters
# Get x, y, z coordination
positions = selected_snapshot.positions
x_list = positions[:, 0]
y_list = positions[:, 1]
z_list = positions[:, 2]
if use_buffer:
if selected_step in fingerprint_buffer.keys():
fingerprint_vector_list = fingerprint_buffer[selected_step]
elif local_descriptor:
fingerprint_vector_list = local_descriptor.create(
selected_snapshot) # TODO: call create without system?
else:
# Original Botu atomic fingerprint
fingerprint_vector_list = np.zeros((number_of_atoms, 2 * number_of_eta))
eta_list = np.array(eta_list)
for atom_i in range(number_of_atoms):
x_ij_array = x_list - x_list[atom_i]
y_ij_array = y_list - y_list[atom_i]
z_ij_array = z_list - z_list[atom_i]
fingerprint_vector_list[atom_i] = calculate_fingerprint_vector(
x_ij_array, y_ij_array, z_ij_array,
atomic_type_list,
lattice_a, lattice_b, lattice_c,
eta_list, atomic_type_list[atom_i],
cutoff_descriptor=cutoff_descriptor,
)
else:
if local_descriptor:
fingerprint_vector_list = local_descriptor.create(selected_snapshot)
else:
# Original Botu atomic fingerprint
fingerprint_vector_list = np.zeros((number_of_atoms, 2 * number_of_eta))
eta_list = np.array(eta_list)
for atom_i in range(number_of_atoms):
x_ij_array = x_list - x_list[atom_i]
y_ij_array = y_list - y_list[atom_i]
z_ij_array = z_list - z_list[atom_i]
fingerprint_vector_list[atom_i] = calculate_fingerprint_vector(
x_ij_array, y_ij_array, z_ij_array,
atomic_type_list,
lattice_a, lattice_b, lattice_c,
eta_list, atomic_type_list[atom_i],
cutoff_descriptor=cutoff_descriptor,
)
if use_buffer:
fingerprint_buffer[selected_step] = fingerprint_vector_list
for k in range(number_of_atoms):
# Select a random atom
atom_i = random.randint(0, number_of_atoms - 1)
if atomic_type_list[atom_i] == target_element:
break
# single_averaged_fingerprint = np.array([0.0 for k in range(2 * number_of_eta)])
single_averaged_fingerprint = np.zeros(len(fingerprint_vector_list[atom_i]))
number_of_fingerprints_in_average = 0
for atom_j in range(number_of_atoms):
if atomic_type_list[atom_j] != target_neighbor_element: # Only keep target elements. Skip if not
continue
buffer_dx = 0.5 * lattice_a - ((x_list[atom_j] - x_list[atom_i] + 1.5 * lattice_a) % lattice_a)
buffer_dy = 0.5 * lattice_b - ((y_list[atom_j] - y_list[atom_i] + 1.5 * lattice_b) % lattice_b)
buffer_dz = 0.5 * lattice_c - ((z_list[atom_j] - z_list[atom_i] + 1.5 * lattice_c) % lattice_c)
cutoff_average_squared = cutoff_average * cutoff_average
dx2 = buffer_dx * buffer_dx
dy2 = buffer_dy * buffer_dy
dz2 = buffer_dz * buffer_dz
r2 = dx2 + dy2 + dz2
if cutoff_average_squared > r2:
number_of_fingerprints_in_average += 1
single_averaged_fingerprint += fingerprint_vector_list[atom_j, :]
single_averaged_fingerprint = single_averaged_fingerprint / float(number_of_fingerprints_in_average)
average_fingerprint_list = list(single_averaged_fingerprint)
fingerprint_str = ["{0: >30.15f}".format(v) for v in average_fingerprint_list]
result_str_list.append(",".join(fingerprint_str) + '\n')
if append:
writing_mode = 'a'
else:
writing_mode = 'w'
with open(output_file, writing_mode) as output_file_pointer:
output_file_pointer.writelines(result_str_list)
output_file_pointer.close()
[docs]
class AverageFingerprintCalculator:
"""
Main class for calculation of LAAF
"""
def __init__(self,
cutoff_descriptor: float = 5.0,
cutoff_average: float = 4.0,
traj_data: str = 'traj_data', #"./trajectory.xyz",
selected_snapshots: str = ':', # Format of ase for selecting the snapshots in trajectories
number_of_eta: int = 100,
element_conversion: dict = None,
descriptor_type='custom',
descriptor=None,
descriptor_parameters: dict = None,
):
"""
:param cutoff_descriptor:
:param cutoff_average:
:param traj_data: #input_file:
:param selected_snapshots:
:param number_of_eta:
:param element_conversion:
:param descriptor_type:
:param descriptor: Descriptor instance (Only tested for SOAP)
:param descriptor_parameters:
"""
# TODO: Choose types of atoms to be included in averaging
if descriptor_parameters is None:
descriptor_parameters = {}
self.fingerprint_buffer = {} # Reduce redundant operations
self.cutoff_descriptor = cutoff_descriptor
self.cutoff_average = cutoff_average
#self.input_file = input_file
self.traj_data = traj_data
self.selected_snapshots = selected_snapshots
self.number_of_eta = number_of_eta
if element_conversion is None:
element_conversion = {
'Si': 0,
'O': 1,
}
self.element_conversion = element_conversion
# Read input file with ASE and select snapshots
self.atoms = traj_data #ase.io.read(input_file, selected_snapshots)
self.species = list(element_conversion.keys())
allowed_descriptor_types = [
'custom', 'ACSF_G2', 'ACSF_G3', 'ACSF_G4', 'ACSF_G5',
'ACSF_G2G4', 'ACSF_G2G4G5', 'SOAP', 'MBSF',
]
self.descriptor_type = descriptor_type
assert descriptor_type in allowed_descriptor_types, \
f'Invalid descriptor type {descriptor_type!r}. Choose from: {allowed_descriptor_types}'
local_descriptor_dict = {
'ACSF_G2': ACSF,
'ACSF_G3': ACSF,
'ACSF_G4': ACSF,
'ACSF_G5': ACSF,
'ACSF_G2G4': ACSF,
'ACSF_G2G4G5': ACSF,
'SOAP': SOAP,
'MBSF': MBSF,
}
if descriptor_type == 'custom':
assert descriptor is not None, 'For a custom descriptor type, you must provide a descriptor instance'
self.descriptor = descriptor
else:
#print('Using local descriptor. Any other descriptor in input will be discarded.')
# Initialize the list of available local descriptors
eta_list_g2 = calculate_eta(cutoff_descriptor=cutoff_descriptor, number_of_eta=number_of_eta)
if descriptor_type == 'ACSF_G2':
# Set G2 parameters
rs_lst = [0.0]
params_g2 = []
for eta in eta_list_g2:
for rs in rs_lst:
params_g2.append([1.0 / (eta * eta), rs])
params_g2 = np.array(params_g2)
descriptor_parameter_dict = {
'cutoff_descriptor': cutoff_descriptor,
'params_g2': params_g2,
'species': self.species,
}
descriptor_parameter_dict.update(descriptor_parameters) # Use user input, if any
self.descriptor = local_descriptor_dict[descriptor_type](**descriptor_parameter_dict)
elif descriptor_type == 'ACSF_G3':
# G3: cosine basis functions parameterised by kappa
kappa_lst = np.linspace(0.5, 5.0, number_of_eta)
params_g3 = kappa_lst # 1-D array of kappa values
descriptor_parameter_dict = {
'cutoff_descriptor': cutoff_descriptor,
'params_g3': params_g3,
'species': self.species,
}
descriptor_parameter_dict.update(descriptor_parameters)
self.descriptor = local_descriptor_dict[descriptor_type](**descriptor_parameter_dict)
elif descriptor_type == 'ACSF_G4':
params_g2 = None
number_of_eta_g4 = 10
eta_list_g4 = calculate_eta(cutoff_descriptor=cutoff_descriptor, number_of_eta=number_of_eta_g4)
zeta_lst = [1, 2, 4, 16]
lambda_list = [-1, 1]
params_g4 = np.array([
[1.0 / (e * e), z, l]
for e in eta_list_g4
for z in zeta_lst
for l in lambda_list
])
descriptor_parameter_dict = {
'cutoff_descriptor': cutoff_descriptor,
'params_g2': params_g2,
'params_g4': params_g4,
'species': self.species,
}
descriptor_parameter_dict.update(descriptor_parameters)
self.descriptor = local_descriptor_dict[descriptor_type](**descriptor_parameter_dict)
elif descriptor_type == 'ACSF_G5':
# G5: same angular form as G4 but without the r_jk cutoff term
params_g2 = None
number_of_eta_g5 = 10
eta_list_g5 = calculate_eta(cutoff_descriptor=cutoff_descriptor, number_of_eta=number_of_eta_g5)
zeta_lst = [1, 2, 4, 16]
lambda_list = [-1, 1]
params_g5 = np.array([
[1.0 / (e * e), z, l]
for e in eta_list_g5
for z in zeta_lst
for l in lambda_list
])
descriptor_parameter_dict = {
'cutoff_descriptor': cutoff_descriptor,
'params_g2': params_g2,
'params_g5': params_g5,
'species': self.species,
}
descriptor_parameter_dict.update(descriptor_parameters)
self.descriptor = local_descriptor_dict[descriptor_type](**descriptor_parameter_dict)
elif descriptor_type == 'ACSF_G2G4':
# Set G2 parameters
rs_lst = [0.0]
params_g2 = []
for eta in eta_list_g2:
for rs in rs_lst:
params_g2.append([1.0 / (eta * eta), rs])
params_g2 = np.array(params_g2)
# Set G4 parameters
number_of_eta_g4 = 10
eta_list_g4 = calculate_eta(cutoff_descriptor=cutoff_descriptor, number_of_eta=number_of_eta_g4)
zeta_lst = [1, 2, 4, 16]
lambda_list = [-1, 1]
params_g4 = []
for eta0 in eta_list_g4:
for zeta0 in zeta_lst:
for lambda0 in lambda_list:
params_g4.append([1.0 / (eta0 * eta0), zeta0, lambda0])
params_g4 = np.array(params_g4)
descriptor_parameter_dict = {
'cutoff_descriptor': cutoff_descriptor,
'params_g2': params_g2,
'params_g4': params_g4,
'species': self.species,
}
descriptor_parameter_dict.update(descriptor_parameters) # Use user input, if any
self.descriptor = local_descriptor_dict[descriptor_type](**descriptor_parameter_dict)
elif descriptor_type == 'ACSF_G2G4G5':
# G2 (radial) + G4 (3-body with r_jk) + G5 (3-body without r_jk)
rs_lst = [0.0]
params_g2 = np.array([
[1.0 / (e * e), rs]
for e in eta_list_g2
for rs in rs_lst
])
number_of_eta_ang = 10
eta_list_ang = calculate_eta(cutoff_descriptor=cutoff_descriptor, number_of_eta=number_of_eta_ang)
zeta_lst = [1, 2, 4, 16]
lambda_list = [-1, 1]
params_g4 = np.array([
[1.0 / (e * e), z, l]
for e in eta_list_ang
for z in zeta_lst
for l in lambda_list
])
params_g5 = params_g4.copy()
descriptor_parameter_dict = {
'cutoff_descriptor': cutoff_descriptor,
'params_g2': params_g2,
'params_g4': params_g4,
'params_g5': params_g5,
'species': self.species,
}
descriptor_parameter_dict.update(descriptor_parameters)
self.descriptor = local_descriptor_dict[descriptor_type](**descriptor_parameter_dict)
elif descriptor_type == 'MBSF':
# set parameter for MBSF gr(g2) + ga (\zeta, \theta_s, \eta, Rs)
rs_lst = [0.0]
params_gr = []
for eta in eta_list_g2:
for rs in rs_lst:
params_gr.append([1.0 / (eta * eta), rs])
params_gr = np.array(params_gr)
# G4
zeta_lst = [1, 2, 4, 16]
theta_s_list = np.linspace(0, 1, 5) * np.pi
number_of_eta_ga = 10
eta_ga_list = calculate_eta(
cutoff_descriptor=cutoff_descriptor,
number_of_eta=number_of_eta_ga,
)
rs_lst = [0.0]
params_ga = []
for zeta0 in zeta_lst:
for theta_s0 in theta_s_list:
for eta0 in eta_ga_list:
for rs0 in rs_lst:
params_ga.append([zeta0, theta_s0, 1.0 / (eta0 * eta0), rs0])
params_ga = np.array(params_ga)
descriptor_parameter_dict = {
'cutoff_descriptor': cutoff_descriptor,
'params_gr': params_gr,
'params_ga': params_ga,
'periodic': True,
'species': self.species,
}
descriptor_parameter_dict.update(descriptor_parameters) # Use user input, if any
self.descriptor = local_descriptor_dict[descriptor_type](**descriptor_parameter_dict)
elif descriptor_type == 'SOAP':
descriptor_parameter_dict = {
'species': self.species,
'periodic': True,
'rcut': cutoff_descriptor,
'nmax': 8,
'lmax': 8,
}
descriptor_parameter_dict.update(descriptor_parameters) # Use user input, if any
self.descriptor = local_descriptor_dict[descriptor_type](**descriptor_parameter_dict)
[docs]
def compute_averaged_fingerprints_random(
self,
output_file: str = "average_fingerprint.csv",
append: bool = False,
number_of_data: int = None, # generated number of data,
start_step: int = 0, # first step to generate data
final_step: int = None, # final step to generate data
target_element: int = 0,
target_neighbor_element: int = None,
seed: int = None,
):
if target_neighbor_element is None:
target_neighbor_element = target_element
# If seed is defined to fix the order, mainly for DEBUG
if seed:
print(f'WARNING: A seed was set ({seed}). Debugging mode...')
random.seed(seed)
atoms = self.atoms
if final_step is None: # Default is that final step is last step
final_step = len(atoms) - 1
assert final_step < len(atoms), \
f'Final step exceeds available number of snapshots. Stopping now! ({final_step} >= {len(atoms)})'
# Generate all data points
result_str_list = []
#for data_index in tqdm(range(number_of_data), desc=f'LAAF of {os.path.basename(self.input_file)}'):
for data_index in range(number_of_data):
# TODO: Select tuples of (selected_step,data_index) to avoid duplicate data
# Select step
selected_step = random.randint(start_step, final_step)
selected_snapshot = atoms[selected_step]
# Get atomic type list
symbols = selected_snapshot.symbols
number_of_atoms = len(symbols)
atomic_type_list = [self.element_conversion[symbol] for symbol in symbols]
atomic_type_list = np.array(atomic_type_list)
# Get cell parameters
# Current available cell system : orthorhombic
cell = selected_snapshot.cell
lattice_a, lattice_b, lattice_c = cell[0, 0], cell[1, 1], cell[2, 2]
# lattice_parameters = [lattice_a, lattice_b, lattice_c] # not used
# Get x, y, z coordination
positions = selected_snapshot.positions
x_list = positions[:, 0]
y_list = positions[:, 1]
z_list = positions[:, 2]
if selected_step in self.fingerprint_buffer.keys():
fingerprint_vector_list = self.fingerprint_buffer[selected_step]
else:
fingerprint_vector_list = self.descriptor.create(selected_snapshot) # TODO: call create without system?
self.fingerprint_buffer[selected_step] = fingerprint_vector_list
"""
this_species_atom_indices = [i for i in range(number_of_atoms) if
atomic_type_list[i] == target_element]
atom_i = random.choice(this_species_atom_indices)
"""
for k in range(number_of_atoms):
# Select a random atom
atom_i = random.randint(0, number_of_atoms - 1)
if atomic_type_list[atom_i] == target_element:
break
single_averaged_fingerprint = np.zeros(len(fingerprint_vector_list[atom_i]))
number_of_fingerprints_in_average = 0
for atom_j in range(number_of_atoms):
if atomic_type_list[atom_j] != target_neighbor_element: # Only keep target elements. Skip if not
continue
buffer_dx = 0.5 * lattice_a - ((x_list[atom_j] - x_list[atom_i] + 1.5 * lattice_a) % lattice_a)
buffer_dy = 0.5 * lattice_b - ((y_list[atom_j] - y_list[atom_i] + 1.5 * lattice_b) % lattice_b)
buffer_dz = 0.5 * lattice_c - ((z_list[atom_j] - z_list[atom_i] + 1.5 * lattice_c) % lattice_c)
cutoff_average_squared = self.cutoff_average * self.cutoff_average
dx2 = buffer_dx * buffer_dx
dy2 = buffer_dy * buffer_dy
dz2 = buffer_dz * buffer_dz
r2 = dx2 + dy2 + dz2
if cutoff_average_squared > r2:
number_of_fingerprints_in_average += 1
single_averaged_fingerprint += fingerprint_vector_list[atom_j, :]
single_averaged_fingerprint = single_averaged_fingerprint / float(number_of_fingerprints_in_average)
average_fingerprint_list = list(single_averaged_fingerprint)
fingerprint_str = ["{0: >30.15f}".format(v) for v in average_fingerprint_list]
result_str_list.append(",".join(fingerprint_str) + '\n')
if append:
writing_mode = 'a'
else:
writing_mode = 'w'
with open(output_file, writing_mode) as output_file_pointer:
output_file_pointer.writelines(result_str_list)
output_file_pointer.close()
[docs]
def compute_averaged_fingeprints_selection(
self,
output_file: str = "average_fingerprint.csv",
selected_atoms: list = None,
append: bool = False,
selected_steps: list = None,
target_element: int = 0,
target_neighbor_element: int = None,
):
if target_neighbor_element is None:
target_neighbor_element = target_element
atoms = self.atoms
# Generate all data points
result_str_list = []
if selected_atoms is None:
selected_atoms = range(len(atoms[0].symbols))
if selected_steps is None:
selected_steps = range(len(atoms))
#for this_step in tqdm(selected_steps):
for this_step in selected_steps:
selected_snapshot = atoms[this_step]
# Get atomic type list
symbols = selected_snapshot.symbols
number_of_atoms = len(symbols)
atomic_type_list = [self.element_conversion[symbol] for symbol in symbols]
atomic_type_list = np.array(atomic_type_list)
selected_atoms = np.where(atomic_type_list == target_element)[0]
cell = selected_snapshot.cell
lattice_a, lattice_b, lattice_c = cell[0, 0], cell[1, 1], cell[2, 2]
# Get x, y, z coordination
positions = selected_snapshot.positions
x_list = positions[:, 0]
y_list = positions[:, 1]
z_list = positions[:, 2]
if this_step in self.fingerprint_buffer.keys():
fingerprint_vector_list = self.fingerprint_buffer[this_step]
else:
fingerprint_vector_list = self.descriptor.create(selected_snapshot) # TODO: call create without system?
#fingerprint_vector_list = self.descriptor.create(selected_atoms) # TODO: call create without system?
self.fingerprint_buffer[this_step] = fingerprint_vector_list
for atom_i in selected_atoms:
# single_averaged_fingerprint = np.array([0.0 for k in range(2 * number_of_eta)])
single_averaged_fingerprint = np.zeros(len(fingerprint_vector_list[atom_i]))
number_of_fingerprints_in_average = 0
for atom_j in range(number_of_atoms):
if atomic_type_list[atom_j] != target_neighbor_element: # Only keep target elements. Skip if not
continue
buffer_dx = 0.5 * lattice_a - ((x_list[atom_j] - x_list[atom_i] + 1.5 * lattice_a) % lattice_a)
buffer_dy = 0.5 * lattice_b - ((y_list[atom_j] - y_list[atom_i] + 1.5 * lattice_b) % lattice_b)
buffer_dz = 0.5 * lattice_c - ((z_list[atom_j] - z_list[atom_i] + 1.5 * lattice_c) % lattice_c)
cutoff_average_squared = self.cutoff_average * self.cutoff_average
dx2 = buffer_dx * buffer_dx
dy2 = buffer_dy * buffer_dy
dz2 = buffer_dz * buffer_dz
r2 = dx2 + dy2 + dz2
if cutoff_average_squared > r2:
number_of_fingerprints_in_average += 1
single_averaged_fingerprint += fingerprint_vector_list[atom_j, :]
single_averaged_fingerprint = single_averaged_fingerprint / float(number_of_fingerprints_in_average)
average_fingerprint_list = list(single_averaged_fingerprint)
fingerprint_str = ["{0: >30.16e}".format(v) for v in average_fingerprint_list]
result_str_list.append("".join(fingerprint_str) + '\n')
if append:
writing_mode = 'a'
else:
writing_mode = 'w'
with open(output_file, writing_mode) as output_file_pointer:
output_file_pointer.writelines(result_str_list)
output_file_pointer.close()