Source code for amp.analysis

#!/usr/bin/env python

import os
import numpy as np
from ase.io import Trajectory

from matplotlib import pyplot
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib import rcParams

from . import Amp
from .utilities import now, hash_images, make_filename, hash_gc_images


rcParams.update({'figure.autolayout': True})


[docs] def plot_sensitivity(calc, images, d=0.0001, label='sensitivity', dblabel=None, plotfile=None, overwrite=False, energy_coefficient=1.0, force_coefficient=0.04, charge_coefficient=10.0, ): """Returns the plot of loss function in terms of perturbed parameters. Takes the load file and images. Any other keyword taken by the Amp calculator can be fed to this class also. Parameters ---------- calc : Amp object or str Either an existing instantiated Amp calculator or a path for loading an existing ".amp" file. In the latter case, should be fed like 'load="filename.amp"'. images : list or str List of ASE atoms objects with positions, symbols, energies, and forces in ASE format. This can also be the path to an ASE trajectory (.traj) or database (.db) file. Energies can be obtained from any reference, e.g. DFT calculations. d : float The amount of perturbation in each parameter. label : str Default prefix/location used for all files. dblabel : str Optional separate prefix/location of database files, including fingerprints, fingerprint primes, and neighborlists, to avoid calculating them. If not supplied, just uses the value from label. plotfile : Object File for the plot. overwrite : bool If a plot or an script containing values found overwrite it. energy_coefficient : float Coefficient of energy loss in the total loss function. force_coefficient : float Coefficient of force loss in the total loss function. charge_coefficient : float Coefficient of charge loss in the total loss function. """ from amp.model import LossFunction if isinstance(calc, str): calc = Amp.load(file=calc) if plotfile is None: plotfile = make_filename(label, '-plot.pdf') if (not overwrite) and os.path.exists(plotfile): raise IOError('File exists: %s.\nIf you want to overwrite,' ' set overwrite=True or manually delete.' % plotfile) calc.dblabel = label if dblabel is None else dblabel if force_coefficient == 0.: calculate_derivatives = False else: calculate_derivatives = True calc._log('\nAmp sensitivity analysis started. ' + now() + '\n') calc._log('Descriptor: %s' % calc.descriptor.__class__.__name__) calc._log('Model: %s' % calc.model.__class__.__name__) if calc.model.__class__.__name__ == 'ChargeNeuralNetwork': images = hash_gc_images(images) else: images = hash_images(images) calc._log('\nDescriptor\n==========') calc.descriptor.calculate_fingerprints( images=images, parallel=calc._parallel, log=calc._log, calculate_derivatives=calculate_derivatives) vector = calc.model.vector.copy() lossfunction = LossFunction(energy_coefficient=energy_coefficient, force_coefficient=force_coefficient, charge_coefficient=charge_coefficient, parallel=calc._parallel, ) calc.model.lossfunction = lossfunction # Set up local loss function. if calc.model.__class__.__name__ == 'ChargeNeuralNetwork': _ = calc.model.calculate_charge_fp_append( images, calc.model.parameters.slab_metal, calc.model.parameters.surface_correction, calc.model.parameters.etas) charge_fp_append, charge_fpprime_append = _ calc.model.lossfunction.attach_model( calc.model, log=calc._log, fingerprints=calc.descriptor.fingerprints, fingerprintprimes=calc.descriptor.fingerprintprimes, charge_fp_appends=charge_fp_append, charge_fpprime_appends=charge_fpprime_append, images=images) else: calc.model.lossfunction.attach_model( calc.model, log=calc._log, fingerprints=calc.descriptor.fingerprints, fingerprintprimes=calc.descriptor.fingerprintprimes, images=images) originalloss = calc.model.lossfunction.get_loss( vector, lossprime=False)['loss'] calc._log('\n Perturbing parameters...', tic='perturb') allparameters = [] alllosses = [] num_parameters = len(vector) for count in range(num_parameters): calc._log('parameter %i out of %i' % (count + 1, num_parameters)) parameters = [] losses = [] # parameter is perturbed -d and loss function calculated. vector[count] -= d parameters.append(vector[count]) perturbedloss = calc.model.lossfunction.get_loss( vector, lossprime=False)['loss'] losses.append(perturbedloss) vector[count] += d parameters.append(vector[count]) losses.append(originalloss) # parameter is perturbed +d and loss function calculated. vector[count] += d parameters.append(vector[count]) perturbedloss = calc.model.lossfunction.get_loss( vector, lossprime=False)['loss'] losses.append(perturbedloss) allparameters.append(parameters) alllosses.append(losses) # returning back to the original value. vector[count] -= d calc._log('...parameters perturbed and loss functions calculated', toc='perturb') calc._log('Plotting loss function vs perturbed parameters...', tic='plot') with PdfPages(plotfile) as pdf: count = 0 for parameter in vector: fig = pyplot.figure() ax = fig.add_subplot(111) ax.plot(allparameters[count], alllosses[count], marker='o', linestyle='--', color='b',) xmin = allparameters[count][0] - \ 0.1 * (allparameters[count][-1] - allparameters[count][0]) xmax = allparameters[count][-1] + \ 0.1 * (allparameters[count][-1] - allparameters[count][0]) ymin = min(alllosses[count]) - \ 0.1 * (max(alllosses[count]) - min(alllosses[count])) ymax = max(alllosses[count]) + \ 0.1 * (max(alllosses[count]) - min(alllosses[count])) ax.set_xlim([xmin, xmax]) ax.set_ylim([ymin, ymax]) ax.set_xlabel('parameter no %i' % count) ax.set_ylabel('loss function') pdf.savefig(fig) pyplot.close(fig) count += 1 calc._log(' ...loss functions plotted.', toc='plot')
[docs] def plot_parity_and_error(calc, images, label_parity='parity', label_error='error', dblabel=None, xtic_angle=45., plot_forces=True, plot_charges=False, plotfile_parity=None, plotfile_error=None, color='b.', overwrite=False, returndata=False, ): """Makes a parity plot and an error plot of Amp energies and forces versus real energies and forces. Parameters ---------- calc : Amp object or str Either an existing instantiated Amp calculator or a path for loading an existing ".amp" file. In the latter case, should be fed like 'load="filename.amp"'. images : list or str List of ASE atoms objects with positions, symbols, energies, and forces in ASE format. This can also be the path to an ASE trajectory (.traj) or database (.db) file. Energies can be obtained from any reference, e.g. DFT calculations. label_parity : str Default prefix/location used for the parity plot. label_error : str Default prefix/location used for the error plot. dblabel : str Optional separate prefix/location of database files, including fingerprints, fingerprint primes, and neighborlists, to avoid calculating them. If not supplied, just uses the value from label. xtic_angle : float Set the xtics angles. Default is 45. plot_forces : bool Determines whether or not forces should be plotted as well. plot_charges : bool Determines whether or not charges should be plotted as well. plotfile_parity : Object File for the parity plot. plotfile_error : Object File for the error plot. color : str Plot color. overwrite : bool If a plot or an script containing values found overwrite it. returndata : bool Whether to return a reference to the figures and their data or not. """ if isinstance(calc, str): calc = Amp.load(file=calc, dblabel=dblabel) if plotfile_parity is None: plotfile_parity = make_filename(label_parity, '-plot.pdf') if plotfile_error is None: plotfile_error = make_filename(label_error, '-plot.pdf') if (not overwrite) and os.path.exists(plotfile_parity): raise IOError('File exists: %s.\nIf you want to overwrite,' ' set overwrite=True or manually delete.' % plotfile_parity) if (not overwrite) and os.path.exists(plotfile_error): raise IOError('File exists: %s.\nIf you want to overwrite,' ' set overwrite=True or manually delete.' % plotfile_error) if plot_forces is True: calculate_derivatives = True else: calculate_derivatives = False calc._log('\nAmp parity and error plots started. ' + now() + '\n') calc._log('Descriptor: %s' % calc.descriptor.__class__.__name__) calc._log('Model: %s' % calc.model.__class__.__name__) if calc.model.__class__.__name__ == 'ChargeNeuralNetwork': images = hash_gc_images(images) else: images = hash_images(images) calc._log('\nDescriptor\n==========') calc.descriptor.calculate_fingerprints( images=images, parallel=calc._parallel, log=calc._log, calculate_derivatives=calculate_derivatives) calc._log('Calculating potential energies...', tic='pot-energy') energy_data = {} if calc.model.__class__.__name__ == 'ChargeNeuralNetwork': calc._log('Calculating charges...', tic='total-charge') charge_data = {} _ = calc.model.calculate_charge_fp_append( images, calc.model.parameters.slab_metal, calc.model.parameters.surface_correction, calc.model.parameters.etas) charge_fp_append, charge_fpprime_append = _ for hash, image in images.items(): no_of_atoms = len(image) model_name = calc.model.__class__.__name__ if model_name == 'ChargeNeuralNetwork': energy_args = dict( fingerprints=calc.descriptor.fingerprints[hash], wf=images[hash].calc.results['electrode_potential'], qfp_append=charge_fp_append[hash]) amp_energy, amp_charge = calc.model.calculate_gc_energy( **energy_args) amp_charge *= -1. actual_energy = image.get_potential_energy(apply_constraint=False) act_energy_per_atom = actual_energy / no_of_atoms energy_error = abs(amp_energy - actual_energy) / no_of_atoms energy_data[hash] = [actual_energy, amp_energy, act_energy_per_atom, energy_error] actual_charge = image.calc.results['excess_electrons'] act_charge_per_atom = actual_charge / no_of_atoms charge_error = abs(amp_charge - actual_charge) / no_of_atoms charge_data[hash] = [actual_charge, amp_charge, act_charge_per_atom, charge_error] else: energy_args = dict( fingerprints=calc.descriptor.fingerprints[hash], ) if model_name == 'KernelRidge': if calc.model.trainingimages is not None: trainingimages = hash_images( Trajectory(calc.model.trainingimages)) energy_args['trainingimages'] = trainingimages calc.descriptor.calculate_fingerprints( images=trainingimages, parallel=calc._parallel, log=calc._log, calculate_derivatives=calculate_derivatives ) fp_trainingimages = calc.descriptor.fingerprints energy_args['fp_trainingimages'] = fp_trainingimages energy_args['hash'] = hash amp_energy = calc.model.calculate_energy(**energy_args) actual_energy = image.get_potential_energy(apply_constraint=False) act_energy_per_atom = actual_energy / no_of_atoms energy_error = abs(amp_energy - actual_energy) / no_of_atoms energy_data[hash] = [actual_energy, amp_energy, act_energy_per_atom, energy_error] calc._log('...potential energies calculated.', toc='pot-energy') # calculating minimum and maximum energies min_act_energy = min([energy_data[hash][0] for hash, image in images.items()]) max_act_energy = max([energy_data[hash][0] for hash, image in images.items()]) min_act_energy_per_atom = min([energy_data[hash][2] for hash, image in images.items()]) max_act_energy_per_atom = max([energy_data[hash][2] for hash, image in images.items()]) # calculating energy per atom rmse energy_square_error = 0. for hash, image in images.items(): energy_square_error += energy_data[hash][3] ** 2. energy_per_atom_rmse = np.sqrt(energy_square_error / len(images)) if calc.model.__class__.__name__ == 'ChargeNeuralNetwork': # calculating charge per atom rmse if using training-charge scheme calc._log('...charge calculated.', toc='total-charge') min_act_charge = min([charge_data[hash][0] for hash, image in images.items()]) max_act_charge = max([charge_data[hash][0] for hash, image in images.items()]) min_act_charge_per_atom = min([charge_data[hash][2] for hash, image in images.items()]) max_act_charge_per_atom = max([charge_data[hash][2] for hash, image in images.items()]) charge_square_error = 0. for hash, image in images.items(): charge_square_error += charge_data[hash][3] ** 2. charge_per_atom_rmse = np.sqrt(charge_square_error / len(images)) if plot_forces is True: calc._log('Calculating forces...', tic='forces') force_data = {} for hash, image in images.items(): if model_name == 'ChargeNeuralNetwork': forces_args = dict( fingerprints=calc.descriptor.fingerprints[hash], fingerprintprimes=calc.descriptor.fingerprintprimes[hash], wf=images[hash].calc.results['electrode_potential'], qfp_append=charge_fp_append[hash], qfpprime_append=charge_fpprime_append[hash] ) amp_forces = calc.model.calculate_gc_forces(**forces_args) else: forces_args = dict( fingerprints=calc.descriptor.fingerprints[hash], fingerprintprimes=calc.descriptor.fingerprintprimes[hash] ) if model_name == 'KernelRidge': if calc.model.trainingimages is not None: trainingimages = \ hash_images(Trajectory(calc.model.trainingimages)) calc.descriptor.calculate_fingerprints( images=trainingimages, calculate_derivatives=True ) t_descriptor = calc.descriptor forces_args['trainingimages'] = trainingimages forces_args['t_descriptor'] = t_descriptor amp_forces = calc.model.calculate_forces(**forces_args) actual_forces = image.get_forces(apply_constraint=False) force_data[hash] = [actual_forces, amp_forces, abs(np.array(amp_forces) - np.array(actual_forces))] calc._log('...forces calculated.', toc='forces') min_act_force = min([force_data[hash][0][index][k] for hash, image in images.items() for index in range(len(image)) for k in range(3)]) max_act_force = max([force_data[hash][0][index][k] for hash, image in images.items() for index in range(len(image)) for k in range(3)]) # calculating force rmse force_square_error = 0. for hash, image in images.items(): no_of_atoms = len(image) for index in range(no_of_atoms): for k in range(3): force_square_error += \ ((1.0 / 3.0) * force_data[hash][2][index][k] ** 2.) / \ no_of_atoms force_rmse = np.sqrt(force_square_error / len(images)) # make parity plot if plot_forces is False and plot_charges is False: fig = pyplot.figure(figsize=(5., 5.)) ax = fig.add_subplot(111) elif plot_forces is True and plot_charges is True: fig = pyplot.figure(figsize=(5., 15.)) ax = fig.add_subplot(311) else: fig = pyplot.figure(figsize=(5., 10.)) ax = fig.add_subplot(211) calc._log('Plotting energy parities...', tic='energy-plot') ax.plot(list(zip(*np.vstack(energy_data.values())))[0], list(zip(*np.vstack(energy_data.values())))[1], color) # draw line ax.plot([min_act_energy, max_act_energy], [min_act_energy, max_act_energy], 'r-', lw=0.3,) ax.set_xlabel("ab initio energy, eV") ax.set_ylabel("Amp energy, eV") ax.set_title("Energies") ax.tick_params(axis='x', rotation=xtic_angle) calc._log('...energies plotted.', toc='energy-plot') if plot_forces is True: if plot_charges: ax = fig.add_subplot(313) else: ax = fig.add_subplot(212) calc._log('Plotting forces...', tic='force-plot') ax.plot(np.hstack(force_data.values())[0].flatten(), np.hstack(force_data.values())[1].flatten(), color) # draw line ax.plot([min_act_force, max_act_force], [min_act_force, max_act_force], 'r-', lw=0.3,) ax.set_xlabel("ab initio force, eV/Ang") ax.set_ylabel("Amp force, eV/Ang") ax.set_title("Forces") ax.tick_params(axis='x', rotation=xtic_angle) calc._log('...forces plotted.', toc='force-plot') if plot_charges is True: if plot_forces: ax = fig.add_subplot(312) else: ax = fig.add_subplot(212) calc._log('Plotting charges...', tic='charge-plot') ax.plot(list(zip(*np.vstack(charge_data.values())))[0], list(zip(*np.vstack(charge_data.values())))[1], color) # draw line ax.plot([min_act_charge, max_act_charge], [min_act_charge, max_act_charge], 'r-', lw=0.3,) ax.set_xlabel(r"ab initio $N_e$") ax.set_ylabel(r"Amp $N_e$") ax.set_title("Number of Excess electrons") ax.tick_params(axis='x', rotation=xtic_angle) calc._log('...charges plotted.', toc='charge-plot') fig.tight_layout() fig.savefig(plotfile_parity) pyplot.close(fig) # make error plot if plot_forces is False and plot_charges is False: fig = pyplot.figure(figsize=(5., 5.)) ax = fig.add_subplot(111) elif plot_forces is True and plot_charges is True: fig = pyplot.figure(figsize=(5., 15.)) ax = fig.add_subplot(311) else: fig = pyplot.figure(figsize=(5., 10.)) ax = fig.add_subplot(211) calc._log('Plotting energy errors...', tic='energy-plot') ax.plot(list(zip(*np.vstack(energy_data.values())))[2], list(zip(*np.vstack(energy_data.values())))[3], color) # draw horizontal line for rmse ax.plot([min_act_energy_per_atom, max_act_energy_per_atom], [energy_per_atom_rmse, energy_per_atom_rmse], color='black', linestyle='dashed', lw=1,) ax.text(max_act_energy_per_atom, energy_per_atom_rmse, 'energy rmse = %6.5f' % energy_per_atom_rmse, ha='right', va='bottom', color='black') ax.set_xlabel("ab initio energy (eV) per atom") ax.set_ylabel("$|$ab initio energy - Amp energy$|$ / number of atoms") ax.set_title("Energies") ax.tick_params(axis='x', rotation=xtic_angle) calc._log('...energy errors plotted.', toc='energy-plot') if plot_forces is True: if plot_charges: ax = fig.add_subplot(313) else: ax = fig.add_subplot(212) calc._log('Plotting force errors...', tic='force-plot') ax.plot(np.hstack(force_data.values())[0].flatten(), np.hstack(force_data.values())[2].flatten(), color) # draw horizontal line for rmse ax.plot([min_act_force, max_act_force], [force_rmse, force_rmse], color='black', linestyle='dashed', lw=1,) ax.text(max_act_force, force_rmse, 'force rmse = %5.4f' % force_rmse, ha='right', va='bottom', color='black',) ax.set_xlabel("ab initio force, eV/Ang") ax.set_ylabel("$|$ab initio force - Amp force$|$") ax.set_title("Forces") ax.tick_params(axis='x', rotation=xtic_angle) calc._log('...force errors plotted.', toc='force-plot') if plot_charges is True: if plot_forces: ax = fig.add_subplot(312) else: ax = fig.add_subplot(212) calc._log('Plotting charge errors...', tic='charge-plot') ax.plot(list(zip(*np.vstack(charge_data.values())))[2], list(zip(*np.vstack(charge_data.values())))[3], color) # draw horizontal line for rmse ax.plot([min_act_charge_per_atom, max_act_charge_per_atom], [charge_per_atom_rmse, charge_per_atom_rmse], color='black', linestyle='dashed', lw=1,) ax.text(max_act_charge_per_atom, charge_per_atom_rmse, 'charge rmse = %6.5f' % charge_per_atom_rmse, ha='right', va='bottom', color='black') ax.set_xlabel(r"ab initio $N_e$") ax.set_ylabel(r"$|$ab initio $N_e$ - Amp $N_e |$") ax.set_title("Number of Excess electrons") ax.tick_params(axis='x', rotation=xtic_angle) calc._log('...charge errors plotted.', toc='charge-plot') fig.tight_layout() fig.savefig(plotfile_error) pyplot.close(fig) if returndata: if plot_forces is False: if plot_charges: return energy_data, charge_data else: return energy_data else: if plot_charges: return energy_data, charge_data, force_data else: return energy_data, force_data
[docs] def calc_rmse(calc_paths, images, cores=None, dblabel=None, energy_coefficient=1.0, force_coefficient=0.04, charge_coefficient=10.0, ): """Calculates energy and force RMSEs for a set of Amp calculators. All calculators must have the same descriptors and models. Parameters ---------- calc_paths : list List of paths for loading existing ".amp" files. images : list or str List of ASE atoms objects with positions, symbols, energies, and forces in ASE format. This can also be the path to an ASE trajectory (.traj) or database (.db) file. Energies can be obtained from any reference, e.g. DFT calculations. cores : int Can specify cores to use for parallel processing; if None, will determine from environment dblabel : str Optional separate prefix/location of database files, including fingerprints, fingerprint primes, and neighborlists, to avoid calculating them. If not supplied, just uses the value from label. energy_coefficient : float Coefficient of energy loss in the total loss function. force_coefficient : float Coefficient of force loss in the total loss function. charge_coefficient : float Coefficient of charge loss in the total loss function. """ from amp.model import LossFunction calcs = [] calc = Amp.load(file=calc_paths[0], cores=cores, dblabel=dblabel) calcs.append(calc) for i in range(1, len(calc_paths)): calcs.append(Amp.load(file=calc_paths[i], cores=cores, dblabel=dblabel, logging=False)) calc._log('Loaded file: %s' % calc_paths[i]) if charge_coefficient == 0.: if force_coefficient == 0.: calculate_derivatives = False convergence = {'energy_rmse': 0.001} else: calculate_derivatives = True convergence = {'energy_rmse': 0.001, 'force_rmse': 0.01} else: if force_coefficient == 0.: calculate_derivatives = False convergence = {'energy_rmse': 0.001, 'charge_rmse': 0.001} else: calculate_derivatives = True convergence = {'energy_rmse': 0.001, 'charge_rmse': 0.001, 'force_rmse': 0.01} # Setting the convergence is a kludgy way to keep LossFunction.__init__() # from resetting the force_coefficient to 0 calc._log('\nAmp calc_rmse started. ' + now() + '\n') calc._log('Descriptor: %s' % calc.descriptor.__class__.__name__) calc._log('Model: %s' % calc.model.__class__.__name__) if calc.model.__class__.__name__ == 'ChargeNeuralNetwork': images = hash_gc_images(images) else: images = hash_images(images) calc._log('\nDescriptor\n==========') calc.descriptor.calculate_fingerprints( images=images, parallel=calc._parallel, log=calc._log, calculate_derivatives=calculate_derivatives) lossfunction = LossFunction(energy_coefficient=energy_coefficient, force_coefficient=force_coefficient, charge_coefficient=charge_coefficient, parallel=calc._parallel, raise_ConvergenceOccurred=False, convergence=convergence, ) calc.model.lossfunction = lossfunction # Set up local loss function. if calc.model.__class__.__name__ == 'ChargeNeuralNetwork': _ = calc.model.calculate_charge_fp_append( images, calc.model.parameters.slab_metal, calc.model.parameters.surface_correction, calc.model.parameters.etas) charge_fp_append, charge_fpprime_append = _ if force_coefficient == 0.: calc.model.lossfunction.attach_model( calc.model, log=calc._log, fingerprints=calc.descriptor.fingerprints, charge_fp_appends=charge_fp_append, charge_fpprime_appends=charge_fpprime_append, images=images) else: calc.model.lossfunction.attach_model( calc.model, log=calc._log, fingerprints=calc.descriptor.fingerprints, fingerprintprimes=calc.descriptor.fingerprintprimes, charge_fp_appends=charge_fp_append, charge_fpprime_appends=charge_fpprime_append, images=images) else: if force_coefficient == 0.: calc.model.lossfunction.attach_model( calc.model, log=calc._log, fingerprints=calc.descriptor.fingerprints, images=images) else: calc.model.lossfunction.attach_model( calc.model, log=calc._log, fingerprints=calc.descriptor.fingerprints, fingerprintprimes=calc.descriptor.fingerprintprimes, images=images) steps = [] loss = [] energy_loss = [] force_loss = [] charge_loss = [] energy_maxresid = [] force_maxresid = [] charge_maxresid = [] energy_rmse = [] force_rmse = [] charge_rmse = [] for i in range(len(calc_paths)): steps.append(int(os.path.basename(calc_paths[i])[:-4])) vector = calcs[i].model.vector.copy() results = calc.model.lossfunction.get_loss( vector, lossprime=calculate_derivatives) loss.append(results['loss']) energy_loss.append(results['energy_loss']) force_loss.append(results['force_loss']) charge_loss.append(results['charge_loss']) energy_maxresid.append(results['energy_maxresid']) force_maxresid.append(results['force_maxresid']) charge_maxresid.append(results['charge_maxresid']) energy_rmse.append(np.sqrt(energy_loss[i] / len(images))) if charge_coefficient == 0.: if force_coefficient == 0.: calc._log('%5i %19s %12.4e %10.4e %10.4e' % (steps[i], now(), loss[i], energy_rmse[i], energy_maxresid[i])) else: force_rmse.append(np.sqrt(force_loss[i] / len(images))) calc._log('%5i %19s %12.4e %10.4e ' ' %10.4e %10.4e %10.4e' % (steps[i], now(), loss[i], energy_rmse[i], energy_maxresid[i], force_rmse[i], force_maxresid[i])) else: charge_rmse.append(np.sqrt(charge_loss[i] / len(images))) if force_coefficient == 0.: calc._log( '%5i %19s %12.4e %10.4e %10.4e %10.4e %10.4e' % (steps[i], now(), loss[i], energy_rmse[i], energy_maxresid[i], charge_rmse[i], charge_maxresid[i])) else: force_rmse.append(np.sqrt(force_loss[i] / len(images))) calc._log('%5i %19s %12.4e %10.4e ' ' %10.4e %10.4e %10.4e ' ' %10.4e %10.4e' % (steps[i], now(), loss[i], energy_rmse[i], energy_maxresid[i], charge_rmse[i], charge_maxresid[i], force_rmse[i], force_maxresid[i])) data = {} data['steps'] = steps data['loss'] = loss data['energy_loss'] = energy_loss data['force_loss'] = force_loss data['charge_loss'] = charge_loss data['energy_maxresid'] = energy_maxresid data['force_maxresid'] = force_maxresid data['charge_maxresid'] = charge_maxresid data['energy_rmse'] = energy_rmse if force_coefficient != 0.: data['force_rmse'] = force_rmse if charge_coefficient != 0.: data['charge_rmse'] = charge_rmse return data
[docs] def read_trainlog(logfile, verbose=True, multiple=0, _shared=None): """Reads the log file from the training process, returning the relevant parameters. Parameters ---------- logfile : str Name or path to the log file. verbose : bool Write out logfile during analysis. multiple : int or True If multiple training sessions are recorded in the same log file, return session number <multiple> (counting from 0). If set to True, returns all sessions as list. _shared : dict or None Internal use. Metadata (no_images, convergence params) inherited from the first attempt when parsing retry attempts that lack headers. """ data = {} with open(logfile, 'r') as f: lines = f.read().splitlines() # Get number of training sets. Two types of boundaries: # 1. 'Amp training started.': a full restart (resume from checkpoint, etc.) # 2. '; retrying with new random weights.': an in-process retry; these # lack their own header lines so metadata is inherited from attempt 0. multiple_starts = [] multiple_is_retry = [] for index, line in enumerate(lines): if line.startswith('Amp training started.'): multiple_starts.append(index) multiple_is_retry.append(False) elif '; retrying with new random weights.' in line: multiple_starts.append(index + 1) multiple_is_retry.append(True) if multiple is True: # Parse attempt 0 first to collect shared metadata for retry attempts. base_data = read_trainlog(logfile, verbose, multiple=0) datalist = [base_data] for index in range(1, len(multiple_starts)): shared = base_data if multiple_is_retry[index] else None datalist.append(read_trainlog(logfile, verbose, multiple=index, _shared=shared)) return datalist else: lines = lines[multiple_starts[multiple]:] def print_(text): if verbose: print(text) # Get number of images. Retry attempts don't reprint this, so fall back # to inherited value from the first attempt. no_images = None for line in lines: if 'unique images after hashing' in line: no_images = int(line.split()[0]) break if no_images is None and _shared is not None: no_images = _shared.get('no_images') data['no_images'] = no_images # Find where convergence data starts. Retry attempts don't reprint the # convergence criteria block, so fall back to inherited data. startline = None _params_already_set = False for index, line in enumerate(lines): if 'Loss function convergence criteria:' in line: startline = index data['convergence'] = {} d = data['convergence'] break else: if _shared is not None and 'convergence' in _shared: # Retry attempt: inherit convergence params, find step-data start # via 'Starting parameter optimization.' import copy data['convergence'] = copy.deepcopy(_shared['convergence']) d = data['convergence'] startline = None for index, line in enumerate(lines): if 'Starting parameter optimization.' in line: startline = index + 3 # skip 'Optimizer:' and kwargs lines break if startline is None: return data # Skip any non-data lines (e.g. empty lines) right after start. while startline < len(lines) and not lines[startline].strip(): startline += 1 trainforces = d.get('force_rmse') is not None traincharges = d.get('charge_rmse') is not None _params_already_set = True else: return data # Get convergence parameters (skipped for retry attempts that inherit them) if not _params_already_set: ready = [False] * 10 for index, line in enumerate(lines[startline:]): if 'energy_rmse:' in line: ready[0] = True d['energy_rmse'] = float(line.split(':')[-1]) elif 'force_rmse:' in line: ready[1] = True _ = line.split(':')[-1].strip() if _ == 'None': d['force_rmse'] = None trainforces = False else: d['force_rmse'] = float(line.split(':')[-1]) trainforces = True print_('train forces: %s' % trainforces) elif 'charge_rmse:' in line: ready[7] = True _ = line.split(':')[-1].strip() if _ == 'None': d['charge_rmse'] = None traincharges = False else: d['charge_rmse'] = float(line.split(':')[-1]) traincharges = True print_('train charges: %s' % traincharges) elif 'force_coefficient:' in line: ready[2] = True _ = line.split(':')[-1].strip() if _ == 'None': d['force_coefficient'] = 0. else: d['force_coefficient'] = float(_) elif 'charge_coefficient:' in line: ready[8] = True _ = line.split(':')[-1].strip() if _ == 'None': d['charge_coefficient'] = 0. else: d['charge_coefficient'] = float(_) elif 'energy_coefficient:' in line: ready[3] = True d['energy_coefficient'] = float(line.split(':')[-1]) elif 'energy_maxresid:' in line: ready[5] = True _ = line.split(':')[-1].strip() if _ == 'None': d['energy_maxresid'] = None else: d['energy_maxresid'] = float(_) elif 'force_maxresid:' in line: ready[6] = True _ = line.split(':')[-1].strip() if _ == 'None': d['force_maxresid'] = None else: d['force_maxresid'] = float(_) elif 'charge_maxresid:' in line: ready[9] = True _ = line.split(':')[-1].strip() if _ == 'None': d['charge_maxresid'] = None else: d['charge_maxresid'] = float(_) elif 'Step' in line and 'Time' in line: ready[4] = True startline += index + 2 if ready == [True] * 7: break for _ in d.items(): print_('{}: {}'.format(_[0], _[1])) E = d['energy_rmse']**2 * no_images if trainforces: F = d['force_rmse']**2 * no_images else: F = 0. if traincharges: Q = d['charge_rmse']**2 * no_images else: Q = 0. costfxngoal = d['energy_coefficient'] * E + d['force_coefficient'] * F + \ d['charge_coefficient'] * Q d['costfxngoal'] = costfxngoal # Extract data (emrs and fmrs are max residuals). steps = [] es = [] qs = [] fs = [] emrs = [] qmrs = [] fmrs = [] costfxns = [] costfxnEs = [] costfxnQs = [] costfxnFs = [] index = startline d['converged'] = None while index < len(lines): line = lines[index] if 'Saving checkpoint data.' in line: index += 1 continue elif 'Overwriting file' in line: index += 1 continue elif 'optimization completed successfully.' in line: # old version d['converged'] = True break elif '...optimization successful.' in line: d['converged'] = True break elif 'could not find parameters for the' in line: break elif '...optimization unsuccessful.' in line: d['converged'] = False break elif len(line) == 0: # Job apparently timed out. break print_(line) if traincharges: if trainforces: (step, time, costfxn, e, _, emr, _, q, _, qmr, _, f, _, fmr, _) = line.split() fs.append(float(f)) fmrs.append(float(fmr)) F = float(f)**2 * no_images costfxnFs.append(d['force_coefficient'] * F / float(costfxn)) else: step, time, costfxn, e, _, emr, _, q, _, qmr, _ = line.split() qs.append(float(q)) qmrs.append(float(qmr)) Q = float(q) ** 2. * no_images costfxnQs.append(d['charge_coefficient'] * Q / float(costfxn)) else: if trainforces: step, time, costfxn, e, _, emr, _, f, _, fmr, _ = line.split() fs.append(float(f)) fmrs.append(float(fmr)) F = float(f)**2 * no_images costfxnFs.append(d['force_coefficient'] * F / float(costfxn)) else: step, time, costfxn, e, _, emr, _ = line.split() steps.append(int(step)) es.append(float(e)) emrs.append(float(emr)) costfxns.append(float(costfxn)) E = float(e)**2 * no_images costfxnEs.append(d['energy_coefficient'] * E / float(costfxn)) index += 1 d['steps'] = steps d['es'] = es d['qs'] = qs d['fs'] = fs d['emrs'] = emrs d['qmrs'] = qmrs d['fmrs'] = fmrs d['costfxns'] = costfxns d['costfxnEs'] = costfxnEs d['costfxnFs'] = costfxnFs d['costfxnQs'] = costfxnQs return data
[docs] def plot_convergence(data, plotfile='convergence.pdf'): """Makes a plot of the convergence of the cost function and its energy and force components. Parameters ---------- data : dict Convergence data dictionary as returned by read_trainlog. plotfile : str or None Name or path to the plot file. If None, instead returns reference to the created figure. """ # Find if multiple runs contained in data set. d = data['convergence'] steps = range(len(d['steps'])) breaks = [] for index, step in enumerate(d['steps'][1:]): if step < d['steps'][index]: breaks.append(index) # Make plots. fig = pyplot.figure(figsize=(6., 8.)) # Margins, vertical gap, and top-to-bottom ratio of figure. lm, rm, bm, tm, vg, tb = 0.12, 0.05, 0.08, 0.03, 0.08, 4. bottomaxheight = (1. - bm - tm - vg) / (tb + 1.) ax = fig.add_axes((lm, bm + bottomaxheight + vg, 1. - lm - rm, tb * bottomaxheight)) ax.semilogy(steps, d['es'], 'b', lw=2, label='energy rmse') ax.semilogy(steps, d['emrs'], 'b:', lw=2, label='energy maxresid') if d['force_rmse']: ax.semilogy(steps, d['fs'], 'g', lw=2, label='force rmse') ax.semilogy(steps, d['fmrs'], 'g:', lw=2, label='force maxresid') if d['charge_rmse']: ax.semilogy(steps, d['qs'], 'r', lw=2, label='charge rmse') ax.semilogy(steps, d['qmrs'], 'r:', lw=2, label='charge maxresid') ax.semilogy(steps, d['costfxns'], color='0.5', lw=2, label='loss function') # Targets. if d['energy_rmse']: ax.semilogy([steps[0], steps[-1]], [d['energy_rmse']] * 2, color='b', linestyle='-', alpha=0.5) if d['energy_maxresid']: ax.semilogy([steps[0], steps[-1]], [d['energy_maxresid']] * 2, color='b', linestyle=':', alpha=0.5) if d['charge_rmse']: ax.semilogy([steps[0], steps[-1]], [d['charge_rmse']] * 2, color='r', linestyle='-', alpha=0.5) if d['charge_maxresid']: ax.semilogy([steps[0], steps[-1]], [d['charge_maxresid']] * 2, color='r', linestyle=':', alpha=0.5) if d['force_rmse']: ax.semilogy([steps[0], steps[-1]], [d['force_rmse']] * 2, color='g', linestyle='-', alpha=0.5) if d['force_maxresid']: ax.semilogy([steps[0], steps[-1]], [d['force_maxresid']] * 2, color='g', linestyle=':', alpha=0.5) ax.set_ylabel('error') ax.legend(loc='best', fontsize=9.) if len(breaks) > 0: ylim = ax.get_ylim() for b in breaks: ax.plot([b] * 2, ylim, '--k') if d['charge_rmse']: if d['force_rmse']: # Loss function component plot. axf = fig.add_axes((lm, bm, 1. - lm - rm, bottomaxheight)) axf.fill_between(x=np.array(steps), y1=d['costfxnEs'], color='blue') axf.fill_between(x=np.array(steps), y1=d['costfxnEs'], y2=np.array(d['costfxnEs']) + np.array(d['costfxnQs']), color='red') axf.fill_between(x=np.array(steps), y1=np.array(d['costfxnEs']) + np.array(d['costfxnQs']), y2=np.array(d['costfxnEs']) + np.array(d['costfxnQs']) + np.array(d['costfxnFs']), color='green') axf.set_ylabel('loss function component') axf.set_xlabel('loss function call') axf.set_ylim(0, 1) else: ax.set_xlabel('loss function call') else: if d['force_rmse']: # Loss function component plot. axf = fig.add_axes((lm, bm, 1. - lm - rm, bottomaxheight)) axf.fill_between(x=np.array(steps), y1=d['costfxnEs'], color='blue') axf.fill_between(x=np.array(steps), y1=d['costfxnEs'], y2=np.array(d['costfxnEs']) + np.array(d['costfxnFs']), color='green') axf.set_ylabel('loss function component') axf.set_xlabel('loss function call') axf.set_ylim(0, 1) else: ax.set_xlabel('loss function call') if plotfile is None: return fig fig.savefig(plotfile) pyplot.close(fig)