Source code for pquad.pquad

from __future__ import absolute_import, division, print_function
import os
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import pkg_resources

if __name__ == '__main__':
    import error_metrics
else:
    from . import error_metrics

#default values
data_path = pkg_resources.resource_filename(__name__, 'data/')

[docs]def get_defaults(): """ Returns default frequencies to project intensities onto as well as default paths for locations of the pure and mixture spectroscopic data. Returns ------- frequency_range: numpy.ndarray Frequencies over which to project the intensities. pure_data_path : str Directory location where pure-component spectra are stored. mixture_data_path : str Directory location where mixed-component spectra are stored. """ pure_data_path = os.path.join(data_path, 'pure_components/') mixture_data_path = os.path.join(data_path, 'mixed_components/') reaction_data_path = os.path.join(data_path, 'reaction/') frequency_range = np.linspace(850,1850,num=501,endpoint=True) return frequency_range, pure_data_path, mixture_data_path, reaction_data_path
[docs]class IR_DECONV: """Class for generating functions used to to deconvolute spectra""" def __init__(self, frequency_range, pure_data_path): """ Parameters ---------- frequency_range : numpy.narray Frequencies over which to project the intensities. pure_data_path : str Directory location where pure component spectra are stored. Attributes ---------- NUM_TARGETS : int Number of different pure commponent species. PURE_DATA : list[numpy.ndarray] Original values of the of the experimental pure-component spectra. There is a separate numpy.ndarray for each pure-component. Each array has shape $(m+1)$x$n$ where $m$ is the number of spectra for a pure component and $n$ is the number of discrete intensities sampled by the spectrometer. nump.ndarray[0] corresponds to the frequencies over which the intensities are measured. PURE_CONCENTRATIONS : list[numpy.ndarray] Concentrations (M) for each pure-component solution measured. There is a separate numpy.ndarry for each experimental pure-component and each array is of length $m$. PURE_FILES : list[str] Location to each file in pure_data_path. FREQUENCY_RANGE : numpy.ndarray Numpy array of frequencies to project each spectra onto. NUM_PURE_SPECTRA : list[int] Number of spectra for each pure-component. PURE_STANDARDIZED : list[numpy.ndarray] List containing standardized sets of pure spectra where each set of spectra is represented by a $m$x$n$ array where $m$ is the the number of spectra for each pure-componet species and $n$ is the length of FREQUENCY_RANGE. PURE_NAMES : list[str] List of pure species names. """ PURE_CONCENTRATIONS = [] PURE_DATA = [] if os.path.isdir(pure_data_path) == True: PURE_FILES = os.listdir(pure_data_path) elif os.path.isfile(pure_data_path) == True: PURE_FILES = [os.path.basename(pure_data_path)] for component in PURE_FILES: if os.path.isdir(pure_data_path) == True: file_path = os.path.join(pure_data_path,component) elif os.path.isfile(pure_data_path) == True: file_path = pure_data_path data = np.loadtxt(file_path, delimiter=',', skiprows=1).T PURE_DATA.append(data) concentrations = np.genfromtxt(file_path, delimiter=','\ , skip_header=0,usecols=np.arange(1,data.shape[0]),max_rows=1,dtype=float) PURE_CONCENTRATIONS.append(concentrations) NUM_PURE_SPECTRA = [len(i) for i in PURE_CONCENTRATIONS] PURE_NAMES = np.char.replace(PURE_FILES,'_DATA.csv','').tolist() self.NUM_TARGETS = len(PURE_FILES) self.PURE_DATA = PURE_DATA self.PURE_CONCENTRATIONS = PURE_CONCENTRATIONS self.PURE_FILES = PURE_FILES self.PURE_NAMES = PURE_NAMES self.FREQUENCY_RANGE = frequency_range self.NUM_PURE_SPECTRA = NUM_PURE_SPECTRA self.PURE_STANDARDIZED = self.get_standardized_spectra(PURE_DATA)
[docs] def _get_pure_single_spectra(self): """ Returns the pure spectra and concentrations in a format X and y, respectively, where X is all spectra and y is the corresponding concentration vectors. Returns ------- X : numpy.ndarray Array of concatenated pure-component spectra of dimensions $m$x$n$ where $m$ is the number of spectra and $n$ is the number of discrete frequencies y : numpy.ndarray Array of concatenated pure-component concentrations of dimensions $m$x$n$ where $m$ is the number of spectra and $n$ the number of pure-component species. """ NUM_TARGETS = self.NUM_TARGETS FREQUENCY_RANGE = self.FREQUENCY_RANGE PURE_STANDARDIZED = self.PURE_STANDARDIZED PURE_CONCENTRATIONS = self.PURE_CONCENTRATIONS NUM_PURE_SPECTRA = self.NUM_PURE_SPECTRA X = np.zeros((np.sum(NUM_PURE_SPECTRA),FREQUENCY_RANGE.size)) y = np.zeros((np.sum(NUM_PURE_SPECTRA),NUM_TARGETS)) for i in range(NUM_TARGETS): y[np.sum(NUM_PURE_SPECTRA[0:i],dtype='int'):np.sum(NUM_PURE_SPECTRA[0:i+1],dtype='int'),i] = PURE_CONCENTRATIONS[i] X[np.sum(NUM_PURE_SPECTRA[0:i],dtype='int'):np.sum(NUM_PURE_SPECTRA[0:i+1],dtype='int')] = PURE_STANDARDIZED[i] return X, y
[docs] def get_standardized_spectra(self, DATA): """ Returns standardize spectra by projecting the intensities onto the same set of frequencies. Parameters ---------- DATA : list[numpy.ndarray] Spectra to be standardized. Must contain the experimental frequencies of the spectra to be standardized as the first entry of each ndarry and all following entries are the spectra. Each ndarray should there for be of shape (>1,n) where n is the number of frequencies sampled by the spectrometer. Returns ------- STANDARDIZED_SPECTRA : list[numpy.ndarray] List containing standardized sets of spectra in DATA projected onto FREQUENCY_RANGE. """ FREQUENCY_RANGE = self.FREQUENCY_RANGE if len(DATA[0].shape) == 1: DATA = [np.copy(DATA)] NUM_SPECTRA = [len(i)-1 for i in DATA] STANDARDIZED_SPECTRA = [] for i in range(len(DATA)): if NUM_SPECTRA[i] == 1: STANDARDIZED_SPECTRA.append(np.zeros(FREQUENCY_RANGE.size)) STANDARDIZED_SPECTRA[i] = np.interp(FREQUENCY_RANGE, DATA[i][0], DATA[i][1], left=None, right=None, period=None) else: STANDARDIZED_SPECTRA.append(np.zeros((NUM_SPECTRA[i],FREQUENCY_RANGE.size))) for ii in range(NUM_SPECTRA[i]): STANDARDIZED_SPECTRA[i][ii] = np.interp(FREQUENCY_RANGE, DATA[i][0], DATA[i][ii+1], left=None, right=None, period=None) return STANDARDIZED_SPECTRA
[docs] def _get_concentrations_2_pure_spectra(self): """ Returns regressed parameters for computing pure component spectra from individual concentrations Returns ------- CONCENTRATIONS_2_PURE_SPECTRA : list[numpy.ndarray] List of parameters to estimate pure-spectra given its concentration. """ NUM_TARGETS = self.NUM_TARGETS PURE_SPECTRA = self.PURE_STANDARDIZED PURE_CONCENTRATIONS = self.PURE_CONCENTRATIONS _get_concentration_coefficients = self._get_concentration_coefficients CONCENTRATIONS_2_PURE_SPECTRA = [] CONCENTRATION_COEFFICIENTS = [] for i in range(NUM_TARGETS): concentration_coefficients = _get_concentration_coefficients(PURE_CONCENTRATIONS[i]) CONCENTRATION_COEFFICIENTS.append(concentration_coefficients) concentrations_2_pure_spectra, res, rank, s = np.linalg.lstsq(concentration_coefficients, PURE_SPECTRA[i], rcond=None) CONCENTRATIONS_2_PURE_SPECTRA.append(concentrations_2_pure_spectra) return CONCENTRATIONS_2_PURE_SPECTRA
[docs] def _get_PC_loadings(self,NUM_PCs): """ Returns principal component loadings after performing SVD on the matrix of pure spectra where $pure-single_spectra = USV^T$ Parameters ---------- NUM_PCs : int The number of principal components of the spectra to keep. Attributes ---------- TOTAL_EXPLAINED_VARIANCE : numpy.ndarray Total explained variance by the $n$ principal components where $n=NUM_PCs$ Returns ------- PC_loadings : numpy.ndarray The first loadings of the first $N$ principal components where $N$ is equal to NUM_PCs. $PC_loadings = V$ """ _get_pure_single_spectra = self._get_pure_single_spectra pure_spectra, concentrations = _get_pure_single_spectra() U, S, V = np.linalg.svd(pure_spectra, full_matrices=False) PC_loadings = V[:NUM_PCs] self.TOTAL_EXPLAINED_VARIANCE = np.sum(S[:NUM_PCs]**2)/np.sum(S**2) return PC_loadings
[docs] def _get_PCs_and_regressors(self,NUM_PCs): """ Returns principal component loadings of the spectra as well as the matrix that multiplies the principal components of a given mixed spectra to return. Parameters ---------- NUM_PCs : int The number of principal components of the spectra to keep. Returns ------- PC_loadings : numpy.ndarray The first loadings of the first $N$ principal components where $N$ is equal to the number of pure-component species on which model is trained. PCs_2_concentrations : numpy.ndarray Regressed matrix to compute concentrations given the principal components of a mixed spectra. """ _get_pure_single_spectra = self._get_pure_single_spectra _get_PC_loadings = self._get_PC_loadings pure_spectra, concentrations = _get_pure_single_spectra() pca_loadings = _get_PC_loadings(NUM_PCs) PCs = np.dot(pure_spectra,pca_loadings.T) PCs_2_concentrations, res, rank, s = np.linalg.lstsq(PCs,concentrations,rcond=None) self.res = res return pca_loadings, PCs_2_concentrations
[docs] def _get_concentration_coefficients(self,concentrations): """ Get coefficients used in computing the individual spectra given their pure component conentrations. Also used in regressing parameters for computing pure component spectra. Parameters ---------- concentrations : float, np.ndarray, or list The concentration(s) whose pure-component spectra must be computed Returns ------- concentration_coefficients : numpy.ndarray set of coefficients for computing pure-component spectra given the concentration of that pure-component """ #concentration_coefficients = np.concatenate((np.ones_like(concentrations).reshape(-1,1), concentrations.reshape(-1,1), concentrations.reshape(-1,1)**2, concentrations.reshape(-1,1)**3),axis=1) concentration_coefficients = np.concatenate((concentrations.reshape(-1,1), concentrations.reshape(-1,1)**2),axis=1) return concentration_coefficients
[docs]class IR_Results(IR_DECONV): """Child class of IR_DECONV for deconvoluting experimental spectra whose intensity increaeses monotonically with concentration.""" def __init__(self, NUM_PCs, frequency_range, pure_data_path): """ Parameters ---------- NUM_PCs : int The number of principal components of the spectra to keep. frequency_range : numpy.narray Frequencies over which to project the intensities. pure_data_path : str Directory location where pure component spectra are stored. Attributes ---------- pca_loadings : numpy.ndarray The first loadings of the first $N$ principal components where $N$ is equal to the number of pure-component species on which model is trained. PCs_2_concentrations : numpy.ndarray Regressed matrix to compute concentrations given the principal components of a mixed spectra. """ IR_DECONV.__init__(self, frequency_range, pure_data_path) self.pca_loadings, self.PCs_2_concentrations = self._get_PCs_and_regressors(NUM_PCs)
[docs] def set_mixture_data(self, mixture_data_path, contains_concentrations=True): """ Instantiates the mixture data that needs to deconvoluted from the file(s) in mixture_data_path. A path to a folder of files, a single file, or a numpy array can be passed. Parameters ---------- mixture_data_path : str or numpy.array Directory or file where mixture data is stored. A numpy.array is also accepted. Attributes ---------- PURE_DATA_IN_MIXTURE : list[numpy.ndarray] List containing unstandardized single-spectra data. Read from the mixture files by numpy.load.txt. Instantiated if contains_concentrations=True. MIXTURE_DATA : list[numpy.ndarray] List containing unstandardized mixture spectra directly from .csv files. Read by numpy.loadtxt. MIXTURE_CONCENTRATIONS : list[numpy.ndarray] List containing pure-component concentrations. Instantiated if contains_concentrations=True. MIXTURE_FILES : list[str] List containing file(s) in mixture_data_path. PURE_DATA_IN_MIXTURE_STANDARDIZED : list[numpy.ndarray] List containing standardized pure-component spectra projected onto FREQUENCY_RANGE. Instantiated if contains_concentrations=True. MIXTURE_STANDARDIZED : numpy.ndarray List containing standardized mixture spectra projected onto FREQUENCY_RANGE. If data is from mixture(s) with concentration data then it is a 2-D array. If data is from reaction data it is either an array of objects which obejct is a numpy or it is a 3-D array if all reaction files have the same number of spectra. NUM_MIXED : int Length of MIXTURE_FILES MIXTURE_INFO : list[str] Data from the first row of the mixture.csv files if contains_concentrations=False """ PURE_FILES = self.PURE_FILES NUM_TARGETS = self.NUM_TARGETS MIXTURE_DATA = [] if contains_concentrations == True: MIXTURE_CONCENTRATIONS = [] PURE_DATA_IN_MIXTURE = [] else: MIXTURE_INFO = [] if type(mixture_data_path) == str: if os.path.isdir(mixture_data_path) == True: MIXTURE_FILES = os.listdir(mixture_data_path) elif os.path.isfile(mixture_data_path) == True: MIXTURE_FILES = [os.path.basename(mixture_data_path)] else: try: open(mixture_data_path) except: print('mixture_data_path is not a valide file or directory') raise for file in MIXTURE_FILES: if os.path.isdir(mixture_data_path) == True: file_path = os.path.join(mixture_data_path,file) elif os.path.isfile(mixture_data_path) == True: file_path = mixture_data_path if contains_concentrations == True: index_list = np.zeros(len(PURE_FILES),dtype=int) component = np.genfromtxt(file_path, delimiter=','\ , skip_header=0,usecols=np.arange(2,2+NUM_TARGETS),max_rows=1\ ,autostrip=True,dtype=str,replace_space='_') for i in range(component.size): component[i] = component[i].replace(' ','_') for count, ii in enumerate(PURE_FILES): if component[i].lower() in ii.lower(): index_list[count] = i individual_spectra = np.loadtxt(file_path, delimiter=',', skiprows=2,usecols=[0]+np.arange(2,2+NUM_TARGETS).tolist()).T PURE_DATA_IN_MIXTURE.append(np.concatenate((np.array([individual_spectra[0]]),individual_spectra[1:][index_list]),axis=0)) concentration = np.genfromtxt(file_path, delimiter=','\ , skip_header=1,usecols=np.arange(2,2+NUM_TARGETS),max_rows=1\ ,dtype=float) MIXTURE_CONCENTRATIONS.append(concentration[index_list]) data = np.loadtxt(file_path, delimiter=',', skiprows=2,usecols=[0,1]).T else: data = np.loadtxt(file_path, delimiter=',', skiprows=1).T try: mixture_info = np.loadtxt(file_path, delimiter=',', skiprows=0,max_rows=1,usecols=np.arange(1,len(data)),dtype=float) except ValueError: mixture_info = np.loadtxt(file_path, delimiter=',', skiprows=0,max_rows=1,usecols=np.arange(1,len(data)),dtype=str) MIXTURE_INFO.append(mixture_info.reshape(-1)) MIXTURE_DATA.append(data) else: MIXTURE_DATA = mixture_data_path MIXTURE_STANDARDIZED = self.get_standardized_spectra(MIXTURE_DATA) self.MIXTURE_DATA = MIXTURE_DATA self.NUM_MIXED = len(MIXTURE_STANDARDIZED) self.MIXTURE_STANDARDIZED = np.copy(MIXTURE_STANDARDIZED) if type(mixture_data_path) == str: self.MIXTURE_FILES = MIXTURE_FILES if contains_concentrations == True: self.PURE_DATA_IN_MIXTURE = PURE_DATA_IN_MIXTURE self.MIXTURE_CONCENTRATIONS = MIXTURE_CONCENTRATIONS self.PURE_IN_MIXTURE_STANDARDIZED = self.get_standardized_spectra(PURE_DATA_IN_MIXTURE) else: self.MIXTURE_INFO = MIXTURE_INFO
[docs] def get_mixture_figures(self, figure_directory): """ Returns parity plot and visualizes input data for the case the case where concentrations in the mixture are known. Parameters ---------- figure_directory : str Directory where figures should be saved. In place of a directory, keywords 'fit' or 'print' can also be provided. These do not save the images but sends them to the gui; 'fit' adjusts the figure size to the screen it is being viewed on. Returns ------- Figures describing the data and results of the model. """ try: self.MIXTURE_CONCENTRATIONS is not None except: print('You must run IR_Results.set_mixture_data with\ conctains_concentrations=True before running this method') return self._visualize_data(figure_directory) self.plot_parity_plot(figure_directory) self.plot_deconvoluted_spectra(figure_directory)
[docs] def get_reaction_figures(self, figure_directory='fit'): """ Returns parity plot and visualizes input data for the case the case where concentrations in the mixture are unknown. Parameters ---------- figure_directory : str Directory where figures should be saved. In place of a directory, keywords 'fit' or 'print' can also be provided. These do not save the images but sends them to the gui; 'fit' adjusts the figure size to the screen it is being viewed on. Returns ------- Figures describing the data and results of the model. """ try: self.MIXTURE_INFO is not None except: print('You must run IR_Results.set_mixture_data with\ conctains_concentrations=False before running this method') raise NUM_TARGETS = self.NUM_TARGETS PURE_NAMES = self.PURE_NAMES MIXTURE_INFO = self.MIXTURE_INFO predictions = self.get_predictions(self.MIXTURE_STANDARDIZED) errors = self.get_95PI(self.MIXTURE_STANDARDIZED) if type(predictions) is not list: predictions = [predictions] errors = [errors] for ii in range(len(predictions)): Markers = ['o','s','D','^'] Colors = ['orange','g','b','r'] linestyle = ['-','--',':','-.'] if figure_directory=='fit': plt.figure(ii+1) else: plt.figure(ii+1, figsize=(3.5,2.5),dpi=400) for i in range(NUM_TARGETS): plt.plot(MIXTURE_INFO[ii], predictions[ii][:,i],marker=Markers[i],color=Colors[i],linestyle=linestyle[i]) plt.errorbar(MIXTURE_INFO[ii], predictions[ii][:,i], yerr=errors[ii][:,i], xerr=None, fmt='none', ecolor='k',elinewidth=1,capsize=3) plt.errorbar(MIXTURE_INFO[ii], predictions[ii][:,i], yerr=errors[ii][:,i], xerr=None, fmt='none', ecolor='k', barsabove=True,elinewidth=1,capsize=3) #plt.legend([i.replace('_',' ') for i in PURE_NAMES]) plt.xlabel('Time') plt.ylabel('Predicted Concentration') plt.tight_layout() if figure_directory in ['print','fit']: plt.show() else: plt.savefig(figure_directory+'/Model_Validation_v'+str(ii)+'.png', format='png') plt.close()
[docs] def save_reaction_data(self, figure_directory='fit'): """ Returns parity plot and visualizes input data for the case the case where concentrations in the mixture are unknown. Parameters ---------- figure_directory : str Directory where figures should be saved. Returns ------- Saves data describing predicted concentrations. """ try: self.MIXTURE_INFO is not None except: print('You must run IR_Results.set_mixture_data with\ conctains_concentrations=False before running this method') raise FREQUENCY_RANGE = self.FREQUENCY_RANGE PURE_NAMES = self.PURE_NAMES MIXTURE_INFO = self.MIXTURE_INFO MIXTURE_STANDARDIZED = self.MIXTURE_STANDARDIZED predictions = self.get_predictions(MIXTURE_STANDARDIZED) errors = self.get_95PI(MIXTURE_STANDARDIZED) deconvoluted_spectra = self.get_deconvoluted_spectra(MIXTURE_STANDARDIZED) if type(predictions) != list: predictions = [predictions] errors = [errors] deconvoluted_spectra = [deconvoluted_spectra] mixed_spectra = [MIXTURE_STANDARDIZED] else: mixed_spectra = MIXTURE_STANDARDIZED for count, array_info in enumerate(MIXTURE_INFO): data_to_save = np.concatenate((array_info.reshape((-1,1)),predictions[count],errors[count]),axis=1) Titles = np.concatenate((np.array(['Time']), PURE_NAMES, [i+'_errors' for i in PURE_NAMES])) new_data_to_save = np.concatenate((Titles.reshape(1,-1),data_to_save),axis=0) np.savetxt(figure_directory+'/concentration_data_v'+str(count)+'.csv',new_data_to_save,delimiter=',',fmt="%s") for count2, time in enumerate(array_info.reshape(-1,1)): data_to_save = np.concatenate((FREQUENCY_RANGE.reshape((-1,1)),mixed_spectra[count][count2].reshape((-1,1))\ ,deconvoluted_spectra[count][count2].T),axis=1) Titles = np.concatenate((np.array(['Frequency','Mixed_Spectra']),PURE_NAMES)) new_data_to_save = np.concatenate((Titles.reshape(1,-1),data_to_save),axis=0) np.savetxt(figure_directory+'/'+'deconvolution_v'+str(count)+'_t'+str(count2)+'_deconv'+'.csv'\ ,new_data_to_save,delimiter=',',fmt="%s")
[docs] def _visualize_data(self,figure_directory): """ Plots visualization of the input data. Parameters ---------- figure_directory : str Directory where figures should be saved. In place of a directory, keywords 'fit' or 'print' can also be provided. These do not save the images but sends them to the gui; 'fit' adjusts the figure size to the screen it is being viewed on. Returns ------- Figures describing the data and results of the model. """ NUM_TARGETS = self.NUM_TARGETS PURE_CONCENTRATIONS = self.PURE_CONCENTRATIONS PURE_NAMES = self.PURE_NAMES PURE_STANDARDIZED = self.PURE_STANDARDIZED PURE_IN_MIXTURE_STANDARDIZED = self.PURE_IN_MIXTURE_STANDARDIZED MIXTURE_STANDARDIZED = self.MIXTURE_STANDARDIZED _get_concentration_coefficients = self._get_concentration_coefficients _get_concentrations_2_pure_spectra = self._get_concentrations_2_pure_spectra PURE_IN_MIX_SUMMED = np.array([np.sum(i,axis=0) for i in PURE_IN_MIXTURE_STANDARDIZED]) CONCENTRATION_COEFFICIENTS = [] for i in range(NUM_TARGETS): concentration_coefficients = _get_concentration_coefficients(PURE_CONCENTRATIONS[i]) CONCENTRATION_COEFFICIENTS.append(concentration_coefficients) CONCENTRATIONS_2_PURE_SPECTRA = _get_concentrations_2_pure_spectra() pure_flattend = PURE_IN_MIX_SUMMED.flatten() markers = ['o','s','D','^'] if figure_directory=='fit': plt.figure(0) else: plt.figure(0, figsize=(3.5,3.5),dpi=400) #print('Comparing regression to pure species spectra') for i in range(NUM_TARGETS): fit_values = np.dot(CONCENTRATION_COEFFICIENTS[i], CONCENTRATIONS_2_PURE_SPECTRA[i]).flatten() plt.plot(PURE_STANDARDIZED[i].flatten()\ ,fit_values-PURE_STANDARDIZED[i].flatten()\ ,markers[i]) plt.legend([PURE_NAMES[i].replace('_',' ') for i in range(NUM_TARGETS)]+['Parity']) plt.xlabel('Experimental Pure Component Intensities') plt.ylabel('Regressed Intensities') if figure_directory in ['print','fit']: plt.show() else: plt.savefig(figure_directory+'/Regressed_vs_Experimental.png', format='png') plt.close() plt.figure(0, figsize=(7.2,5),dpi=400) plt.plot((np.min(MIXTURE_STANDARDIZED),np.max(MIXTURE_STANDARDIZED)),(np.min(MIXTURE_STANDARDIZED),np.max(MIXTURE_STANDARDIZED)),'k',zorder=0) plt.plot(MIXTURE_STANDARDIZED.flatten(),pure_flattend,'o') plt.xlabel('Mixture Intensities') plt.ylabel('Summed Pure\n Component Intensities') if figure_directory == 'print': plt.show() else: plt.savefig(figure_directory+'/Summed_vs_Mixed.png', format='png') plt.close()
[docs] def get_deconvoluted_spectra(self, spectra): """ Given standardized mixed spectra return the corresponding deconvolute and return the corresponding pure-component spectra Parameters ---------- spectra : numpy.ndarray Standardized mixed spectra. Usually this is IR_Results.MIXTURE_STANDARDIZED. Returns ------- reordered_spectra : list[numpy.ndarray] or list[list[numpy.ndarray]] Pure-component spectra that make up the mixed spectra. If multiple mixed spectra are provided the numpy arrays are ordered alphabetically by IR_Results.MIXTURE_FILES. Each numpy array is ordered alphabetically by IR_Results.PURE_FILES. """ spectra = np.copy(spectra) if len(spectra.shape)==1 and len(spectra[0].shape)==0: spectra = spectra.reshape(1,-1) elif len(spectra.shape)==1 or len(spectra.shape)==3: output_list = [] for value in spectra: output_list.append(self.get_deconvoluted_spectra(value)) return output_list NUM_TARGETS = self.NUM_TARGETS FREQUENCY_RANGE = self.FREQUENCY_RANGE _get_concentrations_2_pure_spectra = self._get_concentrations_2_pure_spectra _get_concentration_coefficients = self._get_concentration_coefficients get_predictions = self.get_predictions CONCENTRATIONS_2_PURE_SPECTRA = _get_concentrations_2_pure_spectra() predictions = get_predictions(spectra) deconvoluted_spectra = [] reordered_spectra = [] for i in range(NUM_TARGETS): concentration_coefficients = _get_concentration_coefficients(predictions[:,i]) deconvoluted_spectra.append(np.dot(concentration_coefficients,CONCENTRATIONS_2_PURE_SPECTRA[i])) for i in range(spectra.shape[0]): reordered_spectra_i = np.zeros((NUM_TARGETS, FREQUENCY_RANGE.size)) for ii in range(NUM_TARGETS): reordered_spectra_i[ii] = deconvoluted_spectra[ii][i] reordered_spectra.append(reordered_spectra_i) return reordered_spectra
[docs] def plot_parity_plot(self,figure_directory='print'): """ Returns parity plot for the case the case where concentrations in the mixture are known. Parameters ---------- figure_directory : str Directory where figures should be saved. In place of a directory, keywords 'fit' or 'print' can also be provided. These do not save the images but sends them to the gui; 'fit' adjusts the figure size to the screen it is being viewed on. Returns ------- Parity Plot of predicted vs. actual concentrations. """ if self.MIXTURE_CONCENTRATIONS is None: print('You must run IR_Results.set_mixture_data with\ conctains_concentrations=True before running this method') raise NUM_TARGETS = self.NUM_TARGETS PURE_NAMES = self.PURE_NAMES MIXED_SPECTRA = self.MIXTURE_STANDARDIZED predictions = self.get_predictions(MIXED_SPECTRA) errors = self.get_95PI(MIXED_SPECTRA) True_value = np.array(self.MIXTURE_CONCENTRATIONS) Markers = ['o','s','D','^'] Colors = ['orange','g','b','r'] if figure_directory=='fit': plt.figure(0) else: plt.figure(0, figsize=(7.2,5),dpi=400) for i in range(NUM_TARGETS): plt.plot(True_value[:,i], predictions[:,i],marker=Markers[i],color=Colors[i],linestyle='None') plt.errorbar(True_value[:,i], predictions[:,i], yerr=errors[:,i], xerr=None, fmt='none', ecolor='k',elinewidth=1,capsize=3) plt.errorbar(True_value[:,i], predictions[:,i], yerr=errors[:,i], xerr=None, fmt='none', ecolor='k', barsabove=True,elinewidth=1,capsize=3) plt.plot((np.min(True_value),np.max(True_value)),(np.min(True_value),np.max(True_value)),'k',zorder=0) plt.legend([i.replace('_',' ') for i in PURE_NAMES]) plt.xlabel('Exeprimentally Measured Concentration') plt.ylabel('Predicted Concentration') y_true = np.array(self.MIXTURE_CONCENTRATIONS).flatten() y_pred = self.get_predictions(self.MIXTURE_STANDARDIZED).flatten() print('R2 of mixed prediction: ' + str(error_metrics.get_r2(y_true, y_pred))) print('RMSE of mixed prediction: ' + str(error_metrics.get_rmse(y_true, y_pred))) print('Max Error mixed prediction: ' + str(error_metrics.get_max_error(y_true, y_pred))) plt.tight_layout() if figure_directory in ['print','fit']: plt.show() else: plt.savefig(figure_directory+'/Model_Validation.png', format='png') plt.close()
[docs] def save_parity_data(self,figure_directory): """ saves parity data for the case the case where concentrations in the mixture are known. Parameters ---------- figure_directory : str Directory where data should be saved. Returns ------- saves parity plot. """ if self.MIXTURE_CONCENTRATIONS is None: print('You must run IR_Results.set_mixture_data with\ conctains_concentrations=True before running this method') raise PURE_NAMES = self.PURE_NAMES MIXTURE_FILES = self.MIXTURE_FILES MIXED_SPECTRA = self.MIXTURE_STANDARDIZED predictions = self.get_predictions(MIXED_SPECTRA) errors = self.get_95PI(MIXED_SPECTRA) True_value = np.array(self.MIXTURE_CONCENTRATIONS) data_to_save = np.concatenate((np.array(MIXTURE_FILES).reshape(-1,1),predictions,True_value,errors),axis=1) Titles = np.concatenate((np.array(['File_name']),[i+'_pred' for i in PURE_NAMES]\ ,[i+'_real' for i in PURE_NAMES],[i+'_errors' for i in PURE_NAMES])) new_data_to_save = np.concatenate((Titles.reshape(1,-1),data_to_save),axis=0) np.savetxt(figure_directory+'/Model_Validation.csv',new_data_to_save,delimiter=',',fmt="%s")
[docs] def plot_deconvoluted_spectra(self,figure_directory='print'): """ Returns deconvoluted spectra plot for when concentrations in the mixture are known. Parameters ---------- figure_directory : str Directory where figures should be saved. In place of a directory, keywords 'fit' or 'print' can also be provided. These do not save the images but sends them to the gui; 'fit' adjusts the figure size to the screen it is being viewed on. Returns ------- Comparison plot of deconvolated spectra vs the spectra for the pure-species in the mixture """ try: self.MIXTURE_CONCENTRATIONS is not None except: print('You must run IR_Results.set_mixture_data with\ conctains_concentrations=True before running this method') raise MIXTURE_FILES = self.MIXTURE_FILES FREQUENCY_RANGE = self.FREQUENCY_RANGE NUM_MIXED = self.NUM_MIXED NUM_TARGETS = self.NUM_TARGETS PURE_NAMES = self.PURE_NAMES MIXED_SPECTRA = self.MIXTURE_STANDARDIZED PURE_IN_MIXTURE_STANDARDIZED = self.PURE_IN_MIXTURE_STANDARDIZED get_deconvoluted_spectra = self.get_deconvoluted_spectra deconvoluted_spectra = get_deconvoluted_spectra(MIXED_SPECTRA) Colors = ['orange','g','b','r'] for i in range(NUM_MIXED): if figure_directory=='fit': plt.figure(i+1, figsize=(9.9,5)) else: plt.figure(i+1, figsize=(9.9,5),dpi=400) plt.plot(FREQUENCY_RANGE,MIXED_SPECTRA[i],'k') for ii in range(NUM_TARGETS): plt.plot(FREQUENCY_RANGE,deconvoluted_spectra[i][ii],color=Colors[ii],linestyle = '-') plt.plot(FREQUENCY_RANGE,np.sum(PURE_IN_MIXTURE_STANDARDIZED[i],axis=0),'k:') for ii in range(NUM_TARGETS): plt.plot(FREQUENCY_RANGE,PURE_IN_MIXTURE_STANDARDIZED[i][ii],color=Colors[ii],linestyle = ':') plt.legend(['Mixture Spectra']+[i.replace('_',' ') +' - deconvoluted' for i in PURE_NAMES]\ +['Summed Pure Component Spectra']+[i.replace('_',' ') +' - pure' for i in PURE_NAMES],ncol=2) plt.xlabel('Frequency [cm$^{-1}$]') plt.ylabel('Intensity') plt.ylim([0, MIXED_SPECTRA[i].max()*1.75]) #plt.title(MIXTURE_FILES[i][:-4]) plt.tight_layout() if figure_directory in ['print','fit']: plt.show() else: plt.savefig(figure_directory+'/'+MIXTURE_FILES[i][:-4]+'.png', format='png') plt.close()
[docs] def save_deconvoluted_spectra(self,figure_directory='print'): """ Saves deconvoluted spectra for when concentrations in the mixture are known. Parameters ---------- figure_directory : str Directory where figures should be saved. Returns ------- Saves deconvoluted and pure spectra for the species. """ try: self.MIXTURE_CONCENTRATIONS is not None except: print('You must run IR_Results.set_mixture_data with\ conctains_concentrations=True before running this method') raise MIXTURE_FILES = self.MIXTURE_FILES FREQUENCY_RANGE = self.FREQUENCY_RANGE NUM_MIXED = self.NUM_MIXED PURE_NAMES = self.PURE_NAMES MIXED_SPECTRA = self.MIXTURE_STANDARDIZED PURE_IN_MIXTURE_STANDARDIZED = self.PURE_IN_MIXTURE_STANDARDIZED get_deconvoluted_spectra = self.get_deconvoluted_spectra deconvoluted_spectra = get_deconvoluted_spectra(MIXED_SPECTRA) for i in range(NUM_MIXED): data_to_save = np.concatenate((FREQUENCY_RANGE.reshape((-1,1)),MIXED_SPECTRA[i].reshape((-1,1))\ ,deconvoluted_spectra[i].T,PURE_IN_MIXTURE_STANDARDIZED[i].T),axis=1) Titles = np.concatenate((np.array(['Frequency','Mixed_Spectra'])\ ,[ii+'_deconvoluted' for ii in PURE_NAMES]\ ,[ii+'_pure_spectra' for ii in PURE_NAMES])) new_data_to_save = np.concatenate((Titles.reshape(1,-1),data_to_save),axis=0) np.savetxt(figure_directory+'/'+MIXTURE_FILES[i][:-4]+'_deconv'+'.csv'\ ,new_data_to_save,delimiter=',',fmt="%s")
[docs] def get_predictions(self, spectra): """ Returns predicted concentrations of the pure-component species that make up the mixed spectra. Parameters ---------- spectra : numpy.ndarray Standardized mixed spectra. Usually this is IR_Results.MIXTURE_STANDARDIZED. Returns ------- predictions : numpy.ndarray or list[numpy.ndarray] Predicted concentrations """ spectra = np.copy(spectra) if len(spectra.shape)==1 and len(spectra[0].shape)==0: spectra = spectra.reshape(1,-1) elif len(spectra.shape)==1 or len(spectra.shape)==3: output_list = [] for value in spectra: output_list.append(self.get_predictions(value)) return output_list PCs_2_concentrations = self.PCs_2_concentrations pca_loadings = self.pca_loadings PCs = np.dot(spectra,pca_loadings.T) predictions = np.dot(PCs,PCs_2_concentrations) return predictions
[docs] def get_95PI(self, spectra): """ Returns prediction interval of the predicted concentrations. Parameters ---------- spectra : numpy.ndarray Standardized mixed spectra. Usually this is IR_Results.MIXTURE_STANDARDIZED. Returns ------- prediction_interval : numpy.ndarray or list[numpy.ndarray] Prediction interval at the 95% confidence level such that 95% of error bars of the predicted concentrations should overlap the parity line. """ spectra = np.copy(spectra) if len(spectra.shape)==1 and len(spectra[0].shape)==0: spectra = spectra.reshape(1,-1) elif len(spectra.shape)==1 or len(spectra.shape)==3: output_list = [] for value in spectra: output_list.append(self.get_95PI(value)) return output_list pure_spectra, pure_concentrations = self._get_pure_single_spectra() pca_loadings = self.pca_loadings y_fit = self.get_predictions(spectra=pure_spectra) NUM_TARGETS = self.NUM_TARGETS Xnew = np.dot(spectra,pca_loadings.T) Xfit = np.dot(pure_spectra,pca_loadings.T) var_yfit = np.zeros(NUM_TARGETS) var_ynew = np.zeros((spectra.shape[0],NUM_TARGETS)) for i in range(NUM_TARGETS): var_yfit[i] = np.var(pure_concentrations[:,i]-y_fit[:,i],ddof=pca_loadings.shape[0]) var_estimators = np.linalg.inv(np.dot(Xfit.T,Xfit))*var_yfit[i] for ii in range(spectra.shape[0]): x1 = np.dot(Xnew[ii],var_estimators) x2 = np.dot(x1,Xnew[ii].reshape(-1,1))[0] var_ynew[ii][i] = var_yfit[i]+x2 prediction_interval = stats.t.ppf(1-0.025,y_fit.shape[0]-pca_loadings.shape[0])*var_ynew**0.5 return prediction_interval