Source code for MESSENGERuvvs.MESSENGERdata

"""MESSENGER UVVS data class"""
import numpy as np
import pandas as pd
import bokeh.plotting as bkp
from bokeh.models import HoverTool, Whisker
from bokeh.palettes import Set1
from bokeh.io import export_png
from astropy import units as u
from astropy.visualization import PercentileInterval
from .database_setup import database_connect
from nexoclom import Input


[docs]class InputError(Exception): """Raised when a required parameter is not included in the inputfile.""" def __init__(self, expression, message): self.expression = expression self.message = message
[docs]class MESSENGERdata: """Retrieve MESSENGER data from database. Given a species and set of comparisons, retrieve MESSSENGER UVVS data from the database. The list of searchable fields is given at :doc:`database_fields`. Returns a MESSENGERdata Object. **Parameters** species Species to search. This is required because the data from each species is stored in a different database table. query A SQL-style list of comparisons. The data in the object created is extracted from the database tables using the query: :: SELECT * FROM <species>uvvsdata, <species>pointing WHERE <query> See examples below. **Class Atributes** species The object can only contain a single species. frame Coordinate frame for the data, either MSO or Model. query SQL query used to search the database and create the object. data Pandas dataframe containing result of SQL query. Columns in the dataframe are the same as in the database except *frame* and *species* have been dropped as they are redundant. If models have been run, there are also columns in the form modelN for the Nth model run. taa Median true anomaly for the data in radians. model_label If *N* models have been run, this is a dictionary in the form `{'model0':label0, ..., 'modelN':labelN}` containing descriptions for the models. model_strength If *N* models have been run, this is a dictionary in the form `{'model0':strength0, ..., 'modelN':strengthN}` containing modeled source rates in units of :math:`10^{26}` atoms/s. **Examples** 1. Loading data :: >>> from MESSENGERuvvs import MESSENGERdata >>> CaData = MESSENGERdata('Ca', 'orbit = 36') >>> print(CaData) Species: Ca Query: orbit = 36 Frame: MSO Object contains 581 spectra. >>> NaData = MESSENGERdata('Na', 'orbit > 100 and orbit < 110') >>> print(NaData) Species: Na Query: orbit > 100 and orbit < 110 Frame: MSO Object contains 3051 spectra. >>> MgData = MESSENGERdata('Mg', 'loctimetan > 5.5 and loctimetan < 6.5 and alttan < 1000') >>> print(len(MgData)) 45766 2. Accessing data. * The observations are stored within the MESSENGERdata object in a `pandas <https://pandas.pydata.org>`_ dataframe attribute called *data*. Please see the `pandas documentation <https://pandas.pydata.org>`_ for more information on how to work with dataframes. :: >>> print(CaData.data.head(5)) utc orbit merc_year ... loctimetan slit utcstr unum ... 3329 2011-04-04 21:24:11.820 36 0 ... 14.661961 Atmospheric 2011-04-04T21:24:11 3330 2011-04-04 21:25:08.820 36 0 ... 12.952645 Atmospheric 2011-04-04T21:25:08 3331 2011-04-04 21:26:05.820 36 0 ... 12.015670 Atmospheric 2011-04-04T21:26:05 3332 2011-04-04 21:27:02.820 36 0 ... 12.007919 Atmospheric 2011-04-04T21:27:02 3333 2011-04-04 21:27:59.820 36 0 ... 12.008750 Atmospheric 2011-04-04T21:27:59 [5 rows x 29 columns] * Individual observations can be extracted using standard Python slicing techniques: :: >>> print(CaData[3:8]) Species: Ca Query: orbit = 36 Frame: MSO Object contains 5 spectra. >>> print(CaData[3:8].data['taa']) unum 3332 1.808107 3333 1.808152 3334 1.808198 3335 1.808243 3336 1.808290 Name: taa, dtype: float64 3. Modeling data :: >>> inputs = Input('Ca.spot.Maxwellian.input') >>> CaData.model(inputs, 1e5, label='Model 1') >>> inputs..speeddist.temperature /= 2. # Run model with different temperature >>> CaData.model(inputs, 1e5, label='Model 2') 4. Plotting data :: >>> CaData.plot('Ca.orbit36.models.html') 5. Exporting data to a file :: >>> CaData.export('modelresults.csv') >>> CaData.export('modelresults.html', columns=['taa']) """ def __init__(self, species=None, comparisons=None): allspecies = ['Na', 'Ca', 'Mg'] self.species = None self.frame = None self.query = None self.data = None self.taa = None self.inputs = None self.model_strength = None self.model_label = None if species is None: pass elif species not in allspecies: # Return list of valid species print(f"Valid species are {', '.join(allspecies)}") elif comparisons is None: # Return list of queryable fields with database_connect() as con: columns = pd.read_sql( f'''SELECT * from {species}uvvsdata, {species}pointing WHERE 1=2''', con) print('Available fields are:') for col in columns.columns: print(f'\t{col}') else: # Run the query and try to make the object query = f'''SELECT * from {species}uvvsdata, {species}pointing WHERE unum=pnum and {comparisons} ORDER BY unum''' try: with database_connect() as con: data = pd.read_sql(query, con) except: raise Input('MESSENGERdata.__init__', 'Problem with comparisons given.') if len(data) > 0: self.species = species self.frame = data.frame[0] self.query = comparisons data.drop(['species', 'frame'], inplace=True, axis=1) self.data = data self.data.set_index('unum', inplace=True) self.taa = np.median(data.taa) else: print(query) print('No data found') def __str__(self): result = (f'Species: {self.species}\n' f'Query: {self.query}\n' f'Frame: {self.frame}\n' f'Object contains {len(self)} spectra.') return result def __repr__(self): result = ('MESSENGER UVVS Data Object\n' f'Species: {self.species}\n' f'Query: {self.query}\n' f'Frame: {self.frame}\n' f'Object contains {len(self)} spectra.') return result def __len__(self): try: return len(self.data) except: return 0 def __getitem__(self, q_): if isinstance(q_, int): q = slice(q_, q_+1) elif isinstance(q_, slice): q = q_ elif isinstance(q_, pd.Series): q = np.where(q_)[0] else: raise TypeError new = MESSENGERdata() new.species = self.species new.frame = self.frame new.query = self.query new.taa = self.taa new.data = self.data.iloc[q].copy() new.model_strength = self.model_strength new.model_label = self.model_label new.inputs = self.inputs return new def __iter__(self): for i in range(len(self.data)): yield self[i]
[docs] def keys(self): """Return all keys in the object, including dataframe columns""" keys = list(self.__dict__.keys()) keys.extend([f'data.{col}' for col in self.data.columns]) return keys
[docs] def set_frame(self, frame=None): """Convert between MSO and Model frames. More frames could be added if necessary. If Frame is not specified, flips between MSO and Model.""" if (frame is None) and (self.frame == 'MSO'): frame = 'Model' elif (frame is None) and (self.frame == 'Model'): frame = 'MSO' else: pass allframes = ['Model', 'MSO'] if frame not in allframes: print('{} is not a valid frame.'.format(frame)) return None elif frame == self.frame: pass elif (self.frame == 'MSO') and (frame == 'Model'): # Convert from MSO to Model self.data.x, self.data.y = self.data.y.copy(), -self.data.x.copy() self.data.xbore, self.data.ybore = (self.data.ybore.copy(), -self.data.xbore.copy()) self.data.xtan, self.data.ytan = (self.data.ytan.copy(), -self.data.xtan.copy()) self.frame = 'Model' elif (self.frame == 'Model') and (frame == 'MSO'): self.data.x, self.data.y = -self.data.y.copy(), self.data.x.copy() self.data.xbore, self.data.ybore = (-self.data.ybore.copy(), self.data.xbore.copy()) self.data.xtan, self.data.ytan = (-self.data.ytan.copy(), self.data.xtan.copy()) self.frame = 'MSO' else: assert 0, 'You somehow picked a bad combination.'
def model(self, inputs_, npackets, quantity='radiance', dphi=3*u.deg, overwrite=False, filenames=None, label=None): if isinstance(inputs_, str): inputs = Input(inputs_) elif hasattr(inputs_, 'line_of_sight'): inputs = inputs_ else: raise InputError('MESSENGERdata.model', 'Problem with the inputs.') # TAA needs to match the data oldtaa = inputs.geometry.taa if len(set(self.data.orbit.tolist())) == 1: inputs.geometry.taa = np.median(self.data.taa)*u.rad elif np.max(self.data.taa)-np.min(self.data.taa) < 3*np.pi/180.: inputs.geometry.taa = np.median(self.data.taa)*u.rad else: assert 0, 'Too wide a range of taa' # Run the model inputs.run(npackets, overwrite=overwrite) # Simulate the data if self.inputs is None: self.inputs = [inputs] else: self.inputs.append(inputs) self.set_frame('Model') model_result = inputs.line_of_sight(self.data, quantity, dphi=dphi, filenames=filenames, overwrite=overwrite) # modkey is the number for this model modkey = f'model{len(self.inputs)-1:00d}' packkey = f'packets{len(self.inputs)-1:00d}' self.data[modkey] = model_result.radiance/1e3 # Convert to kR self.data[packkey] = model_result.packets # Estimate model strength (source rate) by mean of middle 50% interval = PercentileInterval(50) lim = interval.get_limits(self.data.radiance) mask = ((self.data.radiance >= lim[0]) & (self.data.radiance <= lim[1])) strunit = u.def_unit('10**26 atoms/s', 1e26/u.s) m_data = np.mean(self.data.radiance[mask]) m_model = np.mean(self.data[modkey][mask]) model_strength = m_data/m_model * strunit * strunit self.data[modkey] = self.data[modkey] * model_strength.value if self.model_strength is None: self.model_strength = {modkey: model_strength} else: self.model_strength[modkey] = model_strength if label is None: label = modkey.capitalize() else: pass if self.model_label is None: self.model_label = {modkey: label} else: self.model_label[modkey] = label print(f'Model strength for {label} = {model_strength}') # Put the old TAA back in. inputs.geometry.taa = oldtaa def plot(self, filename=None, show=True, **kwargs): if filename is not None: if not filename.endswith('.html'): filename += '.html' else: pass bkp.output_file(filename) else: pass # Format the date correction self.data['utcstr'] = self.data['utc'].apply( lambda x: x.isoformat()[0:19]) # Put the dataframe in a useable form self.data['lower'] = self.data.radiance - self.data.sigma self.data['upper'] = self.data.radiance + self.data.sigma source = bkp.ColumnDataSource(self.data) # Make the figure fig = bkp.figure(plot_width=1200, plot_height=800, x_axis_type='datetime', title=f'{self.species}, {self.query}', x_axis_label='UTC', y_axis_label='Radiance (kR)', tools=['pan', 'box_zoom', 'reset', 'save']) # plot the data dplot = fig.circle(x='utc', y='radiance', size=7, color='black', legend='Data', hover_color='white', source=source) # Add error bars fig.add_layout(Whisker(source=source, base='utc', upper='upper', lower='lower')) # tool tips tips = [('index', '$index'), ('UTC', '@utcstr'), ('Radiance', '@radiance{0.2f} kR')] if self.model_label is not None: for modkey, modlabel in self.model_label.items(): tips.append((modlabel, f'@{modkey}{{0.2f}} kR')) datahover = HoverTool(tooltips=tips, renderers=[dplot]) fig.add_tools(datahover) # Plot the model col = (c for c in Set1[9]) if self.model_label is not None: for modkey, modlabel in self.model_label.items(): try: c = next(col) except StopIteration: col = (c for c in Set1[9]) c = next(col) f = fig.line(x='utc', y=modkey, source=source, legend=modlabel, color=c) f = fig.circle(x='utc', y=modkey, size=7, source=source, legend=modlabel, color=c) datahover.renderers.append(f) print(modlabel) # Labels, etc. fig.title.align = 'center' fig.title.text_font_size = '16pt' fig.axis.axis_label_text_font_size = '16pt' fig.axis.major_label_text_font_size = '16pt' fig.legend.label_text_font_size = '16pt' fig.legend.click_policy = 'hide' if filename is not None: bkp.output_file(filename) export_png(fig, filename=filename.replace('.html', '.png')) bkp.save(fig) else: pass if show: bkp.show(fig)
[docs] def export(self, filename, columns=['utc', 'radiance']): """Export data and models to a file. **Parameters** filename Filename to export model results to. The file extension determines the format. Formats available: csv, pkl, html, tex columns Columns from the data dataframe to export. Available columns can be found by calling the `keys()` method on the data object. Default = ['utc', 'radiance'] and all model result columns. Note: The default columns are always included in the output regardless of whether they are specified. **Returns** No outputs. """ columns_ = columns.copy() if self.model_label is not None: columns_.extend(self.model_label.keys()) else: pass # Make sure radiance is in there if 'radiance' not in columns_: columns_.append('radiance') else: pass # Make sure UTC is in there if 'utc' not in columns_: columns_.append('utc') else: pass if len(columns_) != len(set(columns_)): columns_ = list(set(columns_)) else: pass for col in columns_: if col not in self.data.columns: columns_.remove(col) else: pass subset = self.data[columns_] if filename.endswith('.csv'): subset.to_csv(filename) elif filename.endswith('.pkl'): subset.to_pickle(filename) elif filename.endswith('.html'): subset.to_html(filename) elif filename.endswith('.tex'): subset.to_latex(filename) else: print('Valid output formats = csv, pkl, html, tex')