Source code for earthdiagnostics.statistics.climatologicalpercentile

# coding=utf-8
"""Calculates the climatological percentiles for the given leadtime"""
import iris
import iris.coord_categorisation
import iris.coords
import iris.exceptions
import numpy as np
import six
from bscearth.utils.log import Log

from earthdiagnostics.diagnostic import Diagnostic, DiagnosticVariableOption, DiagnosticDomainOption, \
    DiagnosticIntOption, DiagnosticListIntOption
from earthdiagnostics.frequency import Frequencies
from earthdiagnostics.utils import TempFile
from earthdiagnostics.variable import VariableType


[docs]class ClimatologicalPercentile(Diagnostic): """ Calculates the climatological percentiles for the given leadtime :param data_manager: data management object :type data_manager: DataManager :param variable: variable to average :type variable: str :param experiment_config: :type experiment_config: ExperimentConfig """ alias = 'climpercent' "Diagnostic alias for the configuration file" Percentiles = np.array([0.1, 0.25, 0.33, 0.5, 0.66, 0.75, 0.9]) def __init__(self, data_manager, domain, variable, start_year, end_year, forecast_month, experiment_config): Diagnostic.__init__(self, data_manager) self.variable = variable self.domain = domain self.experiment_config = experiment_config self.start_year = start_year self.end_year = end_year self.forecast_month = forecast_month self.distribution = None self.leadtime_files = {} def __eq__(self, other): if self._different_type(other): return False return self.domain == other.domain and self.variable == other.variable and \ self.start_year == other.start_year and self.end_year == other.end_year and \ self.forecast_month == other.forecast_month def __str__(self): return 'Climatological percentile Variable: {0.domain}:{0.variable} Period: {0.start_year}-{0.end_year} ' \ 'Forecast month: {0.forecast_month}'.format(self)
[docs] @classmethod def generate_jobs(cls, diags, options): """ Create a job for each chunk to compute the diagnostic :param diags: Diagnostics manager class :type diags: Diags :param options: domain, variable, percentil number, maximum depth (level) :type options: list[str] :return: """ options_available = (DiagnosticDomainOption(), DiagnosticVariableOption(diags.data_manager.config.var_manager), DiagnosticIntOption('start_year'), DiagnosticIntOption('end_year'), DiagnosticListIntOption('forecast_month'), ) options = cls.process_options(options, options_available) job_list = list() for forecast_month in options['forecast_month']: job_list.append(ClimatologicalPercentile(diags.data_manager, options['domain'], options['variable'], options['start_year'], options['end_year'], forecast_month, diags.config.experiment)) return job_list
[docs] def requested_startdates(self): """ Required startdates to compute the percentile Returns ------- list of str """ return ['{0}{1:02}01'.format(year, self.forecast_month) for year in range(self.start_year, self.end_year + 1)]
[docs] def request_data(self): """Request data required by the diagnostic""" for startdate in self.requested_startdates(): if startdate not in self.leadtime_files: self.leadtime_files[startdate] = {} Log.debug('Retrieving startdate {0}', startdate) self.leadtime_files[startdate] = self.request_chunk(self.domain, '{0}_dis'.format(self.variable), startdate, None, None, vartype=VariableType.STATISTIC)
[docs] def declare_data_generated(self): """Declare data to be generated by the diagnostic""" var_name = '{0.variable}prct{0.start_year}{0.forecast_month}-{0.end_year}{0.forecast_month:02d}'.format(self) self.percentiles_file = self.declare_chunk(self.domain, var_name, None, None, None, frequency=Frequencies.climatology, vartype=VariableType.STATISTIC)
[docs] def compute(self): """Run the diagnostic""" iris.FUTURE.netcdf_promote = True self._get_distribution() percentile_values = self._calculate_percentiles() self._save_results(percentile_values)
def _save_results(self, percentile_values): temp = TempFile.get() iris.FUTURE.netcdf_no_unlimited = True iris.save(percentile_values.merge_cube(), temp, zlib=True) self.percentiles_file.set_local_file(temp, rename_var='percent') def _calculate_percentiles(self): Log.debug('Calculating percentiles') bins = self.distribution.coord('bin').points def calculate(point_distribution): cs = np.cumsum(point_distribution) total = cs[-1] percentile_values = ClimatologicalPercentile.Percentiles * total index = np.searchsorted(cs, percentile_values) return [bins[i] for i in index] results = iris.cube.CubeList() percentile_coord = iris.coords.DimCoord(ClimatologicalPercentile.Percentiles, long_name='percentile') print(self.distribution) for leadtime_slice in self.distribution.slices_over('leadtime'): result = iris.cube.Cube(np.apply_along_axis(calculate, 0, leadtime_slice.data), var_name='percent', units=self.distribution.coord('bin').units) result.add_dim_coord(percentile_coord, 0) result.add_dim_coord(leadtime_slice.coord('latitude'), 1) result.add_dim_coord(leadtime_slice.coord('longitude'), 2) result.add_aux_coord(leadtime_slice.coord('leadtime')) results.append(result) return results def _get_distribution(self): for startdate, startdate_file in six.iteritems(self.leadtime_files): Log.info('Getting data for startdate {0}', startdate) data_cube = iris.load_cube(startdate_file.local_file) if self.distribution is None: self.distribution = data_cube else: self.distribution += data_cube if len(self.distribution.coords('leadtime')) == 0: self.distribution.add_aux_coord(iris.coords.AuxCoord(1, var_name='leadtime', units='months'))