Source code for metobs_toolkit.dataset_visuals

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 16 15:13:17 2024

@author: thoverga
"""


import logging
import os
from metobs_toolkit import Dataset

from metobs_toolkit.plotting_functions import (
    geospatial_plot,
    timeseries_plot,
    folium_plot,
    add_stations_to_folium_map,
    make_folium_html_plot,
)

from metobs_toolkit.landcover_functions import connect_to_gee, _validate_metadf

from metobs_toolkit.df_helpers import (
    multiindexdf_datetime_subsetting,
    fmt_datetime_argument,
    init_multiindex,
    init_multiindexdf,
    init_triple_multiindexdf,
    metadf_to_gdf,
    conv_applied_qc_to_df,
    get_freqency_series,
    value_labeled_doubleidxdf_to_triple_idxdf,
    xs_save,
    concat_save,
)


logger = logging.getLogger(__name__)


class Dataset(Dataset):
    """Extension on the metobs_toolkit.Dataset class with visualisation methods"""

[docs] def make_plot( self, stationnames=None, obstype="temp", colorby="name", starttime=None, endtime=None, title=None, y_label=None, legend=True, show_outliers=True, show_filled=True, _ax=None, # needed for GUI, not recommended use ): """ This function creates a timeseries plot for the dataset. The variable observation type is plotted for all stationnames from a starttime to an endtime. All styling attributes are extracted from the Settings. Parameters ---------- stationnames : list, optional A list with stationnames to include in the timeseries. If None is given, all the stations are used, defaults to None. obstype : string, optional Fieldname to visualise. This can be an observation or station attribute. The default is 'temp'. colorby : 'label' or 'name', optional Indicate how colors should be assigned to the lines. 'label' will color the lines by their quality control label. 'name' will color by each station, defaults to 'name'. starttime : datetime.datetime, optional Specifiy the start datetime for the plot. If None is given it will use the start datetime of the dataset, defaults to None. endtime : datetime.datetime, optional Specifiy the end datetime for the plot. If None is given it will use the end datetime of the dataset, defaults to None. title : string, optional Title of the figure, if None a default title is generated. The default is None. y_label : string, optional y-axes label of the figure, if None a default label is generated. The default is None. legend : bool, optional If True, a legend is added to the plot. The default is True. show_outliers : bool, optional If true the observations labeld as outliers will be included in the plot. This is only true when colorby == 'name'. The default is True. show_filled : bool, optional If true the filled values for gaps and missing observations will be included in the plot. This is only true when colorby == 'name'. The default is True. Returns ------- axis : matplotlib.pyplot.axes The timeseries axes of the plot is returned. Note -------- If a timezone unaware datetime is given as an argument, it is interpreted as if it has the same timezone as the observations. Examples -------- .. code-block:: python >>> import metobs_toolkit >>> >>> # Import data into a Dataset >>> dataset = metobs_toolkit.Dataset() >>> dataset.update_settings( ... input_data_file=metobs_toolkit.demo_datafile, ... input_metadata_file=metobs_toolkit.demo_metadatafile, ... template_file=metobs_toolkit.demo_template, ... ) >>> dataset.import_data_from_file() >>> >>> # Make plot >>> dataset.make_plot(stationnames=['vlinder02', 'vlinder16'], ... obstype='temp', ... colorby='label') <Axes: ... """ if stationnames is None: logger.info(f"Make {obstype}-timeseries plot for all stations") else: logger.info(f"Make {obstype}-timeseries plot for {stationnames}") # combine all dataframes mergedf = self.combine_all_to_obsspace() # subset to obstype mergedf = xs_save(mergedf, obstype, level="obstype") # Subset on stationnames if stationnames is not None: mergedf = mergedf[mergedf.index.get_level_values("name").isin(stationnames)] # Subset on start and endtime starttime = fmt_datetime_argument( starttime, self.settings.time_settings["timezone"] ) endtime = fmt_datetime_argument( endtime, self.settings.time_settings["timezone"] ) mergedf = multiindexdf_datetime_subsetting(mergedf, starttime, endtime) # Get plot styling attributes if title is None: if stationnames is None: if self._istype == "Dataset": title = ( self.obstypes[obstype].get_orig_name() + " for all stations. " ) elif self._istype == "Station": title = self.obstypes[obstype].get_orig_name() + " of " + self.name else: title = ( self.obstypes[obstype].get_orig_name() + " for stations: " + str(stationnames) ) # create y label if y_label is None: y_label = self.obstypes[obstype].get_plot_y_label() # Make plot ax, _colmap = timeseries_plot( mergedf=mergedf, title=title, ylabel=y_label, colorby=colorby, show_legend=legend, show_outliers=show_outliers, show_filled=show_filled, settings=self.settings, _ax=_ax, ) return ax
[docs] def make_interactive_plot( self, obstype="temp", save=True, outputfile=None, starttime=None, endtime=None, vmin=None, vmax=None, mpl_cmap_name="viridis", radius=13, fill_alpha=0.6, max_fps=4, outlier_col="red", ok_col="black", gap_col="orange", fill_col="yellow", ): """Make interactive geospatial plot with time evolution. This function uses the folium package to make an interactive geospatial plot to illustrate the time evolution. Parameters ---------- obstype : str or metobs_toolkit.Obstype, optional The observation type to plot. The default is 'temp'. save : bool, optional If true, the figure will be saved as an html-file. The default is True. outputfile : str, optional The path of the output html-file. The figure will be saved here, if save is True. If outputfile is not given, and save is True, than the figure will be saved in the default outputfolder (if given). The default is None. starttime : datetime.datetime, optional Specifiy the start datetime for the plot. If None is given it will use the start datetime of the dataset, defaults to None. endtime : datetime.datetime, optional Specifiy the end datetime for the plot. If None is given it will use the end datetime of the dataset, defaults to None. vmin : numeric, optional The value corresponding with the minimum color. If None, the minimum of the presented observations is used. The default is None. vmax : numeric, optional The value corresponding with the maximum color. If None, the maximum of the presented observations is used. The default is None. mpl_cmap_name : str, optional The name of the matplotlib colormap to use. The default is 'viridis'. radius : int, optional The radius (in pixels) of the scatters. The default is 13. fill_alpha : float ([0;1]), optional The alpha of the fill color for the scatters. The default is 0.6. max_fps : int (>0), optional The maximum allowd frames per second for the time evolution. The default is 4. outlier_col : str, optional The edge color of the scatters to identify an outliers. The default is 'red'. ok_col : str, optional The edge color of the scatters to identify an ok observation. The default is 'black'. gap_col : str, optional The edge color of the scatters to identify an missing/gap observation. The default is 'orange'. fill_col : str, optional The edge color of the scatters to identify a fillded observation. The default is 'yellow'. Returns ------- m : folium.folium.map The interactive folium map. Note ------- The figure will only appear when this is runned in notebooks. If you do not run this in a notebook, make sure to save the html file, and open it with a browser. Examples -------- .. code-block:: python >>> import metobs_toolkit >>> >>> # Import data into a Dataset >>> dataset = metobs_toolkit.Dataset() >>> dataset.update_settings( ... input_data_file=metobs_toolkit.demo_datafile, ... input_metadata_file=metobs_toolkit.demo_metadatafile, ... template_file=metobs_toolkit.demo_template, ... ) >>> dataset.import_data_from_file() >>> >>> # Make default interactive geospatial plot >>> dataset.make_interactive_plot() """ # Check if obstype is known if isinstance(obstype, str): if obstype not in self.obstypes.keys(): logger.error( f"{obstype} is not found in the knonw observation types: {list(self.obstypes.keys())}" ) return None else: obstype = self.obstypes[obstype] if save: if outputfile is None: if self.settings.IO["output_folder"] is None: logger.error( "No outputfile is given, and there is no default outputfolder specified." ) return None else: outputfile = os.path.join( self.output_folder, "interactive_figure.html" ) else: # Check if outputfile has .html extension if not outputfile.endswith(".html"): outputfile = outputfile + ".html" logger.warning( f"The .hmtl extension is added to the outputfile: {outputfile}" ) # Check if the obstype is present in the data if obstype.name not in self.df.columns: logger.error(f"{obstype.name} is not found in your the Dataset.") return None # Check if geospatial data is available if self.metadf["lat"].isnull().any(): _sta = self.metadf[self.metadf["lat"].isnull()]["lat"] logger.error(f"Stations without coordinates detected: {_sta}") return None if self.metadf["lon"].isnull().any(): _sta = self.metadf[self.metadf["lon"].isnull()]["lon"] logger.error(f"Stations without coordinates detected: {_sta}") return None # Construct dataframe combdf = self.combine_all_to_obsspace() combdf = xs_save(combdf, obstype.name, level="obstype") # Merge geospatial info combgdf = combdf.merge( self.metadf, how="left", left_on="name", right_index=True ) # Subset on start and endtime starttime = fmt_datetime_argument( starttime, self.settings.time_settings["timezone"] ) endtime = fmt_datetime_argument( endtime, self.settings.time_settings["timezone"] ) combgdf = multiindexdf_datetime_subsetting(combgdf, starttime, endtime) combgdf = combgdf.reset_index() # to gdf combgdf = metadf_to_gdf(combgdf, crs=4326) # Make label color mapper label_col_map = {} # Ok label label_col_map["ok"] = ok_col # outlier labels for val in self.settings.qc["qc_checks_info"].values(): label_col_map[val["outlier_flag"]] = outlier_col # missing labels (gaps and missing values) for val in self.settings.gap["gaps_info"].values(): label_col_map[val["outlier_flag"]] = gap_col # fill labels for val in self.settings.missing_obs["missing_obs_fill_info"]["label"].values(): label_col_map[val] = fill_col for val in self.settings.gap["gaps_fill_info"]["label"].values(): label_col_map[val] = fill_col # make time estimation est_seconds = combgdf.shape[0] / 2411.5 # normal laptop logger.info( f'The figure will take approximatly (laptop) {"{:.1f}".format(est_seconds)} seconds to make.' ) # Making the figure m = make_folium_html_plot( gdf=combgdf, variable_column="value", var_display_name=obstype.name, var_unit=obstype.get_standard_unit(), label_column="label", label_col_map=label_col_map, vmin=vmin, vmax=vmax, radius=radius, fill_alpha=fill_alpha, mpl_cmap_name=mpl_cmap_name, max_fps=int(max_fps), ) if save: logger.info(f"Saving the htlm figure at {outputfile}") m.save(outputfile) return m
[docs] def make_geo_plot( self, variable="temp", title=None, timeinstance=None, legend=True, vmin=None, vmax=None, legend_title=None, boundbox=[], ): """Make geospatial plot. This functions creates a geospatial plot for a field (observations or attributes) of all stations. If the field is timedepending, than the timeinstance is used to plot the field status at that datetime. If the field is categorical than the leged will have categorical values, else a colorbar is used. All styling attributes are extracted from the Settings. Parameters ---------- variable : string, optional Fieldname to visualise. This can be an observation type or station or 'lcz'. The default is 'temp'. title : string, optional Title of the figure, if None a default title is generated. The default is None. timeinstance : datetime.datetime, optional Datetime moment of the geospatial plot. If None, the first occuring (not Nan) record is used. The default is None. legend : bool, optional I True, a legend is added to the plot. The default is True. vmin : numeric, optional The value corresponding with the minimum color. If None, the minimum of the presented observations is used. The default is None. vmax : numeric, optional The value corresponding with the maximum color. If None, the maximum of the presented observations is used. The default is None. legend_title : string, optional Title of the legend, if None a default title is generated. The default is None. boundbox : [lon-west, lat-south, lon-east, lat-north], optional The boundbox to indicate the domain to plot. The elemenst are numeric. If the list is empty, a boundbox is created automatically. The default is []. Returns ------- axis : matplotlib.pyplot.geoaxes The geoaxes of the plot is returned. Note -------- If a timezone unaware datetime is given as an argument, it is interpreted as if it has the same timezone as the observations. Examples -------- .. code-block:: python >>> import metobs_toolkit >>> >>> # Import data into a Dataset >>> dataset = metobs_toolkit.Dataset() >>> dataset.update_settings( ... input_data_file=metobs_toolkit.demo_datafile, ... input_metadata_file=metobs_toolkit.demo_metadatafile, ... template_file=metobs_toolkit.demo_template, ... ) >>> dataset.import_data_from_file() >>> >>> # Make default geospatial plot >>> dataset.make_geo_plot() <GeoAxes:... """ # Load default plot settings # default_settings=Settings.plot_settings['spatial_geo'] # get first (Not Nan) timeinstance of the dataset if not given timeinstance = fmt_datetime_argument( timeinstance, self.settings.time_settings["timezone"] ) if timeinstance is None: timeinstance = self.df.dropna(subset=["temp"]).index[0][1] logger.info(f"Make {variable}-geo plot at {timeinstance}") # check coordinates if available if self.metadf["lat"].isnull().any(): _sta = self.metadf[self.metadf["lat"].isnull()]["lat"] logger.error(f"Stations without coordinates detected: {_sta}") return None if self.metadf["lon"].isnull().any(): _sta = self.metadf[self.metadf["lon"].isnull()]["lon"] logger.error(f"Stations without coordinates detected: {_sta}") return None if bool(boundbox): if len(boundbox) != 4: logger.warning( f"The boundbox ({boundbox}) does not contain 4 elements! The default boundbox is used!" ) boundbox = [] # Check if LCZ if available if variable == "lcz": if self.metadf["lcz"].isnull().any(): _sta = self.metadf[self.metadf["lcz"].isnull()]["lcz"] logger.warning(f"Stations without lcz detected: {_sta}") return None title = f"Local climate zones at {timeinstance}." legend_title = "" # subset to timeinstance plotdf = xs_save(self.df, timeinstance, level="datetime") # merge metadata plotdf = plotdf.merge( self.metadf, how="left", left_index=True, right_index=True ) # titles if title is None: try: title = f"{self.obstypes[variable].get_orig_name()} at {timeinstance}." except KeyError: title = f"{variable} at {timeinstance}." if legend: if legend_title is None: legend_title = f"{self.obstypes[variable].get_standard_unit()}" axis = geospatial_plot( plotdf=plotdf, variable=variable, timeinstance=timeinstance, title=title, legend=legend, legend_title=legend_title, vmin=vmin, vmax=vmax, plotsettings=self.settings.app["plot_settings"], categorical_fields=self.settings.app["categorical_fields"], static_fields=self.settings.app["static_fields"], display_name_mapper=self.settings.app["display_name_mapper"], boundbox=boundbox, ) return axis
[docs] def make_gee_plot(self, gee_map, show_stations=True, save=False, outputfile=None): """Make an interactive plot of a google earth dataset. The location of the stations can be plotted on top of it. Parameters ---------- gee_map : str, optional The name of the dataset to use. This name should be present in the settings.gee['gee_dataset_info']. If aggregat is True, an aggregation scheme should included as well. The default is 'worldcover' show_stations : bool, optional If True, the stations will be plotted as markers. The default is True. save : bool, optional If True, the map will be saved as an html file in the output_folder as defined in the settings if the outputfile is not set. The default is False. outputfile : str, optional Specify the path of the html file if save is True. If None, and save is true, the html file will be saved in the output_folder. The default is None. Returns ------- Map : geemap.foliumap.Map The folium Map instance. Warning --------- To display the interactive map a graphical backend is required, which is often missing on (free) cloud platforms. Therefore it is better to set save=True, and open the .html in your browser """ # Connect to GEE connect_to_gee() # get the mapinfo mapinfo = self.settings.gee["gee_dataset_info"][gee_map] # Read in covers, numbers and labels covernum = list(mapinfo["colorscheme"].keys()) colors = list(mapinfo["colorscheme"].values()) covername = [mapinfo["categorical_mapper"][covnum] for covnum in covernum] # create visparams vis_params = { "min": min(covernum), "max": max(covernum), "palette": colors, # hex colors! } if "band_of_use" in mapinfo: band = mapinfo["band_of_use"] else: band = None Map = folium_plot( mapinfo=mapinfo, band=band, vis_params=vis_params, labelnames=covername, layername=gee_map, legendname=f"{gee_map} covers", # showmap = show, ) if show_stations: if not _validate_metadf(self.metadf): logger.warning( "Not enough coordinates information is provided to plot the stations." ) else: Map = add_stations_to_folium_map(Map=Map, metadf=self.metadf) # Save if needed if save: if outputfile is None: # Try to save in the output folder if self.settings.IO["output_folder"] is None: logger.warning( "The outputfolder is not set up, use the update_settings to specify the output_folder." ) else: filename = f"gee_{gee_map}_figure.html" filepath = os.path.join(self.settings.IO["output_folder"], filename) else: # outputfile is specified # 1. check extension if not outputfile.endswith(".html"): outputfile = outputfile + ".html" filepath = outputfile print(f"Gee Map will be save at {filepath}") logger.info(f"Gee Map will be save at {filepath}") Map.save(filepath) return Map