Source code for deeprootgen.statistics.summary_statistics

"""Contains utilities for producing summary statistics.

This module defines utility functions for producing summary statistics that can be used to compare simulated and observational data.
"""

# mypy: ignore-errors

from abc import ABC, abstractmethod
from pydoc import locate
from typing import Callable

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from scipy.spatial import ConvexHull

px.defaults.template = "ggplot2"


[docs] class SummaryStatisticBase(ABC): """The summary statistic abstract class.""" def __init__(self, **_) -> None: """SummaryStatisticBase constructor.""" super().__init__() self.statistic = "" self.unit = ""
[docs] @abstractmethod def calculate(self, df: pd.DataFrame) -> float | np.ndarray: """Calculate the summary statistic. Args: df (pd.DataFrame): The root dataframe. Raises: NotImplementedError: Error raised for the unimplemented abstract method. """ raise NotImplementedError("calculate() method not implemented.")
[docs] def get_xy_comparison_data( self, df: pd.DataFrame, n_elements: int = 10 ) -> np.ndarray: """Get summary statistic data for comparing against another summary statistic. Args: df (pd.DataFrame): The dataframe of root data. n_elements (int, optional): The number of elements for the comparison. Defaults to 10. Returns: np.ndarray: The comparison data. """ comparison_data, _ = self.calculate_statistic_per_layer(df) comparison_data = comparison_data[:n_elements] return comparison_data
[docs] def get_number_of_roots(self, df: pd.DataFrame) -> int: """Get the number of roots in the root system. Args: df (pd.DataFrame): The root dataframe. Returns: int: The number of roots. """ df = df.query("order > 0") n = len(df.organ_id.unique()) return n
[docs] def calculate_statistic_per_layer( self, df: pd.DataFrame, layer_decrement: int = 10 ) -> tuple: """Calculate a summary statistic per soil layer. Args: df (pd.DataFrame): The root dataframe. layer_decrement (int, optional): The depth to decrement each soil layer. Defaults to 10. Returns: tuple: The list of summary statistics and soil layers. """ soil_layers = range(0, df.z.min().astype("int"), -layer_decrement) soil_layers = np.array(soil_layers) statistics_per_layer: list[float] = [] for soil_layer in soil_layers: layer_df = df.query( f"z > {soil_layer - layer_decrement} & z < {soil_layer}" ) statistic = self.calculate(layer_df) statistics_per_layer.append(statistic) return statistics_per_layer, soil_layers
[docs] def visualise(self, df: pd.DataFrame, layer_decrement: int = 10, **_) -> go.Figure: """Visualise the summary statistic by soil depth. Args: df (pd.DataFrame): The root dataframe. layer_decrement (int, optional): The depth to decrement each soil layer. Defaults to 10. Returns: go.Figure: The visualisation of the total root volume. """ statistics, soil_layers = self.calculate_statistic_per_layer( df, layer_decrement ) fig = px.scatter( title=f"{self.statistic} per soil layer ({layer_decrement} cm)", x=statistics, y=abs(soil_layers), ).update_layout( xaxis_title=f"{self.statistic} ({self.unit})", yaxis_title="Soil depth (cm)" ) return fig
[docs] class DepthDistribution(SummaryStatisticBase): """The DepthDistribution summary statistic."""
[docs] def calculate(self, df: pd.DataFrame, bins: int = 10) -> tuple: """Get the cumulative root distribution by soil depth. Args: df (pd.DataFrame): The dataframe of root data. bins (int, optional): The number of bins for a histogram. Defaults to 10. Returns: tuple: The cumulative root distribution summary statistic. """ depth = abs(df.z) count, bins_count = np.histogram(depth, bins=bins) bins_count = np.insert(bins_count, 0, 0) pdf = count / sum(count) cdf = np.cumsum(pdf) cdf = np.insert(cdf, 0, 0) bins_count = bins_count[1:] return cdf, bins_count
[docs] def visualise(self, df: pd.DataFrame, bins: int = 10) -> go.Figure: """Visualise the cumulative root distribution by soil depth. Args: df (pd.DataFrame): The dataframe of root data. bins (int, optional): The number of bins for a histogram. Defaults to 10. Returns: go.Figure: The visualisation of the cumulative root distribution summary statistic. """ cdf, bins_count = self.calculate(df, bins) return px.scatter( title="Cumulative root distribution by soil depth", x=cdf, y=bins_count, ).update_layout( xaxis_title="Cumulative root fraction", yaxis_title="Soil depth (cm)" )
[docs] def get_xy_comparison_data( self, df: pd.DataFrame, n_elements: int = 10 ) -> np.ndarray: """Get summary statistic data for comparing against another summary statistic. Args: df (pd.DataFrame): The dataframe of root data. n_elements (int, optional): The number of elements for the comparison. Defaults to 10. Returns: np.ndarray: The comparison data. """ _, bins_count = self.calculate(df, n_elements) bins_count = bins_count[:n_elements] return bins_count
[docs] class RadialDistribution(SummaryStatisticBase): """The RadialDistribution summary statistic."""
[docs] def calculate(self, df: pd.DataFrame, bins: int = 10) -> tuple: """Get the cumulative root distribution by horizontal distance. Args: df (pd.DataFrame): The dataframe of root data. bins (int, optional): The number of bins for a histogram. Defaults to 10. Returns: tuple: The cumulative root distribution summary statistic. """ horizontal = abs(df.melt(value_vars=["x", "y"]).value) count, bins_count = np.histogram(horizontal, bins=bins) bins_count = np.insert(bins_count, 0, 0) pdf = count / sum(count) cdf = np.cumsum(pdf) cdf = np.insert(cdf, 0, 0) bins_count = bins_count[1:] return cdf, bins_count
[docs] def visualise(self, df: pd.DataFrame, bins: int = 10) -> go.Figure: """Visualise the cumulative root distribution by horizontal distance. Args: df (pd.DataFrame): The dataframe of root data. bins (int, optional): The number of bins for a histogram. Defaults to 10. Returns: go.Figure: The visualisation of the cumulative root distribution summary statistic. """ cdf, bins_count = self.calculate(df, bins) return px.scatter( title="Cumulative root distribution by horizontal distance", x=cdf, y=bins_count, ).update_layout( xaxis_title="Cumulative root fraction", yaxis_title="Horizontal root distance (cm)", )
[docs] def get_xy_comparison_data( self, df: pd.DataFrame, n_elements: int = 10 ) -> np.ndarray: """Get summary statistic data for comparing against another summary statistic. Args: df (pd.DataFrame): The dataframe of root data. n_elements (int, optional): The number of elements for the comparison. Defaults to 10. Returns: np.ndarray: The comparison data. """ _, bins_count = self.calculate(df, n_elements) bins_count = bins_count[:n_elements] return bins_count
[docs] class TotalVolume(SummaryStatisticBase): """The TotalVolume summary statistic.""" def __init__(self, **_) -> None: """TotalVolume constructor.""" super().__init__() self.statistic = "Total root volume" self.unit = "cm^3"
[docs] def calculate(self, df: pd.DataFrame) -> float: """Caculate the total root volume. Args: df (pd.DataFrame): The dataframe of root data. Returns: float: The total root volume. """ radius = df.diameter / 2 height = df.length volume = np.pi * radius**2 * height return np.sum(volume)
[docs] class AverageVolume(SummaryStatisticBase): """The AverageVolume summary statistic.""" def __init__(self, **_) -> None: """AverageVolume constructor.""" super().__init__() self.statistic = "Average root volume" self.unit = "cm^3"
[docs] def calculate(self, df: pd.DataFrame) -> float: """Caculate the average root volume. Args: df (pd.DataFrame): The dataframe of root data. Returns: float: The average root volume. """ radius = df.diameter / 2 height = df.length volume = np.pi * radius**2 * height total_volume = np.sum(volume) n = self.get_number_of_roots(df) average = total_volume / n return average
[docs] class TotalLength(SummaryStatisticBase): """The TotalLength summary statistic.""" def __init__(self, **_) -> None: """TotalLength constructor.""" super().__init__() self.statistic = "Total root length" self.unit = "cm"
[docs] def calculate(self, df: pd.DataFrame) -> float: """Caculate the total root length. Args: df (pd.DataFrame): The dataframe of root data. Returns: float: The total root length. """ return df.length.sum()
[docs] class AverageLength(SummaryStatisticBase): """The AverageLength summary statistic.""" def __init__(self, **_) -> None: """AverageLength constructor.""" super().__init__() self.statistic = "Average root length" self.unit = "cm"
[docs] def calculate(self, df: pd.DataFrame) -> float: """Caculate the average root length. Args: df (pd.DataFrame): The dataframe of root data. Returns: float: The average root length. """ total_length = df.length.sum() n = self.get_number_of_roots(df) average = total_length / n return average
[docs] class TotalDiameter(SummaryStatisticBase): """The TotalDiameter summary statistic.""" def __init__(self, **_) -> None: """TotalDiameter constructor.""" super().__init__() self.statistic = "Total root diameter" self.unit = "cm"
[docs] def calculate(self, df: pd.DataFrame) -> float: """Caculate the total root diameter. Args: df (pd.DataFrame): The dataframe of root data. Returns: float: The total root diameter. """ return df.diameter.sum()
[docs] class AverageDiameter(SummaryStatisticBase): """The AverageDiameter summary statistic.""" def __init__(self, **_) -> None: """AverageDiameter constructor.""" super().__init__() self.statistic = "Average root diameter" self.unit = "cm"
[docs] def calculate(self, df: pd.DataFrame) -> float: """Caculate the average root diameter. Args: df (pd.DataFrame): The dataframe of root data. Returns: float: The average root diameter. """ total_length = df.diameter.sum() n = self.get_number_of_roots(df) average = total_length / n return average
[docs] class TotalSpecificRootLength(SummaryStatisticBase): def __init__(self, root_tissue_density: float, **_) -> None: """TotalSpecificRootLength constructor. Args: root_tissue_density (float): The root tissue density of the root system. """ self.statistic = "Total specific root length" self.unit = "cm" self.root_tissue_density = root_tissue_density
[docs] def calculate(self, df: pd.DataFrame) -> float: """Caculate the total specific root length. Args: df (pd.DataFrame): The dataframe of root data. Returns: float: The total specific root length. """ radius = df.diameter / 2 height = df.length volume = np.pi * radius**2 * height mass = volume * self.root_tissue_density specific_root_length = df.length / mass total = sum(specific_root_length) return total
[docs] class AverageSpecificRootLength(SummaryStatisticBase): """The AverageSpecificRootLength statistic.""" def __init__(self, root_tissue_density: float, **_) -> None: """AverageSpecificRootLength constructor. Args: root_tissue_density (float): The root tissue density of the root system. """ self.statistic = "Average specific root length" self.unit = "cm" self.root_tissue_density = root_tissue_density
[docs] def calculate(self, df: pd.DataFrame) -> float: """Caculate the average specific root length. Args: df (pd.DataFrame): The dataframe of root data. Returns: float: The average specific root length. """ radius = df.diameter / 2 height = df.length volume = np.pi * radius**2 * height mass = volume * self.root_tissue_density specific_root_length = df.length / mass total = sum(specific_root_length) n = self.get_number_of_roots(df) average = total / n return average
[docs] class TotalWeight(SummaryStatisticBase): """The TotalWeight statistic.""" def __init__(self, root_tissue_density: float, **_) -> None: """TotalWeight constructor. Args: root_tissue_density (float): The root tissue density of the root system. """ self.statistic = "Total root weight" self.unit = "g" self.root_tissue_density = root_tissue_density
[docs] def calculate(self, df: pd.DataFrame) -> float: """Caculate the total root weight. Args: df (pd.DataFrame): The dataframe of root data. Returns: float: The total root weight. """ radius = df.diameter / 2 height = df.length volume = np.pi * radius**2 * height mass = volume * self.root_tissue_density total = sum(mass) return total
[docs] class AverageWeight(SummaryStatisticBase): """The AverageWeight statistic.""" def __init__(self, root_tissue_density: float, **_) -> None: """AverageWeight constructor. Args: root_tissue_density (float): The root tissue density of the root system. """ self.statistic = "Average root weight" self.unit = "g" self.root_tissue_density = root_tissue_density
[docs] def calculate(self, df: pd.DataFrame) -> float: """Caculate the average root weight. Args: df (pd.DataFrame): The dataframe of root data. Returns: float: The average root weight. """ radius = df.diameter / 2 height = df.length volume = np.pi * radius**2 * height mass = volume * self.root_tissue_density total = sum(mass) n = self.get_number_of_roots(df) average = total / n return average
[docs] class ConvexHullArea(SummaryStatisticBase): """The ConvexHullArea statistic.""" def __init__(self, **_) -> None: """ConvexHullArea constructor.""" self.statistic = "Convex hull area" self.unit = "cm^2"
[docs] def calculate(self, df: pd.DataFrame) -> float: """Caculate the area of the convex hull. Args: df (pd.DataFrame): The dataframe of root data. Returns: float: The area of the convex hull. """ coordinates = df[["x", "y", "z"]].values hull = ConvexHull(points=coordinates) return hull.area
[docs] class ConvexHullVolume(SummaryStatisticBase): """The ConvexHullVolume statistic.""" def __init__(self, **_) -> None: """ConvexHullVolume constructor.""" self.statistic = "Convex hull volume" self.unit = "cm^3"
[docs] def calculate(self, df: pd.DataFrame) -> float: """Caculate the volume of the convex hull. Args: df (pd.DataFrame): The dataframe of root data. Returns: float: The volume of the convex hull. """ coordinates = df[["x", "y", "z"]].values hull = ConvexHull(points=coordinates) return hull.volume
[docs] def get_summary_statistic_func(summary_statistic: str) -> Callable: """Get the summary statistic function by name. Args: summary_statistic (str): The summary statistic name. Returns: Callable: The summary statistic function. """ summary_statistic = summary_statistic.replace("_", " ").title().replace(" ", "") module = "deeprootgen.statistics.summary_statistics" return locate(f"{module}.{summary_statistic}")
[docs] def get_summary_statistics() -> list[dict]: """Get a list of available summary statistics and labels. Returns: list[dict]: A list of available summary statistics and labels. """ summary_statistics: list[str] = [ "total_volume", "average_volume", "total_length", "average_length", "total_diameter", "average_diameter", # "total_specific_root_length", # "average_specific_root_length", "total_weight", "average_weight", "depth_distribution", "radial_distribution", "convex_hull_area", "convex_hull_volume", ] summary_statistic_list = [] for summary_statistic in summary_statistics: label = summary_statistic.replace("_", " ").title() summary_statistic_list.append({"value": summary_statistic, "label": label}) return summary_statistic_list