Skip to content

PGD Subtools

These are subtools for the PresolarGrains class. All of them must be invoked from the PresolarGrains class!

Sub tool to retrieve data from the filtered database.

Data

Data retrieving class.

By default, the length of any the returned data is not necessarily the same as the length of the parent class database. This is because empty values are filtered out. For certain methods you can specify dropnan=False in order to avoid this behavior. Properties that automatically drop empty values generally have a NAME_all in addition that do not drop empty values. See the documentation of individual routines to see how data are filtered.

Source code in pgdtools/sub_tools/data.py
class Data:
    """Data retrieving class.

    By default, the length of any the returned data is not necessarily the same as the
    length of the parent class database. This is because empty values are filtered out.
    For certain methods you can specify `dropnan=False` in order to avoid this
    behavior. Properties that automatically drop empty values generally have a
    `NAME_all` in addition that do not drop empty values.
     See the documentation of individual routines to see how data are filtered.
    """

    def __init__(self, parent: "pgdtools.PresolarGrains") -> None:
        """Initialize the Data class.

        :param parent: Parent class, must be of type ``PresolarGrains``.

        :raises TypeError: Parent class is not of type ``PresolarGrains``.
        """
        if not isinstance(parent, pgdtools.PresolarGrains):
            raise TypeError("Parent class must be of type PresolarGrains.")

        self.parent = parent

    @property
    def notes(self):
        """Retrieve the notes from the filtered database.

        Note: Values with empty notes will be dropped. To avoid this, use
        the `notes_all` property.

        :return: Series with notes. Index is the PGD ID.
        """
        return self.notes_all.dropna()

    @property
    def notes_all(self) -> pd.Series:
        """Retrieve the notes from the filtered database.

        Note: This routine does not drop any values, even if no notes are present.

        :return: Series with notes. Index is the PGD ID.
        """
        return self.parent.db["Notes"]

    @property
    def size(self) -> pd.DataFrame:
        """Retrieve the size data from the filtered database.

        This retrieves the grain sizes in µm from the presolar grain database.
        Two columns are returned, "Size a" and "Size b".

        Note: This routine will drop the rows that have no size information.
        Note that `Size a (µm)` is always the longer or average reported dimesnions.
        `Size b (µm)` is either the shorter dimension or - if not available in the
        database, set equal here to `Size a`.

        :return: Two columns of size information.
        """
        return self.size_all.dropna(how="all")

    @property
    def size_all(self) -> pd.DataFrame:
        """Retrieve the size data from the filtered database.

        This retrieves the grain sizes in µm from the presolar grain database.
        Two columns are returned, "Size a" and "Size b".

        Note: This routine does not drop any values, even if no size information is
        present.
        Note that `Size a (µm)` is always the longer or average reported dimensions.
        `Size b (µm)` is either the shorter dimension or - if not available in the
        database, set equal here to `Size a`.

        :return: Two columns of size information.
        """
        ret_db = self.parent.db[["Size a (µm)", "Size b (µm)"]].copy()
        ret_db["Size b (µm)"] = ret_db["Size b (µm)"].fillna(ret_db["Size a (µm)"])
        return ret_db

    # METHODS

    def ratio(
        self, rat: Tuple[str, str], dropnan: bool = True
    ) -> Tuple[
        pd.Series,
        pd.Series,
        pd.Series,
    ]:
        """Retrieve a given isotope ratio from the database.

        :param rat: Isotope ratio to retrieve. Tuple of two strings.
            Each string represents an isotope. Example: ("29Si", "28Si").
        :param dropnan: Drop rows with NaN values for the given isotope ratio.
            Defaults to `True`.

        :return: Series with the isotope ratio.
        """
        if len(rat) != 2:
            raise ValueError("Isotope ratio names must be a tuple of length 2.")

        parent_header = self.parent._header(rat[0], rat[1])
        iso_rat, _ = parent_header.ratio
        iso_unc_none = parent_header.uncertainty
        iso_unc = [v for v in iso_unc_none if v is not None]

        if iso_rat is None:
            raise ValueError(
                f"Isotope ratio {rat[0]}/{rat[1]} not available in the database."
            )

        all_hdrs = [iso_rat] + iso_unc

        df = self.parent.db[all_hdrs]

        if dropnan:
            df = df.dropna(how="all")

        ret_ratio = df[iso_rat]

        unc_sym = df[iso_unc_none[0]] if iso_unc_none[0] else None
        if iso_unc_none[1] is not None:
            ret_uncp = df[iso_unc_none[1]].copy()
            if unc_sym is not None:  # so we have symmetric and asymmetric errs
                ret_uncp.where(ret_uncp.notna(), unc_sym, inplace=True)
        else:
            ret_uncp = unc_sym.copy()
            ret_uncp.name = ret_uncp.name.replace("err", "err+")

        if iso_unc_none[2] is not None:
            ret_uncn = df[iso_unc_none[2]].copy()
            if unc_sym is not None:
                ret_uncn.where(ret_uncn.notna(), unc_sym, inplace=True)
        else:
            ret_uncn = unc_sym.copy()
            ret_uncn.name = ret_uncn.name.replace("err", "err-")

        return ret_ratio, ret_uncp, ret_uncn

    def ratio_xy(
        self, rat_x: Tuple[str, str], rat_y: Tuple[str, str], simplify_unc=False
    ) -> Tuple[
        pd.Series,
        Union[pd.Series, pd.DataFrame],
        pd.Series,
        Union[pd.Series, pd.DataFrame],
        Union[None, pd.Series],
    ]:
        """Retrieve two isotope ratios and their respective uncertainties.

        This function is similar to the `ratio` function. It drops all NaNs, i.e.,
        all rows that do not contain values for the wanted x and y ratio. This makes
        this function, as the name implies, very useful for plotting.

        :param rat_x: Isotope ratio to get for x-axis of plot. Tuple of two strings.
            Each string represents an isotope. Example: ("29Si", "28Si").
        :param rat_y: Isotope ratio to get for y-axis of plot. Tuple of two strings.
            Each string represents an isotope. Example: ("29Si", "28Si").
        :param simplify_unc: By default, uncertainties are returned as asymmetric
            errors. This would return a dataframe with two columns.
            However, if `simplify_unc` is set to `True` and both columns are identical,
            a Series will be returned with only one column.

        :return: This function returns a tuple with various Series or Dataframes.
            1. The values for `rat_x`
            2. The uncertainties for `rat_x`, either as a dataframe (asymmetric) or
                as a Series (symmetric, see `simplify_unc=True`).
            3. The values for `rat_y`
            4. The uncertainties for `rat_y`, either as a dataframe (asymmetric) or
                as a Series (symmetric, see `simplify_unc=True`).
            5. The correlation coefficient between the `x` and `y` axis, if available,
                as a Series. Otherwise, `None` is returned. If the correlation column
                is available but no values have been reported (i.e., entries are left
                empty), these empties are replaced with 0 (no correlation).
        """
        dat_x = self.ratio(rat_x, dropnan=False)
        dat_y = self.ratio(rat_y, dropnan=False)

        corr_header = self.parent._header(rat_x[0], rat_y[0]).correlation
        corr_ser = self.parent.db[corr_header] if corr_header is not None else None

        df = pd.DataFrame(dat_x + dat_y).transpose()
        if corr_ser is not None:
            df = df.join(corr_ser.fillna(0).to_frame())

        df = df.dropna()

        xdat = df.iloc[:, 0]
        ydat = df.iloc[:, 3]
        xunc = df.iloc[:, [1, 2]]
        yunc = df.iloc[:, [4, 5]]
        corr = None if corr_ser is None else df.iloc[:, 6]

        if simplify_unc:
            if xunc.iloc[:, 0].equals(xunc.iloc[:, 1]):
                xunc = xunc.iloc[:, 0]
                xunc.name = xunc.name.replace("err+", "err")
            if yunc.iloc[:, 0].equals(yunc.iloc[:, 1]):
                yunc = yunc.iloc[:, 0]
                yunc.name = yunc.name.replace("err-", "err")

        return xdat, xunc, ydat, yunc, corr

notes property

Retrieve the notes from the filtered database.

Note: Values with empty notes will be dropped. To avoid this, use the notes_all property.

Returns:

Type Description

Series with notes. Index is the PGD ID.

notes_all property

Retrieve the notes from the filtered database.

Note: This routine does not drop any values, even if no notes are present.

Returns:

Type Description
Series

Series with notes. Index is the PGD ID.

size property

Retrieve the size data from the filtered database.

This retrieves the grain sizes in µm from the presolar grain database. Two columns are returned, "Size a" and "Size b".

Note: This routine will drop the rows that have no size information. Note that Size a (µm) is always the longer or average reported dimesnions. Size b (µm) is either the shorter dimension or - if not available in the database, set equal here to Size a.

Returns:

Type Description
DataFrame

Two columns of size information.

size_all property

Retrieve the size data from the filtered database.

This retrieves the grain sizes in µm from the presolar grain database. Two columns are returned, "Size a" and "Size b".

Note: This routine does not drop any values, even if no size information is present. Note that Size a (µm) is always the longer or average reported dimensions. Size b (µm) is either the shorter dimension or - if not available in the database, set equal here to Size a.

Returns:

Type Description
DataFrame

Two columns of size information.

__init__(parent)

Initialize the Data class.

Parameters:

Name Type Description Default
parent PresolarGrains

Parent class, must be of type PresolarGrains.

required

Raises:

Type Description
TypeError

Parent class is not of type PresolarGrains.

Source code in pgdtools/sub_tools/data.py
def __init__(self, parent: "pgdtools.PresolarGrains") -> None:
    """Initialize the Data class.

    :param parent: Parent class, must be of type ``PresolarGrains``.

    :raises TypeError: Parent class is not of type ``PresolarGrains``.
    """
    if not isinstance(parent, pgdtools.PresolarGrains):
        raise TypeError("Parent class must be of type PresolarGrains.")

    self.parent = parent

ratio(rat, dropnan=True)

Retrieve a given isotope ratio from the database.

Parameters:

Name Type Description Default
rat Tuple[str, str]

Isotope ratio to retrieve. Tuple of two strings. Each string represents an isotope. Example: ("29Si", "28Si").

required
dropnan bool

Drop rows with NaN values for the given isotope ratio. Defaults to True.

True

Returns:

Type Description
Tuple[Series, Series, Series]

Series with the isotope ratio.

Source code in pgdtools/sub_tools/data.py
def ratio(
    self, rat: Tuple[str, str], dropnan: bool = True
) -> Tuple[
    pd.Series,
    pd.Series,
    pd.Series,
]:
    """Retrieve a given isotope ratio from the database.

    :param rat: Isotope ratio to retrieve. Tuple of two strings.
        Each string represents an isotope. Example: ("29Si", "28Si").
    :param dropnan: Drop rows with NaN values for the given isotope ratio.
        Defaults to `True`.

    :return: Series with the isotope ratio.
    """
    if len(rat) != 2:
        raise ValueError("Isotope ratio names must be a tuple of length 2.")

    parent_header = self.parent._header(rat[0], rat[1])
    iso_rat, _ = parent_header.ratio
    iso_unc_none = parent_header.uncertainty
    iso_unc = [v for v in iso_unc_none if v is not None]

    if iso_rat is None:
        raise ValueError(
            f"Isotope ratio {rat[0]}/{rat[1]} not available in the database."
        )

    all_hdrs = [iso_rat] + iso_unc

    df = self.parent.db[all_hdrs]

    if dropnan:
        df = df.dropna(how="all")

    ret_ratio = df[iso_rat]

    unc_sym = df[iso_unc_none[0]] if iso_unc_none[0] else None
    if iso_unc_none[1] is not None:
        ret_uncp = df[iso_unc_none[1]].copy()
        if unc_sym is not None:  # so we have symmetric and asymmetric errs
            ret_uncp.where(ret_uncp.notna(), unc_sym, inplace=True)
    else:
        ret_uncp = unc_sym.copy()
        ret_uncp.name = ret_uncp.name.replace("err", "err+")

    if iso_unc_none[2] is not None:
        ret_uncn = df[iso_unc_none[2]].copy()
        if unc_sym is not None:
            ret_uncn.where(ret_uncn.notna(), unc_sym, inplace=True)
    else:
        ret_uncn = unc_sym.copy()
        ret_uncn.name = ret_uncn.name.replace("err", "err-")

    return ret_ratio, ret_uncp, ret_uncn

ratio_xy(rat_x, rat_y, simplify_unc=False)

Retrieve two isotope ratios and their respective uncertainties.

This function is similar to the ratio function. It drops all NaNs, i.e., all rows that do not contain values for the wanted x and y ratio. This makes this function, as the name implies, very useful for plotting.

Parameters:

Name Type Description Default
rat_x Tuple[str, str]

Isotope ratio to get for x-axis of plot. Tuple of two strings. Each string represents an isotope. Example: ("29Si", "28Si").

required
rat_y Tuple[str, str]

Isotope ratio to get for y-axis of plot. Tuple of two strings. Each string represents an isotope. Example: ("29Si", "28Si").

required
simplify_unc

By default, uncertainties are returned as asymmetric errors. This would return a dataframe with two columns. However, if simplify_unc is set to True and both columns are identical, a Series will be returned with only one column.

False

Returns:

Type Description
Tuple[Series, Union[Series, DataFrame], Series, Union[Series, DataFrame], Union[None, Series]]

This function returns a tuple with various Series or Dataframes. 1. The values for rat_x 2. The uncertainties for rat_x, either as a dataframe (asymmetric) or as a Series (symmetric, see simplify_unc=True). 3. The values for rat_y 4. The uncertainties for rat_y, either as a dataframe (asymmetric) or as a Series (symmetric, see simplify_unc=True). 5. The correlation coefficient between the x and y axis, if available, as a Series. Otherwise, None is returned. If the correlation column is available but no values have been reported (i.e., entries are left empty), these empties are replaced with 0 (no correlation).

Source code in pgdtools/sub_tools/data.py
def ratio_xy(
    self, rat_x: Tuple[str, str], rat_y: Tuple[str, str], simplify_unc=False
) -> Tuple[
    pd.Series,
    Union[pd.Series, pd.DataFrame],
    pd.Series,
    Union[pd.Series, pd.DataFrame],
    Union[None, pd.Series],
]:
    """Retrieve two isotope ratios and their respective uncertainties.

    This function is similar to the `ratio` function. It drops all NaNs, i.e.,
    all rows that do not contain values for the wanted x and y ratio. This makes
    this function, as the name implies, very useful for plotting.

    :param rat_x: Isotope ratio to get for x-axis of plot. Tuple of two strings.
        Each string represents an isotope. Example: ("29Si", "28Si").
    :param rat_y: Isotope ratio to get for y-axis of plot. Tuple of two strings.
        Each string represents an isotope. Example: ("29Si", "28Si").
    :param simplify_unc: By default, uncertainties are returned as asymmetric
        errors. This would return a dataframe with two columns.
        However, if `simplify_unc` is set to `True` and both columns are identical,
        a Series will be returned with only one column.

    :return: This function returns a tuple with various Series or Dataframes.
        1. The values for `rat_x`
        2. The uncertainties for `rat_x`, either as a dataframe (asymmetric) or
            as a Series (symmetric, see `simplify_unc=True`).
        3. The values for `rat_y`
        4. The uncertainties for `rat_y`, either as a dataframe (asymmetric) or
            as a Series (symmetric, see `simplify_unc=True`).
        5. The correlation coefficient between the `x` and `y` axis, if available,
            as a Series. Otherwise, `None` is returned. If the correlation column
            is available but no values have been reported (i.e., entries are left
            empty), these empties are replaced with 0 (no correlation).
    """
    dat_x = self.ratio(rat_x, dropnan=False)
    dat_y = self.ratio(rat_y, dropnan=False)

    corr_header = self.parent._header(rat_x[0], rat_y[0]).correlation
    corr_ser = self.parent.db[corr_header] if corr_header is not None else None

    df = pd.DataFrame(dat_x + dat_y).transpose()
    if corr_ser is not None:
        df = df.join(corr_ser.fillna(0).to_frame())

    df = df.dropna()

    xdat = df.iloc[:, 0]
    ydat = df.iloc[:, 3]
    xunc = df.iloc[:, [1, 2]]
    yunc = df.iloc[:, [4, 5]]
    corr = None if corr_ser is None else df.iloc[:, 6]

    if simplify_unc:
        if xunc.iloc[:, 0].equals(xunc.iloc[:, 1]):
            xunc = xunc.iloc[:, 0]
            xunc.name = xunc.name.replace("err+", "err")
        if yunc.iloc[:, 0].equals(yunc.iloc[:, 1]):
            yunc = yunc.iloc[:, 0]
            yunc.name = yunc.name.replace("err-", "err")

    return xdat, xunc, ydat, yunc, corr

Sub tool to add filtering capabilities.

Filters

Filtering class to filter the data set.

Note that this class will filter the dataset in the parent class!

Source code in pgdtools/sub_tools/filters.py
class Filters:
    """Filtering class to filter the data set.

    Note that this class will filter the dataset in the parent class!
    """

    def __init__(self, parent: "pgdtools.PresolarGrains") -> None:
        """Initialize the Filters class.

        :param parent: Parent class, must be of type ``PresolarGrains``.

        :raises TypeError: Parent class is not of type ``PresolarGrains``.
        """
        if not isinstance(parent, pgdtools.PresolarGrains):
            raise TypeError("Parent class must be of type PresolarGrains.")

        self.parent = parent

    def db(
        self,
        dbs: Union[
            "pgdtools.PresolarGrains.DataBase", List["pgdtools.PresolarGrains.DataBase"]
        ],
        exclude: bool = False,
    ) -> None:
        """Filter out a specific database.

        :param dbs: Database or databases to filter the data set on.
        :param exclude: Exclude the given databases from the data set.

        :raises TypeError: Database is not of type PresolarGrains.DataBase.
        """
        if not isinstance(dbs, List):
            dbs = [dbs]

        if not all(isinstance(db, pgdtools.PresolarGrains.DataBase) for db in dbs):
            raise TypeError("Database must be of type PresolarGrains.DataBase.")

        if exclude:
            self.parent.db = self.parent.db[
                ~self.parent.db.index.to_series().apply(
                    lambda x: any(x.startswith(db.value) for db in dbs)
                )
            ]
        else:
            self.parent.db = self.parent.db[
                self.parent.db.index.to_series().apply(
                    lambda x: any(x.startswith(db.value) for db in dbs)
                )
            ]

    def pgd_id(self, ids: Union[str, List[str]], exclude: bool = False) -> None:
        """Filter the data set based on PGD IDs.

        :param ids: PGD ID (single or multiple) to filter the data set on.
        :param exclude: Exclude the given IDs from the data set.
        """
        if isinstance(ids, str):
            ids = [ids]
        if exclude:
            self.parent.db = self.parent.db[~self.parent.db.index.isin(ids)]
        else:
            self.parent.db = self.parent.db.loc[ids]

    def pgd_type(self, tp: Union[str, List[str]], exclude: bool = False) -> None:
        """Filter for a given PGD type or types.

        :param tp: PGD type or types to filter the data set on.
        :param exclude: Exclude the given types from the
        """
        self._filter_column("PGD Type", tp, exclude)

    def pgd_subtype(self, st: Union[str, List[str]], exclude: bool = False) -> None:
        """Filter for a given PGD subtype or subtypes.

        Note: Empty values are not dropped if `exclude` is set to `True`.

        :param st: PGD subtype or subtypes to filter the data set on.
        :param exclude: Exclude the given subtypes from the data set.
        """
        self._filter_column("PGD Subtype", st, exclude)

    def ratio(
        self, rat: Tuple[str, str], cmp: str, value: float, exclude: bool = False
    ) -> None:
        """Filter the data set based on a given isotope ratio.

        Here, a given isotope ratio is filtered based on a comparator and a value.
        Some error checking is done on the comparator to ensure that it is valid.

        Note: rows with NaN values for the given comparator will be dropped
        from the dataset before filtering. This behavior is independent of the value
        of `exclude`.

        :param rat: Isotope ratio to filter the data set on. Tuple of two strings.
            Each string represents an isotope. Example: ("29Si", "28Si").
        :param cmp: Comparison operator to use. Available operators are:
            "<", "<=", ">", ">=", "==", "!=".
        :param value: Value to compare the isotope ratio against.
        :param exclude: Exclude the given isotope ratio value range from the data set.
        """
        cmp = _check_comparator(cmp)
        utl.check_iso_rat(rat)
        iso_rat = self.parent._header(rat[0], rat[1]).ratio

        # drop rows with NaN values for the given isotope ratio
        self.parent.db.dropna(subset=[iso_rat[0]], inplace=True)

        if exclude:
            self.parent.db = self.parent.db[
                ~self.parent.db[iso_rat[0]].apply(lambda x: eval(f"x {cmp} {value}"))
            ]
        else:
            self.parent.db = self.parent.db[
                self.parent.db[iso_rat[0]].apply(lambda x: eval(f"x {cmp} {value}"))
            ]

    def reference(self, refs: Union[str, List[str]], exclude=False) -> None:
        """Filter the data set based on (a) given reference(s).

        Note that the references must be exactly what is written in the database.
        If you want to search for references, check out the routine:
        `pgd.reference.search("search string")`.

        :param refs: Reference or references to filter the data set on.
        :param exclude: Exclude the given references from the data set.
        """
        self._filter_column("Reference", refs, exclude=exclude)

    def reset(self) -> None:
        """Reset all the filters and re-instate the original database.

        Alternatively, this can also be done directly from the parent class by using
        the `reset` method.
        """
        self.parent.reset()

    def uncertainty(
        self, rat: Tuple[str, str], cmp: str, value: float, exclude: bool = False
    ) -> None:
        """Filter the data set based on a given uncertainty of an isotope ratio.

        Here, a given uncertainty is filtered based on a comparator and a value.
        Some error checking is done on the comparator to ensure that it is valid.

        Note: rows with NaN values for the given comparator will be dropped
        from the dataset before filtering. This behavior is independent of the value
        of `exclude`.

        :param rat: Isotope ratio to filter the data set on. Tuple of two strings.
            Each string represents an isotope. Example: ("29Si", "28Si").
        :param cmp: Comparison operator to use. Available operators are:
            "<", "<=", ">", ">=", "==", "!=".
        :param value: Value to compare the isotope ratio against.
        :param exclude: Exclude the given isotope ratio value range from the data set.

        :raises ValueError: Invalid comparator or
            isotope ratio names are not valid, not of length 2, or the chosen
            isotope ratio is not available in the database.
        """
        cmp = _check_comparator(cmp)
        utl.check_iso_rat(rat)
        iso_unc = self.parent._header(rat[0], rat[1]).uncertainty

        iso_unc = [v for v in iso_unc if v is not None]

        # drop rows with NaN values for the given isotope ratio
        self.parent.db.dropna(subset=iso_unc, how="all", inplace=True)

        number_of_values = (~self.parent.db[iso_unc].isna()).sum(axis=1)

        if exclude:
            self.parent.db = self.parent.db[
                ~(
                    self.parent.db[iso_unc]
                    .apply(lambda x: eval(f"x {cmp} {value}"))
                    .sum(axis=1)
                    > 0
                )
            ]
        else:
            self.parent.db = self.parent.db[
                self.parent.db[iso_unc]
                .apply(lambda x: eval(f"x {cmp} {value}"))
                .sum(axis=1)
                == number_of_values
            ]

    def _filter_column(
        self, column: str, value: Union[str, List[str]], exclude: bool
    ) -> None:
        """Filter the data set based on a given column.

        :param column: Column to filter the data set on.
        :param value: Value or values to filter the data set on.
        :param exclude: Exclude the given values from the data set.
        """
        if isinstance(value, str):
            value = [value]
        if exclude:
            self.parent.db = self.parent.db[~self.parent.db[column].isin(value)]
        else:
            self.parent.db = self.parent.db[self.parent.db[column].isin(value)]

__init__(parent)

Initialize the Filters class.

Parameters:

Name Type Description Default
parent PresolarGrains

Parent class, must be of type PresolarGrains.

required

Raises:

Type Description
TypeError

Parent class is not of type PresolarGrains.

Source code in pgdtools/sub_tools/filters.py
def __init__(self, parent: "pgdtools.PresolarGrains") -> None:
    """Initialize the Filters class.

    :param parent: Parent class, must be of type ``PresolarGrains``.

    :raises TypeError: Parent class is not of type ``PresolarGrains``.
    """
    if not isinstance(parent, pgdtools.PresolarGrains):
        raise TypeError("Parent class must be of type PresolarGrains.")

    self.parent = parent

db(dbs, exclude=False)

Filter out a specific database.

Parameters:

Name Type Description Default
dbs Union[DataBase, List[DataBase]]

Database or databases to filter the data set on.

required
exclude bool

Exclude the given databases from the data set.

False

Raises:

Type Description
TypeError

Database is not of type PresolarGrains.DataBase.

Source code in pgdtools/sub_tools/filters.py
def db(
    self,
    dbs: Union[
        "pgdtools.PresolarGrains.DataBase", List["pgdtools.PresolarGrains.DataBase"]
    ],
    exclude: bool = False,
) -> None:
    """Filter out a specific database.

    :param dbs: Database or databases to filter the data set on.
    :param exclude: Exclude the given databases from the data set.

    :raises TypeError: Database is not of type PresolarGrains.DataBase.
    """
    if not isinstance(dbs, List):
        dbs = [dbs]

    if not all(isinstance(db, pgdtools.PresolarGrains.DataBase) for db in dbs):
        raise TypeError("Database must be of type PresolarGrains.DataBase.")

    if exclude:
        self.parent.db = self.parent.db[
            ~self.parent.db.index.to_series().apply(
                lambda x: any(x.startswith(db.value) for db in dbs)
            )
        ]
    else:
        self.parent.db = self.parent.db[
            self.parent.db.index.to_series().apply(
                lambda x: any(x.startswith(db.value) for db in dbs)
            )
        ]

pgd_id(ids, exclude=False)

Filter the data set based on PGD IDs.

Parameters:

Name Type Description Default
ids Union[str, List[str]]

PGD ID (single or multiple) to filter the data set on.

required
exclude bool

Exclude the given IDs from the data set.

False
Source code in pgdtools/sub_tools/filters.py
def pgd_id(self, ids: Union[str, List[str]], exclude: bool = False) -> None:
    """Filter the data set based on PGD IDs.

    :param ids: PGD ID (single or multiple) to filter the data set on.
    :param exclude: Exclude the given IDs from the data set.
    """
    if isinstance(ids, str):
        ids = [ids]
    if exclude:
        self.parent.db = self.parent.db[~self.parent.db.index.isin(ids)]
    else:
        self.parent.db = self.parent.db.loc[ids]

pgd_subtype(st, exclude=False)

Filter for a given PGD subtype or subtypes.

Note: Empty values are not dropped if exclude is set to True.

Parameters:

Name Type Description Default
st Union[str, List[str]]

PGD subtype or subtypes to filter the data set on.

required
exclude bool

Exclude the given subtypes from the data set.

False
Source code in pgdtools/sub_tools/filters.py
def pgd_subtype(self, st: Union[str, List[str]], exclude: bool = False) -> None:
    """Filter for a given PGD subtype or subtypes.

    Note: Empty values are not dropped if `exclude` is set to `True`.

    :param st: PGD subtype or subtypes to filter the data set on.
    :param exclude: Exclude the given subtypes from the data set.
    """
    self._filter_column("PGD Subtype", st, exclude)

pgd_type(tp, exclude=False)

Filter for a given PGD type or types.

Parameters:

Name Type Description Default
tp Union[str, List[str]]

PGD type or types to filter the data set on.

required
exclude bool

Exclude the given types from the

False
Source code in pgdtools/sub_tools/filters.py
def pgd_type(self, tp: Union[str, List[str]], exclude: bool = False) -> None:
    """Filter for a given PGD type or types.

    :param tp: PGD type or types to filter the data set on.
    :param exclude: Exclude the given types from the
    """
    self._filter_column("PGD Type", tp, exclude)

ratio(rat, cmp, value, exclude=False)

Filter the data set based on a given isotope ratio.

Here, a given isotope ratio is filtered based on a comparator and a value. Some error checking is done on the comparator to ensure that it is valid.

Note: rows with NaN values for the given comparator will be dropped from the dataset before filtering. This behavior is independent of the value of exclude.

Parameters:

Name Type Description Default
rat Tuple[str, str]

Isotope ratio to filter the data set on. Tuple of two strings. Each string represents an isotope. Example: ("29Si", "28Si").

required
cmp str

Comparison operator to use. Available operators are: "<", "<=", ">", ">=", "==", "!=".

required
value float

Value to compare the isotope ratio against.

required
exclude bool

Exclude the given isotope ratio value range from the data set.

False
Source code in pgdtools/sub_tools/filters.py
def ratio(
    self, rat: Tuple[str, str], cmp: str, value: float, exclude: bool = False
) -> None:
    """Filter the data set based on a given isotope ratio.

    Here, a given isotope ratio is filtered based on a comparator and a value.
    Some error checking is done on the comparator to ensure that it is valid.

    Note: rows with NaN values for the given comparator will be dropped
    from the dataset before filtering. This behavior is independent of the value
    of `exclude`.

    :param rat: Isotope ratio to filter the data set on. Tuple of two strings.
        Each string represents an isotope. Example: ("29Si", "28Si").
    :param cmp: Comparison operator to use. Available operators are:
        "<", "<=", ">", ">=", "==", "!=".
    :param value: Value to compare the isotope ratio against.
    :param exclude: Exclude the given isotope ratio value range from the data set.
    """
    cmp = _check_comparator(cmp)
    utl.check_iso_rat(rat)
    iso_rat = self.parent._header(rat[0], rat[1]).ratio

    # drop rows with NaN values for the given isotope ratio
    self.parent.db.dropna(subset=[iso_rat[0]], inplace=True)

    if exclude:
        self.parent.db = self.parent.db[
            ~self.parent.db[iso_rat[0]].apply(lambda x: eval(f"x {cmp} {value}"))
        ]
    else:
        self.parent.db = self.parent.db[
            self.parent.db[iso_rat[0]].apply(lambda x: eval(f"x {cmp} {value}"))
        ]

reference(refs, exclude=False)

Filter the data set based on (a) given reference(s).

Note that the references must be exactly what is written in the database. If you want to search for references, check out the routine: pgd.reference.search("search string").

Parameters:

Name Type Description Default
refs Union[str, List[str]]

Reference or references to filter the data set on.

required
exclude

Exclude the given references from the data set.

False
Source code in pgdtools/sub_tools/filters.py
def reference(self, refs: Union[str, List[str]], exclude=False) -> None:
    """Filter the data set based on (a) given reference(s).

    Note that the references must be exactly what is written in the database.
    If you want to search for references, check out the routine:
    `pgd.reference.search("search string")`.

    :param refs: Reference or references to filter the data set on.
    :param exclude: Exclude the given references from the data set.
    """
    self._filter_column("Reference", refs, exclude=exclude)

reset()

Reset all the filters and re-instate the original database.

Alternatively, this can also be done directly from the parent class by using the reset method.

Source code in pgdtools/sub_tools/filters.py
def reset(self) -> None:
    """Reset all the filters and re-instate the original database.

    Alternatively, this can also be done directly from the parent class by using
    the `reset` method.
    """
    self.parent.reset()

uncertainty(rat, cmp, value, exclude=False)

Filter the data set based on a given uncertainty of an isotope ratio.

Here, a given uncertainty is filtered based on a comparator and a value. Some error checking is done on the comparator to ensure that it is valid.

Note: rows with NaN values for the given comparator will be dropped from the dataset before filtering. This behavior is independent of the value of exclude.

Parameters:

Name Type Description Default
rat Tuple[str, str]

Isotope ratio to filter the data set on. Tuple of two strings. Each string represents an isotope. Example: ("29Si", "28Si").

required
cmp str

Comparison operator to use. Available operators are: "<", "<=", ">", ">=", "==", "!=".

required
value float

Value to compare the isotope ratio against.

required
exclude bool

Exclude the given isotope ratio value range from the data set.

False

Raises:

Type Description
ValueError

Invalid comparator or isotope ratio names are not valid, not of length 2, or the chosen isotope ratio is not available in the database.

Source code in pgdtools/sub_tools/filters.py
def uncertainty(
    self, rat: Tuple[str, str], cmp: str, value: float, exclude: bool = False
) -> None:
    """Filter the data set based on a given uncertainty of an isotope ratio.

    Here, a given uncertainty is filtered based on a comparator and a value.
    Some error checking is done on the comparator to ensure that it is valid.

    Note: rows with NaN values for the given comparator will be dropped
    from the dataset before filtering. This behavior is independent of the value
    of `exclude`.

    :param rat: Isotope ratio to filter the data set on. Tuple of two strings.
        Each string represents an isotope. Example: ("29Si", "28Si").
    :param cmp: Comparison operator to use. Available operators are:
        "<", "<=", ">", ">=", "==", "!=".
    :param value: Value to compare the isotope ratio against.
    :param exclude: Exclude the given isotope ratio value range from the data set.

    :raises ValueError: Invalid comparator or
        isotope ratio names are not valid, not of length 2, or the chosen
        isotope ratio is not available in the database.
    """
    cmp = _check_comparator(cmp)
    utl.check_iso_rat(rat)
    iso_unc = self.parent._header(rat[0], rat[1]).uncertainty

    iso_unc = [v for v in iso_unc if v is not None]

    # drop rows with NaN values for the given isotope ratio
    self.parent.db.dropna(subset=iso_unc, how="all", inplace=True)

    number_of_values = (~self.parent.db[iso_unc].isna()).sum(axis=1)

    if exclude:
        self.parent.db = self.parent.db[
            ~(
                self.parent.db[iso_unc]
                .apply(lambda x: eval(f"x {cmp} {value}"))
                .sum(axis=1)
                > 0
            )
        ]
    else:
        self.parent.db = self.parent.db[
            self.parent.db[iso_unc]
            .apply(lambda x: eval(f"x {cmp} {value}"))
            .sum(axis=1)
            == number_of_values
        ]

Sub tool to format header infor, etc.

Format

Formatting class.

Default formatting for strings are in LaTeX notation. These can directly be used, e.g., with matplotlib.

Source code in pgdtools/sub_tools/format.py
class Format:
    """Formatting class.

    Default formatting for strings are in LaTeX notation. These can directly be used,
    e.g., with matplotlib.
    """

    def __init__(self, parent: "pgdtools.PresolarGrains") -> None:
        """Initialize the Format class.

        :param parent: Parent class, must be of type ``PresolarGrains``.

        :raises TypeError: Parent class is not of type ``PresolarGrains``.
        """
        if not isinstance(parent, pgdtools.PresolarGrains):
            raise TypeError("Parent class must be of type PresolarGrains.")

        self.parent = parent

    def ratio(self, rat: Tuple[str, str]) -> str:
        """Format an isotope ratio header in html style.

        This can, e.g,. directly be used as an axis label for a plot.

        :param rat: Isotope ratio to filter the data set on. Tuple of two strings.
            Each string represents an isotope. Example: ("29Si", "28Si").

        :return: Formatted isotope ratio header.
        """
        _, delta = self._get_and_check_hdr_ratio(rat)
        iso1 = utl.Isotope(rat[0])
        iso2 = utl.Isotope(rat[1])

        if delta:
            out_str = f"$\\delta({iso1.latex}/{iso2.latex})\\quad(‰)$"
        else:
            out_str = f"${iso1.latex}/{iso2.latex}$"
        return out_str

    def _get_and_check_hdr_ratio(self, rat: Tuple[str, str]):
        """Get the header ratio for a given isotope ratio.

        :param rat: Isotope ratio to filter the data set on. Tuple of two strings.
            Each string represents an isotope. Example: ("29Si", "28Si").

        :return: Header information for the given isotope ratio.

        :raise ValueError: Ratio tuple does not contain two strings.
            Isotope ratio is not found in the header.
        """
        utl.check_iso_rat(rat)

        iso1, iso2 = rat
        hdr = self.parent._header(iso1, iso2).ratio
        if hdr is None:
            raise ValueError(f"Isotope ratio {iso1}/{iso2} not found in the header.")
        else:
            return hdr

__init__(parent)

Initialize the Format class.

Parameters:

Name Type Description Default
parent PresolarGrains

Parent class, must be of type PresolarGrains.

required

Raises:

Type Description
TypeError

Parent class is not of type PresolarGrains.

Source code in pgdtools/sub_tools/format.py
def __init__(self, parent: "pgdtools.PresolarGrains") -> None:
    """Initialize the Format class.

    :param parent: Parent class, must be of type ``PresolarGrains``.

    :raises TypeError: Parent class is not of type ``PresolarGrains``.
    """
    if not isinstance(parent, pgdtools.PresolarGrains):
        raise TypeError("Parent class must be of type PresolarGrains.")

    self.parent = parent

ratio(rat)

Format an isotope ratio header in html style.

This can, e.g,. directly be used as an axis label for a plot.

Parameters:

Name Type Description Default
rat Tuple[str, str]

Isotope ratio to filter the data set on. Tuple of two strings. Each string represents an isotope. Example: ("29Si", "28Si").

required

Returns:

Type Description
str

Formatted isotope ratio header.

Source code in pgdtools/sub_tools/format.py
def ratio(self, rat: Tuple[str, str]) -> str:
    """Format an isotope ratio header in html style.

    This can, e.g,. directly be used as an axis label for a plot.

    :param rat: Isotope ratio to filter the data set on. Tuple of two strings.
        Each string represents an isotope. Example: ("29Si", "28Si").

    :return: Formatted isotope ratio header.
    """
    _, delta = self._get_and_check_hdr_ratio(rat)
    iso1 = utl.Isotope(rat[0])
    iso2 = utl.Isotope(rat[1])

    if delta:
        out_str = f"$\\delta({iso1.latex}/{iso2.latex})\\quad(‰)$"
    else:
        out_str = f"${iso1.latex}/{iso2.latex}$"
    return out_str

Sub tool to add information querying capabilities.

Info

Class to obtain information about the database.

Note: This class will print and return values.

Source code in pgdtools/sub_tools/info.py
class Info:
    """Class to obtain information about the database.

    Note: This class will print and return values.
    """

    def __init__(self, parent: "pgdtools.PresolarGrains") -> None:
        """Initialize the Info class.

        :param parent: Parent class, must be of type ``PresolarGrains``.

        :raises TypeError: Parent class is not of type ``PresolarGrains``.
        """
        if not isinstance(parent, pgdtools.PresolarGrains):
            raise TypeError("Parent class must be of type PresolarGrains.")

        self.parent = parent

    @property
    def dbs(self) -> Tuple["pgdtools.PresolarGrains.DataBase", ...]:
        """Get/print what databases are currently in the selection."""
        index_start = set()
        for ind in self.parent.db.index:
            index_start.add(ind.split("-")[0])

        dbs = tuple(pgdtools.PresolarGrains.DataBase(x) for x in index_start)
        print("Currently available databases are:")
        if len(dbs) == 0:
            print("- None")
        else:
            for db in dbs:
                print(f"- {db.name}")

        return dbs

    @property
    def number_of_grains(self) -> int:
        """Get/print how many presolar grains are in the currently filtered database.

        :return: Number of presolar grains.
        """
        nog = len(self.parent)
        print(f"Number of grains in current selection: {nog}")
        return nog

    @property
    def pgd_types(self) -> Set[str]:
        """Get/print what PGD types of presolar grains are in the current database.

        :return: List of all PGD grain types available.
        """
        ret_set = set(self.parent.db["PGD Type"].drop_duplicates())

        print("Currently available PGD types in filtered database:")
        if len(ret_set) == 0:
            print("- None")
        else:
            for entry in ret_set:
                print(f"- {entry}")
        return ret_set

    # METHODS #

    def correlations(self, inp: str) -> Union[None, List[str]]:
        """Get/print available correlations for a given element or isotope."""
        iso = str(utl.Isotope(inp, allow_element=True))

        entries = [
            x for x in self.parent.db.columns if iso in x and x.startswith("rho")
        ]

        if len(entries) == 0:
            print(f"No correlations for {iso} found.")
            return None
        else:
            print(f"Correlations containing {iso}:")
            for entry in entries:
                print(f"- {entry}")
            return entries

    def ratios(self, inp: str) -> Union[None, List[Tuple[str, bool]]]:
        """Get/print available ratios for a given element or isotope.

        :param inp: Input isotope or element.

        :return: A tuple of tuples. In the latter, each entry consists of available
            isotope ratio and a boolean value to indicate if this is a delta-value.
        """
        excl_startswith = ("err", "rho")

        iso = str(utl.Isotope(inp, allow_element=True))
        all_in_hdr = (x for x in self.parent.db.columns if iso in x)
        flt_hdr = [
            (x, x.startswith("d"))
            for x in all_in_hdr
            if all([not x.startswith(y) for y in excl_startswith]) and "/" in x
        ]

        if len(flt_hdr) == 0:
            print(f"No isotope ratios containing {iso} found.")
            return None
        else:
            print(f"Isotope ratios containing {iso}:")
            for entry in flt_hdr:
                print(f"- {entry[0]}, delta value: {entry[1]}")
            return flt_hdr

dbs property

Get/print what databases are currently in the selection.

number_of_grains property

Get/print how many presolar grains are in the currently filtered database.

Returns:

Type Description
int

Number of presolar grains.

pgd_types property

Get/print what PGD types of presolar grains are in the current database.

Returns:

Type Description
Set[str]

List of all PGD grain types available.

__init__(parent)

Initialize the Info class.

Parameters:

Name Type Description Default
parent PresolarGrains

Parent class, must be of type PresolarGrains.

required

Raises:

Type Description
TypeError

Parent class is not of type PresolarGrains.

Source code in pgdtools/sub_tools/info.py
def __init__(self, parent: "pgdtools.PresolarGrains") -> None:
    """Initialize the Info class.

    :param parent: Parent class, must be of type ``PresolarGrains``.

    :raises TypeError: Parent class is not of type ``PresolarGrains``.
    """
    if not isinstance(parent, pgdtools.PresolarGrains):
        raise TypeError("Parent class must be of type PresolarGrains.")

    self.parent = parent

correlations(inp)

Get/print available correlations for a given element or isotope.

Source code in pgdtools/sub_tools/info.py
def correlations(self, inp: str) -> Union[None, List[str]]:
    """Get/print available correlations for a given element or isotope."""
    iso = str(utl.Isotope(inp, allow_element=True))

    entries = [
        x for x in self.parent.db.columns if iso in x and x.startswith("rho")
    ]

    if len(entries) == 0:
        print(f"No correlations for {iso} found.")
        return None
    else:
        print(f"Correlations containing {iso}:")
        for entry in entries:
            print(f"- {entry}")
        return entries

ratios(inp)

Get/print available ratios for a given element or isotope.

Parameters:

Name Type Description Default
inp str

Input isotope or element.

required

Returns:

Type Description
Union[None, List[Tuple[str, bool]]]

A tuple of tuples. In the latter, each entry consists of available isotope ratio and a boolean value to indicate if this is a delta-value.

Source code in pgdtools/sub_tools/info.py
def ratios(self, inp: str) -> Union[None, List[Tuple[str, bool]]]:
    """Get/print available ratios for a given element or isotope.

    :param inp: Input isotope or element.

    :return: A tuple of tuples. In the latter, each entry consists of available
        isotope ratio and a boolean value to indicate if this is a delta-value.
    """
    excl_startswith = ("err", "rho")

    iso = str(utl.Isotope(inp, allow_element=True))
    all_in_hdr = (x for x in self.parent.db.columns if iso in x)
    flt_hdr = [
        (x, x.startswith("d"))
        for x in all_in_hdr
        if all([not x.startswith(y) for y in excl_startswith]) and "/" in x
    ]

    if len(flt_hdr) == 0:
        print(f"No isotope ratios containing {iso} found.")
        return None
    else:
        print(f"Isotope ratios containing {iso}:")
        for entry in flt_hdr:
            print(f"- {entry[0]}, delta value: {entry[1]}")
        return flt_hdr

Sub tool to gather references for data sets and return them.

References

This class handles references for specific data sets.

Source code in pgdtools/sub_tools/references.py
class References:
    """This class handles references for specific data sets."""

    def __init__(self, parent: "pgdtools.PresolarGrains") -> None:
        """Initialize the Reference class.

        :param parent: Parent class, must be of type ``PresolarGrains``.

        :raises TypeError: Parent class is not of type ``PresolarGrains``.
        """
        if not isinstance(parent, pgdtools.PresolarGrains):
            raise TypeError("Parent class must be of type PresolarGrains.")

        self.parent = parent

        self._reference_json = None
        self._get_reference_json()

    def __repr__(self) -> str:
        """Return a string representation of the class.

        In order to keep it pretty, this will return the following:
        - Reference key
        - Short reference
        - DOI (if available) in parentheses

        :return: String representation of the class.
        """
        ret_val = ""
        for it, (key, value) in enumerate(self.dict.items()):
            ret_val += f"{key}: {value['Reference - short']}"
            if value["DOI"]:
                ret_val += f" ({value['DOI']})"
            if it != len(self.dict) - 1:
                ret_val += "\n"

        return ret_val

    def __eq__(self, other) -> bool:
        """Check if the references are equal.

        Note: This will only check if the set of references are equal and has nothing
        to do with the number of grains that are associated with each reference.

        :param other: Other reference set to compare against.

        :return: True if the references are equal, otherwise False.
        """
        return self.dict == other.dict

    def __len__(self) -> int:
        """Return the number of individual references in the class.

        :return: Number of references.
        """
        return len(self.dict)

    def __iter__(self) -> iter:
        """Iterate over the key, value pairs.

        :return: Iterator for the reference keys and values.
        """
        return iter(self.dict.items())

    def __getitem__(self, key) -> dict:
        """Return the reference for the given item.

        :param key: Reference key.

        :return: Reference details.
        """
        return self.dict[key]

    @property
    def dict(self) -> dict:
        """Return a dictionary representation of the class.

        The keys are the reference IDs and the values are the full references, which
        contain further keys:
        - Number of grains
        - Reference - short
        - Reference - full
        - DOI
        - Comments

        :return: Dictionary representation of the class.
        """
        return {key: self._reference_json[key] for key in self._create_ref_keys_set}

    @property
    def doi(self) -> Set[str]:
        """Return a set of all DOIs for the references of the current database.

        If no DOI is available for a given reference, it will not be included in the
        set.
        """
        return {self.dict[key]["DOI"] for key in self.dict if self.dict[key]["DOI"]}

    @property
    def table_full(self) -> pd.DataFrame:
        """Return a full reference table for every individual grain in the database.

        The row indexes of the table are the PGD IDs. The following columns will be
        present:
        - Number of grains
        - Reference - short
        - Reference - full
        - DOI
        - Comments

        :return: Full reference table for every grain.
        """
        indexes = self.parent.db.index
        series = [
            pd.Series(self._reference_json[ref_id], name=indexes[it])
            for it, ref_id in enumerate(self._create_ref_keys_list)
        ]
        return pd.DataFrame(series)

    @property
    def table_set(self) -> pd.DataFrame:
        """Return a set of references for all grains in dataset in table format."""
        table_set = self.table_full.drop_duplicates()
        return table_set.set_index([self._create_ref_keys(table_set.index)])

    @property
    def _create_ref_keys_list(self) -> List[str]:
        """Create the reference key as a list (in order).

        :return: List of all the reference IDs.
        """
        return self._create_ref_keys(self.parent.db.index)

    @property
    def _create_ref_keys_set(self) -> Set[str]:
        """Create the reference key as a set."""
        return set(self._create_ref_keys_list)

    def search(self, search_str: str) -> List[str]:
        """Search all references information (except for notes) for keywords.

        If you want to provide multiple keywords to search for, please provide
        them separated by a comma. For example: `"Name Firstname"` would search
        all references for "Name Firstname", while `"Name, Firstname"` would search
        for "Name" and "Firstname" separately. For example, a reference with information
        "Firstname Name" would in this case only match with the latter search.

        All searches are case-insensitive.

        :param search_str: Search string.

        :return: List of strings with all short references that match the search terms.
        """
        search_terms = search_str.split(",")
        search_terms = [x.strip().lower() for x in search_terms]

        fields_to_add = ["Reference - short", "Reference - full", "DOI"]

        ref_search_dict = {}
        for ref_key, ref_item in self.dict.items():
            key = ref_item["Reference - short"]
            value = ref_key  # the PGD ID for this reference
            for add_key in fields_to_add:
                value += f" {ref_item[add_key]}"
            ref_search_dict[key] = value.lower()  # make it case-insensitive

        ret_list = []
        for key, item in ref_search_dict.items():
            if all([x in item for x in search_terms]):
                ret_list.append(key)

        ret_list.sort()

        if len(ret_list) == 0:
            print("No references found.")
        else:
            print("References found:")
            for entry in ret_list:
                print(f"- {entry}")
        return ret_list

    def _get_reference_json(self):
        """Load and store the reference JSON file."""
        with open(db.LOCAL_REF_JSON, "r") as file:
            self._reference_json = json.load(file)

    @staticmethod
    def _create_ref_keys(pgd_ids: List[str]) -> List[str]:
        """Create reference keys from a PGD IDs.

        :param pgd_ids: List of PGD IDs to create reference keys from.
        """
        ref_keys = []
        for pgd_id in pgd_ids:
            name_parts = pgd_id.split("-")
            name_parts[-1] = name_parts[-1][0]
            ref_keys.append("-".join(name_parts))
        return ref_keys

dict property

Return a dictionary representation of the class.

The keys are the reference IDs and the values are the full references, which contain further keys: - Number of grains - Reference - short - Reference - full - DOI - Comments

Returns:

Type Description
dict

Dictionary representation of the class.

doi property

Return a set of all DOIs for the references of the current database.

If no DOI is available for a given reference, it will not be included in the set.

table_full property

Return a full reference table for every individual grain in the database.

The row indexes of the table are the PGD IDs. The following columns will be present: - Number of grains - Reference - short - Reference - full - DOI - Comments

Returns:

Type Description
DataFrame

Full reference table for every grain.

table_set property

Return a set of references for all grains in dataset in table format.

__eq__(other)

Check if the references are equal.

Note: This will only check if the set of references are equal and has nothing to do with the number of grains that are associated with each reference.

Parameters:

Name Type Description Default
other

Other reference set to compare against.

required

Returns:

Type Description
bool

True if the references are equal, otherwise False.

Source code in pgdtools/sub_tools/references.py
def __eq__(self, other) -> bool:
    """Check if the references are equal.

    Note: This will only check if the set of references are equal and has nothing
    to do with the number of grains that are associated with each reference.

    :param other: Other reference set to compare against.

    :return: True if the references are equal, otherwise False.
    """
    return self.dict == other.dict

__getitem__(key)

Return the reference for the given item.

Parameters:

Name Type Description Default
key

Reference key.

required

Returns:

Type Description
dict

Reference details.

Source code in pgdtools/sub_tools/references.py
def __getitem__(self, key) -> dict:
    """Return the reference for the given item.

    :param key: Reference key.

    :return: Reference details.
    """
    return self.dict[key]

__init__(parent)

Initialize the Reference class.

Parameters:

Name Type Description Default
parent PresolarGrains

Parent class, must be of type PresolarGrains.

required

Raises:

Type Description
TypeError

Parent class is not of type PresolarGrains.

Source code in pgdtools/sub_tools/references.py
def __init__(self, parent: "pgdtools.PresolarGrains") -> None:
    """Initialize the Reference class.

    :param parent: Parent class, must be of type ``PresolarGrains``.

    :raises TypeError: Parent class is not of type ``PresolarGrains``.
    """
    if not isinstance(parent, pgdtools.PresolarGrains):
        raise TypeError("Parent class must be of type PresolarGrains.")

    self.parent = parent

    self._reference_json = None
    self._get_reference_json()

__iter__()

Iterate over the key, value pairs.

Returns:

Type Description
iter

Iterator for the reference keys and values.

Source code in pgdtools/sub_tools/references.py
def __iter__(self) -> iter:
    """Iterate over the key, value pairs.

    :return: Iterator for the reference keys and values.
    """
    return iter(self.dict.items())

__len__()

Return the number of individual references in the class.

Returns:

Type Description
int

Number of references.

Source code in pgdtools/sub_tools/references.py
def __len__(self) -> int:
    """Return the number of individual references in the class.

    :return: Number of references.
    """
    return len(self.dict)

__repr__()

Return a string representation of the class.

In order to keep it pretty, this will return the following: - Reference key - Short reference - DOI (if available) in parentheses

Returns:

Type Description
str

String representation of the class.

Source code in pgdtools/sub_tools/references.py
def __repr__(self) -> str:
    """Return a string representation of the class.

    In order to keep it pretty, this will return the following:
    - Reference key
    - Short reference
    - DOI (if available) in parentheses

    :return: String representation of the class.
    """
    ret_val = ""
    for it, (key, value) in enumerate(self.dict.items()):
        ret_val += f"{key}: {value['Reference - short']}"
        if value["DOI"]:
            ret_val += f" ({value['DOI']})"
        if it != len(self.dict) - 1:
            ret_val += "\n"

    return ret_val

search(search_str)

Search all references information (except for notes) for keywords.

If you want to provide multiple keywords to search for, please provide them separated by a comma. For example: "Name Firstname" would search all references for "Name Firstname", while "Name, Firstname" would search for "Name" and "Firstname" separately. For example, a reference with information "Firstname Name" would in this case only match with the latter search.

All searches are case-insensitive.

Parameters:

Name Type Description Default
search_str str

Search string.

required

Returns:

Type Description
List[str]

List of strings with all short references that match the search terms.

Source code in pgdtools/sub_tools/references.py
def search(self, search_str: str) -> List[str]:
    """Search all references information (except for notes) for keywords.

    If you want to provide multiple keywords to search for, please provide
    them separated by a comma. For example: `"Name Firstname"` would search
    all references for "Name Firstname", while `"Name, Firstname"` would search
    for "Name" and "Firstname" separately. For example, a reference with information
    "Firstname Name" would in this case only match with the latter search.

    All searches are case-insensitive.

    :param search_str: Search string.

    :return: List of strings with all short references that match the search terms.
    """
    search_terms = search_str.split(",")
    search_terms = [x.strip().lower() for x in search_terms]

    fields_to_add = ["Reference - short", "Reference - full", "DOI"]

    ref_search_dict = {}
    for ref_key, ref_item in self.dict.items():
        key = ref_item["Reference - short"]
        value = ref_key  # the PGD ID for this reference
        for add_key in fields_to_add:
            value += f" {ref_item[add_key]}"
        ref_search_dict[key] = value.lower()  # make it case-insensitive

    ret_list = []
    for key, item in ref_search_dict.items():
        if all([x in item for x in search_terms]):
            ret_list.append(key)

    ret_list.sort()

    if len(ret_list) == 0:
        print("No references found.")
    else:
        print("References found:")
        for entry in ret_list:
            print(f"- {entry}")
    return ret_list

Sub tool to gather used techniques for data sets and return them.

Techniques

This class handles techniques for specific data sets.

Source code in pgdtools/sub_tools/techniques.py
class Techniques:
    """This class handles techniques for specific data sets."""

    def __init__(self, parent: "pgdtools.PresolarGrains") -> None:
        """Initialize the Techniques class.

        :param parent: Parent class, must be of type ``PresolarGrains``.

        :raises TypeError: Parent class is not of type ``PresolarGrains``.
        """
        if not isinstance(parent, pgdtools.PresolarGrains):
            raise TypeError("Parent class must be of type PresolarGrains.")

        # list of separators for splitting techniques
        self._separators = ["&", "and/or"]

        self.parent = parent

        self._techniques_json = None
        self._get_techniques_json()

    def __repr__(self) -> str:
        """Return a string representation of the class.

        Simply print out all the PGD techniques, comma separated.

        :return: String representation of the class.
        """
        return ", ".join(self.dict.keys())

    def __eq__(self, other):
        """Check if the techniques are equal.

        Note: This will only check if the set of techniques are equal and has nothing
        to do with the number of grains that are associated with each technique.

        :param other: Other technique set to compare against.

        :return: True if the techniques are equal, otherwise False.
        """
        return self.dict == other.dict

    def __len__(self) -> int:
        """Return the number of individual techniques in the class.

        :return: Number of unique techniques.
        """
        return len(self.dict)

    def __iter__(self):
        """Iterate over the techniques key, value pairs.

        :return: Key, value pairs of the techniques.
        """
        return iter(self.dict.items())

    def __getitem__(self, key: str) -> dict:
        """Get a technique item based on the key.

        :param key: Technique key.

        :return: Technique details.
        """
        return self.dict[key]

    @property
    def dict(self) -> dict:
        """Return a dictionary representation of the techniques.

        The keys are the PGD Techniques and the values are the technique details.
        - Institution
        - Technique
        - Instrument
        - Reference
        - DOI

        :return: Dictionary of techniques.
        """
        return {key: self._techniques_json[key] for key in self._create_ref_keys_set}

    @property
    def table_full(self) -> pd.DataFrame:
        """Return a full techniques table for every individual grain in the database.

        The row indexes of the table are the PGD IDs. The following columns will be
        present:
        - PGD Technique
        - Institution
        - Technique
        - Instrument
        - Reference
        - DOI

        :return: Full reference table for every grain.
        """
        indexes = self.parent.db.index

        series = []
        for ind, key_list in enumerate(self._create_ref_keys_list):
            for key in key_list:
                lst = {"PGD Technique": key}
                lst.update(self._techniques_json[key])
                ser = pd.Series(lst, name=indexes[ind])
                series.append(ser)
        ret_frame = pd.DataFrame(series)

        return ret_frame

    @property
    def table_set(self) -> pd.DataFrame:
        """Return a set of techniques for all grains in dataset in table format."""
        series = [
            pd.Series(self._techniques_json[key], name=key)
            for key in self._create_ref_keys_set
        ]
        ret_frame = pd.DataFrame(series)
        return ret_frame

    @property
    def _create_ref_keys_list(self) -> List[List[str]]:
        """Create the techniques key as a list (in order).

        :return: List of all the reference IDs.
        """
        ref_keys = self.parent.db["Technique"].to_list()
        seps = "|".join(self._separators)
        ret_keys = [[x.strip() for x in re.split(seps, y)] for y in ref_keys]
        return ret_keys

    @property
    def _create_ref_keys_set(self) -> Set[str]:
        """Create the techniques key as a set."""
        return set(itertools.chain.from_iterable(self._create_ref_keys_list))

    def _get_techniques_json(self):
        """Load and store the techniques JSON file."""
        with open(db.LOCAL_TECH_JSON, "r") as file:
            self._techniques_json = json.load(file)

dict property

Return a dictionary representation of the techniques.

The keys are the PGD Techniques and the values are the technique details. - Institution - Technique - Instrument - Reference - DOI

Returns:

Type Description
dict

Dictionary of techniques.

table_full property

Return a full techniques table for every individual grain in the database.

The row indexes of the table are the PGD IDs. The following columns will be present: - PGD Technique - Institution - Technique - Instrument - Reference - DOI

Returns:

Type Description
DataFrame

Full reference table for every grain.

table_set property

Return a set of techniques for all grains in dataset in table format.

__eq__(other)

Check if the techniques are equal.

Note: This will only check if the set of techniques are equal and has nothing to do with the number of grains that are associated with each technique.

Parameters:

Name Type Description Default
other

Other technique set to compare against.

required

Returns:

Type Description

True if the techniques are equal, otherwise False.

Source code in pgdtools/sub_tools/techniques.py
def __eq__(self, other):
    """Check if the techniques are equal.

    Note: This will only check if the set of techniques are equal and has nothing
    to do with the number of grains that are associated with each technique.

    :param other: Other technique set to compare against.

    :return: True if the techniques are equal, otherwise False.
    """
    return self.dict == other.dict

__getitem__(key)

Get a technique item based on the key.

Parameters:

Name Type Description Default
key str

Technique key.

required

Returns:

Type Description
dict

Technique details.

Source code in pgdtools/sub_tools/techniques.py
def __getitem__(self, key: str) -> dict:
    """Get a technique item based on the key.

    :param key: Technique key.

    :return: Technique details.
    """
    return self.dict[key]

__init__(parent)

Initialize the Techniques class.

Parameters:

Name Type Description Default
parent PresolarGrains

Parent class, must be of type PresolarGrains.

required

Raises:

Type Description
TypeError

Parent class is not of type PresolarGrains.

Source code in pgdtools/sub_tools/techniques.py
def __init__(self, parent: "pgdtools.PresolarGrains") -> None:
    """Initialize the Techniques class.

    :param parent: Parent class, must be of type ``PresolarGrains``.

    :raises TypeError: Parent class is not of type ``PresolarGrains``.
    """
    if not isinstance(parent, pgdtools.PresolarGrains):
        raise TypeError("Parent class must be of type PresolarGrains.")

    # list of separators for splitting techniques
    self._separators = ["&", "and/or"]

    self.parent = parent

    self._techniques_json = None
    self._get_techniques_json()

__iter__()

Iterate over the techniques key, value pairs.

Returns:

Type Description

Key, value pairs of the techniques.

Source code in pgdtools/sub_tools/techniques.py
def __iter__(self):
    """Iterate over the techniques key, value pairs.

    :return: Key, value pairs of the techniques.
    """
    return iter(self.dict.items())

__len__()

Return the number of individual techniques in the class.

Returns:

Type Description
int

Number of unique techniques.

Source code in pgdtools/sub_tools/techniques.py
def __len__(self) -> int:
    """Return the number of individual techniques in the class.

    :return: Number of unique techniques.
    """
    return len(self.dict)

__repr__()

Return a string representation of the class.

Simply print out all the PGD techniques, comma separated.

Returns:

Type Description
str

String representation of the class.

Source code in pgdtools/sub_tools/techniques.py
def __repr__(self) -> str:
    """Return a string representation of the class.

    Simply print out all the PGD techniques, comma separated.

    :return: String representation of the class.
    """
    return ", ".join(self.dict.keys())