Source code for dacy.score.score

"""This includes function for scoring models applied to a SpaCy corpus."""

from __future__ import annotations

from collections.abc import Iterable
from copy import copy
from functools import partial
from time import time  # type: ignore
from typing import Callable

import pandas as pd
from spacy.language import Language
from spacy.scorer import Scorer
from spacy.tokens import Doc, Span
from spacy.training import Example
from spacy.training.augment import dont_augment
from spacy.training.corpus import Corpus

from ..utils import flatten_dict


[docs]def no_misc_getter(doc: Doc, attr: str) -> Iterable[Span]:  # type: ignore
    """A utility getter for scoring entities without including MISC.

    Args:
        doc (Doc): a SpaCy Doc
        attr (str): attribute to be extracted

    Returns:
        Iterable[Span]
    """
    spans = getattr(doc, attr)  # type: ignore
    for span in spans:
        if span.label_ == "MISC":
            continue
        yield span


def dep_getter(token, attr):  # noqa
    dep = getattr(token, attr)  # type: ignore
    dep = token.vocab.strings.as_string(dep).lower()
    return dep


[docs]def score(  # noqa
    corpus: Corpus,
    apply_fn: Callable[[Iterable[Example], list[Example]]] | Language,  # type: ignore
    score_fn: list[Callable[[Iterable[Example]], dict] | str] = [  # noqa
        "token",
        "pos",
        "ents",
        "dep",
    ],
    augmenters: list[Callable[[Language, Example], Iterable[Example]]] = [],  # noqa
    k: int = 1,
    nlp: Language | None = None,
    **kwargs,  # noqa
) -> pd.DataFrame:
    """scores a models performance on a given corpus with potentially
    augmentations applied to it.

    Args:
        corpus (Corpus): A spacy Corpus
        apply_fn (Union[Callable, Language]): A wrapper function for the model you wish
            to score. The model should take in a list of spacy Examples
            (Iterable[Example]) and output a tagged version of it (Iterable[Example]). A
            SpaCy pipeline (Language) can be provided as is.
        score_fn (list[Union[Callable[[Iterable[Example]], dict], str]], optional): A
            scoring function which takes in a list of examples (Iterable[Example]) and
            return a dictionary of performance scores. Four potiential strings are
            valid. "ents" for measuring the performance of entity spans. "pos" for
            measuring the performance of fine-grained (tag_acc), and coarse-grained
            (pos_acc) pos-tags. "token" for measuring the performance of tokenization.
            "dep" for measuring the performance of dependency parsing. "nlp" for
            measuring the performance of all components in the specified nlp pipeline.
            Defaults to ["token", "pos", "ents", "dep"].
        augmenters (list[Callable[[Language, Example], Iterable[Example]]], optional): A
            spaCy style augmenters which should be applied to the corpus or a list
            thereof. defaults to [], indicating no augmenters.
        k (int, optional): Number of times it should run the augmentation and test the
            performance on the corpus. Defaults to 1.
        nlp (Optional[Language], optional): A spacy processing pipeline. If None it will
            use an empty Danish pipeline. Defaults to None. Used for loading the calling
            the corpus.

    Returns:
        pandas.DataFrame: returns a pandas dataframe containing the performance metrics.

    Example:
        >>> from spacy.training.augment import create_lower_casing_augmenter
        >>> from dacy.datasets import dane
        >>> test = dane(splits=["test")
        >>> nlp = dacy.load("da_dacy_small_tft-0.0.0")
        >>> scores = score(test, augmenter=[create_lower_casing_augmenter(0.5)],
        >>>                apply_fn = nlp)
    """
    if callable(augmenters):
        augmenters = [augmenters]
    if len(augmenters) == 0:
        augmenters = [dont_augment]

    def __apply_nlp(examples):  # noqa: ANN001
        examples = ((e.x.text, e.y) for e in examples)
        doc_tuples = nlp_.pipe(examples, as_tuples=True)
        return [Example(x, y) for x, y in doc_tuples]

    if isinstance(apply_fn, Language):
        nlp_ = apply_fn
        apply_fn = __apply_nlp  # type: ignore

    if nlp is None:
        from spacy.lang.da import Danish

        nlp = Danish()

    scorer = Scorer(nlp)

    def ents_scorer(examples):  # noqa: ANN001
        scores = Scorer.score_spans(examples, attr="ents")
        scores_no_misc = Scorer.score_spans(
            examples,
            attr="ents",
            getter=no_misc_getter,
        )
        scores["ents_excl_MISC"] = {
            k: scores_no_misc[k] for k in ["ents_p", "ents_r", "ents_f"]
        }
        return scores

    def pos_scorer(examples):  # noqa: ANN001
        scores = Scorer.score_token_attr(examples, attr="pos")
        scores_ = Scorer.score_token_attr(examples, attr="tag")
        for k in scores_:
            scores[k] = scores_[k]
        return scores

    def_scorers = {
        "ents": ents_scorer,
        "pos": pos_scorer,
        "token": Scorer.score_tokenization,
        "nlp": scorer.score,
        "dep": partial(
            Scorer.score_deps,
            attr="dep",
            getter=dep_getter,
            ignore_labels=("p", "punct"),
        ),
    }

    def __score(augmenter):  # noqa: ANN001
        corpus_ = copy(corpus)
        corpus_.augmenter = augmenter
        scores_ls = []
        for _i in range(k):  # type: ignore
            s = time()
            examples = apply_fn(corpus_(nlp))  # type: ignore
            speed = time() - s
            scores = {"wall_time": speed}
            for fn in score_fn:
                if isinstance(fn, str):
                    fn = def_scorers[fn]  # noqa
                scores.update(fn(examples))  # type: ignore
            scores = flatten_dict(scores)
            scores_ls.append(scores)

        # and collapse list to dict
        for key in scores:  # type: ignore
            scores[key] = [s.get(key, None) for s in scores_ls]  # type: ignore

        scores["k"] = list(range(k))  # type: ignore

        return pd.DataFrame(scores)  # type: ignore

    for i, aug in enumerate(augmenters):
        scores_ = __score(aug)
        scores = pd.concat([scores, scores_]) if i != 0 else scores_  # type: ignore  # noqa
    return scores  # type: ignore