"""This includes function for scoring models applied to a SpaCy corpus."""
from __future__ import annotations
from collections.abc import Iterable
from copy import copy
from functools import partial
from time import time # type: ignore
from typing import Callable
import pandas as pd
from spacy.language import Language
from spacy.scorer import Scorer
from spacy.tokens import Doc, Span
from spacy.training import Example
from spacy.training.augment import dont_augment
from spacy.training.corpus import Corpus
from ..utils import flatten_dict
[docs]def no_misc_getter(doc: Doc, attr: str) -> Iterable[Span]: # type: ignore
"""A utility getter for scoring entities without including MISC.
Args:
doc (Doc): a SpaCy Doc
attr (str): attribute to be extracted
Returns:
Iterable[Span]
"""
spans = getattr(doc, attr) # type: ignore
for span in spans:
if span.label_ == "MISC":
continue
yield span
def dep_getter(token, attr): # noqa
dep = getattr(token, attr) # type: ignore
dep = token.vocab.strings.as_string(dep).lower()
return dep
[docs]def score( # noqa
corpus: Corpus,
apply_fn: Callable[[Iterable[Example], list[Example]]] | Language, # type: ignore
score_fn: list[Callable[[Iterable[Example]], dict] | str] = [ # noqa
"token",
"pos",
"ents",
"dep",
],
augmenters: list[Callable[[Language, Example], Iterable[Example]]] = [], # noqa
k: int = 1,
nlp: Language | None = None,
**kwargs, # noqa
) -> pd.DataFrame:
"""scores a models performance on a given corpus with potentially
augmentations applied to it.
Args:
corpus (Corpus): A spacy Corpus
apply_fn (Union[Callable, Language]): A wrapper function for the model you wish
to score. The model should take in a list of spacy Examples
(Iterable[Example]) and output a tagged version of it (Iterable[Example]). A
SpaCy pipeline (Language) can be provided as is.
score_fn (list[Union[Callable[[Iterable[Example]], dict], str]], optional): A
scoring function which takes in a list of examples (Iterable[Example]) and
return a dictionary of performance scores. Four potiential strings are
valid. "ents" for measuring the performance of entity spans. "pos" for
measuring the performance of fine-grained (tag_acc), and coarse-grained
(pos_acc) pos-tags. "token" for measuring the performance of tokenization.
"dep" for measuring the performance of dependency parsing. "nlp" for
measuring the performance of all components in the specified nlp pipeline.
Defaults to ["token", "pos", "ents", "dep"].
augmenters (list[Callable[[Language, Example], Iterable[Example]]], optional): A
spaCy style augmenters which should be applied to the corpus or a list
thereof. defaults to [], indicating no augmenters.
k (int, optional): Number of times it should run the augmentation and test the
performance on the corpus. Defaults to 1.
nlp (Optional[Language], optional): A spacy processing pipeline. If None it will
use an empty Danish pipeline. Defaults to None. Used for loading the calling
the corpus.
Returns:
pandas.DataFrame: returns a pandas dataframe containing the performance metrics.
Example:
>>> from spacy.training.augment import create_lower_casing_augmenter
>>> from dacy.datasets import dane
>>> test = dane(splits=["test")
>>> nlp = dacy.load("da_dacy_small_tft-0.0.0")
>>> scores = score(test, augmenter=[create_lower_casing_augmenter(0.5)],
>>> apply_fn = nlp)
"""
if callable(augmenters):
augmenters = [augmenters]
if len(augmenters) == 0:
augmenters = [dont_augment]
def __apply_nlp(examples): # noqa: ANN001
examples = ((e.x.text, e.y) for e in examples)
doc_tuples = nlp_.pipe(examples, as_tuples=True)
return [Example(x, y) for x, y in doc_tuples]
if isinstance(apply_fn, Language):
nlp_ = apply_fn
apply_fn = __apply_nlp # type: ignore
if nlp is None:
from spacy.lang.da import Danish
nlp = Danish()
scorer = Scorer(nlp)
def ents_scorer(examples): # noqa: ANN001
scores = Scorer.score_spans(examples, attr="ents")
scores_no_misc = Scorer.score_spans(
examples,
attr="ents",
getter=no_misc_getter,
)
scores["ents_excl_MISC"] = {
k: scores_no_misc[k] for k in ["ents_p", "ents_r", "ents_f"]
}
return scores
def pos_scorer(examples): # noqa: ANN001
scores = Scorer.score_token_attr(examples, attr="pos")
scores_ = Scorer.score_token_attr(examples, attr="tag")
for k in scores_:
scores[k] = scores_[k]
return scores
def_scorers = {
"ents": ents_scorer,
"pos": pos_scorer,
"token": Scorer.score_tokenization,
"nlp": scorer.score,
"dep": partial(
Scorer.score_deps,
attr="dep",
getter=dep_getter,
ignore_labels=("p", "punct"),
),
}
def __score(augmenter): # noqa: ANN001
corpus_ = copy(corpus)
corpus_.augmenter = augmenter
scores_ls = []
for _i in range(k): # type: ignore
s = time()
examples = apply_fn(corpus_(nlp)) # type: ignore
speed = time() - s
scores = {"wall_time": speed}
for fn in score_fn:
if isinstance(fn, str):
fn = def_scorers[fn] # noqa
scores.update(fn(examples)) # type: ignore
scores = flatten_dict(scores)
scores_ls.append(scores)
# and collapse list to dict
for key in scores: # type: ignore
scores[key] = [s.get(key, None) for s in scores_ls] # type: ignore
scores["k"] = list(range(k)) # type: ignore
return pd.DataFrame(scores) # type: ignore
for i, aug in enumerate(augmenters):
scores_ = __score(aug)
scores = pd.concat([scores, scores_]) if i != 0 else scores_ # type: ignore # noqa
return scores # type: ignore