"""This includes function for scoring models applied to a SpaCy corpus."""
from __future__ import annotations
from copy import copy
from functools import partial
from time import time # type: ignore
from typing import Callable, Iterable
import pandas as pd
from spacy.language import Language
from spacy.scorer import Scorer
from spacy.tokens import Doc, Span
from spacy.training import Example
from spacy.training.augment import dont_augment
from spacy.training.corpus import Corpus
from ..utils import flatten_dict
[docs]def no_misc_getter(doc: Doc, attr: str) -> Iterable[Span]: # type: ignore
"""A utility getter for scoring entities without including MISC.
doc (Doc): a SpaCy Doc
attr (str): attribute to be extracted
spans = getattr(doc, attr) # type: ignore
for span in spans:
if span.label_ == "MISC":
yield span
def dep_getter(token, attr): # noqa
dep = getattr(token, attr) # type: ignore
dep = token.vocab.strings.as_string(dep).lower()
return dep
[docs]def score( # noqa
corpus: Corpus,
apply_fn: Callable[[Iterable[Example], list[Example]]] | Language, # type: ignore
score_fn: list[Callable[[Iterable[Example]], dict] | str] = [ # noqa
augmenters: list[Callable[[Language, Example], Iterable[Example]]] = [], # noqa
k: int = 1,
nlp: Language | None = None,
**kwargs, # noqa
) -> pd.DataFrame:
"""scores a models performance on a given corpus with potentially
augmentations applied to it.
corpus (Corpus): A spacy Corpus
apply_fn (Union[Callable, Language]): A wrapper function for the model you wish
to score. The model should take in a list of spacy Examples
(Iterable[Example]) and output a tagged version of it (Iterable[Example]). A
SpaCy pipeline (Language) can be provided as is.
score_fn (List[Union[Callable[[Iterable[Example]], dict], str]], optional): A
scoring function which takes in a list of examples (Iterable[Example]) and
return a dictionary of performance scores. Four potiential strings are
valid. "ents" for measuring the performance of entity spans. "pos" for
measuring the performance of fine-grained (tag_acc), and coarse-grained
(pos_acc) pos-tags. "token" for measuring the performance of tokenization.
"dep" for measuring the performance of dependency parsing. "nlp" for
measuring the performance of all components in the specified nlp pipeline.
Defaults to ["token", "pos", "ents", "dep"].
augmenters (List[Callable[[Language, Example], Iterable[Example]]], optional): A
spaCy style augmenters which should be applied to the corpus or a list
thereof. defaults to [], indicating no augmenters.
k (int, optional): Number of times it should run the augmentation and test the
performance on the corpus. Defaults to 1.
nlp (Optional[Language], optional): A spacy processing pipeline. If None it will
use an empty Danish pipeline. Defaults to None. Used for loading the calling
the corpus.
pandas.DataFrame: returns a pandas dataframe containing the performance metrics.
>>> from spacy.training.augment import create_lower_casing_augmenter
>>> from dacy.datasets import dane
>>> test = dane(splits=["test")
>>> nlp = dacy.load("da_dacy_small_tft-0.0.0")
>>> scores = score(test, augmenter=[create_lower_casing_augmenter(0.5)],
>>> apply_fn = nlp)
if callable(augmenters):
augmenters = [augmenters]
if len(augmenters) == 0:
augmenters = [dont_augment]
def __apply_nlp(examples): # noqa: ANN001
examples = ((e.x.text, e.y) for e in examples)
doc_tuples = nlp_.pipe(examples, as_tuples=True)
return [Example(x, y) for x, y in doc_tuples]
if isinstance(apply_fn, Language):
nlp_ = apply_fn
apply_fn = __apply_nlp # type: ignore
if nlp is None:
from spacy.lang.da import Danish
nlp = Danish()
scorer = Scorer(nlp)
def ents_scorer(examples): # noqa: ANN001
scores = Scorer.score_spans(examples, attr="ents")
scores_no_misc = Scorer.score_spans(
scores["ents_excl_MISC"] = {
k: scores_no_misc[k] for k in ["ents_p", "ents_r", "ents_f"]
return scores
def pos_scorer(examples): # noqa: ANN001
scores = Scorer.score_token_attr(examples, attr="pos")
scores_ = Scorer.score_token_attr(examples, attr="tag")
for k in scores_:
scores[k] = scores_[k]
return scores
def_scorers = {
"ents": ents_scorer,
"pos": pos_scorer,
"token": Scorer.score_tokenization,
"nlp": scorer.score,
"dep": partial(
ignore_labels=("p", "punct"),
def __score(augmenter): # noqa: ANN001
corpus_ = copy(corpus)
corpus_.augmenter = augmenter
scores_ls = []
for _i in range(k): # type: ignore
s = time()
examples = apply_fn(corpus_(nlp)) # type: ignore
speed = time() - s
scores = {"wall_time": speed}
for fn in score_fn:
if isinstance(fn, str):
fn = def_scorers[fn] # noqa
scores.update(fn(examples)) # type: ignore
scores = flatten_dict(scores)
# and collapse list to dict
for key in scores: # type: ignore
scores[key] = [s[key] if key in s else None for s in scores_ls] # type: ignore
scores["k"] = list(range(k)) # type: ignore
return pd.DataFrame(scores) # type: ignore
for i, aug in enumerate(augmenters):
scores_ = __score(aug)
scores = pd.concat([scores, scores_]) if i != 0 else scores_ # type: ignore # noqa
return scores # type: ignore