Source code for dacy.score.score

"""This includes function for scoring models applied to a SpaCy corpus."""

from __future__ import annotations

from collections.abc import Iterable
from copy import copy
from functools import partial
from time import time  # type: ignore
from typing import Callable

import pandas as pd
from spacy.language import Language
from spacy.scorer import Scorer
from spacy.tokens import Doc, Span
from spacy.training import Example
from spacy.training.augment import dont_augment
from spacy.training.corpus import Corpus

from ..utils import flatten_dict


[docs]def no_misc_getter(doc: Doc, attr: str) -> Iterable[Span]: # type: ignore """A utility getter for scoring entities without including MISC. Args: doc (Doc): a SpaCy Doc attr (str): attribute to be extracted Returns: Iterable[Span] """ spans = getattr(doc, attr) # type: ignore for span in spans: if span.label_ == "MISC": continue yield span
def dep_getter(token, attr): # noqa dep = getattr(token, attr) # type: ignore dep = token.vocab.strings.as_string(dep).lower() return dep
[docs]def score( # noqa corpus: Corpus, apply_fn: Callable[[Iterable[Example], list[Example]]] | Language, # type: ignore score_fn: list[Callable[[Iterable[Example]], dict] | str] = [ # noqa "token", "pos", "ents", "dep", ], augmenters: list[Callable[[Language, Example], Iterable[Example]]] = [], # noqa k: int = 1, nlp: Language | None = None, **kwargs, # noqa ) -> pd.DataFrame: """scores a models performance on a given corpus with potentially augmentations applied to it. Args: corpus (Corpus): A spacy Corpus apply_fn (Union[Callable, Language]): A wrapper function for the model you wish to score. The model should take in a list of spacy Examples (Iterable[Example]) and output a tagged version of it (Iterable[Example]). A SpaCy pipeline (Language) can be provided as is. score_fn (list[Union[Callable[[Iterable[Example]], dict], str]], optional): A scoring function which takes in a list of examples (Iterable[Example]) and return a dictionary of performance scores. Four potiential strings are valid. "ents" for measuring the performance of entity spans. "pos" for measuring the performance of fine-grained (tag_acc), and coarse-grained (pos_acc) pos-tags. "token" for measuring the performance of tokenization. "dep" for measuring the performance of dependency parsing. "nlp" for measuring the performance of all components in the specified nlp pipeline. Defaults to ["token", "pos", "ents", "dep"]. augmenters (list[Callable[[Language, Example], Iterable[Example]]], optional): A spaCy style augmenters which should be applied to the corpus or a list thereof. defaults to [], indicating no augmenters. k (int, optional): Number of times it should run the augmentation and test the performance on the corpus. Defaults to 1. nlp (Optional[Language], optional): A spacy processing pipeline. If None it will use an empty Danish pipeline. Defaults to None. Used for loading the calling the corpus. Returns: pandas.DataFrame: returns a pandas dataframe containing the performance metrics. Example: >>> from spacy.training.augment import create_lower_casing_augmenter >>> from dacy.datasets import dane >>> test = dane(splits=["test") >>> nlp = dacy.load("da_dacy_small_tft-0.0.0") >>> scores = score(test, augmenter=[create_lower_casing_augmenter(0.5)], >>> apply_fn = nlp) """ if callable(augmenters): augmenters = [augmenters] if len(augmenters) == 0: augmenters = [dont_augment] def __apply_nlp(examples): # noqa: ANN001 examples = ((e.x.text, e.y) for e in examples) doc_tuples = nlp_.pipe(examples, as_tuples=True) return [Example(x, y) for x, y in doc_tuples] if isinstance(apply_fn, Language): nlp_ = apply_fn apply_fn = __apply_nlp # type: ignore if nlp is None: from spacy.lang.da import Danish nlp = Danish() scorer = Scorer(nlp) def ents_scorer(examples): # noqa: ANN001 scores = Scorer.score_spans(examples, attr="ents") scores_no_misc = Scorer.score_spans( examples, attr="ents", getter=no_misc_getter, ) scores["ents_excl_MISC"] = { k: scores_no_misc[k] for k in ["ents_p", "ents_r", "ents_f"] } return scores def pos_scorer(examples): # noqa: ANN001 scores = Scorer.score_token_attr(examples, attr="pos") scores_ = Scorer.score_token_attr(examples, attr="tag") for k in scores_: scores[k] = scores_[k] return scores def_scorers = { "ents": ents_scorer, "pos": pos_scorer, "token": Scorer.score_tokenization, "nlp": scorer.score, "dep": partial( Scorer.score_deps, attr="dep", getter=dep_getter, ignore_labels=("p", "punct"), ), } def __score(augmenter): # noqa: ANN001 corpus_ = copy(corpus) corpus_.augmenter = augmenter scores_ls = [] for _i in range(k): # type: ignore s = time() examples = apply_fn(corpus_(nlp)) # type: ignore speed = time() - s scores = {"wall_time": speed} for fn in score_fn: if isinstance(fn, str): fn = def_scorers[fn] # noqa scores.update(fn(examples)) # type: ignore scores = flatten_dict(scores) scores_ls.append(scores) # and collapse list to dict for key in scores: # type: ignore scores[key] = [s.get(key, None) for s in scores_ls] # type: ignore scores["k"] = list(range(k)) # type: ignore return pd.DataFrame(scores) # type: ignore for i, aug in enumerate(augmenters): scores_ = __score(aug) scores = pd.concat([scores, scores_]) if i != 0 else scores_ # type: ignore # noqa return scores # type: ignore