Source code for dacy.score.score

"""This includes function for scoring models applied to a SpaCy corpus."""
from __future__ import annotations

from copy import copy
from functools import partial
from time import time  # type: ignore
from typing import Callable, Iterable

import pandas as pd
from spacy.language import Language
from spacy.scorer import Scorer
from spacy.tokens import Doc, Span
from spacy.training import Example
from spacy.training.augment import dont_augment
from spacy.training.corpus import Corpus

from ..utils import flatten_dict


[docs]def no_misc_getter(doc: Doc, attr: str) -> Iterable[Span]: # type: ignore """A utility getter for scoring entities without including MISC. Args: doc (Doc): a SpaCy Doc attr (str): attribute to be extracted Returns: Iterable[Span] """ spans = getattr(doc, attr) # type: ignore for span in spans: if span.label_ == "MISC": continue yield span
def dep_getter(token, attr): # noqa dep = getattr(token, attr) # type: ignore dep = token.vocab.strings.as_string(dep).lower() return dep
[docs]def score( # noqa corpus: Corpus, apply_fn: Callable[[Iterable[Example], list[Example]]] | Language, # type: ignore score_fn: list[Callable[[Iterable[Example]], dict] | str] = [ # noqa "token", "pos", "ents", "dep", ], augmenters: list[Callable[[Language, Example], Iterable[Example]]] = [], # noqa k: int = 1, nlp: Language | None = None, **kwargs, # noqa ) -> pd.DataFrame: """scores a models performance on a given corpus with potentially augmentations applied to it. Args: corpus (Corpus): A spacy Corpus apply_fn (Union[Callable, Language]): A wrapper function for the model you wish to score. The model should take in a list of spacy Examples (Iterable[Example]) and output a tagged version of it (Iterable[Example]). A SpaCy pipeline (Language) can be provided as is. score_fn (List[Union[Callable[[Iterable[Example]], dict], str]], optional): A scoring function which takes in a list of examples (Iterable[Example]) and return a dictionary of performance scores. Four potiential strings are valid. "ents" for measuring the performance of entity spans. "pos" for measuring the performance of fine-grained (tag_acc), and coarse-grained (pos_acc) pos-tags. "token" for measuring the performance of tokenization. "dep" for measuring the performance of dependency parsing. "nlp" for measuring the performance of all components in the specified nlp pipeline. Defaults to ["token", "pos", "ents", "dep"]. augmenters (List[Callable[[Language, Example], Iterable[Example]]], optional): A spaCy style augmenters which should be applied to the corpus or a list thereof. defaults to [], indicating no augmenters. k (int, optional): Number of times it should run the augmentation and test the performance on the corpus. Defaults to 1. nlp (Optional[Language], optional): A spacy processing pipeline. If None it will use an empty Danish pipeline. Defaults to None. Used for loading the calling the corpus. Returns: pandas.DataFrame: returns a pandas dataframe containing the performance metrics. Example: >>> from spacy.training.augment import create_lower_casing_augmenter >>> from dacy.datasets import dane >>> test = dane(splits=["test") >>> nlp = dacy.load("da_dacy_small_tft-0.0.0") >>> scores = score(test, augmenter=[create_lower_casing_augmenter(0.5)], >>> apply_fn = nlp) """ if callable(augmenters): augmenters = [augmenters] if len(augmenters) == 0: augmenters = [dont_augment] def __apply_nlp(examples): # noqa: ANN001 examples = ((e.x.text, e.y) for e in examples) doc_tuples = nlp_.pipe(examples, as_tuples=True) return [Example(x, y) for x, y in doc_tuples] if isinstance(apply_fn, Language): nlp_ = apply_fn apply_fn = __apply_nlp # type: ignore if nlp is None: from spacy.lang.da import Danish nlp = Danish() scorer = Scorer(nlp) def ents_scorer(examples): # noqa: ANN001 scores = Scorer.score_spans(examples, attr="ents") scores_no_misc = Scorer.score_spans( examples, attr="ents", getter=no_misc_getter, ) scores["ents_excl_MISC"] = { k: scores_no_misc[k] for k in ["ents_p", "ents_r", "ents_f"] } return scores def pos_scorer(examples): # noqa: ANN001 scores = Scorer.score_token_attr(examples, attr="pos") scores_ = Scorer.score_token_attr(examples, attr="tag") for k in scores_: scores[k] = scores_[k] return scores def_scorers = { "ents": ents_scorer, "pos": pos_scorer, "token": Scorer.score_tokenization, "nlp": scorer.score, "dep": partial( Scorer.score_deps, attr="dep", getter=dep_getter, ignore_labels=("p", "punct"), ), } def __score(augmenter): # noqa: ANN001 corpus_ = copy(corpus) corpus_.augmenter = augmenter scores_ls = [] for _i in range(k): # type: ignore s = time() examples = apply_fn(corpus_(nlp)) # type: ignore speed = time() - s scores = {"wall_time": speed} for fn in score_fn: if isinstance(fn, str): fn = def_scorers[fn] # noqa scores.update(fn(examples)) # type: ignore scores = flatten_dict(scores) scores_ls.append(scores) # and collapse list to dict for key in scores: # type: ignore scores[key] = [s[key] if key in s else None for s in scores_ls] # type: ignore scores["k"] = list(range(k)) # type: ignore return pd.DataFrame(scores) # type: ignore for i, aug in enumerate(augmenters): scores_ = __score(aug) scores = pd.concat([scores, scores_]) if i != 0 else scores_ # type: ignore # noqa return scores # type: ignore