Source code for dacy.score.input_length

"""Contains functions for testing the performance of models on varying input
length."""
from typing import Callable, List, Union

import pandas as pd
from wasabi import msg

from ..datasets import dane
from .score import score


[docs]def n_sents_score( n_sents: Union[int, List[int]], # type: ignore apply_fn: Callable, # type: ignore dataset: str = "dane", split: str = "test", score_fn: List[Union[str, Callable]] = ["token", "pos", "ents", "dep"], # noqa # type: ignore verbose: bool = True, **kwargs, # noqa ) -> pd.DataFrame: """scores the performance of a given model on examples of a given number of sentences. Args: n_sents (Union[int, List[int]]): Number of sentences which the performance should be applied to. apply_fn (Callable): A wrapper function for the model you wish to score. The model should take in a spacy Example and output a tagged version of it. dataset (str, optional): Which dataset should this be applied to. Possible options include "dane". Defaults to "dane". split (str, optional): Which splits of the dataset should be used. Possible options include "train", "dev", "test", "all". Defaults to "test". score_fn (List[Union[str, Callable]], optional): A scoring function which takes in a list of examples and return a dictionary of the form {"score_name": score}. Four potiential strings are valid. "ents" for measuring the performance of entity spans. "pos" for measuring the performance of pos-tags. "token" for measuring the performance of tokenization. "nlp" for measuring the performance of all components in the specified nlp pipeline. Defaults to ["token", "pos", "ents"]. verbose (bool, optional): Toggles the verbosity of the function. Defualts to True kwargs (dict): arguments to be passed to dataset or the score function. Returns: pandas.DataFrame: returns a pandas dataframe containing the performance metrics. """ dataset_fn = {"dane": dane} if isinstance(n_sents, int): n_sents = [n_sents] for i, n in enumerate(n_sents): if verbose is True: msg.info(f"Calculating score using {n} sentences") corpus = dataset_fn[dataset](splits=split, n_sents=n, **kwargs) # type: ignore scores_ = score(corpus, apply_fn=apply_fn, score_fn=score_fn, **kwargs) # type: ignore scores = pd.concat([scores, scores_]) if i != 0 else scores_ # type: ignore # noqa return scores # type: ignore