Source code for antu.io.instance

from typing import Dict, MutableMapping, Mapping, TypeVar, List, Set

from . import Vocabulary
from .fields import Field

Indices = TypeVar("Indices", List[int], List[List[int]])


[docs]class Instance(Mapping[str, Field]): """ An ``Instance`` is a collection (list) of multiple data fields. Parameters ---------- fields : ``List[Field]``, optional (default=``None``) A list of multiple data fields. """ def __init__(self, fields: List[Field] = None) -> None: self.fields = fields self._fields_dict = {} for field in fields: self._fields_dict[field.name] = field self.indexed = False # Indicates whether the instance has been indexed def __getitem__(self, key: str) -> Field: return self._fields_dict[key] def __iter__(self): return iter(self.fields) def __len__(self) -> int: return len(self.fields)
[docs] def add_field(self, field: Field) -> None: """ Add the field to the existing ``Instance``. Parameters ---------- field : ``Field`` Which field needs to be added. """ self.fields.append(field) if self.indexed: field.index(vocab)
[docs] def count_vocab_items(self, counter: Dict[str, Dict[str, int]]) -> None: """ Increments counts in the given ``counter`` for all of the vocabulary items in all of the ``Fields`` in this ``Instance``. Parameters ---------- counter : ``Dict[str, Dict[str, int]]`` We count the number of strings if the string needs to be counted to some counters. """ for field in self.fields: field.count_vocab_items(counter)
[docs] def index_fields(self, vocab: Vocabulary) -> Dict[str, Dict[str, Indices]]: """ Indexes all fields in this ``Instance`` using the provided ``Vocabulary``. This `mutates` the current object, it does not return a new ``Instance``. A ``DataIterator`` will call this on each pass through a dataset; we use the ``indexed`` flag to make sure that indexing only happens once. This means that if for some reason you modify your vocabulary after you've indexed your instances, you might get unexpected behavior. Parameters ---------- vocab : ``Vocabulary`` ``vocab`` is used to get the index of each item. Returns ------- res : ``Dict[str, Dict[str, Indices]]`` Returns the Indices corresponding to the instance. The first key is field name and the second key is the vocabulary name. """ if not self.indexed: self.indexed = True for field in self.fields: field.index(vocab) res = {} for field in self.fields: res[field.name] = field.indexes return res
[docs] def dynamic_index_fields(self, vocab: Vocabulary, dynamic_fields: Set[str]) -> Dict[str, Dict[str, Indices]]: """ Indexes all fields in this ``Instance`` using the provided ``Vocabulary``. This `mutates` the current object, it does not return a new ``Instance``. A ``DataIterator`` will call this on each pass through a dataset; we use the ``indexed`` flag to make sure that indexing only happens once. This means that if for some reason you modify your vocabulary after you've indexed your instances, you might get unexpected behavior. Parameters ---------- vocab : ``Vocabulary`` ``vocab`` is used to get the index of each item. Returns ------- res : ``Dict[str, Dict[str, Indices]]`` Returns the Indices corresponding to the instance. The first key is field name and the second key is the vocabulary name. """ if not self.indexed: self.indexed = True for field in self.fields: if field.name not in dynamic_fields: field.index(vocab) res = {} for field in self.fields: if field.name in dynamic_fields: field.index(vocab) res[field.name] = field.indexes return res