Encoders
array_to_string(sequence, _index_encoder, _onehot_encoder)
Convert array-like sequence representations to a string.
:raises IndexError: if the alphabet of the sequence is a superset of the index encoder and one-hot encoder object.
Source code in seqlike/encoders.py
def array_to_string(sequence: Union[list, np.ndarray], _index_encoder, _onehot_encoder) -> str:
"""Convert array-like sequence representations to a string.
:raises IndexError: if the alphabet of the sequence is a superset
of the index encoder and one-hot encoder object.
<!-- #noqa: DAR101 -->
<!-- #noqa: DAR201 -->
"""
return "".join(array_to_symbols(sequence, _index_encoder, _onehot_encoder))
array_to_symbols(sequence, _index_encoder, _onehot_encoder)
Convert array-like sequence representations to a string.
:raises IndexError: if the alphabet of the sequence is a superset of the index encoder and one-hot encoder object.
Source code in seqlike/encoders.py
def array_to_symbols(sequence: Union[list, np.ndarray], _index_encoder, _onehot_encoder) -> str:
"""Convert array-like sequence representations to a string.
:raises IndexError: if the alphabet of the sequence is a superset
of the index encoder and one-hot encoder object.
<!-- #noqa: DAR101 -->
<!-- #noqa: DAR201 -->
"""
sequence = np.asarray(sequence, dtype=float)
if sequence.ndim == 1:
try:
sequence = _index_encoder.inverse_transform(sequence.reshape(-1, 1)).flatten()
except IndexError:
raise IndexError(
"The encoder encountered a bad encoding value. "
"Ensure that you're using an alphabet "
"which contains all needed symbols."
)
elif sequence.ndim == 2:
sequence = _onehot_encoder.inverse_transform(sequence).flatten()
# NOTE: We do not need to check for other dim sizes
# because we assume that validate_sequence will take care of it.
return sequence
index_encoder_from_alphabet(alphabet)
Return a OrdinalEncoder from the tokens in alphabet while preserving order.
:param alphabet: a iterable of unique tokens :returns: OrdinalEncoder
Source code in seqlike/encoders.py
def index_encoder_from_alphabet(alphabet):
"""Return a OrdinalEncoder from the tokens in alphabet while preserving order.
:param alphabet: a iterable of unique tokens
:returns: OrdinalEncoder
"""
from sklearn.preprocessing import OrdinalEncoder
categories = [[letter for letter in alphabet]]
fit_list = [[letter] for letter in alphabet]
return OrdinalEncoder(dtype=float, categories=categories).fit(fit_list)
onehot_encoder_from_alphabet(alphabet)
Return a OneHotEncoder from the tokens in alphabet while preserving order.
:param alphabet: a iterable of unique tokens :returns: OneHotEncoder
Source code in seqlike/encoders.py
def onehot_encoder_from_alphabet(alphabet):
"""Return a OneHotEncoder from the tokens in alphabet while preserving order.
:param alphabet: a iterable of unique tokens
:returns: OneHotEncoder
"""
from sklearn.preprocessing import OneHotEncoder
categories = [[letter for letter in alphabet]]
fit_list = [[letter] for letter in alphabet]
return OneHotEncoder(dtype=float, sparse_output=False, categories=categories).fit(fit_list)