Skip to content

Encoders

array_to_string(sequence, _index_encoder, _onehot_encoder)

Convert array-like sequence representations to a string.

:raises IndexError: if the alphabet of the sequence is a superset of the index encoder and one-hot encoder object.

Source code in seqlike/encoders.py
def array_to_string(sequence: Union[list, np.ndarray], _index_encoder, _onehot_encoder) -> str:
    """Convert array-like sequence representations to a string.

    :raises IndexError: if the alphabet of the sequence is a superset
        of the index encoder and one-hot encoder object.

    <!-- #noqa: DAR101 -->
    <!-- #noqa: DAR201 -->
    """
    return "".join(array_to_symbols(sequence, _index_encoder, _onehot_encoder))

array_to_symbols(sequence, _index_encoder, _onehot_encoder)

Convert array-like sequence representations to a string.

:raises IndexError: if the alphabet of the sequence is a superset of the index encoder and one-hot encoder object.

Source code in seqlike/encoders.py
def array_to_symbols(sequence: Union[list, np.ndarray], _index_encoder, _onehot_encoder) -> str:
    """Convert array-like sequence representations to a string.

    :raises IndexError: if the alphabet of the sequence is a superset
        of the index encoder and one-hot encoder object.

    <!-- #noqa: DAR101 -->
    <!-- #noqa: DAR201 -->
    """
    sequence = np.asarray(sequence, dtype=float)
    if sequence.ndim == 1:
        try:
            sequence = _index_encoder.inverse_transform(sequence.reshape(-1, 1)).flatten()
        except IndexError:
            raise IndexError(
                "The encoder encountered a bad encoding value. "
                "Ensure that you're using an alphabet "
                "which contains all needed symbols."
            )
    elif sequence.ndim == 2:
        sequence = _onehot_encoder.inverse_transform(sequence).flatten()

    # NOTE: We do not need to check for other dim sizes
    # because we assume that validate_sequence will take care of it.
    return sequence

index_encoder_from_alphabet(alphabet)

Return a OrdinalEncoder from the tokens in alphabet while preserving order.

:param alphabet: a iterable of unique tokens :returns: OrdinalEncoder

Source code in seqlike/encoders.py
def index_encoder_from_alphabet(alphabet):
    """Return a OrdinalEncoder from the tokens in alphabet while preserving order.

    :param alphabet: a iterable of unique tokens
    :returns: OrdinalEncoder
    """
    from sklearn.preprocessing import OrdinalEncoder

    categories = [[letter for letter in alphabet]]
    fit_list = [[letter] for letter in alphabet]
    return OrdinalEncoder(dtype=float, categories=categories).fit(fit_list)

onehot_encoder_from_alphabet(alphabet)

Return a OneHotEncoder from the tokens in alphabet while preserving order.

:param alphabet: a iterable of unique tokens :returns: OneHotEncoder

Source code in seqlike/encoders.py
def onehot_encoder_from_alphabet(alphabet):
    """Return a OneHotEncoder from the tokens in alphabet while preserving order.

    :param alphabet: a iterable of unique tokens
    :returns: OneHotEncoder
    """
    from sklearn.preprocessing import OneHotEncoder

    categories = [[letter for letter in alphabet]]
    fit_list = [[letter] for letter in alphabet]
    return OneHotEncoder(dtype=float, sparse_output=False, categories=categories).fit(fit_list)