Source code for nucleic.sequence

from itertools import combinations, product
from typing import Generator, List

from nucleic.constants import DNA_IUPAC_NONDEGENERATE

__all__ = ['dna_kmers', 'hamming_circle']


[docs]def dna_kmers(k: int = 3) -> Generator[str, None, None]: """Return the cartesian product of all DNA substrings of length `k`. Args: k: Length of of the DNA substring. Yields: Cartesian product of all DNA substrings of length `k`. Examples: >>> list(dna_kmers(1)) ['A', 'C', 'G', 'T'] >>> len(list(dna_kmers(3))) 64 """ for parts in product(sorted(DNA_IUPAC_NONDEGENERATE), repeat=k): yield ''.join(parts)
[docs]def hamming_circle(string: str, n: int, alphabet: List[str]) -> Generator[str, None, None]: """Find strings, of a given alphabet, with a distance of `n` away from a string. Examples: >>> sorted(hamming_circle('abc', n=0, alphabet='abc')) ['abc'] >>> sorted(hamming_circle('abc', n=1, alphabet='abc')) ['aac', 'aba', 'abb', 'acc', 'bbc', 'cbc'] >>> sorted(hamming_circle('aaa', n=2, alphabet='ab')) ['abb', 'bab', 'bba'] """ for positions in combinations(range(len(string)), n): for replacements in product(range(len(alphabet)), repeat=n): skip = False cousin = list(string) for position, replacement in zip(positions, replacements): if cousin[position] == alphabet[replacement]: skip = True else: cousin[position] = alphabet[replacement] if skip is False: yield ''.join(cousin)