Source code for poolparty.region_ops.apply_at_region

"""Apply a transformation to content at a region."""

from numbers import Real

from poolparty.types import Callable, Optional

from ..utils import dna_utils


[docs] def apply_at_region( pool, region_name: str, transform_fn: Callable, rc: bool = False, remove_tags: bool = True, iter_order: Optional[Real] = None, prefix: Optional[str] = None, ): """ Apply a transformation to the content of a region. This is a high-level convenience function that: 1. Extracts content from the named region (reverse-complementing if rc=True) 2. Applies transform_fn to create a transformed content Pool 3. Replaces the region with the transformed content (reverse-complementing back if rc=True) Parameters ---------- pool : Pool or str Input Pool or sequence string containing the region. region_name : str Name of the region whose content to transform. transform_fn : Callable[[Pool], Pool] Function that takes a Pool and returns a transformed Pool. Examples: pp.rc, pp.shuffle_seq, lambda p: pp.mutagenize(p, ...) rc : bool, default=False If True, reverse-complement content before transform and reverse-complement result back before insertion. remove_tags : bool, default=True If True, region tags are removed from the result. If False, region tags are preserved around the transformed content. iter_order : Optional[Real], default=None Iteration order priority for the Operation. Returns ------- Pool A Pool with the region content transformed. Examples -------- >>> with pp.Party(): ... # Reverse complement a region (tags removed) ... bg = pp.from_seq('ACGT<orf>ATGCCC</orf>TTTT') ... result = pp.apply_at_region(bg, 'orf', pp.rc) ... # Result: 'ACGTGGGCATTTTT' ... ... # Keep tags around transformed content ... bg = pp.from_seq('AAA<region>ACGT</region>TTT') ... result = pp.apply_at_region( ... bg, 'region', ... lambda p: pp.mutagenize(p, num_mutations=1), ... remove_tags=False, ... ) ... # Result: 'AAA<region>ACCT</region>TTT' (tags preserved) Notes ----- If rc=True, the transform_fn receives reverse-complemented content, and the result is reverse-complemented back before insertion. """ from ..fixed_ops.from_seq import from_seq from .extract_region import extract_region from .replace_region import replace_region # Convert string to pool if needed pool = from_seq(pool) if isinstance(pool, str) else pool # Step 1: Extract content from the region content_pool = extract_region(pool, region_name, rc=rc) # Step 2: Apply the transformation transformed_pool = transform_fn(content_pool) if remove_tags: # Step 3a: Replace region with transformed content (tags removed) result = replace_region( pool, transformed_pool, region_name, rc=rc, iter_order=iter_order, prefix=prefix, ) else: # Step 3b: Replace region content but keep tags result = _replace_keeping_tags( pool, transformed_pool, region_name, rc=rc, iter_order=iter_order, prefix=prefix, ) return result
def _replace_keeping_tags( pool, content_pool, region_name: str, rc: bool = False, iter_order: Optional[Real] = None, prefix: Optional[str] = None, ): """Replace region content while preserving region tags.""" from ..fixed_ops.fixed import fixed_operation from ..party import get_active_party from ..utils.parsing_utils import build_region_tags, validate_single_region # Compute output biological length: same formula as replace_region # (tags are preserved but don't affect biological length) party = get_active_party() old_region_len = None if party is not None and party.has_region(region_name): old_region_len = party.get_region(region_name).seq_length def _seq_length_fn(lengths): parent_len = lengths[0] content_len = lengths[1] if len(lengths) > 1 else None if parent_len is None or old_region_len is None or content_len is None: return None return parent_len - old_region_len + content_len def seq_from_seqs_fn(seqs: list[str]) -> str: bg_seq = seqs[0] content_seq = seqs[1] # Find the region in the background sequence region = validate_single_region(bg_seq, region_name) # If rc=True, reverse complement the content before insertion if rc: content_seq = dna_utils.reverse_complement(content_seq) # Build wrapped content with region tags wrapped = build_region_tags(region_name, content_seq) # Build result: prefix + wrapped + suffix prefix = bg_seq[: region.start] suffix = bg_seq[region.end :] return prefix + wrapped + suffix result_pool = fixed_operation( parent_pools=[pool, content_pool], seq_from_seqs_fn=seq_from_seqs_fn, seq_length_from_pool_lengths_fn=_seq_length_fn, iter_order=iter_order, prefix=prefix, ) # Region is preserved, so keep it in the pool's region set # (it was inherited from pool, so nothing to add) return result_pool