Source code for poolparty.region_ops.extract_region
"""Extract content from a region as a new Pool."""
from numbers import Real
from poolparty.types import Optional
from ..utils import dna_utils
from ..utils.parsing_utils import validate_single_region
[docs]
def extract_region(
pool,
region_name: str,
rc: bool = False,
iter_order: Optional[Real] = None,
prefix: Optional[str] = None,
):
"""
Extract content from a named region as a new Pool.
Creates a Pool that yields the content inside the specified region.
Parameters
----------
pool : Pool or str
Input Pool or sequence string containing the region.
region_name : str
Name of the region to extract content from.
rc : bool, default=False
If True, reverse-complement the extracted content.
iter_order : Optional[Real], default=None
Iteration order priority for the Operation.
prefix : Optional[str], default=None
Prefix for sequence names in the resulting Pool.
Returns
-------
Pool
A Pool yielding the content inside the region.
Examples
--------
>>> with pp.Party():
... bg = pp.from_seq('ACGT<region>TTAA</region>GCGC')
... content = pp.extract_region(bg, 'region')
... # content yields: 'TTAA'
...
... # With rc=True, content is reverse-complemented
... content_rc = pp.extract_region(bg, 'region', rc=True)
... # content_rc yields: 'TTAA' (reverse complement of TTAA)
"""
from ..fixed_ops.fixed import fixed_operation
from ..fixed_ops.from_seq import from_seq
from ..party import get_active_party
# Convert string to pool if needed
pool = from_seq(pool) if isinstance(pool, str) else pool
def seq_from_seqs_fn(seqs: list[str]) -> str:
seq = seqs[0]
region = validate_single_region(seq, region_name)
content = region.content
# If rc=True, reverse complement the content
if rc:
content = dna_utils.reverse_complement(content)
return content
# Get seq_length from the registered region
party = get_active_party()
if party.has_region(region_name):
registered_region = party.get_region_by_name(region_name)
region_seq_length = registered_region.seq_length
else:
# Region not registered - this shouldn't happen in normal usage
# but we handle it gracefully by inferring from content
region_seq_length = None
result_pool = fixed_operation(
parent_pools=[pool],
seq_from_seqs_fn=seq_from_seqs_fn,
seq_length_from_pool_lengths_fn=lambda lengths: region_seq_length, # Use registered region's seq_length
iter_order=iter_order,
prefix=prefix,
)
# The extracted content does not contain any regions
# (we only inherit parent regions minus the extracted one)
result_pool._untrack_region(region_name)
return result_pool