Source code for poolparty.fixed_ops.slice_seq

"""SliceSeq operation - slice SEQUENCES (string slicing)."""

from numbers import Integral, Real

from ..pool import Pool
from ..types import Optional, Pool_type, RegionType, Sequence, Union, beartype


[docs] @beartype def slice_seq( pool: Union[Pool_type, str], region: RegionType = None, start: Optional[Integral] = None, stop: Optional[Integral] = None, step: Optional[Integral] = None, keep_context: bool = False, iter_order: Optional[Real] = None, prefix: Optional[str] = None, style: Optional[str] = None, ) -> Pool: """ Create a Pool containing sliced sequences from the input pool. Extracts a subsequence based on region and/or Python-style slice parameters. Parameters ---------- pool : Union[Pool_type, str] The Pool (or sequence string) whose sequences will be sliced. region : RegionType, default=None Region to slice from. Can be: - str: Name of an annotated region (e.g., 'orf') - Sequence[int]: [start, stop] interval in the sequence - None: Use the full sequence If only region is specified (no start/stop/step), returns just that region. start : Optional[Integral], default=None Start position for slicing (0-indexed, Python-style). Applied after region extraction if region is specified. stop : Optional[Integral], default=None Stop position for slicing (exclusive, Python-style). Applied after region extraction if region is specified. step : Optional[Integral], default=None Step for slicing (Python-style). Applied after region extraction if region is specified. keep_context : bool, default=False If True, reassemble the sliced content back into the original sequence context (prefix + sliced_content + suffix). If False (default), return only the sliced content. iter_order : Optional[Real], default=None Iteration order priority for the Operation. prefix : Optional[str], default=None Prefix for sequence naming. style : Optional[str], default=None Style to apply to the resulting sliced sequences (e.g., 'red', 'blue bold'). Returns ------- Pool A Pool containing sliced sequences. Examples -------- >>> with pp.Party(): ... # Slice positions 2-6 from the full sequence ... pool = pp.from_seq('ACGTACGT') ... sliced = pp.slice_seq(pool, start=2, stop=6) ... # Result: 'GTAC' ... ... # Extract just a named region ... pool = pp.from_seq('AAA<orf>ATGCCC</orf>TTT') ... orf = pp.slice_seq(pool, region='orf') ... # Result: 'ATGCCC' ... ... # Slice within a named region ... pool = pp.from_seq('AAA<orf>ATGCCC</orf>TTT') ... sliced = pp.slice_seq(pool, region='orf', start=0, stop=3) ... # Result: 'ATG' ... ... # Slice with step (every other character) ... pool = pp.from_seq('ABCDEFGH') ... sliced = pp.slice_seq(pool, step=2) ... # Result: 'ACEG' ... ... # Use as a method on Pool objects ... pool = pp.from_seq('ACGTACGT') ... sliced = pool.slice_seq(start=0, stop=4) ... # Result: 'ACGT' ... ... # Keep context - reassemble into original sequence ... pool = pp.from_seq('AAA<orf>ATGCCC</orf>TTT') ... sliced = pp.slice_seq(pool, region='orf', start=0, stop=3, keep_context=True) ... # Result: 'AAAATGTTT' (prefix + sliced region + suffix) """ from ..utils.parsing_utils import strip_all_tags, validate_single_region from .fixed import fixed_operation # Check if we need to apply a slice (any slice parameter is specified) has_slice = start is not None or stop is not None or step is not None # Build a slice object from the parameters slice_start = int(start) if start is not None else None slice_stop = int(stop) if stop is not None else None slice_step = int(step) if step is not None else None key = slice(slice_start, slice_stop, slice_step) # keep_context only makes sense when region is specified if keep_context and region is None: raise ValueError("keep_context=True requires a region to be specified") if region is not None: # Region specified - we need to extract the region content and optionally slice it if isinstance(region, str): # Named region - extract content from XML tags def seq_from_seqs_fn(seqs: list[str]) -> str: seq = seqs[0] parsed_region = validate_single_region(seq, region) content = strip_all_tags(parsed_region.content) if has_slice: sliced_content = content[key] else: sliced_content = content if keep_context: # Reassemble: prefix + sliced_content + suffix prefix_str = strip_all_tags(seq[: parsed_region.start]) suffix_str = strip_all_tags(seq[parsed_region.end :]) return prefix_str + sliced_content + suffix_str return sliced_content def seq_length_from_pool_lengths_fn(lengths: Sequence[Optional[int]]) -> Optional[int]: # Get the registered region's length from Party from ..party import get_active_party party = get_active_party() if party.has_region(region): registered_region = party.get_region_by_name(region) region_len = registered_region.seq_length else: region_len = None if keep_context: parent_len = lengths[0] if parent_len is None or region_len is None: return None if not has_slice: return parent_len s_start, s_stop, s_step = key.indices(region_len) sliced_len = max( 0, (s_stop - s_start + (s_step - 1 if s_step > 0 else s_step + 1)) // s_step, ) return parent_len - region_len + sliced_len if region_len is None: return None if not has_slice: return region_len # Apply slice to region length s_start, s_stop, s_step = key.indices(region_len) return max( 0, (s_stop - s_start + (s_step - 1 if s_step > 0 else s_step + 1)) // s_step ) else: # Interval region [start, stop] region_start, region_stop = int(region[0]), int(region[1]) def seq_from_seqs_fn(seqs: list[str]) -> str: seq = strip_all_tags(seqs[0]) if region_stop > len(seq): raise ValueError( f"slice_seq region [{region_start}, {region_stop}] exceeds " f"sequence length {len(seq)}" ) content = seq[region_start:region_stop] if has_slice: sliced_content = content[key] else: sliced_content = content if keep_context: # Reassemble: prefix + sliced_content + suffix prefix_str = seq[:region_start] suffix_str = seq[region_stop:] return prefix_str + sliced_content + suffix_str return sliced_content def seq_length_from_pool_lengths_fn(lengths: Sequence[Optional[int]]) -> Optional[int]: parent_len = lengths[0] region_len = region_stop - region_start if parent_len is not None and region_stop > parent_len: raise ValueError( f"slice_seq region [{region_start}, {region_stop}] exceeds " f"sequence length {parent_len}" ) if keep_context: # Length depends on parent length and slice if parent_len is None: return None # prefix_len + sliced_len + suffix_len prefix_len = region_start suffix_len = parent_len - region_stop if not has_slice: return prefix_len + region_len + suffix_len s_start, s_stop, s_step = key.indices(region_len) sliced_len = max( 0, (s_stop - s_start + (s_step - 1 if s_step > 0 else s_step + 1)) // s_step ) return prefix_len + sliced_len + suffix_len if not has_slice: return region_len # Apply slice to region length s_start, s_stop, s_step = key.indices(region_len) return max( 0, (s_stop - s_start + (s_step - 1 if s_step > 0 else s_step + 1)) // s_step ) else: # No region - slice the full sequence def seq_from_seqs_fn(seqs: list[str]) -> str: return seqs[0][key] def seq_length_from_pool_lengths_fn(lengths: Sequence[Optional[int]]) -> Optional[int]: parent_len = lengths[0] if parent_len is None: return None # Compute length from slice params s_start, s_stop, s_step = key.indices(parent_len) return max(0, (s_stop - s_start + (s_step - 1 if s_step > 0 else s_step + 1)) // s_step) result_pool = fixed_operation( parent_pools=[pool], seq_from_seqs_fn=seq_from_seqs_fn, seq_length_from_pool_lengths_fn=seq_length_from_pool_lengths_fn, iter_order=iter_order, prefix=prefix, _factory_name="slice_seq", ) # Apply style if specified if style is not None: from .stylize import stylize result_pool = stylize(result_pool, style=style) return result_pool