Source code for poolparty.scan_ops.shuffle_scan

"""Shuffle scan operation - shuffle characters within a window at scanning positions."""

from numbers import Integral, Real

from ..pool import Pool
from ..types import CardsType, Literal, ModeType, Optional, PositionsType, RegionType, Union, beartype


[docs] @beartype def shuffle_scan( pool: Union[Pool, str], shuffle_length: Integral, positions: PositionsType = None, region: RegionType = None, shuffle_type: Literal["mono", "dinuc"] = "mono", shuffles_per_position: Integral = 1, prefix: Optional[str] = None, prefix_position: Optional[str] = None, prefix_shuffle: Optional[str] = None, mode: ModeType = "random", num_states: Optional[Integral] = None, style: Optional[str] = None, iter_order: Optional[Real] = None, cards: Optional[tuple[CardsType, CardsType]] = None, _factory_name: Optional[str] = "shuffle_scan", ) -> Pool: """ Shuffle characters within a window at specified scanning positions. Parameters ---------- pool : Union[Pool, str] Parent pool or sequence string. shuffle_length : Integral Length of the region to shuffle at each position. positions : PositionsType, default=None Positions to consider for the start of the shuffle region (0-based). region : RegionType, default=None Region to constrain the scan to. Can be a marker name or [start, stop] interval. shuffle_type : Literal["mono", "dinuc"], default="mono" Type of shuffle to perform: - ``"mono"``: random permutation preserving mononucleotide composition. - ``"dinuc"``: Euler-path shuffle preserving dinucleotide frequencies. The first and last characters of each window are fixed. shuffles_per_position : Integral, default=1 Number of shuffles to perform at each position. prefix : Optional[str], default=None Prefix for cartesian product index (e.g., 'shuf' produces 'shuf_0', 'shuf_1', ...). prefix_position : Optional[str], default=None Prefix for position index (e.g., 'pos' produces 'pos_0', 'pos_1', ...). prefix_shuffle : Optional[str], default=None Prefix for shuffle variant index (e.g., 'var' produces 'var_0', 'var_1', ...). mode : ModeType, default='random' Selection mode: 'random' or 'sequential'. num_states : Optional[Integral], default=None Number of states. In sequential mode, overrides the computed count (cycling if greater, clipping if less). In random mode, if None defaults to 1 (pure random sampling). style : Optional[str], default=None Style to apply to shuffled characters (e.g., 'purple', 'red bold'). iter_order : Optional[Real], default=None Iteration order priority for the Operation. cards : Optional[tuple[CardsType, CardsType]], default=None Design card keys as a 2-tuple ``(scan_cards, shuffle_cards)``. Scan keys: ``'position_index'``, ``'start'``, ``'end'``, ``'name'``, ``'region_seq'``. Shuffle keys: ``'permutation'``. Returns ------- Pool A Pool yielding sequences where a region of the specified length is shuffled at each allowed position. """ from ..base_ops.shuffle_seq import shuffle_seq from ..fixed_ops.from_seq import from_seq from ..fixed_ops.passthrough import passthrough from ..region_ops import region_scan # Convert string inputs to pools pool = ( from_seq(pool, _factory_name=f"{_factory_name}(from_seq)") if isinstance(pool, str) else pool ) # Validate pool has defined seq_length (only when no region specified) bg_length = pool.seq_length if bg_length is None and region is None: raise ValueError("pool must have a defined seq_length") # Validate shuffle_length if shuffle_length <= 0: raise ValueError(f"shuffle_length must be > 0, got {shuffle_length}") if bg_length is not None and shuffle_length >= bg_length: raise ValueError( f"shuffle_length ({shuffle_length}) must be < pool.seq_length ({bg_length})" ) # Resolve cards cards_scan, cards_shuf = cards if cards else (None, None) region_name = "_shuf" region_length = int(shuffle_length) # 1. Insert tags at scanning positions marked = region_scan( pool, tag_name=region_name, region_length=region_length, positions=positions, region=region, remove_tags=False, mode=mode, num_states=num_states, iter_order=iter_order, cards=cards_scan, _factory_name=f"{_factory_name}(region_scan)", ) # Capture position state pos_state = marked.operation.state # 2. Shuffle the marked region directly using shuffle_seq with region='_shuf' result = shuffle_seq( marked, region=region_name, shuffle_type=shuffle_type, _remove_tags=True, # Remove _shuf tags style=style, mode="random", num_states=shuffles_per_position, iter_order=-1, cards=cards_shuf, _factory_name=f"{_factory_name}(shuffle_seq)", ) # Capture shuffle state shuffle_state = result.operation.state # 3. Add PassthroughOp for custom naming if any prefix is set if any([prefix, prefix_position, prefix_shuffle]): num_shuffles = int(shuffles_per_position) if shuffles_per_position else 1 def compute_names(): # Check if this branch is active if not pos_state.is_active: return [] if shuffle_state is not None and not shuffle_state.is_active: return [] pos_idx = pos_state.value shuffle_idx = shuffle_state.value if shuffle_state else 0 contributions = [] if prefix: # Cartesian product index W = pos_idx * num_shuffles + shuffle_idx contributions.append(f"{prefix}_{W}") if prefix_position: contributions.append(f"{prefix_position}_{pos_idx}") if prefix_shuffle: contributions.append(f"{prefix_shuffle}_{shuffle_idx}") return contributions result = passthrough( result, _name_fn=compute_names, iter_order=iter_order, _factory_name=f"{_factory_name}(naming)", ) return result