Source code for poolparty.scan_ops.shuffle_scan

"""Shuffle scan operation - shuffle characters within a window at scanning positions."""

from numbers import Integral, Real

from ..pool import Pool
from ..types import CardsType, Literal, ModeType, Optional, PositionsType, RegionType, Union, beartype



[docs]
@beartype
def shuffle_scan(
    pool: Union[Pool, str],
    shuffle_length: Integral,
    positions: PositionsType = None,
    region: RegionType = None,
    shuffle_type: Literal["mono", "dinuc"] = "mono",
    shuffles_per_position: Integral = 1,
    prefix: Optional[str] = None,
    prefix_position: Optional[str] = None,
    prefix_shuffle: Optional[str] = None,
    mode: ModeType = "random",
    num_states: Optional[Integral] = None,
    style: Optional[str] = None,
    iter_order: Optional[Real] = None,
    cards: Optional[tuple[CardsType, CardsType]] = None,
    _factory_name: Optional[str] = "shuffle_scan",
) -> Pool:
    """
    Shuffle characters within a window at specified scanning positions.

    Parameters
    ----------
    pool : Union[Pool, str]
        Parent pool or sequence string.
    shuffle_length : Integral
        Length of the region to shuffle at each position.
    positions : PositionsType, default=None
        Positions to consider for the start of the shuffle region (0-based).
    region : RegionType, default=None
        Region to constrain the scan to. Can be a marker name or [start, stop] interval.
    shuffle_type : Literal["mono", "dinuc"], default="mono"
        Type of shuffle to perform:

        - ``"mono"``: random permutation preserving mononucleotide composition.
        - ``"dinuc"``: Euler-path shuffle preserving dinucleotide frequencies.
          The first and last characters of each window are fixed.
    shuffles_per_position : Integral, default=1
        Number of shuffles to perform at each position.
    prefix : Optional[str], default=None
        Prefix for cartesian product index (e.g., 'shuf' produces 'shuf_0', 'shuf_1', ...).
    prefix_position : Optional[str], default=None
        Prefix for position index (e.g., 'pos' produces 'pos_0', 'pos_1', ...).
    prefix_shuffle : Optional[str], default=None
        Prefix for shuffle variant index (e.g., 'var' produces 'var_0', 'var_1', ...).
    mode : ModeType, default='random'
        Selection mode: 'random' or 'sequential'.
    num_states : Optional[Integral], default=None
        Number of states. In sequential mode, overrides the computed count
        (cycling if greater, clipping if less). In random mode, if None
        defaults to 1 (pure random sampling).
    style : Optional[str], default=None
        Style to apply to shuffled characters (e.g., 'purple', 'red bold').
    iter_order : Optional[Real], default=None
        Iteration order priority for the Operation.
    cards : Optional[tuple[CardsType, CardsType]], default=None
        Design card keys as a 2-tuple ``(scan_cards, shuffle_cards)``.
        Scan keys: ``'position_index'``, ``'start'``, ``'end'``, ``'name'``,
        ``'region_seq'``. Shuffle keys: ``'permutation'``.

    Returns
    -------
    Pool
        A Pool yielding sequences where a region of the specified length is shuffled
        at each allowed position.
    """
    from ..base_ops.shuffle_seq import shuffle_seq
    from ..fixed_ops.from_seq import from_seq
    from ..fixed_ops.passthrough import passthrough
    from ..region_ops import region_scan

    # Convert string inputs to pools
    pool = (
        from_seq(pool, _factory_name=f"{_factory_name}(from_seq)")
        if isinstance(pool, str)
        else pool
    )

    # Validate pool has defined seq_length (only when no region specified)
    bg_length = pool.seq_length
    if bg_length is None and region is None:
        raise ValueError("pool must have a defined seq_length")

    # Validate shuffle_length
    if shuffle_length <= 0:
        raise ValueError(f"shuffle_length must be > 0, got {shuffle_length}")
    if bg_length is not None and shuffle_length >= bg_length:
        raise ValueError(
            f"shuffle_length ({shuffle_length}) must be < pool.seq_length ({bg_length})"
        )

    # Resolve cards
    cards_scan, cards_shuf = cards if cards else (None, None)

    region_name = "_shuf"
    region_length = int(shuffle_length)

    # 1. Insert tags at scanning positions
    marked = region_scan(
        pool,
        tag_name=region_name,
        region_length=region_length,
        positions=positions,
        region=region,
        remove_tags=False,
        mode=mode,
        num_states=num_states,
        iter_order=iter_order,
        cards=cards_scan,
        _factory_name=f"{_factory_name}(region_scan)",
    )

    # Capture position state
    pos_state = marked.operation.state

    # 2. Shuffle the marked region directly using shuffle_seq with region='_shuf'
    result = shuffle_seq(
        marked,
        region=region_name,
        shuffle_type=shuffle_type,
        _remove_tags=True,  # Remove _shuf tags
        style=style,
        mode="random",
        num_states=shuffles_per_position,
        iter_order=-1,
        cards=cards_shuf,
        _factory_name=f"{_factory_name}(shuffle_seq)",
    )

    # Capture shuffle state
    shuffle_state = result.operation.state

    # 3. Add PassthroughOp for custom naming if any prefix is set
    if any([prefix, prefix_position, prefix_shuffle]):
        num_shuffles = int(shuffles_per_position) if shuffles_per_position else 1

        def compute_names():
            # Check if this branch is active
            if not pos_state.is_active:
                return []
            if shuffle_state is not None and not shuffle_state.is_active:
                return []

            pos_idx = pos_state.value
            shuffle_idx = shuffle_state.value if shuffle_state else 0

            contributions = []
            if prefix:  # Cartesian product index
                W = pos_idx * num_shuffles + shuffle_idx
                contributions.append(f"{prefix}_{W}")
            if prefix_position:
                contributions.append(f"{prefix_position}_{pos_idx}")
            if prefix_shuffle:
                contributions.append(f"{prefix_shuffle}_{shuffle_idx}")
            return contributions

        result = passthrough(
            result,
            _name_fn=compute_names,
            iter_order=iter_order,
            _factory_name=f"{_factory_name}(naming)",
        )

    return result