"""FromSeqs operation - create a pool from a list of sequences."""
from numbers import Real
import numpy as np
from ..dna_pool import DnaPool
from ..operation import Operation
from ..pool import Pool
from ..types import CardsType, Integral, ModeType, Optional, Pool_type, RegionType, Seq, Sequence, Union, beartype
from ..utils import dna_utils
from ..utils.dna_seq import DnaSeq
[docs]
@beartype
def from_seqs(
seqs: Sequence[str],
pool: Optional[Union[Pool, str]] = None,
region: RegionType = None,
style: Optional[str] = None,
seq_names: Optional[Sequence[str]] = None,
prefix: Optional[str] = None,
mode: ModeType = "random",
num_states: Optional[Integral] = None,
iter_order: Optional[Real] = None,
cards: CardsType = None,
_factory_name: Optional[str] = None,
) -> DnaPool:
"""
Create a Pool containing the specified sequences.
Parameters
----------
seqs : Sequence[str]
Sequence of string sequences to include in the pool.
pool : Optional[Union[Pool, str]], default=None
Background pool or sequence. If provided with region, selected sequence
replaces the region content.
region : RegionType, default=None
Region to replace in pool. Can be a marker name or [start, stop] interval.
Required if pool is provided.
seq_names : Optional[Sequence[str]], default=None
Explicit names for each sequence. If provided, these are used directly.
prefix : Optional[str], default=None
Prefix for auto-generated names (e.g., 'seq_' produces 'seq_0', 'seq_1', ...).
Cannot be used together with seq_names.
mode : ModeType, default='random'
Sequence selection mode: 'sequential' or 'random'.
num_states : Optional[int], default=None
Number of states. In sequential mode, overrides the computed count
(cycling if greater, clipping if less). In random mode, if None
defaults to 1 (pure random sampling).
style : Optional[str], default=None
Style to apply to output sequences (e.g., 'red', 'blue bold').
iter_order : Optional[Real], default=None
Iteration order priority for the Operation.
cards : list[str] or dict, optional
Design card keys to include. Available keys: ``'seq_name'``, ``'seq_index'``.
Returns
-------
Pool_type
A Pool object yielding the provided sequences using the specified selection mode.
Raises
------
TypeError
If seqs is a bare string instead of a list of strings.
ValueError
If pool is provided without region.
"""
if isinstance(seqs, str):
raise TypeError(
"from_seqs expects a list of sequences, not a bare string. "
"Use from_seqs(['...']) for a single sequence, or from_seq('...') for a fixed pool."
)
from ..fixed_ops.from_seq import from_seq
pool_obj = from_seq(pool) if isinstance(pool, str) else pool
op = FromSeqsOp(
seqs,
parent_pool=pool_obj,
region=region,
style=style,
seq_names=seq_names,
prefix=prefix,
mode=mode,
num_states=num_states,
name=None,
iter_order=iter_order,
cards=cards,
_factory_name=_factory_name,
)
result_pool = DnaPool(operation=op)
return result_pool
[docs]
class FromSeqsOp(Operation):
"""Create a pool from a list of sequences."""
factory_name = "from_seqs"
design_card_keys = ["seq_name", "seq_index"]
[docs]
def __init__(
self,
seqs: Sequence[str],
parent_pool: Optional[Pool] = None,
region: RegionType = None,
style: Optional[str] = None,
seq_names: Optional[Sequence[str]] = None,
prefix: Optional[str] = None,
mode: ModeType = "random",
num_states: Optional[Integral] = None,
name: Optional[str] = None,
iter_order: Optional[Real] = None,
cards: CardsType = None,
_factory_name: Optional[str] = None,
) -> None:
"""Initialize FromSeqsOp."""
from ..party import get_active_party
party = get_active_party()
if party is None:
raise RuntimeError(
"from_seqs requires an active Party context. "
"Use 'with pp.Party() as party:' to create one."
)
# Set factory name if provided
if _factory_name is not None:
self.factory_name = _factory_name
# Validate parent_pool/region combination
if parent_pool is not None and region is None:
raise ValueError(
"region is required when parent_pool is provided. "
"Specify which region of parent_pool to replace with the selected sequence."
)
self._style = style
if len(seqs) == 0:
raise ValueError("seqs must not be empty")
if mode == "fixed" and len(seqs) != 1:
raise ValueError("mode='fixed' requires exactly 1 sequence")
if seq_names is not None and prefix is not None:
raise ValueError("Cannot specify both seq_names and prefix")
self.seqs = list(seqs)
# Track whether explicit seq_names were provided (for compute_name_contributions)
self._seq_names_explicit = seq_names is not None
self.seq_names = list(seq_names) if seq_names else [f"seq_{i}" for i in range(len(seqs))]
# Store current index for name computation
self._current_idx: int = 0
if len(self.seq_names) != len(self.seqs):
raise ValueError("seq_names must have same length as seqs")
# Determine num_states based on mode
natural_num_states = None
match mode:
case "sequential":
# Natural count is the number of sequences
natural_num_states = len(seqs)
# Use user-provided num_states if given, else natural count
if num_states is None:
num_states = natural_num_states
case "random":
# num_states stays as provided (or None for pure random mode)
pass
case _:
num_states = 1
# Use lengths without markers (includes all chars except marker tags)
lengths = [dna_utils.get_length_without_tags(s) for s in self.seqs]
seq_length = lengths[0] if all(L == lengths[0] for L in lengths) else None
parent_pools_list = [parent_pool] if parent_pool is not None else []
super().__init__(
parent_pools=parent_pools_list,
num_states=num_states,
mode=mode,
seq_length=seq_length,
name=name,
iter_order=iter_order,
prefix=prefix,
region=region,
_natural_num_states=natural_num_states,
cards=cards,
)
def _compute_core(
self,
parents: list[Seq],
rng: Optional[np.random.Generator] = None,
) -> tuple[Seq, dict]:
"""Return Seq and design card."""
if self.mode == "random":
if rng is None:
raise RuntimeError(
f"{self.mode.capitalize()} mode requires RNG - use Party.generate(seed=...)"
)
idx = int(rng.integers(0, len(self.seqs)))
elif self.mode == "fixed":
# Fixed mode - always use index 0
idx = 0
else:
# Sequential mode - use state value (0 when inactive)
state = self.state.value
idx = (0 if state is None else state) % len(self.seqs)
# Store index for name computation
self._current_idx = idx
seq_string = self.seqs[idx]
# Apply style to all positions if specified
from ..utils.style_utils import SeqStyle, styles_suppressed
if styles_suppressed():
output_seq = DnaSeq(seq_string, None)
else:
output_style = SeqStyle.full(len(seq_string), self._style)
output_seq = DnaSeq(seq_string, output_style)
return output_seq, {
"seq_name": self.seq_names[int(idx)],
"seq_index": int(idx),
}
[docs]
def compute_name_contributions(self, global_state=None, max_global_state=None) -> list[str]:
"""Compute name contributions - explicit seq_names or prefix pattern."""
# Check if state is inactive (for branch selection)
if not self.state.is_active:
return []
if self._seq_names_explicit:
# Use explicit seq_name for current index
return [self.seq_names[self._current_idx]]
# Otherwise use default prefix logic from base class
return super().compute_name_contributions(global_state, max_global_state)
def _get_copy_params(self) -> dict:
"""Return parameters needed to create a copy of this operation."""
params = super()._get_copy_params()
# Only include seq_names if explicitly set by user
params["seq_names"] = self.seq_names if self._seq_names_explicit else None
return params