import pathlib as pl
import numpy as np
from bagel.oracles.base import Oracle, OracleResult
from bagel.chain import Chain
# Amino acid solubility contributions (simplified)
SOLUBILITY_SCORES = {
"D": 1.0, "E": 1.0, "K": 0.9, "R": 0.8, "N": 0.7, "Q": 0.7,
"S": 0.5, "T": 0.5, "H": 0.4, "G": 0.3, "A": 0.2, "P": 0.2,
"Y": -0.1, "W": -0.3, "F": -0.5, "M": -0.4,
"V": -0.6, "I": -0.7, "L": -0.7, "C": -0.2,
}
class SolubilityResult(OracleResult):
input_chains: list[Chain]
per_residue_scores: list[float]
overall_score: float
def save_attributes(self, filepath: pl.Path) -> None:
np.savetxt(f"{filepath}.solubility.txt", self.per_residue_scores)
class SolubilityOracle(Oracle):
"""Predicts sequence solubility from amino acid composition."""
result_class = SolubilityResult
def predict(self, chains: list[Chain]) -> SolubilityResult:
per_residue = []
for chain in chains:
for residue in chain.residues:
per_residue.append(SOLUBILITY_SCORES.get(residue.name, 0.0))
return SolubilityResult(
input_chains=chains,
per_residue_scores=per_residue,
overall_score=float(np.mean(per_residue)),
)