Source code for scgo.param_presets

"""Parameter presets for SCGO campaigns."""

from __future__ import annotations

from typing import Any

from scgo.constants import (
    BOLTZMANN_K_EV_PER_K,
    DEFAULT_COMPARATOR_TOL,
    DEFAULT_ENERGY_TOLERANCE,
    DEFAULT_NEB_TANGENT_METHOD,
    DEFAULT_PAIR_COR_MAX,
)
from scgo.surface.config import SurfaceSystemConfig
from scgo.system_types import SYSTEM_TYPE_POLICIES, SystemType, get_system_policy

# Available MACE model names for use in calculator_kwargs["model_name"]
AVAILABLE_MACE_MODELS = [
    "mace_matpes_0",  # r2scan variant (default in MACE class)
    "mace_mp_small",  # Small MACE-MP
    "mace_mpa_medium",  # Medium MACE-MPA
    "mace_off_small",  # Small MACE-OFF
]

# Common fairchem pretrained names (see fairchem.core.calculate.pretrained_mlip)
AVAILABLE_UMA_MODELS = [
    "uma-s-1p2",
    "uma-s-1p1",
    "uma-m-1p1",
]

__all__ = [
    "AVAILABLE_MACE_MODELS",
    "AVAILABLE_UMA_MODELS",
    "TS_DEFAULTS_BY_SYSTEM_TYPE",
    "get_default_params",
    "get_minimal_ga_params",
    "get_testing_params",
    "get_torchsim_ga_params",
    "get_diversity_params",
    "get_high_energy_params",
    "get_ts_defaults",
    "get_ts_search_params",
    "get_default_uma_params",
    "get_uma_ga_benchmark_params",
]


# Per-system-type NEB defaults consumed by `get_ts_search_params` and
# `coerce_ts_params_to_runner_kwargs`. Keep `neb_align_endpoints` and
# `neb_interpolation_mic` coherent with `SystemPolicy.neb_disable_alignment` /
# `neb_force_mic` (an import-time assertion below guards against drift). Other
# knobs (n_images, fmax, steps, ...) are independent and benchmarked per type.
_GAS_TS_NEB_DEFAULTS: dict[str, Any] = {
    "neb_align_endpoints": True,
    "neb_interpolation_mic": False,
    "neb_surface_cell_remap": False,
    "neb_surface_lattice_rotation": False,
    "neb_surface_max_lattice_shift": 1,
    "neb_n_images": 5,
    "neb_spring_constant": 0.1,
    "neb_fmax": 0.05,
    "neb_steps": "auto",
    "neb_climb": False,
    "neb_perturb_sigma": 0.0,
    "neb_interpolation_method": "idpp",
    "neb_tangent_method": DEFAULT_NEB_TANGENT_METHOD,
    "torchsim_fmax": 0.05,
    "torchsim_max_steps": "auto",
}

_SURFACE_TS_NEB_DEFAULTS: dict[str, Any] = {
    "neb_align_endpoints": True,
    "neb_interpolation_mic": True,
    "neb_surface_cell_remap": True,
    "neb_surface_lattice_rotation": True,
    "neb_surface_max_lattice_shift": 1,
    "neb_n_images": 5,
    "neb_spring_constant": 0.1,
    "neb_fmax": 0.1,
    "neb_steps": 500,
    "neb_climb": False,
    "neb_perturb_sigma": 0.0,
    "neb_interpolation_method": "idpp",
    "neb_tangent_method": DEFAULT_NEB_TANGENT_METHOD,
    "torchsim_fmax": 0.1,
    "torchsim_max_steps": 500,
}

TS_DEFAULTS_BY_SYSTEM_TYPE: dict[SystemType, dict[str, Any]] = {
    "gas_cluster": dict(_GAS_TS_NEB_DEFAULTS),
    "gas_cluster_adsorbate": dict(_GAS_TS_NEB_DEFAULTS),
    "surface_cluster": dict(_SURFACE_TS_NEB_DEFAULTS),
    "surface_cluster_adsorbate": dict(_SURFACE_TS_NEB_DEFAULTS),
}


def _assert_ts_defaults_match_system_policies() -> None:
    """Guard against drift between TS defaults and ``SystemPolicy`` flags."""
    missing = set(SYSTEM_TYPE_POLICIES) - set(TS_DEFAULTS_BY_SYSTEM_TYPE)
    extra = set(TS_DEFAULTS_BY_SYSTEM_TYPE) - set(SYSTEM_TYPE_POLICIES)
    if missing or extra:
        raise RuntimeError(
            "TS_DEFAULTS_BY_SYSTEM_TYPE keys must match SYSTEM_TYPE_POLICIES "
            f"(missing={sorted(missing)!r}, extra={sorted(extra)!r})."
        )
    for st, defaults in TS_DEFAULTS_BY_SYSTEM_TYPE.items():
        policy = SYSTEM_TYPE_POLICIES[st]
        expected_align = not policy.neb_disable_alignment
        if defaults["neb_align_endpoints"] is not expected_align:
            raise RuntimeError(
                f"TS_DEFAULTS_BY_SYSTEM_TYPE[{st!r}]['neb_align_endpoints']="
                f"{defaults['neb_align_endpoints']!r} disagrees with "
                f"SystemPolicy.neb_disable_alignment={policy.neb_disable_alignment!r}."
            )
        if defaults["neb_interpolation_mic"] != policy.neb_force_mic:
            raise RuntimeError(
                f"TS_DEFAULTS_BY_SYSTEM_TYPE[{st!r}]['neb_interpolation_mic']="
                f"{defaults['neb_interpolation_mic']!r} disagrees with "
                f"SystemPolicy.neb_force_mic={policy.neb_force_mic!r}."
            )
        for key in ("neb_surface_cell_remap", "neb_surface_lattice_rotation"):
            if defaults.get(key, False) != getattr(policy, key):
                raise RuntimeError(
                    f"TS_DEFAULTS_BY_SYSTEM_TYPE[{st!r}][{key!r}]="
                    f"{defaults.get(key)!r} disagrees with "
                    f"SystemPolicy.{key}={getattr(policy, key)!r}."
                )


_assert_ts_defaults_match_system_policies()


[docs] def get_ts_defaults(system_type: SystemType) -> dict[str, Any]: """Return a fresh copy of NEB knob defaults for one system type. Single source of truth read by :func:`get_ts_search_params` and :func:`scgo.utils.ts_runner_kwargs.coerce_ts_params_to_runner_kwargs`. """ if system_type not in TS_DEFAULTS_BY_SYSTEM_TYPE: raise ValueError( f"Unsupported system_type={system_type!r}; expected one of " f"{sorted(TS_DEFAULTS_BY_SYSTEM_TYPE)!r}." ) return dict(TS_DEFAULTS_BY_SYSTEM_TYPE[system_type])
[docs] def get_default_params() -> dict[str, Any]: """Return the default SCGO parameter dictionary for global optimization. Suitable for ``run_go`` / ``run_go_ts`` as ``params`` / ``go_params``; pass as-is or override keys (omitted keys are filled via :func:`scgo.utils.run_helpers.initialize_params`). """ return { "validate_with_hessian": False, "calculator": "MACE", "seed": None, # Will be overridden by function parameter "calculator_kwargs": {"model_name": "mace_matpes_0"}, "fmax_threshold": 0.05, "check_hessian": True, "imag_freq_threshold": 50.0, "tag_final_minima": True, "connectivity_factor": 1.4, # Default connectivity factor for cluster validation "allow_cluster_fragmentation": False, "allow_adsorbate_surface_detachment": False, "enforce_adsorbate_subgraph_integrity": True, "freeze_adsorbate_internal_geometry": False, "fitness_strategy": "low_energy", # Default: minimize energy "diversity_reference_db": None, # For diversity strategy "diversity_max_references": 100, # Performance limit "diversity_update_interval": 5, # Update references every N iterations/generations "optimizer_params": { "simple": { "optimizer": "FIRE", "fmax": 0.05, "niter": 1, "niter_local_relaxation": "auto", "system_type": "gas_cluster", }, "bh": { "optimizer": "FIRE", "temperature": 500 * 8.617e-5, # 500K in eV "fmax": 0.05, "niter": "auto", "dr": 0.2, "move_fraction": 0.3, "niter_local_relaxation": "auto", "move_strategy": "random", "deduplicate": True, "energy_tolerance": DEFAULT_ENERGY_TOLERANCE, "comparator_tol": DEFAULT_COMPARATOR_TOL, "comparator_pair_cor_max": DEFAULT_PAIR_COR_MAX, "comparator_n_top": None, "fitness_strategy": None, # None = inherit from top-level "diversity_reference_db": None, # For diversity strategy "diversity_max_references": 100, # Performance limit "diversity_update_interval": 5, # Update references every N iterations "system_type": "gas_cluster", }, "ga": { "optimizer": "FIRE", "population_size": "auto", "niter": "auto", "niter_local_relaxation": "auto", "mutation_probability": 0.4, "offspring_fraction": 0.5, "fmax": 0.05, "vacuum": 10.0, "energy_tolerance": DEFAULT_ENERGY_TOLERANCE, "use_adaptive_mutations": True, "stagnation_trigger": 4, "stagnation_full_trigger": 8, "recovery_window": 2, "aggressive_burst_multiplier": 1.8, "max_mutation_probability": 0.65, "early_stopping_niter": 10, # Stop if no improvement after N generations "n_jobs_population_init": -2, # Parallel batch init: -2 = all CPUs except one "n_jobs_offspring": -2, # Parallel default aligned with n_jobs_population_init "batch_size": None, "relaxer": None, "fitness_strategy": None, # None = inherit from top-level "diversity_reference_db": None, # For diversity strategy "diversity_max_references": 100, # Performance limit "diversity_update_interval": 5, # Update references every N generations "system_type": "gas_cluster", }, }, }
[docs] def get_minimal_ga_params( seed: int | None = None, model_name: str | None = None, ) -> dict[str, Any]: """Return compact GA-focused parameters derived from defaults. Uses sequential population init and offspring work (``n_jobs_*`` set to 1) so runners stay easy to reason about. Pass as-is to ``run_*`` or override keys; omitted keys are filled via :func:`scgo.utils.run_helpers.initialize_params`. """ params = get_default_params() # Override GA-specific settings for faster/leaner runs params["optimizer_params"]["ga"].update( { "niter": "auto", "population_size": "auto", "mutation_probability": 0.4, "energy_tolerance": DEFAULT_ENERGY_TOLERANCE, "niter_local_relaxation": "auto", "n_jobs_population_init": 1, # Sequential for runners (explicit control) "n_jobs_offspring": 1, # Match init: avoid parallel offspring when init is serial } ) # Set model name if provided if model_name is not None: params["calculator_kwargs"]["model_name"] = model_name # Set seed if provided if seed is not None: params["seed"] = seed return params
[docs] def get_testing_params() -> dict[str, Any]: """Return fast, low-cost parameters for tests (EMT, fewer iterations). Complete preset based on :func:`get_default_params`; pass as-is to ``run_*`` or override keys (omitted keys are filled via :func:`scgo.utils.run_helpers.initialize_params`). """ params = get_default_params() params["calculator"] = "EMT" params["calculator_kwargs"] = {} params["optimizer_params"]["simple"].update( { "niter": 1, "niter_local_relaxation": 2, } ) params["optimizer_params"]["bh"].update( { "niter": 5, "niter_local_relaxation": 2, } ) params["optimizer_params"]["ga"].update( { "population_size": 5, "offspring_fraction": 0.5, "niter": 2, "niter_local_relaxation": 2, "n_jobs_population_init": -2, } ) return params
def _get_base_ga_benchmark_params(seed: int) -> dict[str, Any]: """Return GA benchmark parameters derived from defaults.""" params = get_default_params() params["seed"] = seed params["calculator_kwargs"]["default_dtype"] = "float32" # Customize GA parameters for benchmarking params["optimizer_params"]["ga"].update( { "fmax": 0.05, "niter_local_relaxation": 200, "niter": 10, "population_size": 50, "n_jobs_population_init": -2, # Parallel for benchmarks }, ) return params def _attach_fairchem_torchsim_relaxer( ga: dict[str, Any], calculator_kwargs: dict[str, Any], *, max_steps: int, autobatcher: bool | None = None, expected_max_atoms: int | None = None, ) -> None: """Set ``ga['relaxer']`` to a FairChem-backed :class:`TorchSimBatchRelaxer`.""" from scgo.calculators.torchsim_helpers import TorchSimBatchRelaxer fmax_val = float(ga.get("fmax", 0.05)) ga["relaxer"] = TorchSimBatchRelaxer( model_kind="fairchem", fairchem_model_name=calculator_kwargs["model_name"], fairchem_task_name=calculator_kwargs.get("task_name"), force_tol=fmax_val, optimizer_name="fire", max_steps=max_steps, dtype=None, # TorchSim default per model; keep lazy/portable autobatcher=autobatcher, expected_max_atoms=expected_max_atoms, )
[docs] def get_uma_ga_benchmark_params( seed: int, *, model_name: str = "uma-s-1p2", uma_task: str = "oc25", ) -> dict[str, Any]: """GA benchmark parameters matching :func:`_get_base_ga_benchmark_params` but with UMA. Tuned for regression and profiling alongside the MACE TorchSim benchmark preset (:func:`get_torchsim_ga_params`): fixed local relaxation budget from the base preset (200 steps, not ``"auto"``), with autobatching and ``expected_max_atoms=600`` for stable GPU memory behaviour. Pass as-is to ``run_*`` or override keys. For general UMA runs with default GA ``"auto"`` local steps, use :func:`get_default_uma_params` instead. """ params = _get_base_ga_benchmark_params(seed) params["calculator"] = "UMA" params["calculator_kwargs"] = {"model_name": model_name, "task_name": uma_task} ga = params["optimizer_params"]["ga"] niter_local = ga.get("niter_local_relaxation", 200) max_steps = 200 if niter_local == "auto" else int(niter_local) _attach_fairchem_torchsim_relaxer( ga, params["calculator_kwargs"], max_steps=max_steps, autobatcher=True, expected_max_atoms=600, ) return params
[docs] def get_default_uma_params() -> dict[str, Any]: """Default SCGO parameters using the UMA calculator (fairchem-core). Pass as-is to ``run_*`` or override keys. For typical campaigns with default GA settings: ``niter_local_relaxation`` is ``"auto"`` and the TorchSim relaxer uses 250 max steps in that case. Autobatcher and memory-probe defaults follow :class:`TorchSimBatchRelaxer` (``autobatcher`` None: CUDA on, CPU off). Use :func:`get_uma_ga_benchmark_params` when you need the same structure as the MACE benchmark preset (fixed local steps, explicit autobatcher/expected_max_atoms). """ params = get_default_params() params["calculator"] = "UMA" params["calculator_kwargs"] = { "model_name": "uma-s-1p2", "task_name": "oc25", } ga = params.get("optimizer_params", {}).get("ga", {}) niter_local = ga.get("niter_local_relaxation", "auto") max_steps = 250 if niter_local == "auto" else int(niter_local) _attach_fairchem_torchsim_relaxer( ga, params["calculator_kwargs"], max_steps=max_steps, autobatcher=None, expected_max_atoms=None, ) return params
[docs] def get_torchsim_ga_params( *, system_type: SystemType, surface_config: SurfaceSystemConfig | None = None, seed: int | None = None, model_name: str | None = None, ) -> dict[str, Any]: """Return GO params using TorchSim relaxer (requires ``scgo[mace]``). Mirrors :func:`get_ts_search_params` call style by requiring ``system_type`` and accepting ``surface_config`` / ``seed`` explicitly. Pass as-is to ``run_*`` or override keys. When ``model_name`` is set, it is written to ``calculator_kwargs`` and the :class:`~scgo.calculators.torchsim_helpers.TorchSimBatchRelaxer` uses the same MACE model name as the ASE calculator. """ import torch from scgo.calculators.torchsim_helpers import TorchSimBatchRelaxer policy = get_system_policy(system_type) if policy.uses_surface and not isinstance(surface_config, SurfaceSystemConfig): raise ValueError( f"system_type={system_type!r} requires surface_config to be provided " "as a SurfaceSystemConfig when building go_params." ) effective_seed = 0 if seed is None else int(seed) params = _get_base_ga_benchmark_params(effective_seed) if seed is None: params["seed"] = None if model_name is not None: params["calculator_kwargs"]["model_name"] = model_name mace_model = params["calculator_kwargs"].get("model_name", "mace_matpes_0") fmax_val = params["optimizer_params"]["ga"]["fmax"] niter_local = params["optimizer_params"]["ga"]["niter_local_relaxation"] params["optimizer_params"]["ga"].update( { "relaxer": TorchSimBatchRelaxer( force_tol=fmax_val, optimizer_name="fire", mace_model_name=mace_model, seed=seed, max_steps=niter_local, dtype=torch.float32, autobatcher=True, expected_max_atoms=600, ), }, ) for algo in ("simple", "bh", "ga"): params["optimizer_params"][algo]["system_type"] = system_type if policy.uses_surface: params["surface_config"] = surface_config for algo in ("simple", "bh", "ga"): params["optimizer_params"][algo]["surface_config"] = surface_config return params
[docs] def get_diversity_params( reference_db_glob: str = "**/*.db", max_references: int = 100, update_interval: int = 5, ) -> dict[str, Any]: """Return params for diversity-based optimization (reference DB, intervals). Pass as-is to ``run_*`` or override keys. ``reference_db_glob`` must match at least one database with reference structures when you run; there is no runtime check that the glob is non-empty. """ params = get_default_params() params["fitness_strategy"] = "diversity" params["diversity_reference_db"] = reference_db_glob params["diversity_max_references"] = max_references params["diversity_update_interval"] = update_interval # Diversity strategy works better with larger populations # Keep auto settings but note they will scale appropriately return params
[docs] def get_high_energy_params() -> dict[str, Any]: """Return params that bias exploration toward high-energy structures. Pass as-is to ``run_*`` or override keys. Sets top-level ``fitness_strategy`` to ``high_energy`` (used by BH and GA). Basin hopping additionally uses a higher temperature. GA hyperparameters are otherwise unchanged—override ``optimizer_params['ga']`` if you need stronger exploration there. """ params = get_default_params() params["fitness_strategy"] = "high_energy" # Increase temperature for BH to accept high-energy moves # Default is 500K, increase to 1000K for better high-energy exploration params["optimizer_params"]["bh"]["temperature"] = ( 1000 * BOLTZMANN_K_EV_PER_K ) # 1000K return params
[docs] def get_ts_search_params( calculator: str = "MACE", calculator_kwargs: dict[str, Any] | None = None, *, system_type: SystemType, surface_config: SurfaceSystemConfig | None = None, seed: int | None = None, ) -> dict[str, Any]: """TS-only settings (NEB, calculator, pairing). Not merged with GO defaults. Suitable for ``run_ts_search`` / ``run_go_ts`` as ``ts_params``; pass as-is or override keys (omitted keys are filled via :func:`scgo.utils.run_helpers.initialize_ts_params`). For EMT or other non-TorchSim calculators, set ``use_torchsim=False`` on the returned dict before running. `system_type` is used to shape technical defaults. For surface system types, `surface_config` is required and stored in the returned dictionary so TS loading/validation always receives explicit slab context (no guessing). If ``seed`` is set, it is stored in the returned dict; :func:`run_go_ts` / ``run_ts_*`` require it to be consistent with ``go_params['seed']`` and the ``seed=`` run argument. The ``connectivity_factor`` key sets the global connectivity threshold for cluster validation (default 1.4). NEB endpoint alignment is on by default (``neb_align_endpoints=True``). Surface system types also enable ``neb_interpolation_mic``, ``neb_surface_cell_remap``, ``neb_surface_lattice_rotation``, and ``neb_surface_max_lattice_shift`` (default ``1``) so path interpolation starts from lattice-compatible aligned endpoints. """ policy = get_system_policy(system_type) if policy.uses_surface and not isinstance(surface_config, SurfaceSystemConfig): raise ValueError( f"system_type={system_type!r} requires surface_config to be provided " "as a SurfaceSystemConfig when building ts_params." ) if calculator_kwargs is None: calc_u = str(calculator).strip().upper() calculator_kwargs = {"model_name": "mace_matpes_0"} if calc_u == "MACE" else {} params: dict[str, Any] = { "calculator": calculator, "calculator_kwargs": dict(calculator_kwargs), "connectivity_factor": 1.4, "allow_cluster_fragmentation": False, "allow_adsorbate_surface_detachment": False, "enforce_adsorbate_subgraph_integrity": True, "max_pairs": None, "energy_gap_threshold": 2.0, "similarity_tolerance": DEFAULT_COMPARATOR_TOL, "similarity_pair_cor_max": 0.1, "use_torchsim": True, "torchsim_batch_size": 5, "use_parallel_neb": False, "dedupe_minima": True, "minima_energy_tolerance": DEFAULT_ENERGY_TOLERANCE, } params.update(get_ts_defaults(system_type)) if policy.uses_surface: params["surface_config"] = surface_config if seed is not None: params["seed"] = int(seed) return params