Bump hunt example#

This workflow closely follows the three-class softmax example, but here we do the inference on an unrelated event variable (“mass”), mirroring a bump hunt workflow as e.g. in Higgs-to-γγ analyses. Hence, events in a category are not all in one bin (as it was in the other workflows), but rather spread out over the full mass range and only partly contribute to the significance obtained in the small signal window.

Generate two resonant signals and five continuum backgrounds (building on the three-class softmax example).
Assign a diphoton mass to every event (Gaussian for signal, exponentials for the continuum).
Technically, we could perform the categorization optimization purely on the events in a small signal window around 125 GeV, but practically, often we suffer from low background statistics.
Therefore, we use the full power of the continuum background simulation by including all events in the gradient calculations, but reweighting the yield to match the expectation in the signal window (125 ± σ) during training. For this, we fit the background with exponentials in each category.

Run it with for instance:

python examples/bumphunt_example/run_example.py \
    --epochs 400 \
    --gato-bins 5 8 \
    --out PlotsBumpHunt

Outputs land under examples/bumphunt_example/<out>/ and contain:

Inclusive diphoton-mass spectra before categorisation (linear/log).
Per-category diphoton spectra for all signals/backgrounds.
Loss, penalty, and bias histories with temperature annotations.
Boundary snapshots + GIFs showing the 2-D category evolution.
Yield vs. statistical-uncertainty bar plots per category.
Saved checkpoints for each trained configuration.

Source#

Diphoton bump-hunt optimisation script#

import argparse
import os

import numpy as np
import tensorflow as tf

from gatohep.losses import high_bkg_uncertainty_penalty, low_bkg_penalty
from gatohep.models import gato_gmm_model
from gatohep.plotting_utils import (
    assign_bins_and_order,
    make_gif,
    plot_bias_history,
    plot_bin_boundaries_2D,
    plot_category_mass_spectra,
    plot_history,
    plot_inclusive_mass,
    plot_significance_comparison,
    plot_yield_vs_uncertainty,
)
from gatohep.utils import (
    LearningRateScheduler,
    TemperatureScheduler,
    asymptotic_significance,
    build_category_mass_maps,
    compute_mass_reweight_factors,
    convert_mass_data_to_tensors,
    generate_resonance_toy_data,
    slice_to_2d_features,
)


class DiphotonSoftmax(gato_gmm_model):
    def __init__(self, n_cats, temperature=0.5, mass_sigma=1.5):
        super().__init__(
            n_cats=n_cats,
            dim=2,
            temperature=temperature,
            mean_norm="softmax",
            cov_offdiag_damping=0.1,
            name="gato_diphoton",
        )
        self.mass_center = tf.constant(125.0, dtype=tf.float32)
        self.mass_sigma = tf.constant(float(mass_sigma), dtype=tf.float32)
        self.mass_sig_low = self.mass_center - self.mass_sigma
        self.mass_sig_high = self.mass_center + self.mass_sigma

    def call(self, data_dict, reweight=None, reweight_processes=None):
        masked = {}
        for proc, tensors in data_dict.items():
            weights = tensors["weight"]
            if proc in ("signal1", "signal2"):
                masses = tensors["mass"]
                window_mask = tf.cast(
                    tf.logical_and(
                        masses >= self.mass_sig_low, masses <= self.mass_sig_high
                    ),
                    tf.float32,
                )
                weights = weights * window_mask
            masked[proc] = {
                "NN_output": tensors["NN_output"],
                "weight": weights,
            }

        significances, bkg_yield, bkg_sum_w2 = self.get_differentiable_significance(
            masked,
            signal_labels=["signal1", "signal2"],
            background_reweight=reweight,
            reweight_processes=reweight_processes,
            return_details=True,
        )
        z1 = significances["signal1"]
        z2 = significances["signal2"]
        loss = -tf.sqrt(z1 * z2)
        return loss, bkg_yield, bkg_sum_w2, z1, z2


def compute_significances_from_assignments(
    assignments, data_dict, n_bins, mass_low, mass_high
):
    """
    Sum signal/background yields per bin using a provided assignment map.

    Parameters
    ----------
    assignments : dict[str, np.ndarray]
        Hard bin indices per process (negative entries ignored).
    data_dict : Mapping[str, pandas.DataFrame]
        Event tables containing ``mass`` and ``weight`` columns.
    n_bins : int
        Number of categories / bins.
    mass_low, mass_high : float
        Higgs-window boundaries.

    Returns
    -------
    tuple[float, float]
        Significances for ``signal1`` and ``signal2``.
    """
    s1 = np.zeros(n_bins, dtype=np.float64)
    s2 = np.zeros_like(s1)
    bkg = np.zeros_like(s1)

    for proc, assign in assignments.items():
        if assign.size == 0:
            continue
        df = data_dict[proc]
        masses = df["mass"].values
        weights = df["weight"].values
        mask = (
            (assign >= 0)
            & (masses >= mass_low)
            & (masses <= mass_high)
        )
        if not np.any(mask):
            continue
        bins = assign[mask]
        w = weights[mask]
        accum = np.zeros(n_bins, dtype=np.float64)
        np.add.at(accum, bins, w)
        if proc == "signal1":
            s1 += accum
        elif proc == "signal2":
            s2 += accum
        else:
            bkg += accum

    s1_tf = tf.constant(s1, dtype=tf.float32)
    s2_tf = tf.constant(s2, dtype=tf.float32)
    bkg_tf = tf.constant(bkg, dtype=tf.float32)

    z1_bins = asymptotic_significance(s1_tf, bkg_tf + s2_tf)
    z2_bins = asymptotic_significance(s2_tf, bkg_tf + s1_tf)
    z1 = float(tf.sqrt(tf.reduce_sum(z1_bins**2)))
    z2 = float(tf.sqrt(tf.reduce_sum(z2_bins**2)))
    return z1, z2


def build_argmax_assignments(data_dict, nbins, sig_index):
    """
    Produce equidistant bin indices based on a softmax component.

    Only events whose argmax equals ``sig_index`` receive a valid bin,
    reproducing the baseline used in the three-class example.
    """
    edges = np.linspace(0.33, 1.0, nbins + 1, dtype=np.float32)
    assignments = {}
    for proc, df in data_dict.items():
        if df.empty:
            assignments[proc] = np.array([], dtype=np.int32)
            continue
        outputs = np.stack(df["NN_output"].values)
        argmax = np.argmax(outputs, axis=1)
        values = outputs[:, sig_index]
        bins = np.clip(np.digitize(values, edges, right=False) - 1, 0, nbins - 1)
        valid = argmax == sig_index
        assign = np.where(valid, bins, -1).astype(np.int32)
        assignments[proc] = assign
    return assignments


def main():
    parser = argparse.ArgumentParser(
        description="Diphoton bump-hunt optimisation with GATO."
    )
    parser.add_argument("--epochs", type=int, default=200)
    parser.add_argument("--gato-bins", nargs="+", type=int, default=[3, 5])
    parser.add_argument("--lam-yield", type=float, default=0.0)
    parser.add_argument("--lam-unc", type=float, default=0.0)
    parser.add_argument("--thr-yield", type=float, default=10)
    parser.add_argument("--thr-unc", type=float, default=0.1)
    parser.add_argument("--n-bkg", type=int, default=1_000_000)
    parser.add_argument("--n-signal", type=int, default=100_000)
    parser.add_argument("--rewt-interval", type=int, default=50)
    parser.add_argument("--mass-sigma", type=float, default=1.5)
    parser.add_argument("--out", type=str, default="PlotsDiphotonBumpHunt")
    args = parser.parse_args()

    path_plots = os.path.join("examples", "bumphunt_example", args.out)
    os.makedirs(path_plots, exist_ok=True)

    data_full = generate_resonance_toy_data(
        n_signal1=args.n_signal,
        n_signal2=args.n_signal,
        n_bkg=args.n_bkg,
        mass_sigma=args.mass_sigma,
        background_slopes=(0.05, 0.04, 0.035, 0.03, 0.025),
    )
    data_2d = slice_to_2d_features(data_full)
    tensor_data = convert_mass_data_to_tensors(data_2d)

    sig_low = 125.0 - args.mass_sigma
    sig_high = 125.0 + args.mass_sigma
    plot_inclusive_mass(data_2d, path_plots, sig_scales=(50, 250))

    baseline_bins = [2, 5, 10]
    baseline_results = {"signal1": {}, "signal2": {}}
    gato_results = {"signal1": {}, "signal2": {}}

    for nbins in baseline_bins:
        for sig_idx, sig_name in enumerate(("signal1", "signal2")):
            assignments = build_argmax_assignments(data_2d, nbins, sig_idx)
            z1, z2 = compute_significances_from_assignments(
                assignments,
                data_2d,
                nbins,
                sig_low,
                sig_high,
            )
            baseline_results[sig_name][nbins] = z1 if sig_idx == 0 else z2

    for n_cats in args.gato_bins:
        print(f"\n--- Optimising {n_cats} bins ---")
        model = DiphotonSoftmax(
            n_cats=n_cats, temperature=1.0, mass_sigma=args.mass_sigma
        )
        optimizer = tf.keras.optimizers.RMSprop(0.05)
        lr_scheduler = LearningRateScheduler(
            optimizer,
            lr_initial=0.05,
            lr_final=0.001,
            total_epochs=args.epochs,
            mode="cosine",
        )
        temp_scheduler = TemperatureScheduler(
            model,
            t_initial=1.0,
            t_final=0.1,
            total_epochs=args.epochs,
            mode="cosine",
        )

        @tf.function
        def train_step(tdata, reweight_tensor, lamY, lamU, thrY, thrU):
            with tf.GradientTape() as tape:
                loss, B_sig, B_sig_w2, z1, z2 = model.call(tdata, reweight_tensor)
                penalty_y = low_bkg_penalty(B_sig, threshold=thrY)
                penalty_u = high_bkg_uncertainty_penalty(
                    B_sig_w2, B_sig, rel_threshold=thrU
                )
                total = loss + lamY * penalty_y + lamU * penalty_u
            grads = tape.gradient(total, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))
            return total, loss, penalty_y, penalty_u, z1, z2, B_sig

        reweight = tf.ones(n_cats, dtype=tf.float32)
        loss_history = []
        penalty_y_hist = []
        penalty_u_hist = []
        continuum_history = []
        bias_history = []
        bias_epochs = []
        temp_history = []
        path_bins = os.path.join(path_plots, f"gato_{n_cats}bins")
        os.makedirs(path_bins, exist_ok=True)
        frames_dir = os.path.join(path_bins, "boundary_frames")
        os.makedirs(frames_dir, exist_ok=True)
        boundary_frames = []

        for epoch in range(args.epochs):
            if epoch % max(1, args.rewt_interval) == 0:
                factors = compute_mass_reweight_factors(
                    model,
                    data_2d,
                    signal_labels=["signal1", "signal2"],
                    mass_sig_low=sig_low,
                    mass_sig_high=sig_high,
                )
                reweight = tf.constant(factors, dtype=tf.float32)
                print(f"Updated reweight factors: {factors}")

            _, loss, penY, penU, z1, z2, B_bins = train_step(
                tensor_data,
                reweight,
                args.lam_yield,
                args.lam_unc,
                args.thr_yield,
                args.thr_unc,
            )
            lr_scheduler.update(epoch)
            temp_scheduler.update(epoch)

            loss_history.append(float(loss.numpy()))
            penalty_y_hist.append(float(penY.numpy()))
            penalty_u_hist.append(float(penU.numpy()))
            reweight_np = reweight.numpy()
            B_np = B_bins.numpy()
            continuum_history.append(
                B_np / np.maximum(reweight_np, 1e-6)
            )

            if epoch % 10 == 0 or epoch == args.epochs - 1:
                lr_value = getattr(optimizer, "learning_rate", getattr(optimizer, "lr", None))
                lr_value = float(lr_value.numpy()) if hasattr(lr_value, "numpy") else float(lr_value)
                temperature = model.temperature
                temp_history.append(temperature)
                bias_input = {
                    p: {
                        "NN_output": tensor_data[p]["NN_output"],
                        "weight": tensor_data[p]["weight"],
                    }
                    for p in tensor_data
                }
                bias_vec = model.get_bias(bias_input)
                bias_history.append(float(np.mean(np.abs(bias_vec))))
                bias_epochs.append(epoch)
                boundary_fname = os.path.join(frames_dir, f"boundary_{epoch:04d}.png")
                plot_bin_boundaries_2D(
                    model,
                    list(range(n_cats)),
                    boundary_fname,
                    resolution=600,
                    annotation=f"Epoch {epoch}",
                )
                boundary_frames.append(boundary_fname)
                print(
                    f"[{epoch:04d}] loss={loss.numpy():.4f} "
                    f"Z1={z1.numpy():.3f} Z2={z2.numpy():.3f} lr={lr_value:.5f}"
                )

        ckpt_dir = os.path.join(path_plots, "checkpoints", f"{n_cats}_bins")
        os.makedirs(ckpt_dir, exist_ok=True)
        model.save(ckpt_dir)

        loss_eval = model.call(tensor_data, reweight)
        _, _, _, z1_final, z2_final = loss_eval
        print(
            f"Final significances for {n_cats} bins: "
            f"Z(signal1)={float(z1_final.numpy()):.3f}, "
            f"Z(signal2)={float(z2_final.numpy()):.3f}"
        )

        plot_history(
            np.array(loss_history),
            os.path.join(path_bins, f"loss_{n_cats}.pdf"),
            y_label="Geometric mean significance",
            x_label="Epoch",
        )
        plot_history(
            np.array(penalty_y_hist),
            os.path.join(path_bins, f"penalty_yield_{n_cats}.pdf"),
            y_label="Low background penalty",
            x_label="Epoch",
        )
        plot_history(
            np.array(penalty_u_hist),
            os.path.join(path_bins, f"penalty_unc_{n_cats}.pdf"),
            y_label="High-uncertainty penalty",
            x_label="Epoch",
        )
        plot_history(
            np.array(continuum_history),
            os.path.join(path_bins, f"continuum_background_{n_cats}.pdf"),
            y_label="Continuum background (100-180 GeV)",
            x_label="Epoch",
            boundaries=True,
            boundary_labels=[f"Cat. {i}" for i in range(n_cats)],
        )
        plot_history(
            np.array(continuum_history),
            os.path.join(path_bins, f"continuum_background_{n_cats}_log.pdf"),
            y_label="Continuum background (100-180 GeV)",
            x_label="Epoch",
            boundaries=True,
            log_scale=True,
            boundary_labels=[f"Cat. {i}" for i in range(n_cats)],
        )
        plot_bias_history(
            bias_history,
            os.path.join(path_bins, f"bias_history_{n_cats}.pdf"),
            epochs=bias_epochs,
            temp_points=temp_history,
            temp_label="Temperature",
        )

        raw_assign = model.get_bin_indices(
            {p: {"NN_output": tensor_data[p]["NN_output"]} for p in tensor_data}
        )
        raw_assign_np = {k: v.numpy() for k, v in raw_assign.items()}

        assign_dict, order, _, inv = assign_bins_and_order(model, data_2d, reduce=False)
        assign_np = {k: np.asarray(v) for k, v in assign_dict.items()}

        z1_opt, z2_opt = compute_significances_from_assignments(
            raw_assign_np,
            data_2d,
            n_cats,
            sig_low,
            sig_high,
        )
        gato_results["signal1"][n_cats] = z1_opt
        gato_results["signal2"][n_cats] = z2_opt
        per_cat_hists = build_category_mass_maps(raw_assign_np, data_2d, n_cats)
        plot_category_mass_spectra(
            per_cat_hists,
            os.path.join(path_bins, "mass_spectra"),
            sig_scales=(2, 10),
        )

        plot_bin_boundaries_2D(
            model,
            order,
            os.path.join(path_bins, f"bin_boundaries_{n_cats}_bins.pdf"),
        )

        if boundary_frames:
            gif_path = os.path.join(path_bins, f"boundary_evolution_{n_cats}.gif")
            make_gif(boundary_frames, gif_path, interval=500)
        B_sorted, rel_unc, _ = model.compute_hard_bkg_stats(
            {p: {"NN_output": tensor_data[p]["NN_output"], "weight": tensor_data[p]["weight"]} for p in tensor_data},
            signal_labels=["signal1", "signal2"],
        )
        plot_yield_vs_uncertainty(
            B_sorted,
            rel_unc,
            output_filename=os.path.join(path_bins, f"yield_unc_{n_cats}.pdf"),
        )
        plot_yield_vs_uncertainty(
            B_sorted,
            rel_unc,
            output_filename=os.path.join(path_bins, f"yield_unc_{n_cats}_log.pdf"),
            log=True,
        )

    remapped_baseline = {
        sig: {2 * n + 1: baseline_results[sig][n] for n in baseline_results[sig]}
        for sig in baseline_results
    }
    plot_significance_comparison(
        remapped_baseline,
        gato_results,
        os.path.join(path_plots, "significance_comparison.pdf"),
    )

if __name__ == "__main__":
    main()

Bump hunt example#

Source#

This Page