tests: incorrect number of posterior samples #1473

vagechirkov · 2025-03-18T11:16:45Z

🐛 Bug Description

I was working on PR #1461 (Issue: #1428). I wanted to run all tests quickly (even if they fail) just to test the fixtures, so I set num_samples=10 and steps=5 for posterior sampling in test_npse_iid_inference. Everything seems to work fine for None and gaussian prior, but something weird happens for uniform.

🟢 Gaussian prior

🟢 None prior

🔴 Uniform prior

🔄 Steps to Reproduce

Run the tests:

pytest tests/linearGaussian_npse_test.py::test_npse_iid_inference

Full tests code:

DEFAULT_USE = [
    pytest.param(("ve", None), id="ve-None"),
    pytest.param(("vp", "gaussian"), id="vp-gaussian"),
]

SLOW_COMBINATIONS = [
    pytest.param(("vp", "uniform"), id="vp-uniform", marks=pytest.mark.slow),
    pytest.param(("vp", None), id="vp-None", marks=pytest.mark.slow),
    pytest.param(("ve", "gaussian"), id="ve-gaussian", marks=pytest.mark.slow),
    pytest.param(("ve", "uniform"), id="ve-uniform", marks=pytest.mark.slow),
    pytest.param(("subvp", "gaussian"), id="subvp-gaussian", marks=pytest.mark.slow),
    pytest.param(("subvp", "uniform"), id="subvp-uniform", marks=pytest.mark.slow),
    pytest.param(("subvp", None), id="subvp-None", marks=pytest.mark.slow),
]


@pytest.fixture(scope="module")
def npse_trained_model(request):
    """Module-scoped fixture that trains a score estimator for NPSE tests."""
    sde_type, prior_type = request.param
    num_dim = 2
    num_simulations = 5

    # likelihood_mean will be likelihood_shift+theta
    likelihood_shift = -1.0 * ones(num_dim)
    likelihood_cov = 0.3 * eye(num_dim)

    if prior_type == "gaussian" or (prior_type is None):
        prior_mean = zeros(num_dim)
        prior_cov = eye(num_dim)
        prior = MultivariateNormal(loc=prior_mean, covariance_matrix=prior_cov)
        prior_npse = prior if prior_type is None else None
    elif prior_type == "uniform":
        prior = BoxUniform(-2 * ones(num_dim), 2 * ones(num_dim))
        prior_npse = prior

    # This check that our method to handle "general" priors works.
    # i.e. if NPSE does not get a proper passed by the user.
    inference = NPSE(prior_npse, show_progress_bars=True, sde_type=sde_type)

    theta = prior.sample((num_simulations,))
    x = linear_gaussian(theta, likelihood_shift, likelihood_cov)

    score_estimator = inference.append_simulations(theta, x).train(
        stop_after_epochs=200
    )

    return {
        "score_estimator": score_estimator,
        "inference": inference,
        "prior": prior,
        "likelihood_shift": likelihood_shift,
        "likelihood_cov": likelihood_cov,
        "prior_mean": prior_mean
        if prior_type == "gaussian" or prior_type is None
        else None,
        "prior_cov": prior_cov
        if prior_type == "gaussian" or prior_type is None
        else None,
        "num_dim": num_dim,
        "x_o": zeros(num_dim),
        "sde_type": sde_type,
        "prior_type": prior_type,
    }



@pytest.mark.parametrize(
    "npse_trained_model",
    DEFAULT_USE + SLOW_COMBINATIONS,
    indirect=True,  # So pytest knows to pass to the fixture
)
@pytest.mark.parametrize(
    "iid_method, num_trial",
    [
        pytest.param("fnpe", 3, id="fnpe-2trials", marks=pytest.mark.slow),
        pytest.param("gauss", 3, id="gauss-6trials", marks=pytest.mark.slow),
        pytest.param("auto_gauss", 8, id="auto_gauss-8trials"),
        pytest.param(
            "auto_gauss", 16, id="auto_gauss-16trials", marks=pytest.mark.slow
        ),
        pytest.param("jac_gauss", 8, id="jac_gauss-8trials", marks=pytest.mark.slow),
    ],
)
def test_npse_iid_inference(npse_trained_model, iid_method, num_trial):
    """Test whether NPSE infers well a simple example with available ground truth."""
    num_samples = 10

    # Extract data from fixture
    score_estimator = npse_trained_model["score_estimator"]
    inference = npse_trained_model["inference"]
    prior = npse_trained_model["prior"]
    likelihood_shift = npse_trained_model["likelihood_shift"]
    likelihood_cov = npse_trained_model["likelihood_cov"]
    prior_mean = npse_trained_model["prior_mean"]
    prior_cov = npse_trained_model["prior_cov"]
    num_dim = npse_trained_model["num_dim"]
    sde_type = npse_trained_model["sde_type"]
    prior_type = npse_trained_model["prior_type"]

    x_o = zeros(num_trial, num_dim)
    posterior = inference.build_posterior(score_estimator)
    posterior.set_default_x(x_o)
    samples = posterior.sample((num_samples,), iid_method=iid_method, steps=5)

    if prior_type == "gaussian" or (prior_type is None):
        gt_posterior = true_posterior_linear_gaussian_mvn_prior(
            x_o, likelihood_shift, likelihood_cov, prior_mean, prior_cov
        )
        target_samples = gt_posterior.sample((num_samples,))
    elif prior_type == "uniform":
        target_samples = samples_true_posterior_linear_gaussian_uniform_prior(
            x_o,
            likelihood_shift,
            likelihood_cov,
            prior,  # type: ignore
        )

    # Compute the c2st and assert it is near chance level of 0.5.
    # Some degradation is expected, also because posterior get tighter which
    # usually makes the c2st worse.
    check_c2st(
        samples,
        target_samples,
        alg=f"npse-{sde_type}-{prior_type}-{num_dim}-{iid_method}-{num_trial}iid-trials",
        tol=0.05 * min(num_trial, 8),
    )

The text was updated successfully, but these errors were encountered:

vagechirkov · 2025-03-18T11:18:31Z

@manuelgloeckler @janfb

vagechirkov · 2025-03-21T09:39:40Z

I think to reproduce this problem, one can try to simply run sampling with a very low number of samples and the uniform prior. It may just be a sampling logging bug.

vagechirkov added the bug label Mar 18, 2025

janfb changed the title ~~Incorrect number of posterior samples~~ tests: incorrect number of posterior samples Mar 18, 2025

janfb added the improving-tests label Mar 18, 2025

vagechirkov mentioned this issue Mar 23, 2025

Reuse npse_trained_model fixure for test_npse_map #1461

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

tests: incorrect number of posterior samples #1473

tests: incorrect number of posterior samples #1473

vagechirkov commented Mar 18, 2025

vagechirkov commented Mar 18, 2025

vagechirkov commented Mar 21, 2025

tests: incorrect number of posterior samples #1473

tests: incorrect number of posterior samples #1473

Comments

vagechirkov commented Mar 18, 2025

🐛 Bug Description

🔄 Steps to Reproduce

vagechirkov commented Mar 18, 2025

vagechirkov commented Mar 21, 2025