Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tests: incorrect number of posterior samples #1473

Open
vagechirkov opened this issue Mar 18, 2025 · 2 comments
Open

tests: incorrect number of posterior samples #1473

vagechirkov opened this issue Mar 18, 2025 · 2 comments
Labels
bug Something isn't working improving-tests Everything to do with the sbi test suite

Comments

@vagechirkov
Copy link
Collaborator

🐛 Bug Description

I was working on PR #1461 (Issue: #1428). I wanted to run all tests quickly (even if they fail) just to test the fixtures, so I set num_samples=10 and steps=5 for posterior sampling in test_npse_iid_inference. Everything seems to work fine for None and gaussian prior, but something weird happens for uniform.

🟢 Gaussian prior Image Image
🟢 None prior Image Image
🔴 Uniform prior Image Image

🔄 Steps to Reproduce

Run the tests:

pytest tests/linearGaussian_npse_test.py::test_npse_iid_inference

Full tests code:
DEFAULT_USE = [
    pytest.param(("ve", None), id="ve-None"),
    pytest.param(("vp", "gaussian"), id="vp-gaussian"),
]

SLOW_COMBINATIONS = [
    pytest.param(("vp", "uniform"), id="vp-uniform", marks=pytest.mark.slow),
    pytest.param(("vp", None), id="vp-None", marks=pytest.mark.slow),
    pytest.param(("ve", "gaussian"), id="ve-gaussian", marks=pytest.mark.slow),
    pytest.param(("ve", "uniform"), id="ve-uniform", marks=pytest.mark.slow),
    pytest.param(("subvp", "gaussian"), id="subvp-gaussian", marks=pytest.mark.slow),
    pytest.param(("subvp", "uniform"), id="subvp-uniform", marks=pytest.mark.slow),
    pytest.param(("subvp", None), id="subvp-None", marks=pytest.mark.slow),
]


@pytest.fixture(scope="module")
def npse_trained_model(request):
    """Module-scoped fixture that trains a score estimator for NPSE tests."""
    sde_type, prior_type = request.param
    num_dim = 2
    num_simulations = 5

    # likelihood_mean will be likelihood_shift+theta
    likelihood_shift = -1.0 * ones(num_dim)
    likelihood_cov = 0.3 * eye(num_dim)

    if prior_type == "gaussian" or (prior_type is None):
        prior_mean = zeros(num_dim)
        prior_cov = eye(num_dim)
        prior = MultivariateNormal(loc=prior_mean, covariance_matrix=prior_cov)
        prior_npse = prior if prior_type is None else None
    elif prior_type == "uniform":
        prior = BoxUniform(-2 * ones(num_dim), 2 * ones(num_dim))
        prior_npse = prior

    # This check that our method to handle "general" priors works.
    # i.e. if NPSE does not get a proper passed by the user.
    inference = NPSE(prior_npse, show_progress_bars=True, sde_type=sde_type)

    theta = prior.sample((num_simulations,))
    x = linear_gaussian(theta, likelihood_shift, likelihood_cov)

    score_estimator = inference.append_simulations(theta, x).train(
        stop_after_epochs=200
    )

    return {
        "score_estimator": score_estimator,
        "inference": inference,
        "prior": prior,
        "likelihood_shift": likelihood_shift,
        "likelihood_cov": likelihood_cov,
        "prior_mean": prior_mean
        if prior_type == "gaussian" or prior_type is None
        else None,
        "prior_cov": prior_cov
        if prior_type == "gaussian" or prior_type is None
        else None,
        "num_dim": num_dim,
        "x_o": zeros(num_dim),
        "sde_type": sde_type,
        "prior_type": prior_type,
    }



@pytest.mark.parametrize(
    "npse_trained_model",
    DEFAULT_USE + SLOW_COMBINATIONS,
    indirect=True,  # So pytest knows to pass to the fixture
)
@pytest.mark.parametrize(
    "iid_method, num_trial",
    [
        pytest.param("fnpe", 3, id="fnpe-2trials", marks=pytest.mark.slow),
        pytest.param("gauss", 3, id="gauss-6trials", marks=pytest.mark.slow),
        pytest.param("auto_gauss", 8, id="auto_gauss-8trials"),
        pytest.param(
            "auto_gauss", 16, id="auto_gauss-16trials", marks=pytest.mark.slow
        ),
        pytest.param("jac_gauss", 8, id="jac_gauss-8trials", marks=pytest.mark.slow),
    ],
)
def test_npse_iid_inference(npse_trained_model, iid_method, num_trial):
    """Test whether NPSE infers well a simple example with available ground truth."""
    num_samples = 10

    # Extract data from fixture
    score_estimator = npse_trained_model["score_estimator"]
    inference = npse_trained_model["inference"]
    prior = npse_trained_model["prior"]
    likelihood_shift = npse_trained_model["likelihood_shift"]
    likelihood_cov = npse_trained_model["likelihood_cov"]
    prior_mean = npse_trained_model["prior_mean"]
    prior_cov = npse_trained_model["prior_cov"]
    num_dim = npse_trained_model["num_dim"]
    sde_type = npse_trained_model["sde_type"]
    prior_type = npse_trained_model["prior_type"]

    x_o = zeros(num_trial, num_dim)
    posterior = inference.build_posterior(score_estimator)
    posterior.set_default_x(x_o)
    samples = posterior.sample((num_samples,), iid_method=iid_method, steps=5)

    if prior_type == "gaussian" or (prior_type is None):
        gt_posterior = true_posterior_linear_gaussian_mvn_prior(
            x_o, likelihood_shift, likelihood_cov, prior_mean, prior_cov
        )
        target_samples = gt_posterior.sample((num_samples,))
    elif prior_type == "uniform":
        target_samples = samples_true_posterior_linear_gaussian_uniform_prior(
            x_o,
            likelihood_shift,
            likelihood_cov,
            prior,  # type: ignore
        )

    # Compute the c2st and assert it is near chance level of 0.5.
    # Some degradation is expected, also because posterior get tighter which
    # usually makes the c2st worse.
    check_c2st(
        samples,
        target_samples,
        alg=f"npse-{sde_type}-{prior_type}-{num_dim}-{iid_method}-{num_trial}iid-trials",
        tol=0.05 * min(num_trial, 8),
    )
@vagechirkov vagechirkov added the bug Something isn't working label Mar 18, 2025
@vagechirkov
Copy link
Collaborator Author

@manuelgloeckler @janfb

@janfb janfb changed the title Incorrect number of posterior samples tests: incorrect number of posterior samples Mar 18, 2025
@janfb janfb added the improving-tests Everything to do with the sbi test suite label Mar 18, 2025
@vagechirkov
Copy link
Collaborator Author

I think to reproduce this problem, one can try to simply run sampling with a very low number of samples and the uniform prior. It may just be a sampling logging bug.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working improving-tests Everything to do with the sbi test suite
Projects
None yet
Development

No branches or pull requests

2 participants