diff --git a/py/kubeflow/kfctl/testing/pytests/kf_is_ready_test.py b/py/kubeflow/kfctl/testing/pytests/kf_is_ready_test.py index 4eec4f60..28a41b96 100644 --- a/py/kubeflow/kfctl/testing/pytests/kf_is_ready_test.py +++ b/py/kubeflow/kfctl/testing/pytests/kf_is_ready_test.py @@ -1,11 +1,8 @@ -import datetime +# TODO(jlewi): This code should probably move to kubeflow/testing repo. +# Might also want to split it up into multiple test files. import logging import os -import subprocess -import tempfile -import uuid import yaml -from retrying import retry import googleapiclient.discovery from oauth2client.client import GoogleCredentials @@ -58,6 +55,7 @@ def check_deployments_ready(record_xml_attribute, namespace, name, deployments): namespace: The namespace Kubeflow is deployed to. """ set_logging() + # TODO(jlewi): Should we do this in the calling function)? util.set_pytest_junit(record_xml_attribute, name) # Need to activate account for scopes. @@ -73,6 +71,36 @@ def check_deployments_ready(record_xml_attribute, namespace, name, deployments): logging.info("Verifying that deployment %s started...", deployment_name) util.wait_for_deployment(api_client, namespace, deployment_name, 10) +def check_statefulsets_ready(record_xml_attribute, namespace, name, stateful_sets): + """Test that Kubeflow deployments are successfully deployed. + + Args: + namespace: The namespace to check + """ + set_logging() + # TODO(jlewi): Should we do this in the calling function)? + util.set_pytest_junit(record_xml_attribute, name) + + # Need to activate account for scopes. + if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): + util.run(["gcloud", "auth", "activate-service-account", + "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"]]) + + api_client = deploy_utils.create_k8s_client() + + util.load_kube_config() + + for set_name in stateful_sets: + logging.info("Verifying that stateful set %s.%s started...", namespace, + set_name) + try: + util.wait_for_statefulset(api_client, namespace, set_name) + except: + # Collect debug information by running describe + util.run(["kubectl", "-n", namespace, "describe", "statefulsets", + set_name]) + raise + def test_katib_is_ready(record_xml_attribute, namespace): deployment_names = [ "katib-controller", @@ -121,114 +149,96 @@ def test_notebook_is_ready(record_xml_attribute, namespace): def test_centraldashboard_is_ready(record_xml_attribute, namespace): check_deployments_ready(record_xml_attribute, namespace, - "test_centraldashboard_is_ready",["centraldashboard"]) + "test_centraldashboard_is_ready", + ["centraldashboard"]) def test_profiles_is_ready(record_xml_attribute, namespace): check_deployments_ready(record_xml_attribute, namespace, - "test_profile_is_ready",["profiles-deployment"]) + "test_profile_is_ready", ["profiles-deployment"]) def test_pytorch_is_ready(record_xml_attribute, namespace): check_deployments_ready(record_xml_attribute, namespace, - "test_pytorch_is_ready",["pytorch-operator"]) + "test_pytorch_is_ready", ["pytorch-operator"]) def test_tf_job_is_ready(record_xml_attribute, namespace): check_deployments_ready(record_xml_attribute, namespace, - "test_tf_job_is_ready",["tf-job-operator"]) - -def test_kf_is_ready(record_xml_attribute, namespace, use_basic_auth, use_istio, - app_path): - """Test that Kubeflow was successfully deployed. - - Args: - namespace: The namespace Kubeflow is deployed to. - """ - set_logging() - util.set_pytest_junit(record_xml_attribute, "test_kf_is_ready") - - # Need to activate account for scopes. - if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): - util.run(["gcloud", "auth", "activate-service-account", - "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"]]) - - api_client = deploy_utils.create_k8s_client() - - util.load_kube_config() - - # Verify that components are actually deployed. - # TODO(jlewi): We need to parameterize this list based on whether - # we are using IAP or basic auth. - # TODO(yanniszark): This list is incomplete and missing a lot of components. - deployment_names = [ - "workflow-controller", - ] - - stateful_set_names = [] + "test_tf_job_is_ready", ["tf-job-operator"]) - platform, _ = get_platform_app_name(app_path) - - ingress_related_deployments = [ - "istio-egressgateway", +def test_istio_is_ready(record_xml_attribute): + # Starting with 1.1 on GCP at least istio-egressgateway is no longer + # included by default + istio_deployments = [ "istio-ingressgateway", "istio-pilot", - "istio-policy", "istio-sidecar-injector", "istio-telemetry", "istio-tracing", "prometheus", ] - ingress_related_stateful_sets = [] - knative_namespace = "knative-serving" - knative_related_deployments = [ + namespace = "istio-system" + check_deployments_ready(record_xml_attribute, namespace, + "test_istio_is_ready", istio_deployments) + +def test_knative_is_deployed(record_xml_attribute, app_path): + + namespace = "knative-serving" + deployments = [ "activator", "autoscaler", "controller", ] - if platform == "gcp": - deployment_names.extend(["cloud-endpoints-controller"]) - stateful_set_names.extend(["kfserving-controller-manager"]) - if use_basic_auth: - deployment_names.extend(["basic-auth-login"]) - ingress_related_stateful_sets.extend(["backend-updater"]) - else: - ingress_related_deployments.extend(["iap-enabler"]) - ingress_related_stateful_sets.extend(["backend-updater"]) - elif platform == "existing_arrikto": - deployment_names.extend(["dex"]) - ingress_related_deployments.extend(["authservice"]) - knative_related_deployments = [] - - - # TODO(jlewi): Might want to parallelize this. - for deployment_name in deployment_names: - logging.info("Verifying that deployment %s started...", deployment_name) - util.wait_for_deployment(api_client, namespace, deployment_name, 10) + platform, _ = get_platform_app_name(app_path) - ingress_namespace = "istio-system" if use_istio else namespace - for deployment_name in ingress_related_deployments: - logging.info("Verifying that deployment %s started...", deployment_name) - util.wait_for_deployment(api_client, ingress_namespace, deployment_name, 10) + if platform == "existing_arrikto": + pytest.skip("knative tests skipped on existing_arrikto") + return + check_deployments_ready(record_xml_attribute, namespace, + "test_knative_is_deployed", deployments) - all_stateful_sets = [(namespace, name) for name in stateful_set_names] - all_stateful_sets.extend([(ingress_namespace, name) for name in ingress_related_stateful_sets]) + stateful_sets = ["kfserving-controller-manager"] + check_statefulsets_ready(record_xml_attribute, namespace, + "test_knative_is_deployed", stateful_sets) - for ss_namespace, name in all_stateful_sets: - logging.info("Verifying that stateful set %s.%s started...", ss_namespace, name) - try: - util.wait_for_statefulset(api_client, ss_namespace, name) - except: - # Collect debug information by running describe - util.run(["kubectl", "-n", ss_namespace, "describe", "statefulsets", name]) - raise +def test_dex_is_deployed(record_xml_attribute, app_path): + platform, _ = get_platform_app_name(app_path) - # TODO(jlewi): We should verify that the ingress is created and healthy. + namespace = "istio-system" + # knative tests + if platform != "existing_arrikto": + pytest.skip("knative tests skipped unless platform=existing_arrikto") + return - for deployment_name in knative_related_deployments: - logging.info("Verifying that deployment %s started...", deployment_name) - util.wait_for_deployment(api_client, knative_namespace, deployment_name, 10) + deployments = ["dex", "authservice"] + + check_deployments_ready(record_xml_attribute, namespace, + "test_dex_is_deployed", deployments) + +def test_gcp_ingress_services(record_xml_attribute, namespace, app_path): + """Test that Kubeflow was successfully deployed. + + Args: + namespace: The namespace Kubeflow is deployed to. + """ + namespace = "istio-system" + platform, _ = get_platform_app_name(app_path) + if platform != "gcp": + pytest.skip("Not running on GCP") + return + + deployments = ["cloud-endpoints-controller", "iap-enabler"] + stateful_sets = ["backend-updater"] + + name = "test_gcp_ingress_services" + check_deployments_ready(record_xml_attribute, namespace, + name, deployments) + + + check_statefulsets_ready(record_xml_attribute, namespace, + name, stateful_sets) def test_gcp_access(record_xml_attribute, namespace, app_path, project): """Test that Kubeflow gcp was configured with workload_identity and GCP service account credentails. @@ -247,33 +257,36 @@ def test_gcp_access(record_xml_attribute, namespace, app_path, project): api_client = deploy_utils.create_k8s_client() platform, app_name = get_platform_app_name(app_path) - if platform == "gcp": - # check secret - util.check_secret(api_client, namespace, "user-gcp-sa") + if platform != "gcp": + + pytest.skip("Not running on GCP") + return - cred = GoogleCredentials.get_application_default() - # Create the Cloud IAM service object - service = googleapiclient.discovery.build('iam', 'v1', credentials=cred) + # check secret + util.check_secret(api_client, namespace, "user-gcp-sa") - userSa = 'projects/%s/serviceAccounts/%s-user@%s.iam.gserviceaccount.com' % (project, app_name, project) - adminSa = 'serviceAccount:%s-admin@%s.iam.gserviceaccount.com' % (app_name, project) + cred = GoogleCredentials.get_application_default() + # Create the Cloud IAM service object + service = googleapiclient.discovery.build('iam', 'v1', credentials=cred) - request = service.projects().serviceAccounts().getIamPolicy(resource=userSa) - response = request.execute() - roleToMembers = {} - for binding in response['bindings']: - roleToMembers[binding['role']] = set(binding['members']) + userSa = 'projects/%s/serviceAccounts/%s-user@%s.iam.gserviceaccount.com' % (project, app_name, project) + adminSa = 'serviceAccount:%s-admin@%s.iam.gserviceaccount.com' % (app_name, project) - if 'roles/owner' not in roleToMembers: - raise Exception("roles/owner missing in iam-policy of %s" % userSa) + request = service.projects().serviceAccounts().getIamPolicy(resource=userSa) + response = request.execute() + roleToMembers = {} + for binding in response['bindings']: + roleToMembers[binding['role']] = set(binding['members']) - if adminSa not in roleToMembers['roles/owner']: - raise Exception("Admin %v should be owner of user %s" % (adminSa, userSa)) + if 'roles/owner' not in roleToMembers: + raise Exception("roles/owner missing in iam-policy of %s" % userSa) - workloadIdentityRole = 'roles/iam.workloadIdentityUser' - if workloadIdentityRole not in roleToMembers: - raise Exception("roles/iam.workloadIdentityUser missing in iam-policy of %s" % userSa) + if adminSa not in roleToMembers['roles/owner']: + raise Exception("Admin %s should be owner of user %s" % (adminSa, userSa)) + workloadIdentityRole = 'roles/iam.workloadIdentityUser' + if workloadIdentityRole not in roleToMembers: + raise Exception("roles/iam.workloadIdentityUser missing in iam-policy of %s" % userSa) if __name__ == "__main__": logging.basicConfig(level=logging.INFO,