Skip to content

Commit

Permalink
Fix kf_is_ready_test (kubeflow#372)
Browse files Browse the repository at this point in the history
* Fix kf_is_ready_test

* istio_egressgateway is no longer deployed so need to remove it from
  list of deployments we check for; it doesn't seem to be required
  to enable outbound internet access.

* Split up the kf_is_ready_test into more focused tests

* Related to GoogleCloudPlatform/kubeflow-distribution#73

* * istio-policy is deprecated and no longer included in the default profile
  https://istio.io/latest/docs/tasks/policy-enforcement/enabling-policy/

  * This appears to be different from security and authorization.
  • Loading branch information
jlewi authored and vpavlin committed Jul 22, 2020
1 parent 780a5f6 commit 4fdf6f1
Showing 1 changed file with 116 additions and 103 deletions.
219 changes: 116 additions & 103 deletions py/kubeflow/kfctl/testing/pytests/kf_is_ready_test.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import datetime
# TODO(jlewi): This code should probably move to kubeflow/testing repo.
# Might also want to split it up into multiple test files.
import logging
import os
import subprocess
import tempfile
import uuid
import yaml
from retrying import retry

import googleapiclient.discovery
from oauth2client.client import GoogleCredentials
Expand Down Expand Up @@ -58,6 +55,7 @@ def check_deployments_ready(record_xml_attribute, namespace, name, deployments):
namespace: The namespace Kubeflow is deployed to.
"""
set_logging()
# TODO(jlewi): Should we do this in the calling function)?
util.set_pytest_junit(record_xml_attribute, name)

# Need to activate account for scopes.
Expand All @@ -73,6 +71,36 @@ def check_deployments_ready(record_xml_attribute, namespace, name, deployments):
logging.info("Verifying that deployment %s started...", deployment_name)
util.wait_for_deployment(api_client, namespace, deployment_name, 10)

def check_statefulsets_ready(record_xml_attribute, namespace, name, stateful_sets):
"""Test that Kubeflow deployments are successfully deployed.
Args:
namespace: The namespace to check
"""
set_logging()
# TODO(jlewi): Should we do this in the calling function)?
util.set_pytest_junit(record_xml_attribute, name)

# Need to activate account for scopes.
if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
util.run(["gcloud", "auth", "activate-service-account",
"--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"]])

api_client = deploy_utils.create_k8s_client()

util.load_kube_config()

for set_name in stateful_sets:
logging.info("Verifying that stateful set %s.%s started...", namespace,
set_name)
try:
util.wait_for_statefulset(api_client, namespace, set_name)
except:
# Collect debug information by running describe
util.run(["kubectl", "-n", namespace, "describe", "statefulsets",
set_name])
raise

def test_katib_is_ready(record_xml_attribute, namespace):
deployment_names = [
"katib-controller",
Expand Down Expand Up @@ -121,114 +149,96 @@ def test_notebook_is_ready(record_xml_attribute, namespace):

def test_centraldashboard_is_ready(record_xml_attribute, namespace):
check_deployments_ready(record_xml_attribute, namespace,
"test_centraldashboard_is_ready",["centraldashboard"])
"test_centraldashboard_is_ready",
["centraldashboard"])

def test_profiles_is_ready(record_xml_attribute, namespace):
check_deployments_ready(record_xml_attribute, namespace,
"test_profile_is_ready",["profiles-deployment"])
"test_profile_is_ready", ["profiles-deployment"])

def test_pytorch_is_ready(record_xml_attribute, namespace):
check_deployments_ready(record_xml_attribute, namespace,
"test_pytorch_is_ready",["pytorch-operator"])
"test_pytorch_is_ready", ["pytorch-operator"])

def test_tf_job_is_ready(record_xml_attribute, namespace):
check_deployments_ready(record_xml_attribute, namespace,
"test_tf_job_is_ready",["tf-job-operator"])

def test_kf_is_ready(record_xml_attribute, namespace, use_basic_auth, use_istio,
app_path):
"""Test that Kubeflow was successfully deployed.
Args:
namespace: The namespace Kubeflow is deployed to.
"""
set_logging()
util.set_pytest_junit(record_xml_attribute, "test_kf_is_ready")

# Need to activate account for scopes.
if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
util.run(["gcloud", "auth", "activate-service-account",
"--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"]])

api_client = deploy_utils.create_k8s_client()

util.load_kube_config()

# Verify that components are actually deployed.
# TODO(jlewi): We need to parameterize this list based on whether
# we are using IAP or basic auth.
# TODO(yanniszark): This list is incomplete and missing a lot of components.
deployment_names = [
"workflow-controller",
]

stateful_set_names = []
"test_tf_job_is_ready", ["tf-job-operator"])

platform, _ = get_platform_app_name(app_path)

ingress_related_deployments = [
"istio-egressgateway",
def test_istio_is_ready(record_xml_attribute):
# Starting with 1.1 on GCP at least istio-egressgateway is no longer
# included by default
istio_deployments = [
"istio-ingressgateway",
"istio-pilot",
"istio-policy",
"istio-sidecar-injector",
"istio-telemetry",
"istio-tracing",
"prometheus",
]
ingress_related_stateful_sets = []

knative_namespace = "knative-serving"
knative_related_deployments = [
namespace = "istio-system"
check_deployments_ready(record_xml_attribute, namespace,
"test_istio_is_ready", istio_deployments)

def test_knative_is_deployed(record_xml_attribute, app_path):

namespace = "knative-serving"
deployments = [
"activator",
"autoscaler",
"controller",
]

if platform == "gcp":
deployment_names.extend(["cloud-endpoints-controller"])
stateful_set_names.extend(["kfserving-controller-manager"])
if use_basic_auth:
deployment_names.extend(["basic-auth-login"])
ingress_related_stateful_sets.extend(["backend-updater"])
else:
ingress_related_deployments.extend(["iap-enabler"])
ingress_related_stateful_sets.extend(["backend-updater"])
elif platform == "existing_arrikto":
deployment_names.extend(["dex"])
ingress_related_deployments.extend(["authservice"])
knative_related_deployments = []


# TODO(jlewi): Might want to parallelize this.
for deployment_name in deployment_names:
logging.info("Verifying that deployment %s started...", deployment_name)
util.wait_for_deployment(api_client, namespace, deployment_name, 10)
platform, _ = get_platform_app_name(app_path)

ingress_namespace = "istio-system" if use_istio else namespace
for deployment_name in ingress_related_deployments:
logging.info("Verifying that deployment %s started...", deployment_name)
util.wait_for_deployment(api_client, ingress_namespace, deployment_name, 10)
if platform == "existing_arrikto":
pytest.skip("knative tests skipped on existing_arrikto")
return

check_deployments_ready(record_xml_attribute, namespace,
"test_knative_is_deployed", deployments)

all_stateful_sets = [(namespace, name) for name in stateful_set_names]
all_stateful_sets.extend([(ingress_namespace, name) for name in ingress_related_stateful_sets])
stateful_sets = ["kfserving-controller-manager"]
check_statefulsets_ready(record_xml_attribute, namespace,
"test_knative_is_deployed", stateful_sets)

for ss_namespace, name in all_stateful_sets:
logging.info("Verifying that stateful set %s.%s started...", ss_namespace, name)
try:
util.wait_for_statefulset(api_client, ss_namespace, name)
except:
# Collect debug information by running describe
util.run(["kubectl", "-n", ss_namespace, "describe", "statefulsets", name])
raise
def test_dex_is_deployed(record_xml_attribute, app_path):
platform, _ = get_platform_app_name(app_path)

# TODO(jlewi): We should verify that the ingress is created and healthy.
namespace = "istio-system"
# knative tests
if platform != "existing_arrikto":
pytest.skip("knative tests skipped unless platform=existing_arrikto")
return

for deployment_name in knative_related_deployments:
logging.info("Verifying that deployment %s started...", deployment_name)
util.wait_for_deployment(api_client, knative_namespace, deployment_name, 10)
deployments = ["dex", "authservice"]

check_deployments_ready(record_xml_attribute, namespace,
"test_dex_is_deployed", deployments)

def test_gcp_ingress_services(record_xml_attribute, namespace, app_path):
"""Test that Kubeflow was successfully deployed.
Args:
namespace: The namespace Kubeflow is deployed to.
"""
namespace = "istio-system"
platform, _ = get_platform_app_name(app_path)

if platform != "gcp":
pytest.skip("Not running on GCP")
return

deployments = ["cloud-endpoints-controller", "iap-enabler"]
stateful_sets = ["backend-updater"]

name = "test_gcp_ingress_services"
check_deployments_ready(record_xml_attribute, namespace,
name, deployments)


check_statefulsets_ready(record_xml_attribute, namespace,
name, stateful_sets)

def test_gcp_access(record_xml_attribute, namespace, app_path, project):
"""Test that Kubeflow gcp was configured with workload_identity and GCP service account credentails.
Expand All @@ -247,33 +257,36 @@ def test_gcp_access(record_xml_attribute, namespace, app_path, project):
api_client = deploy_utils.create_k8s_client()

platform, app_name = get_platform_app_name(app_path)
if platform == "gcp":
# check secret
util.check_secret(api_client, namespace, "user-gcp-sa")
if platform != "gcp":

pytest.skip("Not running on GCP")
return

cred = GoogleCredentials.get_application_default()
# Create the Cloud IAM service object
service = googleapiclient.discovery.build('iam', 'v1', credentials=cred)
# check secret
util.check_secret(api_client, namespace, "user-gcp-sa")

userSa = 'projects/%s/serviceAccounts/%s-user@%s.iam.gserviceaccount.com' % (project, app_name, project)
adminSa = 'serviceAccount:%s-admin@%s.iam.gserviceaccount.com' % (app_name, project)
cred = GoogleCredentials.get_application_default()
# Create the Cloud IAM service object
service = googleapiclient.discovery.build('iam', 'v1', credentials=cred)

request = service.projects().serviceAccounts().getIamPolicy(resource=userSa)
response = request.execute()
roleToMembers = {}
for binding in response['bindings']:
roleToMembers[binding['role']] = set(binding['members'])
userSa = 'projects/%s/serviceAccounts/%s-user@%s.iam.gserviceaccount.com' % (project, app_name, project)
adminSa = 'serviceAccount:%s-admin@%s.iam.gserviceaccount.com' % (app_name, project)

if 'roles/owner' not in roleToMembers:
raise Exception("roles/owner missing in iam-policy of %s" % userSa)
request = service.projects().serviceAccounts().getIamPolicy(resource=userSa)
response = request.execute()
roleToMembers = {}
for binding in response['bindings']:
roleToMembers[binding['role']] = set(binding['members'])

if adminSa not in roleToMembers['roles/owner']:
raise Exception("Admin %v should be owner of user %s" % (adminSa, userSa))
if 'roles/owner' not in roleToMembers:
raise Exception("roles/owner missing in iam-policy of %s" % userSa)

workloadIdentityRole = 'roles/iam.workloadIdentityUser'
if workloadIdentityRole not in roleToMembers:
raise Exception("roles/iam.workloadIdentityUser missing in iam-policy of %s" % userSa)
if adminSa not in roleToMembers['roles/owner']:
raise Exception("Admin %s should be owner of user %s" % (adminSa, userSa))

workloadIdentityRole = 'roles/iam.workloadIdentityUser'
if workloadIdentityRole not in roleToMembers:
raise Exception("roles/iam.workloadIdentityUser missing in iam-policy of %s" % userSa)

if __name__ == "__main__":
logging.basicConfig(level=logging.INFO,
Expand Down

0 comments on commit 4fdf6f1

Please sign in to comment.