Skip to content

Commit

Permalink
chore: Remove Bigframes installation in remote training custom job
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 568668981
  • Loading branch information
matthew29tang authored and copybara-github committed Sep 26, 2023
1 parent 23dae36 commit 0c1c129
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 20 deletions.
2 changes: 0 additions & 2 deletions vertexai/preview/_workflow/executor/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,8 +506,6 @@ def remote_training(invokable: shared._Invokable, rewrapper: Any):
VERTEX_AI_DEPENDENCY_PATH,
"absl-py==1.4.0",
]
if bf:
vertex_requirements.append("bigframes==0.1.1")

requirements = []

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1124,7 +1124,7 @@ def serialize(
raise ValueError(f"Invalid gcs path: {gcs_path}")

BigframeSerializer._metadata.dependencies = (
supported_frameworks._get_deps_if_bigframe(to_serialize)
supported_frameworks._get_bigframe_deps()
)

# Check if index.name is default and set index.name if not
Expand Down
36 changes: 19 additions & 17 deletions vertexai/preview/_workflow/shared/supported_frameworks.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,13 +267,7 @@ def _get_deps_if_pandas_dataframe(possible_dataframe: Any) -> List[str]:
if _is_pandas_dataframe(possible_dataframe):
dep_version = version.Version(pd.__version__).base_version
deps.append(f"pandas=={dep_version}")
try:
import pyarrow as pa

pyarrow_version = version.Version(pa.__version__).base_version
deps.append(f"pyarrow=={pyarrow_version}")
except ImportError:
deps.append("pyarrow")
deps += _get_pyarrow_deps()
# Note: it's likely that a DataFrame can be changed to other format, and
# therefore needs to be serialized by CloudPickleSerializer. An example
# is sklearn's Transformer.fit_transform() method, whose output is always
Expand All @@ -282,18 +276,26 @@ def _get_deps_if_pandas_dataframe(possible_dataframe: Any) -> List[str]:
return deps


def _get_deps_if_bigframe(possible_dataframe: Any) -> List[str]:
def _get_bigframe_deps() -> List[str]:
deps = []
if _is_bigframe(possible_dataframe):
dep_version = version.Version(bf.__version__).base_version
deps.append(f"bigframes=={dep_version}")

# Note: it's likely that a DataFrame can be changed to other format, and
# therefore needs to be serialized by CloudPickleSerializer. An example
# is sklearn's Transformer.fit_transform() method, whose output is always
# a ndarray.
deps += _get_cloudpickle_deps()
# Note: bigframe serialization can only occur locally so bigframes
# should not be installed remotely. Pandas and pyarrow are required
# to deserialize for sklearn bigframes though.
deps += _get_pandas_deps()
deps += _get_pyarrow_deps()
return deps


def _get_pyarrow_deps() -> List[str]:
deps = []
try:
global pyarrow
import pyarrow

dep_version = version.Version(pyarrow.__version__).base_version
deps.append(f"pyarrow=={dep_version}")
except ImportError:
deps.append("pyarrow")
return deps


Expand Down

0 comments on commit 0c1c129

Please sign in to comment.