Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lower noop wheel install overhead. #2315

Merged
merged 2 commits into from
Jan 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pex/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,7 @@ def iter_map_parallel(
# least two slots to ensure we process input items in parallel.
pool_size = max(2, min(len(input_items) // min_average_load, _sanitize_max_jobs(max_jobs)))

perform_install = functools.partial(_apply_function, function)
apply_function = functools.partial(_apply_function, function)
Copy link
Member Author

@jsirois jsirois Jan 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was, in the end, an unrelated re-name. I was poking around in the resolve / wheel install machinery looking at timings and narrowing in on the double-hashing going on.


slots = defaultdict(list) # type: DefaultDict[int, List[float]]
with TRACER.timed(
Expand All @@ -741,7 +741,7 @@ def iter_map_parallel(
)
):
with _mp_pool(size=pool_size) as pool:
for pid, result, elapsed_secs in pool.imap_unordered(perform_install, input_items):
for pid, result, elapsed_secs in pool.imap_unordered(apply_function, input_items):
TRACER.log(
"[{pid}] {verbed} {result} in {elapsed_secs:.2f}s".format(
pid=pid,
Expand Down
24 changes: 22 additions & 2 deletions pex/pep_376.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pex.common import is_pyc_dir, is_pyc_file, safe_mkdir, safe_open
from pex.interpreter import PythonInterpreter
from pex.typing import TYPE_CHECKING, cast
from pex.util import CacheHelper
from pex.venv.virtualenv import Virtualenv

if TYPE_CHECKING:
Expand Down Expand Up @@ -169,10 +170,24 @@ def save(
record_relpath, # type: Text
):
# type: (...) -> InstalledWheel
layout = {"stash_dir": stash_dir, "record_relpath": record_relpath}

# We currently need the installed wheel chroot hash for PEX-INFO / boot purposes. It is
# expensive to calculate; so we do it here 1 time when saving the installed wheel.
fingerprint = CacheHelper.dir_hash(prefix_dir, hasher=hashlib.sha256)

layout = {
"stash_dir": stash_dir,
"record_relpath": record_relpath,
"fingerprint": fingerprint,
}
with open(cls.layout_file(prefix_dir), "w") as fp:
json.dump(layout, fp, sort_keys=True)
return cls(prefix_dir=prefix_dir, stash_dir=stash_dir, record_relpath=record_relpath)
return cls(
prefix_dir=prefix_dir,
stash_dir=stash_dir,
record_relpath=record_relpath,
fingerprint=fingerprint,
)

@classmethod
def load(cls, prefix_dir):
Expand Down Expand Up @@ -201,15 +216,20 @@ def load(cls, prefix_dir):
layout_file=layout_file, value=layout
)
)

fingerprint = layout.get("fingerprint")

return cls(
prefix_dir=prefix_dir,
stash_dir=cast(str, stash_dir),
record_relpath=cast(str, record_relpath),
fingerprint=cast("Optional[str]", fingerprint),
)

prefix_dir = attr.ib() # type: str
stash_dir = attr.ib() # type: str
record_relpath = attr.ib() # type: Text
fingerprint = attr.ib() # type: Optional[str]

def stashed_path(self, *components):
# type: (*str) -> str
Expand Down
12 changes: 11 additions & 1 deletion pex/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from pex.jobs import Raise, SpawnedJob, execute_parallel, iter_map_parallel
from pex.network_configuration import NetworkConfiguration
from pex.orderedset import OrderedSet
from pex.pep_376 import InstalledWheel, LoadError
from pex.pep_425 import CompatibilityTags
from pex.pep_427 import InstallableType, WheelError, install_wheel_chroot
from pex.pep_503 import ProjectName
Expand Down Expand Up @@ -485,7 +486,16 @@ def finalize_install(self, install_requests):
# pex: * - paths that do not exist or will be imported via zipimport
# pex.pex 2.0.2
#
wheel_dir_hash = fingerprint_path(self.install_chroot)
cached_fingerprint = None # type: Optional[str]
try:
installed_wheel = InstalledWheel.load(self.install_chroot)
except LoadError:
# We support legacy chroots below by calculating the chroot fingerprint just in time.
pass
else:
cached_fingerprint = installed_wheel.fingerprint
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@zmanji so this represents the avoidance of ~3s to hash PyTorch 1 of 2 times. It only works for new caches though (rm -rf ~/.pex/installed_wheels). I wanted to keep the status quo here with the chroot symlinking and just get this easy win for now. I do have some notes drawn up revisiting using a separate chroot hash, so this may all go away later, but with the same net effect of skipping the expensive chroot hashing.

Comment on lines +492 to +496
Copy link
Collaborator

@huonw huonw Jan 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Totally stylistic (no need to change), but is it possible to calculate wheel_dir_hash in these two branches directly, potentially saving some slight fiddle:

Suggested change
except LoadError:
# We support legacy chroots below by calculating the chroot fingerprint just in time.
pass
else:
cached_fingerprint = installed_wheel.fingerprint
except LoadError:
# We support legacy chroots below by calculating the chroot fingerprint just in time.
wheel_dir_hash = fingerprint_path(self.install_chroot)
else:
wheel_dir_hash = installed_wheel.fingerprint

Copy link
Member Author

@jsirois jsirois Jan 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That doesn't work out nice like you think since installed_wheel.fingerprint can be None, necessitating repeating the except clause calculation.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, of course; thanks.


wheel_dir_hash = cached_fingerprint or fingerprint_path(self.install_chroot)
runtime_key_dir = os.path.join(self._installation_root, wheel_dir_hash)
with atomic_directory(runtime_key_dir) as atomic_dir:
if not atomic_dir.is_finalized():
Expand Down
2 changes: 1 addition & 1 deletion pex/vendor/_vendored/attrs/.layout.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"record_relpath": "attrs-21.5.0.dev0.dist-info/RECORD", "stash_dir": ".prefix"}
{"fingerprint": "13e0015aa58e8f470b17936f42a5d5f874a20194156c371d2a4c695dc8c16d9e", "record_relpath": "attrs-21.5.0.dev0.dist-info/RECORD", "stash_dir": ".prefix"}
2 changes: 1 addition & 1 deletion pex/vendor/_vendored/packaging_20_9/.layout.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"record_relpath": "packaging-20.9.dist-info/RECORD", "stash_dir": ".prefix"}
{"fingerprint": "5a3fc5dcd563b4a4474944cd0d73ee4a51fb53132af553abfb203dc1cdf8f7c3", "record_relpath": "packaging-20.9.dist-info/RECORD", "stash_dir": ".prefix"}
2 changes: 1 addition & 1 deletion pex/vendor/_vendored/packaging_21_3/.layout.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"record_relpath": "packaging-21.3.dist-info/RECORD", "stash_dir": ".prefix"}
{"fingerprint": "2a93da49a5fc8217a0f710ff0ca3cfc56fefa35eddcf6a64786d452bfa284525", "record_relpath": "packaging-21.3.dist-info/RECORD", "stash_dir": ".prefix"}
2 changes: 1 addition & 1 deletion pex/vendor/_vendored/packaging_23_1/.layout.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"record_relpath": "packaging-23.1.dist-info/RECORD", "stash_dir": ".prefix"}
{"fingerprint": "a98c4a74d7b6a62763df7ad330ac4c7a0779323fc36e961aeb8f20865e21a191", "record_relpath": "packaging-23.1.dist-info/RECORD", "stash_dir": ".prefix"}
2 changes: 1 addition & 1 deletion pex/vendor/_vendored/pip/.layout.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"record_relpath": "pip-20.3.4.dist-info/RECORD", "stash_dir": ".prefix"}
{"fingerprint": "120267325b80f5c4b4adac019eb6617ab3319395c043d2871eedf70dd6ae2954", "record_relpath": "pip-20.3.4.dist-info/RECORD", "stash_dir": ".prefix"}
2 changes: 1 addition & 1 deletion pex/vendor/_vendored/setuptools/.layout.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"record_relpath": "setuptools-44.0.0+3acb925dd708430aeaf197ea53ac8a752f7c1863.dist-info/RECORD", "stash_dir": ".prefix"}
{"fingerprint": "ebe3717ba6bad87ca328cbf3d3eb3f5475105bccb51dc09a69d37eff6b2e5210", "record_relpath": "setuptools-44.0.0+3acb925dd708430aeaf197ea53ac8a752f7c1863.dist-info/RECORD", "stash_dir": ".prefix"}
2 changes: 1 addition & 1 deletion pex/vendor/_vendored/toml/.layout.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"record_relpath": "toml-0.10.2.dist-info/RECORD", "stash_dir": ".prefix"}
{"fingerprint": "3d44cdc5911c31b190a0225202daafbb22f7f74e6761fb086742dadb5dff5384", "record_relpath": "toml-0.10.2.dist-info/RECORD", "stash_dir": ".prefix"}