Skip to content

Commit 6282bd3

Browse files
authored
Merge pull request #1101 from projectsyn/feat/parallel-alias-setup
Fix performance regression by parallelizing alias checkout
2 parents a0a8c8f + 4d5f0fc commit 6282bd3

File tree

6 files changed

+80
-42
lines changed

6 files changed

+80
-42
lines changed

commodore/component/__init__.py

+24-19
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ class Component:
1919
_version: Optional[str] = None
2020
_dir: P
2121
_sub_path: str
22-
_aliases: dict[str, tuple[str, str]]
22+
_aliases: dict[str, tuple[str, str, MultiDependency]]
2323
_work_dir: Optional[P]
2424

2525
@classmethod
@@ -58,7 +58,13 @@ def __init__(
5858
self.version = version
5959
self._sub_path = sub_path
6060
self._repo = None
61-
self._aliases = {self.name: (self.version or "", self.sub_path or "")}
61+
self._aliases = {
62+
self.name: (
63+
self.version or "",
64+
self.sub_path or "",
65+
self._dependency,
66+
)
67+
}
6268
self._work_dir = work_dir
6369

6470
@property
@@ -137,13 +143,16 @@ def defaults_file(self) -> P:
137143
return self.alias_defaults_file(self.name)
138144

139145
def alias_directory(self, alias: str) -> P:
140-
apath = self._dependency.get_component(alias)
141-
if not apath:
142-
raise ValueError(f"unknown alias {alias} for component {self.name}")
143146
if alias not in self._aliases:
144147
raise ValueError(
145148
f"alias {alias} for component {self.name} has not been registered"
146149
)
150+
adep = self._aliases[alias][2]
151+
apath = adep.get_component(alias)
152+
# Here: if alias is registered in `self._aliases` it must be registered on the
153+
# alias's multi-dependency. The assert makes mypy happy. We disable bandit's
154+
# "assert_used" lint, since we don't rely on this assertion for correctness.
155+
assert apath # nosec B101
147156
return apath / self._aliases[alias][1]
148157

149158
def alias_class_file(self, alias: str) -> P:
@@ -190,9 +199,14 @@ def register_alias(
190199
self,
191200
alias: str,
192201
version: str,
202+
dependency: MultiDependency,
193203
sub_path: str = "",
194204
target_dir: Optional[P] = None,
195205
):
206+
if alias in self._aliases:
207+
raise ValueError(
208+
f"alias {alias} already registered on component {self.name}"
209+
)
196210
alias_target_dir = target_dir
197211
if not alias_target_dir:
198212
if not self._work_dir:
@@ -201,25 +215,16 @@ def register_alias(
201215
+ "which isn't configured with a working directory"
202216
)
203217
alias_target_dir = component_dir(self._work_dir, alias)
204-
if alias in self._aliases:
205-
raise ValueError(
206-
f"alias {alias} already registered on component {self.name}"
207-
)
208-
self._aliases[alias] = (version, sub_path)
209-
self._dependency.register_component(alias, alias_target_dir)
218+
self._aliases[alias] = (version, sub_path, dependency)
219+
dependency.register_component(alias, alias_target_dir)
210220

211-
def checkout_alias(
212-
self, alias: str, alias_dependency: Optional[MultiDependency] = None
213-
):
221+
def checkout_alias(self, alias: str):
214222
if alias not in self._aliases:
215223
raise ValueError(
216224
f"alias {alias} is not registered on component {self.name}"
217225
)
218-
219-
if alias_dependency:
220-
alias_dependency.checkout_component(alias, self._aliases[alias][0])
221-
else:
222-
self._dependency.checkout_component(alias, self._aliases[alias][0])
226+
adep = self._aliases[alias][2]
227+
adep.checkout_component(alias, self._aliases[alias][0])
223228

224229
def is_checked_out(self) -> bool:
225230
return self.target_dir is not None and self.target_dir.is_dir()

commodore/component/compile.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ def _setup_component(
169169
config.register_component_aliases({instance_name: component_name})
170170
if instance_name != component_name:
171171
component.register_alias(
172-
instance_name, "", sub_path=sub_path, target_dir=target_dir
172+
instance_name, "", cdep, sub_path=sub_path, target_dir=target_dir
173173
)
174174

175175
# Validate component libraries

commodore/dependency_mgmt/__init__.py

+32-17
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import itertools
44
from concurrent.futures import ThreadPoolExecutor
5+
from typing import Callable, Iterable
56

67
import click
78

@@ -109,29 +110,37 @@ def fetch_components(cfg: Config):
109110
+ "Please specify `--force` to discard them"
110111
)
111112
deps.setdefault(cdep.url, []).append(c)
112-
fetch_parallel(fetch_component, cfg, deps.values())
113+
do_parallel(fetch_component, cfg, deps.values())
113114

114115
components = cfg.get_components()
115116

117+
aliases: dict[str, list] = {}
116118
for alias, component in component_aliases.items():
117119
if alias == component:
118120
# Nothing to setup for identity alias
119121
continue
120122

121123
c = components[component]
122124
aspec = cspecs[alias]
123-
adep = None
125+
adep = c.dependency
124126
if aspec.url != c.repo_url:
125127
adep = cfg.register_dependency_repo(aspec.url)
126-
adep.register_component(alias, component_dir(cfg.work_dir, alias))
127-
128-
c.register_alias(alias, aspec.version, aspec.path)
129-
c.checkout_alias(alias, adep)
130-
131-
create_alias_symlinks(cfg, c, alias)
132-
133-
134-
def fetch_component(cfg, dependencies):
128+
c.register_alias(alias, aspec.version, adep, aspec.path)
129+
if adep.url in deps:
130+
# NOTE(sg): if we already processed the dependency URL in the previous fetch
131+
# stage, we can create all instance worktrees in parallel. We do so by using
132+
# the alias name as the key for our "parallelization" dict.
133+
aliases[alias] = [(alias, c)]
134+
else:
135+
# Otherwise, we use adep.url as the parallelization key to avoid any race
136+
# conditions when creating multiple worktrees from a not-yet-cloned
137+
# dependency URL.
138+
aliases.setdefault(adep.url, []).append((alias, c))
139+
140+
do_parallel(setup_alias, cfg, aliases.values())
141+
142+
143+
def fetch_component(cfg: Config, dependencies: Iterable):
135144
"""
136145
Fetch all components of a MultiDependency object.
137146
"""
@@ -141,7 +150,13 @@ def fetch_component(cfg, dependencies):
141150
create_component_symlinks(cfg, c)
142151

143152

144-
def fetch_parallel(fetch_fun, cfg, to_fetch):
153+
def setup_alias(cfg: Config, aliases: Iterable):
154+
for alias, c in aliases:
155+
c.checkout_alias(alias)
156+
create_alias_symlinks(cfg, c, alias)
157+
158+
159+
def do_parallel(fun: Callable[[Config, Iterable], None], cfg: Config, data: Iterable):
145160
"""
146161
Fetch dependencies in parallel threads with ThreadPoolExecutor.
147162
"""
@@ -150,7 +165,7 @@ def fetch_parallel(fetch_fun, cfg, to_fetch):
150165
# that any exceptions raised in `fetch_fun` are propagated, cf.
151166
# https://docs.python.org/3/library/concurrent.futures.html#executor-objects. We
152167
# do so by simply materializing the iterator into a list.
153-
list(exe.map(fetch_fun, itertools.repeat(cfg), to_fetch))
168+
list(exe.map(fun, itertools.repeat(cfg), data))
154169

155170

156171
def register_components(cfg: Config):
@@ -208,10 +223,10 @@ def register_components(cfg: Config):
208223
c = registered_components[cn]
209224
aspec = cspecs[alias]
210225

226+
adep = c.dependency
211227
if aspec.url != c.repo_url:
212228
adep = cfg.register_dependency_repo(aspec.url)
213-
adep.register_component(alias, component_dir(cfg.work_dir, alias))
214-
c.register_alias(alias, aspec.version, aspec.path)
229+
c.register_alias(alias, aspec.version, adep, aspec.path)
215230

216231
if not component_dir(cfg.work_dir, alias).is_dir():
217232
raise click.ClickException(f"Missing alias checkout for '{alias} as {cn}'")
@@ -249,10 +264,10 @@ def fetch_packages(cfg: Config):
249264
+ "Please specify `--force` to discard them"
250265
)
251266
deps.setdefault(pdep.url, []).append((p, pkg))
252-
fetch_parallel(fetch_package, cfg, deps.values())
267+
do_parallel(fetch_package, cfg, deps.values())
253268

254269

255-
def fetch_package(cfg, dependencies):
270+
def fetch_package(cfg: Config, dependencies: Iterable):
256271
"""
257272
Fetch all package dependencies of a MultiDependency object.
258273
"""

tests/test_component.py

+21-3
Original file line numberDiff line numberDiff line change
@@ -485,14 +485,14 @@ def test_component_register_alias_workdir(tmp_path: P, workdir: bool):
485485

486486
if not workdir:
487487
with pytest.raises(ValueError) as exc:
488-
c.register_alias("test-alias", c.version)
488+
c.register_alias("test-alias", c.version, c.dependency)
489489

490490
assert (
491491
"Can't register alias on component test-component which isn't configured with a working directory"
492492
in str(exc.value)
493493
)
494494
else:
495-
c.register_alias("test-alias", c.version)
495+
c.register_alias("test-alias", c.version, c.dependency)
496496
assert (
497497
c.alias_directory("test-alias") == tmp_path / "dependencies" / "test-alias"
498498
)
@@ -505,6 +505,24 @@ def test_component_register_alias_targetdir(tmp_path: P, workdir: bool):
505505
if workdir:
506506
c._work_dir = tmp_path
507507

508-
c.register_alias("test-alias", c.version, target_dir=tmp_path / "test-alias")
508+
c.register_alias(
509+
"test-alias", c.version, c.dependency, target_dir=tmp_path / "test-alias"
510+
)
509511
# explicit `target_dir` has precedence over the component's _work_dir
510512
assert c.alias_directory("test-alias") == tmp_path / "test-alias"
513+
514+
515+
def test_component_register_alias_multiple_err(tmp_path: P):
516+
c = _setup_component(tmp_path, name="test-component")
517+
c.register_alias(
518+
"test-alias", c.version, c.dependency, target_dir=tmp_path / "test-alias"
519+
)
520+
521+
with pytest.raises(ValueError) as exc:
522+
c.register_alias(
523+
"test-alias", c.version, c.dependency, target_dir=tmp_path / "test-alias"
524+
)
525+
526+
assert "alias test-alias already registered on component test-component" in str(
527+
exc.value
528+
)

tests/test_postprocess.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def _setup(tmp_path, f, alias="test-component"):
145145
cdep = MultiDependency("https://fake.repo.url/", tmp_path / "dependencies")
146146
component = Component("test-component", dependency=cdep, work_dir=tmp_path)
147147
if alias != "test-component":
148-
component.register_alias(alias, "master")
148+
component.register_alias(alias, "master", cdep)
149149
os.symlink(component.target_directory, component.alias_directory(alias))
150150
config.register_component(component)
151151
aliases = {alias: "test-component"}

tests/test_render_inventory.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def _setup(tmp_path: Path):
2828
os.makedirs(tmp_path / "dependencies" / "test")
2929
cdep = MockMultiDependency(git.Repo.init(tmp_path / "repo.git"))
3030
c = Component("test", cdep, work_dir=tmp_path)
31-
c.register_alias("test-1", "master")
31+
c.register_alias("test-1", "master", cdep)
3232
os.makedirs(c.class_file.parent)
3333
# Create alias checkout by symlinking component directory
3434
os.symlink(c.target_directory, c.alias_directory("test-1"))

0 commit comments

Comments
 (0)