pypa · joerick · Jul 11, 2020 · Jun 23, 2020 · Jun 24, 2020 · Jun 24, 2020
diff --git a/cibuildwheel/bashlex_eval.py b/cibuildwheel/bashlex_eval.py
@@ -1,17 +1,26 @@
 import shlex
 import subprocess
 
-from typing import Dict, NamedTuple
+from typing import Dict, NamedTuple, Callable, Optional
 
 import bashlex  # type: ignore
 
 
+# a function that takes a shell command and the environment, and returns the result
+EnvironmentExecutor = Callable[[str, Dict[str, str]], str]
+
+
+def local_environment_executor(command: str, env: Dict[str, str]) -> str:
+    return subprocess.check_output(shlex.split(command), env=env, universal_newlines=True)
+
+
 class NodeExecutionContext(NamedTuple):
     environment: Dict[str, str]
     input: str
+    executor: EnvironmentExecutor
 
 
-def evaluate(value: str, environment: Dict[str, str]) -> str:
+def evaluate(value: str, environment: Dict[str, str], executor: Optional[EnvironmentExecutor] = None) -> str:
     if not value:
         # empty string evaluates to empty string
         # (but trips up bashlex)
@@ -26,7 +35,7 @@ def evaluate(value: str, environment: Dict[str, str]) -> str:
 
     return evaluate_node(
         value_word_node,
-        context=NodeExecutionContext(environment=environment, input=value)
+        context=NodeExecutionContext(environment=environment, input=value, executor=executor or local_environment_executor)
     )
 
 
@@ -67,7 +76,7 @@ def evaluate_word_node(node: bashlex.ast.node, context: NodeExecutionContext) ->
 def evaluate_command_node(node: bashlex.ast.node, context: NodeExecutionContext) -> str:
     words = [evaluate_node(part, context=context) for part in node.parts]
     command = ' '.join(words)
-    return subprocess.check_output(shlex.split(command), env=context.environment, universal_newlines=True)
+    return context.executor(command, context.environment)
 
 
 def evaluate_parameter_node(node: bashlex.ast.node, context: NodeExecutionContext) -> str:

diff --git a/cibuildwheel/docker_container.py b/cibuildwheel/docker_container.py
@@ -0,0 +1,178 @@
+import io
+import json
+import shlex
+import subprocess
+import sys
+import uuid
+from os import PathLike
+from pathlib import Path, PurePath
+from typing import IO, Dict, List, Optional, Sequence, TextIO, Union
+
+
+class DockerContainer:
+    '''
+    An object that represents a running Docker container.
+
+    Intended for use as a context manager e.g.
+    `with DockerContainer('ubuntu') as docker:`
+
+    A bash shell is running in the remote container. When `call()` is invoked,
+    the command is relayed to the remote shell, and the results are streamed
+    back to cibuildwheel.
+    '''
+    UTILITY_PYTHON = '/opt/python/cp38-cp38/bin/python'
+
+    process: subprocess.Popen
+    bash_stdin: IO[str]
+    bash_stdout: IO[str]
+
+    def __init__(self, docker_image: str, simulate_32_bit=False):
+        self.docker_image = docker_image
+        self.simulate_32_bit = simulate_32_bit
+
+    def __enter__(self) -> 'DockerContainer':
+        self.container_name = f'cibuildwheel-{uuid.uuid4()}'
+        shell_args = ['linux32', '/bin/bash'] if self.simulate_32_bit else ['/bin/bash']
+        subprocess.run(
+            [
+                'docker', 'create',
+                '--env', 'CIBUILDWHEEL',
+                '--name', self.container_name,
+                '-i',
+                '-v', '/:/host',  # ignored on CircleCI
+                self.docker_image,
+                *shell_args
+            ],
+            check=True,
+        )
+        process = subprocess.Popen(
+            [
+                'docker', 'start',
+                '--attach', '--interactive',
+                self.container_name,
+            ],
+            encoding='utf8',
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            # make the input buffer large enough to carry a lot of environment
+            # variables. We choose 256kB.
+            bufsize=262144,
+        )
+        self.process = process
+        assert process.stdin and process.stdout
+        self.bash_stdin = process.stdin
+        self.bash_stdout = process.stdout
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.bash_stdin.close()
+        self.process.terminate()
+        self.process.wait()
+
+        subprocess.run(['docker', 'rm', '--force', '-v', self.container_name])
+        self.container_name = None
+
+    def copy_into(self, from_path: Path, to_path: PurePath) -> None:
+        # `docker cp` causes 'no space left on device' error when
+        # a container is running and the host filesystem is
+        # mounted. https://github.com/moby/moby/issues/38995
+        # Use `docker exec` instead.
+        if from_path.is_dir():
+            self.call(['mkdir', '-p', to_path])
+            subprocess.run(
+                f'tar cf - . | docker exec -i {self.container_name} tar -xC {to_path} -f -',
+                shell=True,
+                check=True,
+                cwd=from_path)
+        else:
+            subprocess.run(
+                f'cat {from_path} | docker exec -i {self.container_name} sh -c "cat > {to_path}"',
+                shell=True,
+                check=True)
+
+    def copy_out(self, from_path: PurePath, to_path: Path) -> None:
+        # note: we assume from_path is a dir
+        to_path.mkdir(parents=True, exist_ok=True)
+
+        subprocess.run(
+            f'docker exec -i {self.container_name} tar -cC {from_path} -f - . | tar -xf -',
+            shell=True,
+            check=True,
+            cwd=to_path
+        )
+
+    def glob(self, pattern: PurePath) -> List[PurePath]:
+        path_strs = json.loads(self.call([
+            self.UTILITY_PYTHON,
+            '-c',
+            f'import sys, json, glob; json.dump(glob.glob({str(pattern)!r}), sys.stdout)'
+        ], capture_output=True))
+
+        return [PurePath(p) for p in path_strs]
+
+    def call(self, args: Sequence[Union[str, PathLike]], env: Dict[str, str] = {},
+             capture_output=False, cwd: Optional[Union[str, PathLike]] = None) -> str:
+        chdir = f'cd {cwd}' if cwd else ''
+        env_assignments = ' '.join(f'{shlex.quote(k)}={shlex.quote(v)}'
+                                   for k, v in env.items())
+        command = ' '.join(shlex.quote(str(a)) for a in args)
+        end_of_message = str(uuid.uuid4())
+
+        # log the command we're executing
+        print(f'    + {command}')
+
+        # Write a command to the remote shell. First we change the
+        # cwd, if that's required. Then, we use the `env` utility to run
+        # `command` inside the specified environment. We use `env` because it
+        # can cope with spaces and strange characters in the name or value.
+        # Finally, the remote shell is told to write a footer - this will show
+        # up in the output so we know when to stop reading, and will include
+        # the returncode of `command`.
+        self.bash_stdin.write(f'''(
+            {chdir}
+            env {env_assignments} {command}
+            printf "%04d%s\n" $? {end_of_message}
+        )
+        ''')
+        self.bash_stdin.flush()
+
+        if capture_output:
+            output_io: TextIO = io.StringIO()
+        else:
+            output_io = sys.stdout
+
+        while True:
+            line = self.bash_stdout.readline()
+
+            if line.endswith(end_of_message+'\n'):
+                footer_offset = (
+                    len(line)
+                    - 1  # newline character
+                    - len(end_of_message)  # delimiter
+                    - 4  # 4 returncode decimals
+                )
+                returncode_str = line[footer_offset:footer_offset+4]
+                returncode = int(returncode_str)
+                # add the last line to output, without the footer
+                output_io.write(line[0:footer_offset])
+                break
+            else:
+                output_io.write(line)
+
+        output = output_io.getvalue() if isinstance(output_io, io.StringIO) else None
+
+        if returncode != 0:
+            raise subprocess.CalledProcessError(returncode, args, output)
+
+        return output if output else ''
+
+    def get_environment(self) -> Dict[str, str]:
+        return json.loads(self.call([
+            self.UTILITY_PYTHON,
+            '-c',
+            'import sys, json, os; json.dump(os.environ.copy(), sys.stdout)'
+        ], capture_output=True))
+
+    def environment_executor(self, command: str, environment: Dict[str, str]) -> str:
+        # used as an EnvironmentExecutor to evaluate commands and capture output
+        return self.call(shlex.split(command), env=environment)
diff --git a/cibuildwheel/environment.py b/cibuildwheel/environment.py
@@ -1,6 +1,6 @@
 import bashlex  # type: ignore
 
-from typing import Dict, List, Mapping
+from typing import Dict, List, Mapping, Optional
 
 from . import bashlex_eval
 
@@ -46,9 +46,9 @@ def __init__(self, assignment: str):
         self.name = name
         self.value = value
 
-    def evaluated_value(self, environment: Dict[str, str]) -> str:
+    def evaluated_value(self, environment: Dict[str, str], executor: Optional[bashlex_eval.EnvironmentExecutor] = None) -> str:
         '''Returns the value of this assignment, as evaluated in the environment'''
-        return bashlex_eval.evaluate(self.value, environment=environment)
+        return bashlex_eval.evaluate(self.value, environment=environment, executor=executor)
 
     def as_shell_assignment(self) -> str:
         return f'export {self.name}={self.value}'
@@ -61,11 +61,13 @@ class ParsedEnvironment:
     def __init__(self, assignments: List[EnvironmentAssignment]):
         self.assignments = assignments
 
-    def as_dictionary(self, prev_environment: Mapping[str, str]) -> Dict[str, str]:
+    def as_dictionary(self,
+                      prev_environment: Mapping[str, str],
+                      executor: Optional[bashlex_eval.EnvironmentExecutor] = None) -> Dict[str, str]:
         environment = dict(**prev_environment)
 
         for assignment in self.assignments:
-            value = assignment.evaluated_value(environment=environment)
+            value = assignment.evaluated_value(environment=environment, executor=executor)
             environment[assignment.name] = value
 
         return environment