1
1
"""This module contains functions for cloning a Git repository to a local path."""
2
2
3
- import asyncio
4
3
import os
5
4
from pathlib import Path
6
- from typing import List , Optional , Tuple
5
+ from typing import Optional
7
6
8
- from gitingest .ingestion_schema import CloneConfig
7
+ from gitingest .schemas import CloneConfig
8
+ from gitingest .utils .git_utils import check_repo_exists , ensure_git_installed , run_command
9
9
from gitingest .utils .timeout_wrapper import async_timeout
10
10
11
11
TIMEOUT : int = 60
12
12
13
13
14
14
@async_timeout (TIMEOUT )
15
- async def clone (config : CloneConfig ) -> None :
15
+ async def clone_repo (config : CloneConfig ) -> None :
16
16
"""
17
17
Clone a repository to a local path based on the provided configuration.
18
18
@@ -47,7 +47,7 @@ async def clone(config: CloneConfig) -> None:
47
47
raise OSError (f"Failed to create parent directory { parent_dir } : { exc } " ) from exc
48
48
49
49
# Check if the repository exists
50
- if not await _check_repo_exists (url ):
50
+ if not await check_repo_exists (url ):
51
51
raise ValueError ("Repository not found, make sure it is public" )
52
52
53
53
clone_cmd = ["git" , "clone" , "--single-branch" ]
@@ -64,7 +64,8 @@ async def clone(config: CloneConfig) -> None:
64
64
clone_cmd += [url , local_path ]
65
65
66
66
# Clone the repository
67
- await _run_command (* clone_cmd )
67
+ await ensure_git_installed ()
68
+ await run_command (* clone_cmd )
68
69
69
70
if commit or partial_clone :
70
71
checkout_cmd = ["git" , "-C" , local_path ]
@@ -81,148 +82,4 @@ async def clone(config: CloneConfig) -> None:
81
82
checkout_cmd += ["checkout" , commit ]
82
83
83
84
# Check out the specific commit and/or subpath
84
- await _run_command (* checkout_cmd )
85
-
86
-
87
- async def _check_repo_exists (url : str ) -> bool :
88
- """
89
- Check if a Git repository exists at the provided URL.
90
-
91
- Parameters
92
- ----------
93
- url : str
94
- The URL of the Git repository to check.
95
- Returns
96
- -------
97
- bool
98
- True if the repository exists, False otherwise.
99
-
100
- Raises
101
- ------
102
- RuntimeError
103
- If the curl command returns an unexpected status code.
104
- """
105
- proc = await asyncio .create_subprocess_exec (
106
- "curl" ,
107
- "-I" ,
108
- url ,
109
- stdout = asyncio .subprocess .PIPE ,
110
- stderr = asyncio .subprocess .PIPE ,
111
- )
112
- stdout , _ = await proc .communicate ()
113
-
114
- if proc .returncode != 0 :
115
- return False
116
-
117
- response = stdout .decode ()
118
- status_code = _get_status_code (response )
119
-
120
- if status_code in (200 , 301 ):
121
- return True
122
-
123
- if status_code in (404 , 302 ):
124
- return False
125
-
126
- raise RuntimeError (f"Unexpected status code: { status_code } " )
127
-
128
-
129
- async def fetch_remote_branch_list (url : str ) -> List [str ]:
130
- """
131
- Fetch the list of branches from a remote Git repository.
132
- Parameters
133
- ----------
134
- url : str
135
- The URL of the Git repository to fetch branches from.
136
- Returns
137
- -------
138
- List[str]
139
- A list of branch names available in the remote repository.
140
- """
141
- fetch_branches_command = ["git" , "ls-remote" , "--heads" , url ]
142
- stdout , _ = await _run_command (* fetch_branches_command )
143
- stdout_decoded = stdout .decode ()
144
-
145
- return [
146
- line .split ("refs/heads/" , 1 )[1 ]
147
- for line in stdout_decoded .splitlines ()
148
- if line .strip () and "refs/heads/" in line
149
- ]
150
-
151
-
152
- async def _run_command (* args : str ) -> Tuple [bytes , bytes ]:
153
- """
154
- Execute a command asynchronously and captures its output.
155
-
156
- Parameters
157
- ----------
158
- *args : str
159
- The command and its arguments to execute.
160
-
161
- Returns
162
- -------
163
- Tuple[bytes, bytes]
164
- A tuple containing the stdout and stderr of the command.
165
-
166
- Raises
167
- ------
168
- RuntimeError
169
- If command exits with a non-zero status.
170
- """
171
- await check_git_installed ()
172
-
173
- # Execute the requested command
174
- proc = await asyncio .create_subprocess_exec (
175
- * args ,
176
- stdout = asyncio .subprocess .PIPE ,
177
- stderr = asyncio .subprocess .PIPE ,
178
- )
179
- stdout , stderr = await proc .communicate ()
180
- if proc .returncode != 0 :
181
- error_message = stderr .decode ().strip ()
182
- raise RuntimeError (f"Command failed: { ' ' .join (args )} \n Error: { error_message } " )
183
-
184
- return stdout , stderr
185
-
186
-
187
- async def check_git_installed () -> None :
188
- """
189
- Check if Git is installed and accessible on the system.
190
-
191
- Raises
192
- ------
193
- RuntimeError
194
- If Git is not installed or if the Git command exits with a non-zero status.
195
- """
196
- try :
197
- proc = await asyncio .create_subprocess_exec (
198
- "git" ,
199
- "--version" ,
200
- stdout = asyncio .subprocess .PIPE ,
201
- stderr = asyncio .subprocess .PIPE ,
202
- )
203
- _ , stderr = await proc .communicate ()
204
- if proc .returncode != 0 :
205
- error_message = stderr .decode ().strip () if stderr else "Git command not found"
206
- raise RuntimeError (f"Git is not installed or not accessible: { error_message } " )
207
-
208
- except FileNotFoundError as exc :
209
- raise RuntimeError ("Git is not installed. Please install Git before proceeding." ) from exc
210
-
211
-
212
- def _get_status_code (response : str ) -> int :
213
- """
214
- Extract the status code from an HTTP response.
215
-
216
- Parameters
217
- ----------
218
- response : str
219
- The HTTP response string.
220
-
221
- Returns
222
- -------
223
- int
224
- The status code of the response
225
- """
226
- status_line = response .splitlines ()[0 ].strip ()
227
- status_code = int (status_line .split (" " , 2 )[1 ])
228
- return status_code
85
+ await run_command (* checkout_cmd )
0 commit comments