Skip to content

Commit 9b32619

Browse files
committed
changed import structure, so hopefully import errors are fixed now
1 parent c15eb9d commit 9b32619

20 files changed

Lines changed: 749 additions & 787 deletions

.github/workflows/code_check.yml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,6 @@ jobs:
3030
pip install -r requirements.txt
3131
pip install pylint
3232
33-
- name: Clean python cache
34-
run: |
35-
find . -type d -name "__pycache__" -exec rm -rf {} +
36-
3733
- name: Lint with pylint
3834
env:
3935
PYTHONPATH: ${{ github.workspace }}
@@ -42,4 +38,4 @@ jobs:
4238
4339
- name: Test with python unittest
4440
run: |
45-
python -m unittest discover -s middleware
41+
python -m unittest discover -s middleware -p "test_*.py"

middleware/git_repo/__init__.py

Lines changed: 262 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,265 @@
11
"""
2-
Currently the git_repo package only contains the git_repo module.
2+
This module deals with storing files in a git repository.
3+
4+
Classes
5+
-------
6+
GitRepo
7+
A wrapper class for git.Repo that is able to initialize a new git
8+
repository or clone an existing one. It respects the configuration
9+
specified in terms of a GitRepoConfig instance.
10+
GitRepoConfig
11+
The configuration for a GitRepo object.
12+
"""
13+
14+
__all__ = [
15+
'GitRepo',
16+
'GitRepoConfig'
17+
]
18+
__version__ = '0.1.0'
19+
__author__ = 'carsten.scharfenberg@zalf.de'
20+
21+
22+
from types import TracebackType
23+
from typing import Annotated, Any, List, NamedTuple, Type, Union
24+
from pathlib import Path, PurePosixPath
25+
import os
26+
import tempfile
27+
28+
import git
29+
30+
31+
class GitRepoConfig(NamedTuple):
32+
"""
33+
A class for the configuration of a GitRepo.
34+
"""
35+
36+
repo_url: Annotated[str, "The ssh URL of the git repository"]
37+
local_path: Annotated[str, "The local path of the git repository"]
38+
user_name: Annotated[str, "The name of git user"]
39+
user_email: Annotated[str, "The email address of git usere"]
40+
branch: Annotated[str, "The branch of the git repository"] = "main"
41+
42+
43+
class GitRepo:
44+
"""
45+
A wrapper class for git.Repo that is able to initialize or clone git repository
46+
47+
Methods
48+
-------
49+
add_and_commit(files, message)
50+
Add the specified files to the git repo and commits them.
51+
pull()
52+
Performs a pull from origin on the git repo.
53+
push()
54+
Performs a push to origin on the git repo.
55+
56+
Properties
57+
----------
58+
working_dir
59+
Then local working directory of the git repo
60+
"""
61+
62+
# So we do not need to turn off host key checking
63+
github_host_keys = """# github.com:22 SSH-2.0-1907b149
64+
github.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk=
65+
# github.com:22 SSH-2.0-1907b149
66+
# github.com:22 SSH-2.0-1907b149
67+
github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=
68+
# github.com:22 SSH-2.0-1907b149
69+
github.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl
70+
# github.com:22 SSH-2.0-1907b149
71+
"""
72+
73+
def __init__(self, config: GitRepoConfig) -> None:
74+
self._config = config
75+
self._ssh_tempdir = tempfile.TemporaryDirectory(dir='/tmp/ssh') # pylint: disable=R1732
76+
self._ssh_key = os.path.abspath(
77+
os.path.join(self._ssh_tempdir.name, "ssh_key"))
78+
self._ssh_authorized_keys = os.path.abspath(
79+
os.path.join(self._ssh_tempdir.name, "authorized_keys"))
80+
self._repo = self._setup()
81+
82+
# Make this class a context manager to reliably delete the temp dir
83+
def __enter__(self) -> "GitRepo":
84+
"""
85+
Make this class a context manager to reliably delete the temp dir.
86+
87+
Returns
88+
-------
89+
GitRepo
90+
The same instance of GitRepo
91+
"""
92+
return self
93+
94+
def __exit__(
95+
self,
96+
exc_type: Type[BaseException],
97+
exc_value: BaseException,
98+
traceback: TracebackType) -> None:
99+
"""
100+
Make this class a context manager to reliably delete the temp dir.
101+
102+
Parameters
103+
----------
104+
exc_type : Type[BaseException]
105+
The type of the exception being handled, if any.
106+
exc_val : BaseException
107+
The exception instance being handled, if any.
108+
exc_tb : TracebackType
109+
The traceback of the exception being handled, if any.
110+
111+
Returns
112+
-------
113+
None
114+
"""
115+
self._ssh_tempdir.cleanup()
116+
117+
@property
118+
def working_dir(self) -> Union[str, os.PathLike[str]]:
119+
"""
120+
Returns the local working directory of the git repo
121+
122+
Returns
123+
-------
124+
Union[None, str, os.PathLike[str]]
125+
The local working directory of the git repo as path-like object
126+
"""
127+
return self._repo.working_dir
128+
129+
def add_and_commit(self, files: List[Path], message: str) -> git.Commit:
130+
"""
131+
Add the specified files to the git repo and commits them.
132+
133+
Parameters
134+
----------
135+
files : List[Path]
136+
a list of (absolute) file path within the git working directory
137+
that are to be added to the git repo
138+
message : str
139+
the commit message
140+
141+
Returns
142+
-------
143+
git.Commit
144+
The git commit object as returned by GitPython
145+
"""
146+
self._repo.index.add(files)
147+
return self._repo.index.commit(message)
148+
149+
def pull(self) -> Any:
150+
"""
151+
Performs a pull from origin on the git repo.
152+
153+
Returns
154+
-------
155+
any
156+
The git fetch info list as returned by GitPython
157+
"""
158+
return self._repo.remotes.origin.pull()
159+
160+
def push(self) -> Any:
161+
"""
162+
Performs a push to origin on the git repo.
163+
164+
Returns
165+
-------
166+
any
167+
The git push info list as returned by GitPython.
168+
"""
169+
return self._repo.remotes.origin.push()
170+
171+
@staticmethod
172+
def _make_ssh_key_path(original_path):
173+
# This is some ugly workaround for git on Windows. In this case git is based on MSYS, so the
174+
# ssh command requires POSIX compatible paths, whereas otherwise we deal with Windows paths
175+
# on Windows. Thus we need to convert the Windows path to the ssh key to MSYS-POSIX.
176+
# Be aware: this is brittle as it assumes that we always use MSYS ssh on Windows. Maybe
177+
# there are other ways to setup git.
178+
path = Path(original_path)
179+
parts = path.parts
180+
if parts[0].endswith(':\\'):
181+
parts = ['/', parts[0].rstrip(':\\'), *parts[1:]]
182+
return PurePosixPath(*parts)
183+
184+
def _setup(self):
185+
# find out local repo path
186+
local_path = self._config.local_path
187+
188+
# find the ssh key and use it. We need an absolute path for this so git can find it.
189+
# no matter which is the current working directory.
190+
ssh_key = GitRepo._make_ssh_key_path(self._ssh_key)
191+
ssh_authorized_keys = GitRepo._make_ssh_key_path(
192+
self._ssh_authorized_keys)
193+
194+
# Get key from env and write to file
195+
private_key = os.environ.get("SSH_PRIVATE_KEY")
196+
if private_key is None:
197+
raise ValueError(
198+
"SSH_PRIVATE_KEY environment variable is not set.")
199+
with open(ssh_key, "w", encoding="utf-8") as file:
200+
file.write(private_key)
201+
file.write("\n")
202+
os.chmod(ssh_key, 0o600)
203+
204+
# Write authorized_keys to file
205+
with open(ssh_authorized_keys, "w", encoding="utf-8") as file:
206+
file.write(GitRepo.github_host_keys)
207+
os.chmod(ssh_authorized_keys, 0o644)
208+
209+
os.environ['GIT_SSH_COMMAND'] = (
210+
f'ssh '
211+
f'-F /dev/null '
212+
f'-i {ssh_key} '
213+
f'-o UserKnownHostsFile={ssh_authorized_keys} '
214+
f'-o StrictHostKeyChecking=yes'
215+
)
216+
217+
# Initialize existing repo or clone it, if this hasn't been done yet
218+
try:
219+
repo = git.Repo(local_path)
220+
if repo.remotes.origin.url != self._config.repo_url:
221+
raise RuntimeError("Repository " + local_path + "already exists and is not a " +
222+
" clone of " + self._config.repo_url)
223+
except (git.NoSuchPathError, git.InvalidGitRepositoryError):
224+
repo = git.Repo.clone_from(self._config.repo_url, local_path)
225+
226+
# set git config
227+
config_writer = repo.config_writer()
228+
config_writer.set_value("user", "name", self._config.user_name)
229+
config_writer.set_value("user", "email", self._config.user_email)
230+
# In case we use a persistent volume containing our git repo and someone else is
231+
# running/testing the basic middleware on another machine, we might end up
232+
# lagging behind the remote repo. So we need to specify a pull strategy. Rebase
233+
# seems to be most suitable (tests showed that fast-forward is not possible and
234+
# rebase works without merge commits that we do not want -- hopefully we never
235+
# run into conflicts.)
236+
config_writer.set_value("pull", "rebase", True)
237+
config_writer.release()
238+
239+
# switch into desired branch or create it
240+
branch = self._config.branch
241+
if branch not in repo.branches:
242+
# before we can create a branch we need have a commit, so try to access it
243+
try:
244+
_ = repo.head.commit
245+
except ValueError:
246+
# create initial commit
247+
readme = """# Purpose of this repository #
248+
249+
This repository is automatically maintained by the FAIRagro
250+
[middleware](https://github.com/fairagro/m4.2_basic_middleware). It stores scraped meta
251+
data from resarch data repositories in consolidated JSON-LD files.
252+
253+
<mark>Important:</mark> do not change this repo manually.
3254
"""
255+
readme_path = os.path.join(local_path, 'README.md')
256+
with open(readme_path, "w", encoding="utf-8") as file:
257+
file.write(readme)
258+
repo.index.add([readme_path])
259+
repo.index.commit("Initial commit")
260+
# create new branch
261+
repo.create_head(branch)
262+
repo.remotes.origin.push(branch)
263+
repo.git.checkout(branch)
4264

5-
from .git_repo import * # noqa: F403
265+
return repo

0 commit comments

Comments
 (0)