diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 3775bad..dd7388f 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -11,7 +11,7 @@ updates: interval: "weekly" - package-ecosystem: "uv" - directory: "/" + directory: "/repoupdater" schedule: interval: "weekly" day: "wednesday" diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml deleted file mode 100644 index 9b3f97b..0000000 --- a/.github/workflows/tests.yaml +++ /dev/null @@ -1,30 +0,0 @@ -name: Tests -permissions: - contents: read -on: - pull_request: - workflow_dispatch: -jobs: - test_dry_run: - runs-on: ubuntu-22.04 - name: Run dry-run - steps: - - name: Checkout - uses: actions/checkout@v6 - - uses: opensafely-core/setup-action@v1 - with: - install-just: true - install-uv: true - cache: uv - - name: setup environment - run: | - echo "ORG_TOKEN=${{ secrets.ORG_READONLY_TOKEN }}" > .env - - name: Run dry-run - run: | - echo "WARNING: if this step fails randomly, you may need to update the repo secret 'ORG_READONLY_TOKEN'" - echo "This is a Personal Access Token (Classic), that only has 'read:org'." - echo - echo "WARNING: ignore output from command as long as it does not fail." - echo "It is running w/o elevated privileges needed to correctly read branch protection details" - echo - just manage-github --dry-run diff --git a/README.md b/README.md index 3547ce1..f6e6901 100644 --- a/README.md +++ b/README.md @@ -1,112 +1,10 @@ # OpenSAFELY Sysadmin Tools -This repository contains the documentation and scripts used to manage -the OpenSAFELY Github organisation's users, teams, repos and -permissions. +A collection of sysadmin things. -Github's organisation features are somewhat limited. Repositories are -flat, no grouping, so each repo needs explicitly adding to a team, at an -explicit permissions level. This makes managing this via the UI -laborious and and error prone. - -This repo include config and scripts to manage the teams and repos via -the Github API. - -The high level goal to protect against injection of code via github into -any part of the OpenSAFELY systems. To reduce risk, we to separate the -sensitive infrastructure repos out from the ever-growing list of -study repos, and restrict write access to the senstive repos to a -smaller technical team. As there is no repository grouping, this is -done via explicit config stored in this repo. - -There are two teams. Researchers have admin access to all study repos. -Developers have admin access to all protected infrastructure repos, and -are also in Researchers team. - -All master/main branches are protected, even for admins. This disables -force-pushes from anywhere. - -Additionally, protected repos require code review, and signing. This -prevents pushes to master/main without a review. - -# Readonly Classic PATs - -At the time this system was implemented, Github only had classic PATs. And -whilst these supported a readonly scope for public repos, if you wanted private -repo access, you *had* to have write access too. However, it was not acceptable -for job-server or job-runner to have write access. - -So, in order to acheive a readonly opensafely org PAT, we: - -a) lowered the base permissions for the opensafely org to read (they were admin!) -b) added the machinery described above to elevate approved users permissions to be able to *write*. -c) created an opensafely-readonly bot account, that was *not* included in the machinery above -c) use this bot use to create PATs for job-server and job-runner. - -Over time, this readonly user has also been used to create issues in various -private repo, so is also a collaborator on specific repos in ebmdatalab org as well. - -# System prerequisites - -## Just - -We use [`just`](https://github.com/casey/just) as our command runner. It's -a single file binary available for many platforms so should be easy to -install. - -```sh -# macOS -brew install just - -# Linux -# Install from https://github.com/casey/just/releases - -# Add completion for your shell. E.g. for bash: -source <(just --completions bash) - -# Show all available commands -just # shortcut for just --list -``` - -## uv - -Follow installation instructions from the [uv documentation](https://docs.astral.sh/uv/getting-started/installation/) for your OS. - -## Python - -You'll need an appropriate version of Python on your PATH. Check the -`.python-version` file for the required version. - -# Setup - -Create virtual environment, .env file and install requirements. -``` -just devenv -``` - -* Create a GitHub personal access token with admin:org permissions, and update - it the `ORG_TOKEN` variable in the `.env` file. - - -# Run - -Ensure you have a GH PAT with org admin permissions in `.env` - -`just manage-github` will run the command in dryrun mode, printing changes it would have made - -`just manage-github --exec` will actually apply the changes. - - -# Cron Job - -The management script is designed to run periodically. However, it uses a very -privileged secret, so it currently runs from Simon's home machine. - -Run set up a cronjob yourself, you can use `cronjob.sh`. First, edit the `tokenfile` variable to -point a file with a GH PAT that has admin org permissions. - -Then, set it to run every hour at n minutes past the hour via `crontab -e` or similar. -e.g. to run at 17m past each hour: - -`17 * * * * /path/to/cronjob.sh >> /path/to/logfile.log 2>&1` +This contains: +* documentation in `adr/` +* `repoupdater` a utility to help manage multiple repositories in + `repoupdater/` +* configuration for small services in `services/` diff --git a/client.py b/client.py deleted file mode 100644 index e35bc34..0000000 --- a/client.py +++ /dev/null @@ -1,103 +0,0 @@ -import os -import sys - -from github import Github - - -ERROR_MSG = """ -Error: missing environment variable ORG_TOKEN. You need a Personal -Access Token (Classic), with admin:org and all repo permissions. - -https://docs.github.com/en/github/authenticating-to-github/creating-a-personal-access-token -""" - - -def github_client(): - token = os.environ.get("ORG_TOKEN") - if not token: - sys.exit(ERROR_MSG) - return Github(token) - - -def get_org(org): - return github_client().get_organization(org) - - -class Change: - def __init__(self, cmd, msg, *args): - self.cmd = cmd - self.msg = msg - self.args = args - - def __str__(self): - return self.msg.format(*self.args) - - def __call__(self): - return self.cmd() - - -class GithubTeam: - """Represents an organisation or team on Github.""" - - def __init__(self, team): - self.team = team - self._members = None - self._repos = None - - @property - def members(self): - if self._members is None: - # print(' - loading members for {}'.format(self.team.name)) - self._members = {m.login: m for m in self.team.get_members()} - return self._members - - @property - def repos(self): - if self._repos is None: - # print(' - loading repos for {}'.format(self.team.name)) - self._repos = {r.full_name: r for r in self.team.get_repos()} - return self._repos - - def add_member(self, member): - if member.login not in self.members: - yield Change( - lambda: self.team.add_membership(member), - "add {} to {} team", - member.login, - self.team.name, - ) - - def need_to_set_permissions(self, permission, raw): - # this relies on the dict being in permission order, which it seems to be - for name, value in raw.items(): - if value: - # this will be the first value set - if name == permission: - # if it matches the expected permission, we're good - return False - else: - # otherwise, either a higher or lower was set, and we need to change - return True - # no permissions - return True - - def add_repo(self, repo, permission): - if repo.full_name not in self.repos: - yield Change( - lambda: self.team.add_to_repos(repo), - "add {} repo to {} team", - repo.name, - self.team.slug, - ) - - current = self.team.get_repo_permission(repo) - if current is None or self.need_to_set_permissions( - permission, current.raw_data - ): - yield Change( - lambda: self.team.set_repo_permission(repo, permission), - "set {} permission on {} to {}", - permission, - repo.name, - self.team.slug, - ) diff --git a/cronjob.sh b/cronjob.sh deleted file mode 100755 index aef1312..0000000 --- a/cronjob.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -set -euo pipefail -date -tokenfile=/home/wavy/datalab/opensafely-sysadmin/org-token -checkout=/tmp/opensafely-sysadmin -if test -d $checkout; then - git -C $checkout pull -else - git clone https://github.com/opensafely-core/sysadmin.git $checkout -fi - -cd $checkout -echo "ORG_TOKEN=$(cat $tokenfile)" > .env -code=0 -export PYTHONUNBUFFERED=1 -time just manage-github --exec || code=$? -echo "Exit: $code" -exit $code diff --git a/manage-github.py b/manage-github.py deleted file mode 100755 index ff60f93..0000000 --- a/manage-github.py +++ /dev/null @@ -1,318 +0,0 @@ -import argparse -import itertools -import select -import sys -from datetime import datetime - -from github import GithubException, RateLimitExceededException - -import client - - -# these counts are excluded from the generic researchers team -BOTS = [ - "opensafely-readonly", - "opensafely-interactive-bot", -] - - -# This applies to all repos. Values are from -# https://docs.github.com/en/rest/reference/repos#update-a-repository -REPO_POLICY = {"delete_branch_on_merge": True} - -# This applies to study repo master/main branchs. See convert_protection -# function for values. -STUDY_BRANCH_POLICY = { - "enforce_admins": True, -} - -# This applies to code repo master/main branchs. See convert_protection -# function for values. -CODE_BRANCH_POLICY = { - "enforce_admins": True, - "required_approving_review_count": 1, -} - -EXCLUDED_REPOS = ["opensafely-core/ethelred"] - - -def convert_protection(protection): - """Convert protection read format to the right format. - - Converts results of branch.get_protection() into a dict that can passed to - branch.edit_protection(). That this is necessary is a sad thing. - - Input: https://pygithub.readthedocs.io/en/latest/github_objects/BranchProtection.html - - Output: keyword args as per: - - https://pygithub.readthedocs.io/en/latest/github_objects/Branch.html#github.Branch.Branch.edit_protection - """ - reviews = protection.required_pull_request_reviews - output = dict( - enforce_admins=protection.enforce_admins, - dismissal_users=getattr(reviews, "dismissal_users", None), - dismissal_teams=getattr(reviews, "dismissal_teams", None), - dismiss_stale_reviews=getattr(reviews, "dismiss_stale_reviews", None), - require_code_owner_reviews=getattr(reviews, "require_code_owner_reviews", None), - required_approving_review_count=getattr( - reviews, "required_approving_review_count", None - ), - strict=getattr(protection.required_status_checks, "strict", None), - contexts=getattr(protection.required_status_checks, "contexts", None), - # TODO: user/team push restrictions if we need them - ) - - return output - - -def protect_branch(repo, branch=None, **kwargs): - """Audit and enforce branch protections. - - Keyword args can be used to set additional restrictions, as per: - - https://pygithub.readthedocs.io/en/latest/github_objects/Branch.html#github.Branch.Branch.edit_protection - - We set enforce_admins=True by default - - """ - # our security model requires enforce_admins - kwargs["enforce_admins"] = True - protection = {} - protected_branches = [] - - # cope with master -> main name transition, including possibility that both - # exist - if branch is None: - branches = ["master", "main"] - else: - branches = [branch] - - for branch_name in branches: - try: - b = repo.get_branch(branch_name) - protected_branches.append(b) - except GithubException as e: - if e.status != 404: - raise - - if not protected_branches: - yield client.Change( - lambda: None, - "ERROR: Could not find {} branches in {}", - branches, - repo.full_name, - ) - - for protected_branch in protected_branches: - try: - current_protection = convert_protection(protected_branch.get_protection()) - except GithubException as e: - if e.status == 404: - # new repo no protection set - protection = kwargs - else: - # this occurs when a private repo is forked *into* the opensafely org - # currently just vaccine-eligibility repo, we want to avoid that in future. - yield client.Change( - lambda: None, - "ERROR: exception getting branch protection on {}/{}\n{}", - repo.full_name, - protected_branch.name, - e, - ) - continue - else: - for k, v in kwargs.items(): - if current_protection[k] != v: - protection[k] = v - - if protection: - yield client.Change( - lambda: protected_branch.edit_protection(**protection), - "setting branch protection on {}/{} to:\n{}", - repo.name, - protected_branch.name, - ", ".join(f"{k}={v}" for k, v in protection.items()), - ) - - -def configure_repo(repo, **kwargs): - """Configure a repo according to config.""" - - try: - for user in repo.get_collaborators("direct"): - # a direct user with the admin permission is the repo creator, or someone added by the repo creator - if user.permissions.admin: - msg = ( - f"removing direct admin collaborator {user.login} from {repo.name}" - ) - if repo.archived: - yield client.Change( - lambda: print( - f"Cannot safely remove admin user {user.login} from {repo.name} is the repo is archived.\n" - f"Please manually remove them: {repo.html_url}" - ), - msg, - ) - else: - yield client.Change( - lambda: repo.remove_from_collaborators(user), - msg, - ) - - except GithubException as exc: - if exc.status == 403: - print( - "Token does not have permissions to query repo collaborators (need write access)" - ) - else: - raise - - # if it's archived we can't change policy - if repo.archived: - return - - to_change = {} - for name, value in kwargs.items(): - if getattr(repo, name) != value: - to_change[name] = value - - if to_change: - yield client.Change( - lambda: repo.edit(**to_change), - "setting repo policy:\n{}", - to_change, - ) - - -def input_with_timeout(prompt, timeout=5.0): - print(prompt) - i, _, _ = select.select([sys.stdin], [], [], 5) - if i: - return sys.stdin.readline().strip().lower() - else: - return None - - -def manage_code(org, repo_policy=None, branch_policy=None, excluded_repos=None): - """Ensure that all opensafe-core repos have the correct configuration.""" - code = client.GithubTeam(org) - excluded_repos = set() if excluded_repos is None else set(excluded_repos) - repos = [repo for name, repo in code.repos.items() if name not in excluded_repos] - for repo in repos: - print(repo.full_name) - if repo_policy: - yield from configure_repo(repo, **repo_policy) - if branch_policy: - yield from protect_branch(repo, **branch_policy) - - -def manage_studies(org, repo_policy, branch_policy): - """Ensure all opensafely repos have the correct config. - - This also involves adding non_study repos to the editors team, and all - others to the researchers team. - """ - opensafely = client.GithubTeam(org) - researchers = client.GithubTeam(org.get_team_by_slug("researchers")) - editors = client.GithubTeam(org.get_team_by_slug("editors")) - - # everyone is in researchers group - for member in opensafely.members.values(): - # avoid elevating bot accounts - if member.login not in BOTS: - yield from researchers.add_member(member) - - for repo in opensafely.repos.values(): - print(repo.full_name) - yield from configure_repo(repo, **repo_policy) - yield from protect_branch(repo, **branch_policy) - - # another api request :( - if "non-research" in repo.get_topics(): - yield from editors.add_repo(repo, "maintain") - else: - # researchers have access to all studies - yield from researchers.add_repo(repo, "push") - - -def main(argv=sys.argv[1:]): - parser = argparse.ArgumentParser( - description="Apply policy to OpenSAFELY github org" - ) - parser.add_argument( - "--exec", - action="store_true", - dest="execute", - help="Automatically execute commands", - ) - parser.add_argument( - "--dry-run", - action="store_true", - dest="dry_run", - help="Just print what would change and exit", - ) - - args = parser.parse_args(argv) - # we run in one of three modes: - # --dry-run: analyse changes, but do not apply - # --exec: analyse changes and apply immediately - # default: analyse changes and ask for confirmation before applying them - mode = "default" - if args.dry_run: - mode = "dry-run" - elif args.execute: - mode = "execute" - - if mode == "dry-run": - print("*** DRY RUN - no changes will be made ***") - try: - studies = client.get_org("opensafely") - core = client.get_org("opensafely-core") - - pending_changes = [] - - # analyse changes needed - changes = itertools.chain( - manage_studies(studies, REPO_POLICY, STUDY_BRANCH_POLICY), - manage_code(core, REPO_POLICY, CODE_BRANCH_POLICY, EXCLUDED_REPOS), - ) - - for change in changes: - print(change) - if mode == "execute": - change() - else: - pending_changes.append(change) - - if mode == "dry-run": - print("*** DRY RUN - no changes were made ***") - elif mode == "default": - if pending_changes: - answer = input_with_timeout( - "Do you want to apply the above changes (y/n)?", - 30.0, - ) - if answer == "y": - for change in pending_changes: - print(change) - change() - else: - print("No changes needed") - - except RateLimitExceededException as exc: - print("Github ratelimit hit") - try: - reset = datetime.fromtimestamp(int(exc.headers["x-ratelimit-reset"])) - print(f"Will reset at {reset.isoformat()}") - except Exception: - pass - - for k, v in exc.headers.items(): - if k.lower().startswith("x-ratelimit"): - print(f"{k}: {v}") - - -if __name__ == "__main__": - main() diff --git a/README.repoupdater.md b/repoupdater/README.md similarity index 100% rename from README.repoupdater.md rename to repoupdater/README.md diff --git a/repoupdater/client.py b/repoupdater/client.py new file mode 100644 index 0000000..60a4524 --- /dev/null +++ b/repoupdater/client.py @@ -0,0 +1,23 @@ +import os +import sys + +from github import Github + + +ERROR_MSG = """ +Error: missing environment variable ORG_TOKEN. You need a Personal +Access Token (Classic), with admin:org and all repo permissions. + +https://docs.github.com/en/github/authenticating-to-github/creating-a-personal-access-token +""" + + +def github_client(): + token = os.environ.get("ORG_TOKEN") + if not token: + sys.exit(ERROR_MSG) + return Github(token) + + +def get_org(org): + return github_client().get_organization(org) diff --git a/dotenv-sample b/repoupdater/dotenv-sample similarity index 67% rename from dotenv-sample rename to repoupdater/dotenv-sample index 6793be5..a8a3045 100644 --- a/dotenv-sample +++ b/repoupdater/dotenv-sample @@ -1,4 +1,3 @@ # Classic PAT with org permissions -# - manage-github requires admin:org (all) # - repoupdater requires repo (all) ORG_TOKEN=changeme diff --git a/justfile b/repoupdater/justfile similarity index 97% rename from justfile rename to repoupdater/justfile index c300cdd..223f897 100644 --- a/justfile +++ b/repoupdater/justfile @@ -108,8 +108,5 @@ fix: -uv run ruff format . -just --fmt --unstable -manage-github *ARGS="--dry-run": _checkenv - uv run python manage-github.py {{ ARGS }} - repoupdater *ARGS: _checkenv uv run python repoupdater.py {{ ARGS }} diff --git a/pyproject.toml b/repoupdater/pyproject.toml similarity index 97% rename from pyproject.toml rename to repoupdater/pyproject.toml index 40f1536..21b5141 100644 --- a/pyproject.toml +++ b/repoupdater/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "os-sysadmin" +name = "repoupdater" version = "0.1.0" description = "Add your description here" readme = "README.md" diff --git a/repoupdater.py b/repoupdater/repoupdater.py similarity index 100% rename from repoupdater.py rename to repoupdater/repoupdater.py diff --git a/uv.lock b/repoupdater/uv.lock similarity index 100% rename from uv.lock rename to repoupdater/uv.lock diff --git a/scan-for-icu.sh b/scan-for-icu.sh deleted file mode 100644 index f4029aa..0000000 --- a/scan-for-icu.sh +++ /dev/null @@ -1,12 +0,0 @@ -log=$(mktemp) -echo "Repo url, commit, author, date" -for r in research/* -do - git -C "$r" log --pretty=format:'%h, %an, "%ad"' -S "admitted_to_icu" > "$log" - url=$(git -C "$r" config --get remote.origin.url | sed 's#git@github.com:#https://github.com/#' | sed 's/\.git$//') - while IFS= read -r line - do - echo "$url, $line" - done < "$log" -done -