diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ecd9b7997..67eeb0cd5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -552,12 +552,18 @@ jobs: exit 1 # containerize the package and upload to the GHCR upon new release (whether pre-release or not) - ghcr-build-and-push-on-release: + # Step 1: Build the Docker image and save as tar for scanning + ghcr-build-on-release: needs: deploy runs-on: ubuntu-latest permissions: contents: read packages: write + outputs: + image-tags: ${{ steps.set-tags.outputs.tags }} + image-name: synapsepythonclient-release + env: + TARFILE_NAME: synapsepythonclient-release.tar steps: - name: Check out the repo @@ -565,74 +571,167 @@ jobs: - name: Extract Release Version run: echo "RELEASE_VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV shell: bash + - name: Set image tags + id: set-tags + shell: bash + run: | + if [[ "${{ github.event.release.prerelease }}" == "true" ]]; then + echo "tags=ghcr.io/sage-bionetworks/synapsepythonclient:${{ env.RELEASE_VERSION }}-prerelease" >> $GITHUB_OUTPUT + else + echo "tags=ghcr.io/sage-bionetworks/synapsepythonclient:latest,ghcr.io/sage-bionetworks/synapsepythonclient:${{ env.RELEASE_VERSION }}" >> $GITHUB_OUTPUT + fi - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 - - name: Log in to GitHub Container Registry - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Build and push Docker image (official release) - id: docker_build - if: '!github.event.release.prerelease' - uses: docker/build-push-action@v3 + - name: Build Docker image + uses: docker/build-push-action@v5 with: - push: true + context: . + push: false + load: true provenance: false - tags: ghcr.io/sage-bionetworks/synapsepythonclient:latest,ghcr.io/sage-bionetworks/synapsepythonclient:${{ env.RELEASE_VERSION }} + tags: synapsepythonclient-release:local file: ./Dockerfile platforms: linux/amd64 cache-from: type=registry,ref=ghcr.io/sage-bionetworks/synapsepythonclient:build-cache - cache-to: type=registry,mode=max,ref=ghcr.io/sage-bionetworks/synapsepythonclient:build-cache - - name: Build and push Docker image (pre-release) - id: docker_build_prerelease - if: 'github.event.release.prerelease' - uses: docker/build-push-action@v3 + cache-to: type=inline + - name: Save Docker image to tar + run: docker save synapsepythonclient-release:local -o ${{ env.TARFILE_NAME }} + - name: Upload tar artifact + uses: actions/upload-artifact@v4 with: - push: true - provenance: false - tags: ghcr.io/sage-bionetworks/synapsepythonclient:${{ env.RELEASE_VERSION }}-prerelease - file: ./Dockerfile - platforms: linux/amd64 - cache-from: type=registry,ref=ghcr.io/sage-bionetworks/synapsepythonclient:build-cache-prerelease - cache-to: type=registry,mode=max,ref=ghcr.io/sage-bionetworks/synapsepythonclient:build-cache-prerelease - - name: Output image digest (official release) - if: '!github.event.release.prerelease' - run: echo "The image digest for official release is ${{ steps.docker_build.outputs.digest }}" - - name: Output image digest (pre-release) - if: 'github.event.release.prerelease' - run: echo "The image digest for pre-release is ${{ steps.docker_build_prerelease.outputs.digest }}" + name: ${{ env.TARFILE_NAME }} + path: ${{ env.TARFILE_NAME }} + retention-days: 1 + + # Step 2: Scan the built image with Trivy before pushing + trivy-scan-release: + needs: [ghcr-build-on-release] + uses: ./.github/workflows/trivy.yml + with: + SOURCE_TYPE: tar + TARFILE_NAME: synapsepythonclient-release.tar + IMAGE_NAME: synapsepythonclient-release:local + EXIT_CODE: 1 + permissions: + contents: read + security-events: write + actions: read - # containerize the package and upload to the GHCR upon commit in develop - ghcr-build-and-push-on-develop: + # Step 3: Push the image to GHCR only if Trivy scan passes + ghcr-push-on-release: + needs: [ghcr-build-on-release, trivy-scan-release] runs-on: ubuntu-latest - if: github.ref == 'refs/heads/develop' permissions: contents: read packages: write + env: + TARFILE_NAME: synapsepythonclient-release.tar + steps: - - name: Check out the repo - uses: actions/checkout@v4 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + - name: Download scanned tar + uses: actions/download-artifact@v4 + with: + name: ${{ env.TARFILE_NAME }} + path: /tmp + - name: Load Docker image from tar + run: docker load -i /tmp/${{ env.TARFILE_NAME }} - name: Log in to GitHub Container Registry uses: docker/login-action@v2 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Build and push Docker image for develop - id: docker_build + - name: Tag and push Docker image + shell: bash + run: | + IFS=',' read -ra TAGS <<< "${{ needs.ghcr-build-on-release.outputs.image-tags }}" + for TAG in "${TAGS[@]}"; do + docker tag synapsepythonclient-release:local "$TAG" + docker push "$TAG" + done + + # containerize the package and upload to the GHCR upon commit in develop + # Step 1: Build the Docker image and save as tar for scanning + ghcr-build-on-develop: + runs-on: ubuntu-latest + if: github.ref == 'refs/heads/develop' + permissions: + contents: read + packages: write + outputs: + image-tag: ghcr.io/sage-bionetworks/synapsepythonclient:develop-${{ github.sha }} + image-name: synapsepythonclient-develop + env: + TARFILE_NAME: synapsepythonclient-develop.tar + + steps: + - name: Check out the repo + uses: actions/checkout@v4 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - name: Build Docker image uses: docker/build-push-action@v5 with: - push: true + context: . + push: false + load: true provenance: false - tags: ghcr.io/sage-bionetworks/synapsepythonclient:develop-${{ github.sha }} + tags: synapsepythonclient-develop:local file: ./Dockerfile platforms: linux/amd64 cache-from: type=registry,ref=ghcr.io/sage-bionetworks/synapsepythonclient:build-cache cache-to: type=inline - - name: Output image digest - run: echo "The image digest is ${{ steps.docker_build.outputs.digest }}" + - name: Save Docker image to tar + run: docker save synapsepythonclient-develop:local -o ${{ env.TARFILE_NAME }} + - name: Upload tar artifact + uses: actions/upload-artifact@v4 + with: + name: ${{ env.TARFILE_NAME }} + path: ${{ env.TARFILE_NAME }} + retention-days: 1 + + # Step 2: Scan the built image with Trivy before pushing + trivy-scan-develop: + needs: [ghcr-build-on-develop] + uses: ./.github/workflows/trivy.yml + with: + SOURCE_TYPE: tar + TARFILE_NAME: synapsepythonclient-develop.tar + IMAGE_NAME: synapsepythonclient-develop:local + EXIT_CODE: 1 + permissions: + contents: read + security-events: write + actions: read + + # Step 3: Push the image to GHCR only if Trivy scan passes + ghcr-push-on-develop: + needs: [ghcr-build-on-develop, trivy-scan-develop] + runs-on: ubuntu-latest + if: github.ref == 'refs/heads/develop' + permissions: + contents: read + packages: write + + env: + TARFILE_NAME: synapsepythonclient-develop.tar + + steps: + - name: Download scanned tar + uses: actions/download-artifact@v4 + with: + name: ${{ env.TARFILE_NAME }} + path: /tmp + - name: Load Docker image from tar + run: docker load -i /tmp/${{ env.TARFILE_NAME }} + - name: Log in to GitHub Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Tag and push Docker image + run: | + docker tag synapsepythonclient-develop:local "${{ needs.ghcr-build-on-develop.outputs.image-tag }}" + docker push "${{ needs.ghcr-build-on-develop.outputs.image-tag }}" diff --git a/.github/workflows/docker_build.yml b/.github/workflows/docker_build.yml new file mode 100644 index 000000000..59117ec60 --- /dev/null +++ b/.github/workflows/docker_build.yml @@ -0,0 +1,104 @@ +--- +# +# Reusable workflow to build, scan, and push a Docker image. +# Called by the periodic scan workflow to rebuild images +# when new vulnerabilities are found. +# +name: Build and publish a Docker image + +on: + workflow_call: + inputs: + REF_TO_CHECKOUT: + required: false + type: string + description: "Reference to checkout, e.g. a tag like v1.0.1. Defaults to the branch/tag of the current event." + IMAGE_REFERENCES: + required: true + type: string + description: "Comma-separated image references, e.g., ghcr.io/sage-bionetworks/synapsepythonclient:1.0.1" + +env: + TARFILE_NAME: image.tar + LOCAL_IMAGE_TAG: rebuild-image:local + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + ref: ${{ inputs.REF_TO_CHECKOUT }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Build Docker image + uses: docker/build-push-action@v5 + with: + context: . + push: false + load: true + tags: ${{ env.LOCAL_IMAGE_TAG }} + file: ./Dockerfile + platforms: linux/amd64 + + - name: Save Docker image to tar + run: docker save ${{ env.LOCAL_IMAGE_TAG }} -o ${{ env.TARFILE_NAME }} + + - name: Upload tarball for use by Trivy job + uses: actions/upload-artifact@v4 + with: + name: ${{ env.TARFILE_NAME }} + path: ${{ env.TARFILE_NAME }} + retention-days: 1 + + outputs: + tarfile_artifact: ${{ env.TARFILE_NAME }} + + trivy-scan: + needs: build + uses: "./.github/workflows/trivy.yml" + with: + SOURCE_TYPE: tar + IMAGE_NAME: rebuild-image:local + TARFILE_NAME: ${{ needs.build.outputs.tarfile_artifact }} + EXIT_CODE: 1 + ARTIFACT_NAME_SUFFIX: -rebuild + permissions: + contents: read + security-events: write + actions: read + + push-image: + needs: [build, trivy-scan] + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - name: Download tar artifact + uses: actions/download-artifact@v4 + with: + name: ${{ needs.build.outputs.tarfile_artifact }} + path: /tmp + + - name: Load Docker image from tar + run: docker load -i /tmp/${{ needs.build.outputs.tarfile_artifact }} + + - name: Login to GitHub Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Tag and push Docker image + shell: bash + run: | + IFS=',' read -ra TAGS <<< "${{ inputs.IMAGE_REFERENCES }}" + for TAG in "${TAGS[@]}"; do + docker tag ${{ env.LOCAL_IMAGE_TAG }} "$TAG" + docker push "$TAG" + done diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml new file mode 100644 index 000000000..00feac32d --- /dev/null +++ b/.github/workflows/trivy.yml @@ -0,0 +1,95 @@ +--- +# +# This workflow runs Trivy on a Docker image +# It can pull the image from a container registry +# or download a tar file. The latter is used +# to check a container image prior to publishing +# to the registry. + +name: Run Trivy on a Docker image and push results to GitHub + +on: + workflow_call: + inputs: + SOURCE_TYPE: # 'tar' or 'image' + required: true + type: string + TARFILE_NAME: # only used if SOURCE_TYPE=='tar' + required: false + type: string + IMAGE_NAME: + required: true + type: string + EXIT_CODE: # return code for failed scan. 0 means OK. Non-zero will fail the build when there are findings. + required: false + type: number + default: 0 + ARTIFACT_NAME_SUFFIX: # optional suffix to disambiguate artifact names when this workflow is called multiple times in the same run + required: false + type: string + default: "" + outputs: + trivy_conclusion: + description: "The pass/fail status from Trivy" + value: ${{ jobs.trivy.outputs.trivy_conclusion }} + +env: + sarif_file_name: trivy-results.sarif + # downloading the trivy-db from its default GitHub location fails because + # the site experiences too many downloads. The fix is to pull from this + # alternate location. + TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2 + TRIVY_JAVA_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-java-db:1 + +jobs: + trivy: + name: Trivy + runs-on: ubuntu-latest + permissions: + contents: read + security-events: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Download tar file + id: tar-download + uses: actions/download-artifact@v4 + if: ${{ inputs.SOURCE_TYPE == 'tar' }} + with: + name: ${{ inputs.TARFILE_NAME }} + path: /tmp + + - name: Load docker image from tar file + if: ${{ inputs.SOURCE_TYPE == 'tar' }} + run: docker load -i ${{ steps.tar-download.outputs.download-path }}/${{ inputs.TARFILE_NAME }} + + - name: Run Trivy vulnerability scanner for any major issues + uses: aquasecurity/trivy-action@57a97c7e7821a5776cebc9bb87c984fa69cba8f1 # v0.35.0 + id: trivy + with: + image-ref: ${{ inputs.IMAGE_NAME }} + ignore-unfixed: true # skip vulnerabilities for which there is no fix + severity: 'CRITICAL,HIGH' + format: 'sarif' + limit-severities-for-sarif: true + output: ${{ env.sarif_file_name }} + exit-code: ${{ inputs.EXIT_CODE }} + + - name: Upload Trivy scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@5c8a8a642e79153f5d047b10ec1cba1d1cc65699 # v3.35.1 + if: ${{ !cancelled() }} + with: + sarif_file: ${{ env.sarif_file_name }} + wait-for-processing: true + + - name: Upload Trivy output + uses: actions/upload-artifact@v4 + if: ${{ !cancelled() }} + with: + name: ${{ env.sarif_file_name }}${{ inputs.ARTIFACT_NAME_SUFFIX }} + path: ${{ env.sarif_file_name }} + + outputs: + trivy_conclusion: ${{ steps.trivy.conclusion }} diff --git a/.github/workflows/trivy_periodic_scan.yml b/.github/workflows/trivy_periodic_scan.yml new file mode 100644 index 000000000..7eb90157e --- /dev/null +++ b/.github/workflows/trivy_periodic_scan.yml @@ -0,0 +1,156 @@ +--- +# +# This workflow scans the latest published container image +# for new vulnerabilities daily, publishing findings to +# the GitHub Security tab. If vulnerabilities are found, +# it bumps the patch version and triggers a rebuild. +# +name: Trivy Periodic Image Scan + +on: + schedule: + - cron: "0 0 * * *" # run daily + workflow_dispatch: {} + +jobs: + get-image-reference: + runs-on: ubuntu-latest + steps: + - name: Convert repo name to lower case + id: to_lower_case + run: | + # While GitHub repos can be mixed case, + # Docker images can only be lower case + repo_name=$(echo ${{ github.repository }} | tr '[:upper:]' '[:lower:]') + echo "repo_name=$repo_name" >> $GITHUB_OUTPUT + - name: Find current version + id: find_version + uses: mathieudutour/github-tag-action@a22cf08638b34d5badda920f9daf6e72c477b07b # v6.2 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + dry_run: true # setting to 'true' means no new version is created + outputs: + image_repo: ghcr.io/${{ steps.to_lower_case.outputs.repo_name }} + image_tag: ${{ steps.find_version.outputs.previous_tag }} + permissions: + contents: read + + periodic-scan: + needs: get-image-reference + uses: "./.github/workflows/trivy.yml" + with: + SOURCE_TYPE: image + IMAGE_NAME: ${{ needs.get-image-reference.outputs.image_repo }}:${{ needs.get-image-reference.outputs.image_tag }} + EXIT_CODE: 1 + permissions: + contents: read + deployments: write + security-events: write + + # If scan failed, compute next version (dry run) and attempt a rebuild. + # The tag is only created after a successful rebuild to prevent + # infinite bump loops when a CVE fix requires a major version upgrade + # that a simple rebuild cannot provide. + compute-next-version: + needs: periodic-scan + runs-on: ubuntu-latest + if: ${{ !cancelled() && needs.periodic-scan.outputs.trivy_conclusion == 'failure' }} + steps: + - name: Compute next version (dry run — no tag created) + id: tag_version + uses: mathieudutour/github-tag-action@a22cf08638b34d5badda920f9daf6e72c477b07b # v6.2 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + dry_run: true + - name: Parse new version + id: parsed + uses: booxmedialtd/ws-action-parse-semver@7784200024d6b3fc01253e617ec0168daf603de3 # v1.4.7 + with: + input_string: ${{ steps.tag_version.outputs.new_version }} + outputs: + new_tag: ${{ steps.tag_version.outputs.new_tag }} + new_version: ${{ steps.tag_version.outputs.new_version }} + new_major_minor: ${{ steps.parsed.outputs.major }}.${{ steps.parsed.outputs.minor }} + permissions: + contents: read + + update-image: + needs: [get-image-reference, periodic-scan, compute-next-version] + if: ${{ !cancelled() && needs.periodic-scan.outputs.trivy_conclusion == 'failure' && needs.compute-next-version.result == 'success' }} + uses: "./.github/workflows/docker_build.yml" + with: + REF_TO_CHECKOUT: ${{ needs.get-image-reference.outputs.image_tag }} + IMAGE_REFERENCES: "${{ needs.get-image-reference.outputs.image_repo }}:${{ needs.compute-next-version.outputs.new_tag }},${{ needs.get-image-reference.outputs.image_repo }}:${{ needs.compute-next-version.outputs.new_major_minor }}" + permissions: + contents: read + deployments: write + security-events: write + packages: write + actions: read # Required by docker_build.yml's nested trivy-scan job + + # Only create the git tag after the rebuilt image passes Trivy and is pushed + create-tag: + needs: [periodic-scan, update-image] + runs-on: ubuntu-latest + if: ${{ !cancelled() && needs.periodic-scan.outputs.trivy_conclusion == 'failure' && needs.update-image.result == 'success' }} + steps: + - name: Bump version and push tag + uses: mathieudutour/github-tag-action@a22cf08638b34d5badda920f9daf6e72c477b07b # v6.2 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + + # If the rebuild still has vulnerabilities, open a GitHub issue for manual triage + # instead of looping endlessly + alert-on-failure: + needs: [periodic-scan, update-image] + runs-on: ubuntu-latest + if: ${{ !cancelled() && needs.periodic-scan.outputs.trivy_conclusion == 'failure' && needs.update-image.result == 'failure' }} + permissions: + issues: write + steps: + - name: Check for existing open issue + id: check_issue + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + with: + script: | + const issues = await github.rest.issues.listForRepo({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + labels: 'trivy,security', + per_page: 1 + }); + return issues.data.length > 0 ? 'true' : 'false'; + result-encoding: string + + - name: Create GitHub issue for unresolved vulnerabilities + if: steps.check_issue.outputs.result == 'false' + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + with: + script: | + await github.rest.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title: 'Trivy: unresolved container vulnerabilities after rebuild', + body: [ + '## Summary', + '', + 'The daily Trivy periodic scan found Critical/High vulnerabilities in the latest published Docker image.', + 'An automated rebuild was attempted but the rebuilt image **still has vulnerabilities**,', + 'indicating the fix requires a manual dependency update rather than a base image refresh.', + '', + '## Next steps', + '', + `- Review findings in the [Security tab](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/security/code-scanning)`, + '- Update the affected dependencies to a version that includes the fix', + '- Or add the CVE ID(s) to a `.trivyignore` file if the risk is accepted', + '', + '## Details', + '', + `- **Workflow run:** ${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`, + `- **Triggered at:** ${new Date().toISOString()}`, + ].join('\n'), + labels: ['trivy', 'security'] + }); diff --git a/.github/workflows/validate-release.yml b/.github/workflows/validate-release.yml new file mode 100644 index 000000000..40fdea282 --- /dev/null +++ b/.github/workflows/validate-release.yml @@ -0,0 +1,61 @@ +# GitHub Action workflow for current release against the Synapse dev backend (running staging code). +name: validate-synapse-release + +on: + workflow_dispatch: + +concurrency: + group: validate-synapse-release-${{ github.ref }} + cancel-in-progress: true + +jobs: + validate-synapse-release: + runs-on: ubuntu-latest + timeout-minutes: 180 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.14' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e ".[boto3,pandas,pysftp,tests,curator]" + - name: Check for Secret availability + id: secret-check + run: | + if [ -z "${{ secrets.encrypted_d17283647768_key }}" ] || [ -z "${{ secrets.encrypted_d17283647768_iv }}" ]; then + echo "secrets_available=false" >> $GITHUB_OUTPUT; + echo "Secrets for integration tests are not available. Cancelling integration tests."; + exit 1; + else + echo "secrets_available=true" >> $GITHUB_OUTPUT; + fi + - name: Run integration tests (if secrets available) + run: | + export SYNAPSE_PROFILE="TestUbuntuMaximumPython" + echo "Using SYNAPSE_PROFILE: $SYNAPSE_PROFILE" + + # decrypt the encrypted test synapse configuration + openssl aes-256-cbc -K ${{ secrets.encrypted_d17283647768_key }} -iv ${{ secrets.encrypted_d17283647768_iv }} -in test.synapseConfig.enc -out test.synapseConfig -d + mv test.synapseConfig ~/.synapseConfig + + # build and run a docker container to serve as an SFTP host for our SFTP tests + docker build -t sftp_tests - < tests/integration/synapseclient/core/upload/Dockerfile_sftp + docker run -d sftp_tests:latest + + # get the internal IP address of the just launched container + export SFTP_HOST=$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' $(docker ps -q)) + + printf "[sftp://$SFTP_HOST]\nusername: test\npassword: test\n" >> ~/.synapseConfig + + # add to known_hosts so the ssh connections can be made without any prompting/errors + mkdir -p ~/.ssh + ssh-keyscan -H $SFTP_HOST >> ~/.ssh/known_hosts + + # set env vars used in external bucket tests from secrets + export EXTERNAL_S3_BUCKET_NAME="${{secrets.EXTERNAL_S3_BUCKET_NAME}}" + export EXTERNAL_S3_BUCKET_AWS_ACCESS_KEY_ID="${{secrets.EXTERNAL_S3_BUCKET_AWS_ACCESS_KEY_ID}}" + export EXTERNAL_S3_BUCKET_AWS_SECRET_ACCESS_KEY="${{secrets.EXTERNAL_S3_BUCKET_AWS_SECRET_ACCESS_KEY}}" + + pytest -sv --reruns 3 tests/integration -n 8 --ignore=tests/integration/synapseclient/test_command_line_client.py --dist loadscope diff --git a/.gitignore b/.gitignore index fa4e7f520..19eb11079 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,7 @@ jenkins/ .idea/* docs/build/doctrees/* docs/build/html/_sources/* +docs_site/* build/* /venv diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8b505e30d..81d016dac 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,13 +30,13 @@ repos: name: isort (python) - repo: https://github.com/psf/black - rev: 23.3.0 + rev: 26.3.1 hooks: - id: black language_version: python3 - repo: https://github.com/PyCQA/bandit - rev: 1.8.0 + rev: 1.8.6 hooks: - id: bandit args: ["-c", "pyproject.toml"] diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000..0a47bd9a6 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,127 @@ + + +## Project + +Synapse Python Client — official Python SDK and CLI for Synapse (synapse.org), a collaborative science platform by Sage Bionetworks. Provides programmatic access to entities (projects, files, folders, tables, views), metadata, permissions, evaluations, and data curation workflows. Published to PyPI as `synapseclient`. + +## Stack + +- Python 3.10–3.14 (`setup.cfg`: `python_requires = >=3.10, <3.15`) +- HTTP: httpx (async), requests (sync/legacy) +- Models: stdlib dataclasses (NOT Pydantic) +- Tests: pytest 8.2, pytest-asyncio, pytest-socket, pytest-xdist +- Docs: MkDocs with Material theme, mkdocstrings +- Linting: ruff, black (line-length 88), isort (profile=black), bandit +- CI: GitHub Actions → SonarCloud, PyPI deploy on release +- Docker: `Dockerfile` at repo root, published to `ghcr.io/sage-bionetworks/synapsepythonclient` + +## Commands + +```bash +# Install for development +pip install -e ".[boto3,pandas,pysftp,tests,curator,dev]" + +# Unit tests +pytest -sv tests/unit + +# Integration tests (requires Synapse credentials, runs in parallel) +pytest -sv --reruns 3 tests/integration -n 8 --dist loadscope + +# Pre-commit checks (ruff, black, isort, bandit) +pre-commit run --all-files + +# Build docs locally +pip install -e ".[docs]" && mkdocs serve +``` + +## Conventions + +### Docstring examples format +Use mkdocstrings admonition syntax. Start with a description line, then wrap code in a fenced python block. Examples should include login boilerplate. + + Example: Short title +   + Description of what the example does. + ```python + + ``` + +### Async-first with generated sync wrappers +All new methods must be async with `_async` suffix. The `@async_to_sync` class decorator (`core/async_utils.py`) auto-generates sync counterparts at class definition time. Never write sync methods manually on model classes — the decorator handles it. + +### `wrap_async_to_sync()` for standalone functions +Use `wrap_async_to_sync()` (not `@async_to_sync`) for free-standing async functions outside of classes — see `operations/` layer for the pattern. The class decorator only works on classes. + +### Protocol classes for sync type hints +Each model in `models/` has a corresponding protocol in `models/protocols/` defining the sync method signatures. When adding a new async method to a model, add its sync signature to the protocol class so IDE type hints work. + +### Dataclass models with `fill_from_dict()` +Models are `@dataclass` classes, NOT Pydantic. REST responses are deserialized via `fill_from_dict()` methods on each model. New models must follow this pattern. + +### Concrete types are Java class names +`core/constants/concrete_types.py` maps Java class names (e.g., `org.sagebionetworks.repo.model.FileEntity`) for polymorphic entity deserialization. When adding new entity types, register the concrete type string here AND in `api/entity_factory.py` AND in `models/mixins/asynchronous_job.py` if it's an async job type. + +### Options dataclass pattern +The `operations/` layer uses dataclass option objects (`StoreFileOptions`, `FileOptions`, `TableOptions`, etc.) to bundle type-specific configuration for CRUD operations. Follow this pattern for new entity-type-specific options. + +### Mixin composition for shared behavior +Shared functionality lives in `models/mixins/` (AccessControllable, StorableContainer, AsynchronousJob, etc.). Prefer adding to existing mixins over duplicating logic across models. + +### `synapse_client` parameter pattern +Most functions accept an optional `synapse_client` parameter. If omitted, `Synapse.get_client()` returns the cached singleton. Never pass `None` explicitly — omit the argument instead. + +### Branch naming +Use `SYNPY-{issue_number}` or `synpy-{issue_number}` prefix for feature branches. PR titles follow `[SYNPY-XXXX] Description` format. + +## Architecture + +``` +synapseclient/ +├── client.py # Synapse class — public entry point, REST methods, auth (9600+ lines) +├── api/ # REST API layer — one file per resource type (21 files) +│ └── entity_factory.py # Polymorphic entity deserialization via concrete type dispatch +├── models/ # Dataclass entities (Project, File, Table, etc.) (28 files) +│ ├── protocols/ # Sync method type signatures for IDE hints (18 files) +│ ├── mixins/ # Shared behavior (ACL, containers, async jobs, tables) (7 files) +│ └── services/ # Model-level business logic (storable_entity, search) +├── operations/ # High-level CRUD: get(), store(), delete() — factory dispatch +├── core/ # Infrastructure: upload/download, retry, cache, creds, OTel +│ ├── upload/ # Multipart upload (sync + async) +│ ├── download/ # File download (sync + async) +│ ├── credentials/ # Auth chain (PAT, env var, config file, AWS SSM) +│ ├── constants/ # Concrete types, config keys, limits, method flags +│ ├── models/ # ACL, Permission, DictObject, custom JSON serialization +│ └── multithread_download/ # Threaded download manager +├── extensions/ +│ └── curator/ # Schema curation (pandas, networkx, rdflib) — optional +├── services/ # JSON schema validation services +└── entity.py, table.py, ... # Legacy classes (pre-OOP rewrite, read-only) + +synapseutils/ # Legacy bulk utilities (copy, sync, migrate, walk) — sync-only +``` + +Data flow: User → `operations/` factory → model async methods → `api/` service functions → `client.py` REST calls → Synapse API. Responses deserialized via `fill_from_dict()` on model instances. + +## Constraints + +- Do not use Pydantic for models — the codebase uses stdlib dataclasses with custom serialization. Mixing would break the `@async_to_sync` decorator and `fill_from_dict()` pattern. +- For new tests, prefer async test modules. Existing synchronous unit tests under `tests/unit/` are retained and maintained; the `@async_to_sync` decorator is covered by a dedicated smoke test, so avoid adding duplicate sync/async test coverage. +- On non-Windows platforms, unit tests must not make external network calls — `pytest-socket` blocks internet-facing sockets while allowing Unix domain sockets. Socket blocking is skipped on Windows. Use `pytest-mock` for HTTP mocking. +- `develop` is the default/main branch, not `main` or `master`. PRs target `develop`. +- Legacy classes in root `synapseclient/` (entity.py, table.py, etc.) are kept for backwards compatibility. New features go in `models/` using the dataclass pattern. +- Avoid adding new methods to `client.py` (9600+ lines) — prefer the `api/` + `models/` layered pattern. +- `synapseutils/` is legacy sync-only (uses `requests`, NOT `httpx`). Do not add async methods there — new async equivalents go in `models/` or `operations/`. + +## Testing + +- `asyncio_mode = auto` in pytest.ini — no need for `@pytest.mark.asyncio` +- `asyncio_default_fixture_loop_scope = session` — all async tests share one event loop +- Unit test client fixture: session-scoped, `skip_checks=True`, `cache_client=False` +- Integration tests use `--reruns 3` for flaky retries and `-n 8 --dist loadscope` for parallelism +- Integration fixtures create per-worker Synapse projects; use `schedule_for_cleanup()` for teardown +- Auth env vars: `SYNAPSE_AUTH_TOKEN` (bearer token), `SYNAPSE_PROFILE` (config file profile, default: `"default"`), `SYNAPSE_TOKEN_AWS_SSM_PARAMETER_NAME` (AWS SSM path) +- CI runs integration tests only on Python 3.10 and 3.14 (oldest + newest) to limit Synapse server load + +## Maintenance + +Each CLAUDE.md file has a `` header. Update this when the file is reviewed or modified. If a code change invalidates guidance in a CLAUDE.md file, update the guidance in the same PR. diff --git a/Pipfile.lock b/Pipfile.lock index 8a6029565..c6bbec4ad 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "e0442132198c73f3b5eaaee9f635609da7860eab9654c678da8100ff4bdc9b40" + "sha256": "c041ba44bdc1f862cf8d17232e8e23a6a478907c3d046dcace249137ad2c3bec" }, "pipfile-spec": 6, "requires": { @@ -18,11 +18,11 @@ "default": { "anyio": { "hashes": [ - "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", - "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c" + "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", + "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc" ], - "markers": "python_version >= '3.9'", - "version": "==4.12.1" + "markers": "python_version >= '3.10'", + "version": "==4.13.0" }, "async-lru": { "hashes": [ @@ -40,132 +40,190 @@ "markers": "python_version >= '3.6'", "version": "==1.0.1" }, + "black": { + "hashes": [ + "sha256:0126ae5b7c09957da2bdbd91a9ba1207453feada9e9fe51992848658c6c8e01c", + "sha256:0f76ff19ec5297dd8e66eb64deda23631e642c9393ab592826fd4bdc97a4bce7", + "sha256:28ef38aee69e4b12fda8dba75e21f9b4f979b490c8ac0baa7cb505369ac9e1ff", + "sha256:2bd5aa94fc267d38bb21a70d7410a89f1a1d318841855f698746f8e7f51acd1b", + "sha256:2c50f5063a9641c7eed7795014ba37b0f5fa227f3d408b968936e24bc0566b07", + "sha256:2d6bfaf7fd0993b420bed691f20f9492d53ce9a2bcccea4b797d34e947318a78", + "sha256:41cd2012d35b47d589cb8a16faf8a32ef7a336f56356babd9fcf70939ad1897f", + "sha256:474c27574d6d7037c1bc875a81d9be0a9a4f9ee95e62800dab3cfaadbf75acd5", + "sha256:5602bdb96d52d2d0672f24f6ffe5218795736dd34807fd0fd55ccd6bf206168b", + "sha256:5e9d0d86df21f2e1677cc4bd090cd0e446278bcbbe49bf3659c308c3e402843e", + "sha256:5ed0ca58586c8d9a487352a96b15272b7fa55d139fc8496b519e78023a8dab0a", + "sha256:6c54a4a82e291a1fee5137371ab488866b7c86a3305af4026bdd4dc78642e1ac", + "sha256:6e131579c243c98f35bce64a7e08e87fb2d610544754675d4a0e73a070a5aa3a", + "sha256:855822d90f884905362f602880ed8b5df1b7e3ee7d0db2502d4388a954cc8c54", + "sha256:86a8b5035fce64f5dcd1b794cf8ec4d31fe458cf6ce3986a30deb434df82a1d2", + "sha256:8a33d657f3276328ce00e4d37fe70361e1ec7614da5d7b6e78de5426cb56332f", + "sha256:92c0ec1f2cc149551a2b7b47efc32c866406b6891b0ee4625e95967c8f4acfb1", + "sha256:9a5e9f45e5d5e1c5b5c29b3bd4265dcc90e8b92cf4534520896ed77f791f4da5", + "sha256:afc622538b430aa4c8c853f7f63bc582b3b8030fd8c80b70fb5fa5b834e575c2", + "sha256:b07fc0dab849d24a80a29cfab8d8a19187d1c4685d8a5e6385a5ce323c1f015f", + "sha256:b5e6f89631eb88a7302d416594a32faeee9fb8fb848290da9d0a5f2903519fc1", + "sha256:bf9bf162ed91a26f1adba8efda0b573bc6924ec1408a52cc6f82cb73ec2b142c", + "sha256:c7e72339f841b5a237ff14f7d3880ddd0fc7f98a1199e8c4327f9a4f478c1839", + "sha256:ddb113db38838eb9f043623ba274cfaf7d51d5b0c22ecb30afe58b1bb8322983", + "sha256:dfdd51fc3e64ea4f35873d1b3fb25326773d55d2329ff8449139ebaad7357efb", + "sha256:f1cd08e99d2f9317292a311dfe578fd2a24b15dbce97792f9c4d752275c1fa56", + "sha256:f89f2ab047c76a9c03f78d0d66ca519e389519902fa27e7a91117ef7611c0568" + ], + "index": "pypi", + "markers": "python_version >= '3.10'", + "version": "==26.3.1" + }, "certifi": { "hashes": [ - "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", - "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120" + "sha256:3c52e209ba0a4ad7aebe60436a4ab349c39e1e602e8c134221e546902ad25897", + "sha256:69dea482ab64caa7b9f6aba1c6bf48bb6a5448d1c0f1b17ab42ad8c763a5344d" ], "markers": "python_version >= '3.7'", - "version": "==2026.1.4" + "version": "==2026.5.20" }, "charset-normalizer": { "hashes": [ - "sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad", - "sha256:077fbb858e903c73f6c9db43374fd213b0b6a778106bc7032446a8e8b5b38b93", - "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", - "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89", - "sha256:0f04b14ffe5fdc8c4933862d8306109a2c51e0704acfa35d51598eb45a1e89fc", - "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", - "sha256:194f08cbb32dc406d6e1aea671a68be0823673db2832b38405deba2fb0d88f63", - "sha256:1bee1e43c28aa63cb16e5c14e582580546b08e535299b8b6158a7c9c768a1f3d", - "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", - "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", - "sha256:244bfb999c71b35de57821b8ea746b24e863398194a4014e4c76adc2bbdfeff0", - "sha256:2677acec1a2f8ef614c6888b5b4ae4060cc184174a938ed4e8ef690e15d3e505", - "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161", - "sha256:2aaba3b0819274cc41757a1da876f810a3e4d7b6eb25699253a4effef9e8e4af", - "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", - "sha256:2c9d3c380143a1fedbff95a312aa798578371eb29da42106a29019368a475318", - "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", - "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4", - "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", - "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", - "sha256:44c2a8734b333e0578090c4cd6b16f275e07aa6614ca8715e6c038e865e70576", - "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", - "sha256:4902828217069c3c5c71094537a8e623f5d097858ac6ca8252f7b4d10b7560f1", - "sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8", - "sha256:4fe7859a4e3e8457458e2ff592f15ccb02f3da787fcd31e0183879c3ad4692a1", - "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", - "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", - "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", - "sha256:5947809c8a2417be3267efc979c47d76a079758166f7d43ef5ae8e9f92751f88", - "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016", - "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", - "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", - "sha256:5cb4d72eea50c8868f5288b7f7f33ed276118325c1dfd3957089f6b519e1382a", - "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", - "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0", - "sha256:64b55f9dce520635f018f907ff1b0df1fdc31f2795a922fb49dd14fbcdf48c84", - "sha256:6515f3182dbe4ea06ced2d9e8666d97b46ef4c75e326b79bb624110f122551db", - "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1", - "sha256:6aee717dcfead04c6eb1ce3bd29ac1e22663cdea57f943c87d1eab9a025438d7", - "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", - "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8", - "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", - "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", - "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", - "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", - "sha256:778d2e08eda00f4256d7f672ca9fef386071c9202f5e4607920b86d7803387f2", - "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", - "sha256:798d75d81754988d2565bff1b97ba5a44411867c0cf32b77a7e8f8d84796b10d", - "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", - "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", - "sha256:7c308f7e26e4363d79df40ca5b2be1c6ba9f02bdbccfed5abddb7859a6ce72cf", - "sha256:7fa17817dc5625de8a027cb8b26d9fefa3ea28c8253929b8d6649e705d2835b6", - "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", - "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", - "sha256:837c2ce8c5a65a2035be9b3569c684358dfbf109fd3b6969630a87535495ceaa", - "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381", - "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", - "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", - "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc", - "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", - "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", - "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", - "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", - "sha256:9cd98cdc06614a2f768d2b7286d66805f94c48cde050acdbbb7db2600ab3197e", - "sha256:9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313", - "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569", - "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3", - "sha256:a61900df84c667873b292c3de315a786dd8dac506704dea57bc957bd31e22c7d", - "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", - "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", - "sha256:a8bf8d0f749c5757af2142fe7903a9df1d2e8aa3841559b2bad34b08d0e2bcf3", - "sha256:a9768c477b9d7bd54bc0c86dbaebdec6f03306675526c9927c0e8a04e8f94af9", - "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", - "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", - "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", - "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", - "sha256:b5d84d37db046c5ca74ee7bb47dd6cbc13f80665fdde3e8040bdd3fb015ecb50", - "sha256:b7cf1017d601aa35e6bb650b6ad28652c9cd78ee6caff19f3c28d03e1c80acbf", - "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", - "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", - "sha256:c4ef880e27901b6cc782f1b95f82da9313c0eb95c3af699103088fa0ac3ce9ac", - "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", - "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815", - "sha256:cb01158d8b88ee68f15949894ccc6712278243d95f344770fa7593fa2d94410c", - "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", - "sha256:cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6", - "sha256:cd09d08005f958f370f539f186d10aec3377d55b9eeb0d796025d4886119d76e", - "sha256:cd4b7ca9984e5e7985c12bc60a6f173f3c958eae74f3ef6624bb6b26e2abbae4", - "sha256:ce8a0633f41a967713a59c4139d29110c07e826d131a316b50ce11b1d79b4f84", - "sha256:cead0978fc57397645f12578bfd2d5ea9138ea0fac82b2f63f7f7c6877986a69", - "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", - "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", - "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0", - "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897", - "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", - "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", - "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", - "sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d", - "sha256:e912091979546adf63357d7e2ccff9b44f026c075aeaf25a52d0e95ad2281074", - "sha256:eaabd426fe94daf8fd157c32e571c85cb12e66692f15516a83a03264b08d06c3", - "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224", - "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", - "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a", - "sha256:f155a433c2ec037d4e8df17d18922c3a0d9b3232a396690f17175d2946f0218d", - "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", - "sha256:f34be2938726fc13801220747472850852fe6b1ea75869a048d6f896838c896f", - "sha256:f820802628d2694cb7e56db99213f930856014862f3fd943d290ea8438d07ca8", - "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", - "sha256:f8e160feb2aed042cd657a72acc0b481212ed28b1b9a95c0cee1621b524e1966", - "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", - "sha256:fa09f53c465e532f4d3db095e0c55b615f010ad81803d383195b6b5ca6cbf5f3", - "sha256:faa3a41b2b66b6e50f84ae4a68c64fcd0c44355741c6374813a800cd6695db9e", - "sha256:fd44c878ea55ba351104cb93cc85e74916eb8fa440ca7903e57575e97394f608" + "sha256:007d05ec7321d12a40227aae9e2bc6dca73f3cb21058999a1df9e193555a9dcc", + "sha256:03853ed82eeebbce3c2abfdbc98c96dc205f32a79627688ac9a27370ea61a49c", + "sha256:07d9e39b01743c3717745f4c530a6349eadbfa043c7577eef86c502c15df2c67", + "sha256:08e721811161356f97b4059a9ba7bafb23ea5ee2255402c42881c214e173c6b4", + "sha256:0c96c3b819b5c3e9e165495db84d41914d6894d55181d2d108cc1a69bfc9cce0", + "sha256:0ea948db76d31190bf08bd371623927ee1339d5f2a0b4b1b4a4439a65298703c", + "sha256:0f7eb884681e3938906ed0434f20c63046eacd0111c4ba96f27b76084cd679f5", + "sha256:12a6fff75f6bc66711b73a2f0addfc4c8c15a20e805146a02d147a318962c444", + "sha256:12d8baf840cc7889b37c7c770f478adea7adce3dcb3944d02ec87508e2dcf153", + "sha256:14265bfe1f09498b9d8ec91e9ec9fa52775edf90fcbde092b25f4a33d444fea9", + "sha256:16d971e29578a5e97d7117866d15889a4a07befe0e87e703ed63cd90cb348c01", + "sha256:177a0ba5f0211d488e295aaf82707237e331c24788d8d76c96c5a41594723217", + "sha256:1a87ca9d5df6fe460483d9a5bbf2b18f620cbed41b432e2bddb686228282d10b", + "sha256:1c2a768fdd44ee4a9339a9b0b130049139b8ce3c01d2ce09f67f5a68048d477c", + "sha256:1c2aed2e5e41f24ea8ef1590b8e848a79b56f3a5564a65ceec43c9d692dc7d8a", + "sha256:1dc8b0ea451d6e69735094606991f32867807881400f808a106ee1d963c46a83", + "sha256:1efde3cae86c8c273f1eb3b287be7d8499420cf2fe7585c41d370d3e790054a5", + "sha256:202389074300232baeb53ae2569a60901f7efadd4245cf3a3bf0617d60b439d7", + "sha256:203104ed3e428044fd943bc4bf45fa73c0730391f9621e37fe39ecf477b128cb", + "sha256:2257141f39fe65a3fdf38aeccae4b953e5f3b3324f4ff0daf9f15b8518666a2c", + "sha256:298930cec56029e05497a76988377cbd7457ba864beeea92ad7e844fe74cd1f1", + "sha256:2cd4a60d0e2fb04537162c62bbbb4182f53541fe0ede35cdf270a1c1e723cc42", + "sha256:2d6eb928e13016cea4f1f21d1e10c1cebd5a421bc57ddf5b1142ae3f86824fab", + "sha256:2fe249cb4651fd12605b7288b24751d8bfd46d35f12a20b1ba33dea122e690df", + "sha256:30b8d1d8c52a48c2c5690e152c169b673487a2a58de1ec7393196753063fcd5e", + "sha256:320ade88cfb846b8cd6b4ddf5ee9e80ee0c1f52401f2456b84ae1ae6a1a5f207", + "sha256:3534e7dcbdcf757da6b85a0bbf5b6868786d5982dd959b065e65481644817a18", + "sha256:36836d6ff945a00b88ba1e4572d721e60b5b8c98c155d465f56ad19d68f23734", + "sha256:38c0109396c4cfc574d502df99742a45c72c08eff0a36158b6f04000043dbf38", + "sha256:3946fa46a0cf3e4c8cb1cc52f56bb536310d34f25f01ca9b6c16afa767dab110", + "sha256:3bec022aec2c514d9cf199522a802bd007cd588ab17ab2525f20f9c34d067c18", + "sha256:3c9a494bc5ec77d43cea229c4f6db1e4d8fe7e1bbffa8b6f0f0032430ff8ab44", + "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d", + "sha256:3dedcc22d73ec993f42055eff4fcfed9318d1eeb9a6606c55892a26964964e48", + "sha256:4042d5c8f957e15221d423ba781e85d553722fc4113f523f2feb7b188cc34c5e", + "sha256:481551899c856c704d58119b5025793fa6730adda3571971af568f66d2424bb5", + "sha256:4dc1e73c36828f982bfe79fadf5919923f8a6f4df2860804db9a98c48824ce8d", + "sha256:4e5163c14bffd570ef2affbfdd77bba66383890797df43dc8b4cc7d6f500bf53", + "sha256:511ef87c8aec0783e08ac18565a16d435372bc1ac25a91e6ac7f5ef2b0bff790", + "sha256:532bc9bf33a68613fd7d65e4b1c71a6a38d7d42604ecf239c77392e9b4e8998c", + "sha256:54523e136b8948060c0fa0bc7b1b50c32c186f2fceee897a495406bb6e311d2b", + "sha256:5649fd1c7bade02f320a462fdefd0b4bd3ce036065836d4f42e0de958038e116", + "sha256:56be790f86bfb2c98fb742ce566dfb4816e5a83384616ab59c49e0604d49c51d", + "sha256:5b77459df20e08151cd6f8b9ef8ef1f961ef73d85c21a555c7eed5b79410ec10", + "sha256:5ed6ab538499c8644b8a3e18debabcd7ce684f3fa91cf867521a7a0279cab2d6", + "sha256:6178f72c5508bfc5fd446a5905e698c6212932f25bcdd4b47a757a50605a90e2", + "sha256:6370e8686f662e6a3941ee48ed4742317cafbe5707e36406e9df792cdb535776", + "sha256:64f02c6841d7d83f832cd97ccf8eb8a906d06eb95d5276069175c696b024b60a", + "sha256:65bcd23054beab4d166035cabbc868a09c1a49d1efe458fe8e4361215df40265", + "sha256:66671f93accb62ed07da56613636f3641f1a12c13046ce91ffc923721f23c008", + "sha256:6696b7688f54f5af4462118f0bfa7c1621eeb87154f77fa04b9295ce7a8f2943", + "sha256:6785f414ae0f3c733c437e0f3929197934f526d19dfaa75e18fdb4f94c6fb374", + "sha256:67f6279d125ca0046a7fd386d01b311c6363844deac3e5b069b514ba3e63c246", + "sha256:6c114670c45346afedc0d947faf3c7f701051d2518b943679c8ff88befe14f8e", + "sha256:6e0d51f618228538a3e8f46bd246f87a6cd030565e015803691603f55e12afb5", + "sha256:6ed74185b2db44f41ef35fd1617c5888e59792da9bbc9190d6c7300617182616", + "sha256:708838739abf24b2ceb208d0e22403dd018faeef86ddac04319a62ae884c4f15", + "sha256:715479b9a2802ecac752a3b0efa2b0b60285cf962ee38414211abdfccc233b41", + "sha256:733784b6d6def852c814bce5f318d25da2ee65dd4839a0718641c696e09a2960", + "sha256:750e02e074872a3fad7f233b47734166440af3cdea0add3e95163110816d6752", + "sha256:752a45dc4a6934060b3b0dab47e04edc3326575f82be64bc4fc293914566503e", + "sha256:7579e913a5339fb8fa133f6bbcfd8e6749696206cf05acdbdca71a1b436d8e72", + "sha256:7641bb8895e77f921102f72833904dcd9901df5d6d72a2ab8f31d04b7e51e4e7", + "sha256:7804338df6fcc08105c7745f1502ba68d900f45fd770d5bdd5288ddccb8a42d8", + "sha256:80d04837f55fc81da168b98de4f4b797ef007fc8a79ab71c6ec9bc4dd662b15b", + "sha256:813c0e0132266c08eb87469a642cb30aaff57c5f426255419572aaeceeaa7bf4", + "sha256:82b271f5137d07749f7bf32f70b17ab6eaabedd297e75dce75081a24f76eb545", + "sha256:84c018e49c3bf790f9c2771c45e9313a08c2c2a6342b162cd650258b57817706", + "sha256:8751d2787c9131302398b11e6c8068053dcb55d5a8964e114b6e196cf16cb366", + "sha256:8778f0c7a52e56f75d12dae53ae320fae900a8b9b4164b981b9c5ce059cd1fcb", + "sha256:87fad7d9ba98c86bcb41b2dc8dbb326619be2562af1f8ff50776a39e55721c5a", + "sha256:8d828b6667a32a728a1ad1d93957cdf37489c57b97ae6c4de2860fa749b8fc1e", + "sha256:8e385e4267ab76874ae30db04c627faaaf0b509e1ccc11a95b3fc3e83f855c00", + "sha256:92a0a01ead5e668468e952e4238cccd7c537364eb7d851ab144ab6627dbbe12f", + "sha256:94e1885b270625a9a828c9793b4d52a64445299baa1fea5a173bf1d3dd9a1a5a", + "sha256:a180c5e59792af262bf263b21a3c49353f25945d8d9f70628e73de370d55e1e1", + "sha256:a277ab8928b9f299723bc1a2dabb1265911b1a76341f90a510368ca44ad9ab66", + "sha256:a5fe03b42827c13cdccd08e6c0247b6a6d4b5e3cdc53fd1749f5896adcdc2356", + "sha256:a6c5863edfbe888d9eff9c8b8087354e27618d9da76425c119293f11712a6319", + "sha256:a89c23ef8d2c6b27fd200a42aa4ac72786e7c60d40efdc76e6011260b6e949c4", + "sha256:adb2597b428735679446b46c8badf467b4ca5f5056aae4d51a19f9570301b1ad", + "sha256:ae196f021b5e7c78e918242d217db021ed2a6ace2bc6ae94c0fc596221c7f58d", + "sha256:ae89db9e5f98a11a4bf50407d4363e7b09b31e55bc117b4f7d80aab97ba009e5", + "sha256:aed52fea0513bac0ccde438c188c8a471c4e0f457c2dd20cdbf6ea7a450046c7", + "sha256:aef65cd602a6d0e0ff6f9930fcb1c8fec60dd2cfcb6facaf4bdb0e5873042db0", + "sha256:af21eb4409a119e365397b2adbaca4c9ccab56543a65d5dbd9f920d6ac29f686", + "sha256:b14b2d9dac08e28bb8046a1a0434b1750eb221c8f5b87a68f4fa11a6f97b5e34", + "sha256:bb6d88045545b26da47aa879dd4a89a71d1dce0f0e549b1abcb31dfe4a8eac49", + "sha256:bb8cc7534f51d9a017b93e3e85b260924f909601c3df002bcdb58ddb4dc41a5c", + "sha256:bc17a677b21b3502a21f66a8cc64f5bfad4df8a0b8434d661666f8ce90ac3af1", + "sha256:bd6c2a1c7573c64738d716488d2cdd3c00e340e4835707d8fdb8dc1a66ef164e", + "sha256:bd9b23791fe793e4968dba0c447e12f78e425c59fc0e3b97f6450f4781f3ee60", + "sha256:c03a41a8784091e67a39648f70c5f97b5b6a37f216896d44d2cdcb82615339a0", + "sha256:c0f081d69a6e58272819b70288d3221a6ee64b98df852631c80f293514d3b274", + "sha256:c35abb8bfff0185efac5878da64c45dafd2b37fb0383add1be155a763c1f083d", + "sha256:c36c333c39be2dbca264d7803333c896ab8fa7d4d6f0ab7edb7dfd7aea6e98c0", + "sha256:c45e9440fb78f8ddabcf714b68f936737a121355bf59f3907f4e17721b9d1aae", + "sha256:c593052c465475e64bbfe5dbd81680f64a67fdc752c56d7a0ae205dc8aeefe0f", + "sha256:cdd68a1fb318e290a2077696b7eb7a21a49163c455979c639bf5a5dcdc46617d", + "sha256:ce3412fbe1e31eb81ea42f4169ed94861c56e643189e1e75f0041f3fe7020abe", + "sha256:cf1493cd8607bec4d8a7b9b004e699fcf8f9103a9284cc94962cb73d20f9d4a3", + "sha256:cf29836da5119f3c8a8a70667b0ef5fdca3bb12f80fd06487cfa575b3909b393", + "sha256:d4a48e5b3c2a489fae013b7589308a40146ee081f6f509e047e0e096084ceca1", + "sha256:d560742f3c0d62afaccf9f41fe485ed69bd7661a241f86a3ef0f0fb8b1a397af", + "sha256:d6038d37043bced98a66e68d3aa2b6a35505dc01328cd65217cefe82f25def44", + "sha256:d61f00a0869d77422d9b2aba989e2d24afa6ffd552af442e0e58de4f35ea6d00", + "sha256:d635aab80466bc95771bb78d5370e74d36d1fe31467b6b29b8b57b2a3cd7d22c", + "sha256:dca4bbc466a95ba9c0234ef56d7dd9509f63da22274589ebd4ed7f1f4d4c54e3", + "sha256:dd915403e231e6b1809fe9b6d9fc55cf8fb5e02765ac625d9cd623342a7905d7", + "sha256:e044c39e41b92c845bc815e5ae4230804e8e7bc29e399b0437d64222d92809dd", + "sha256:e060d01aec0a910bdccb8be71faf34e7799ce36950f8294c8bf612cba65a2c9e", + "sha256:e1421b502d83040e6d7fb2fb18dff63957f720da3d77b2fbd3187ceb63755d7b", + "sha256:e17b8d5d6a8c47c85e68ca8379def1303fd360c3e22093a807cd34a71cd082b8", + "sha256:e5f4d355f0a2b1a31bc3edec6795b46324349c9cb25eed068049e4f472fb4259", + "sha256:e712b419df8ba5e42b226c510472b37bd57b38e897d3eca5e8cfd410a29fa859", + "sha256:e74327fb75de8986940def6e8dee4f127cc9752bee7355bb323cc5b2659b6d46", + "sha256:e80c8378d8f3d83cd3164da1ad2df9e37a666cdde7b1cb2298ed0b558064be30", + "sha256:e8ac484bf18ce6975760921bb6148041faa8fef0547200386ea0b52b5d27bf7b", + "sha256:eca9705049ad3c7345d574e3510665cb2cf844c2f2dcfe675332677f081cbd46", + "sha256:ed065083d0898c9d5b4bbec7b026fd755ff7454e6e8b73a67f8c744b13986e24", + "sha256:edac0f1ab77644605be2cbba52e6b7f630731fc42b34cb0f634be1a6eface56a", + "sha256:effc3f449787117233702311a1b7d8f59cba9ced946ba727bdc329ec69028e24", + "sha256:f22dec1690b584cea26fade98b2435c132c1b5f68e39f5a0b7627cd7ae31f1dc", + "sha256:f495a1652cf3fbab2eb0639776dad966c2fb874d79d87ca07f9d5f059b8bd215", + "sha256:f496c9c3cc02230093d8330875c4c3cdfc3b73612a5fd921c65d39cbcef08063", + "sha256:f59099f9b66f0d7145115e6f80dd8b1d847176df89b234a5a6b3f00437aa0832", + "sha256:f59ad4c0e8f6bba240a9bb85504faa1ab438237199d4cce5f622761507b8f6a6", + "sha256:fbccdc05410c9ee21bbf16a35f4c1d16123dcdeb8a1d38f33654fa21d0234f79", + "sha256:fea24543955a6a729c45a73fe90e08c743f0b3334bbf3201e6c4bc1b0c7fa464" ], "markers": "python_version >= '3.7'", - "version": "==3.4.4" + "version": "==3.4.7" + }, + "click": { + "hashes": [ + "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", + "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6" + ], + "markers": "python_version >= '3.10'", + "version": "==8.3.1" }, "deprecated": { "hashes": [ @@ -177,11 +235,11 @@ }, "googleapis-common-protos": { "hashes": [ - "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038", - "sha256:e55a601c1b32b52d7a3e65f43563e2aa61bcd737998ee672ac9b951cd49319f5" + "sha256:53a062ff3c32552fbd62c11fe23768b78e4ddf0494d5e5fd97d3f4689c75fbbd", + "sha256:961ed60399c457ceb0ee8f285a84c870aabc9c6a832b9d37bb281b5bebde43ed" ], - "markers": "python_version >= '3.7'", - "version": "==1.72.0" + "markers": "python_version >= '3.9'", + "version": "==1.75.0" }, "h11": { "hashes": [ @@ -209,11 +267,11 @@ }, "idna": { "hashes": [ - "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", - "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902" + "sha256:048adeaf8c2d788c40fee287673ccaa74c24ffd8dcf09ffa555a2fbb59f10ac8", + "sha256:ca962446ea538f7092a95e057da437618e886f4d349216d2b1e294abfdb65fdc" ], "markers": "python_version >= '3.8'", - "version": "==3.11" + "version": "==3.15" }, "importlib-metadata": { "hashes": [ @@ -223,6 +281,14 @@ "markers": "python_version >= '3.9'", "version": "==8.7.1" }, + "mypy-extensions": { + "hashes": [ + "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", + "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558" + ], + "markers": "python_version >= '3.8'", + "version": "==1.1.0" + }, "nest-asyncio": { "hashes": [ "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe", @@ -233,123 +299,139 @@ }, "opentelemetry-api": { "hashes": [ - "sha256:2edd8463432a7f8443edce90972169b195e7d6a05500cd29e6d13898187c9950", - "sha256:fbde8c80e1b937a2c61f20347e91c0c18a1940cecf012d62e65a7caf08967c9c" + "sha256:51a69edacadbc03a8950ace1c4c21099cacc538820ac2c9e36277e78cebba714", + "sha256:56c63bea9f77b62856be8c47600474acad853b2924b99b1687c4cb6297166716" ], - "markers": "python_version >= '3.9'", - "version": "==1.39.1" + "markers": "python_version >= '3.10'", + "version": "==1.42.1" }, "opentelemetry-exporter-otlp-proto-common": { "hashes": [ - "sha256:08f8a5862d64cc3435105686d0216c1365dc5701f86844a8cd56597d0c764fde", - "sha256:763370d4737a59741c89a67b50f9e39271639ee4afc999dadfe768541c027464" + "sha256:04f1f01fb597c4249dfcd7f8b861c902c2102369d376d9d346ff38de4469a2ee", + "sha256:f48d395ab815b444da118868977e9798ea354c25737d5cf39578ae894011c140" ], - "markers": "python_version >= '3.9'", - "version": "==1.39.1" + "markers": "python_version >= '3.10'", + "version": "==1.42.1" }, "opentelemetry-exporter-otlp-proto-http": { "hashes": [ - "sha256:31bdab9745c709ce90a49a0624c2bd445d31a28ba34275951a6a362d16a0b9cb", - "sha256:d9f5207183dd752a412c4cd564ca8875ececba13be6e9c6c370ffb752fd59985" + "sha256:00a16da1b312a1d6c7233d600d557c91df71125af73020f3b9a7765bd699d59d", + "sha256:bf142a21035d7571ac3a09cb2e5639f49886f243972883cfe777ed3bf02b734d" ], - "markers": "python_version >= '3.9'", - "version": "==1.39.1" + "markers": "python_version >= '3.10'", + "version": "==1.42.1" }, "opentelemetry-instrumentation": { "hashes": [ - "sha256:04480db952b48fb1ed0073f822f0ee26012b7be7c3eac1a3793122737c78632d", - "sha256:57ddc7974c6eb35865af0426d1a17132b88b2ed8586897fee187fd5b8944bd6a" + "sha256:32368d6ae52c8de20aa790a6ad86b10a76f09956092337ae37d675773990e541", + "sha256:f1986716d52cc316ea5f60189098726a9071d8ecc0eee96c9ed110be08bade9c" ], - "markers": "python_version >= '3.9'", - "version": "==0.60b1" + "markers": "python_version >= '3.10'", + "version": "==0.63b1" }, "opentelemetry-instrumentation-httpx": { "hashes": [ - "sha256:a506ebaf28c60112cbe70ad4f0338f8603f148938cb7b6794ce1051cd2b270ae", - "sha256:f37636dd742ad2af83d896ba69601ed28da51fa4e25d1ab62fde89ce413e275b" + "sha256:14df6e99d81be9a8cd238f6639b6fa52404c4d3ce219058fcb5dc8c0f2211f86", + "sha256:f41ec82f25c3abcdada621052db3e5fd648e3b43d55eec4b9c0c5d3ecb7b4ff4" ], - "markers": "python_version >= '3.9'", - "version": "==0.60b1" + "markers": "python_version >= '3.10'", + "version": "==0.63b1" }, "opentelemetry-instrumentation-requests": { "hashes": [ - "sha256:9a1063c16c44a3ba6e81870c4fa42a0fac3ecef5a4d60a11d0976eec9046f3d4", - "sha256:eec9fac3fab84737f663a2e08b12cb095b4bd67643b24587a8ecfa3cf4d0ca4c" + "sha256:513fcaa3d93debbdb359c00ce1a137a34a89ee908c51ac43beb7e8c18ac2b3cd", + "sha256:935c980a11e33bfd7ed969c741e4bd7c84077045651469f10e163534368d87f7" ], - "markers": "python_version >= '3.9'", - "version": "==0.60b1" + "markers": "python_version >= '3.10'", + "version": "==0.63b1" }, "opentelemetry-instrumentation-threading": { "hashes": [ - "sha256:20b18a68abe5801fa9474336b7c27487d4af3e00b66f6a8734e4fdd75c8b0b43", - "sha256:92a52a60fee5e32bc6aa8f5acd749b15691ad0bc4457a310f5736b76a6d9d1de" + "sha256:33059298e68c94b13c38b562ad28799ec16a2fd06182ebfc762bb4e956e55d94", + "sha256:afa8c2cada8ed136f07b04dc8739bc861a15e9a5edea1a65e4c5e1919c62946c" ], - "markers": "python_version >= '3.9'", - "version": "==0.60b1" + "markers": "python_version >= '3.10'", + "version": "==0.63b1" }, "opentelemetry-instrumentation-urllib": { "hashes": [ - "sha256:7d6c56e45551bdbf21efc11bd463e10862e8fd04ed4a94b5695325a56440b13e", - "sha256:bf36188d684ca6454b7162492a66749181955011e0cc47a2324cbe66e7f13e81" + "sha256:500b959d7933408ef30a6f4bb2a0b6979f71129e62b945fc5615aa63df4ad9b8", + "sha256:538e8c72515b48c69e03c2789a03d245ba6e1bf5c22c2052df1e872bb8274d96" ], - "markers": "python_version >= '3.9'", - "version": "==0.60b1" + "markers": "python_version >= '3.10'", + "version": "==0.63b1" }, "opentelemetry-proto": { "hashes": [ - "sha256:22cdc78efd3b3765d09e68bfbd010d4fc254c9818afd0b6b423387d9dee46007", - "sha256:6c8e05144fc0d3ed4d22c2289c6b126e03bcd0e6a7da0f16cedd2e1c2772e2c8" + "sha256:c6a51e6b4f05ae63565f3a113217f3d2bfaec68f78c02d7a6c85f9010d1cfca6", + "sha256:dedb74cba2886c59c7789b227a7a670613025a07489040050aedff6e5c0fb43c" ], - "markers": "python_version >= '3.9'", - "version": "==1.39.1" + "markers": "python_version >= '3.10'", + "version": "==1.42.1" }, "opentelemetry-sdk": { "hashes": [ - "sha256:4d5482c478513ecb0a5d938dcc61394e647066e0cc2676bee9f3af3f3f45f01c", - "sha256:cf4d4563caf7bff906c9f7967e2be22d0d6b349b908be0d90fb21c8e9c995cc6" + "sha256:083cd4bbfaa5aa7b5a9e552430d9951219967cfb27aa61feb13a77aba1fc839d", + "sha256:8c834e8f8c9ba4171d4ec843d0cb8a67e4c7394d3f9e9297e582cbd9456ddbf7" ], - "markers": "python_version >= '3.9'", - "version": "==1.39.1" + "markers": "python_version >= '3.10'", + "version": "==1.42.1" }, "opentelemetry-semantic-conventions": { "hashes": [ - "sha256:87c228b5a0669b748c76d76df6c364c369c28f1c465e50f661e39737e84bc953", - "sha256:9fa8c8b0c110da289809292b0591220d3a7b53c1526a23021e977d68597893fb" + "sha256:3daf963611334b365e98a57438183eb012d3bfb40b2d931a9af613476b8701a9", + "sha256:dfe5ef4dee82586b746f522b818ceb298d00b3d59f660042bd79404bff8d0682" ], - "markers": "python_version >= '3.9'", - "version": "==0.60b1" + "markers": "python_version >= '3.10'", + "version": "==0.63b1" }, "opentelemetry-util-http": { "hashes": [ - "sha256:0d97152ca8c8a41ced7172d29d3622a219317f74ae6bb3027cfbdcf22c3cc0d6", - "sha256:66381ba28550c91bee14dcba8979ace443444af1ed609226634596b4b0faf199" + "sha256:6284194028c59cd439f8acfe388145069a6127f11dc077e1344a2094adacc3f8", + "sha256:ba1268f00922ee522dba2ae38458060f99486e7385a8056985901ca9685adfff" ], - "markers": "python_version >= '3.9'", - "version": "==0.60b1" + "markers": "python_version >= '3.10'", + "version": "==0.63b1" }, "packaging": { "hashes": [ - "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", - "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529" + "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e", + "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661" ], "markers": "python_version >= '3.8'", - "version": "==26.0" + "version": "==26.2" + }, + "pathspec": { + "hashes": [ + "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645", + "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723" + ], + "markers": "python_version >= '3.9'", + "version": "==1.0.4" + }, + "platformdirs": { + "hashes": [ + "sha256:1ec356301b7dc906d83f371c8f487070e99d3ccf9e501686456394622a01a934", + "sha256:68a9a4619a666ea6439f2ff250c12a853cd1cbd5158d258bd824a7df6be2f868" + ], + "markers": "python_version >= '3.10'", + "version": "==4.9.4" }, "protobuf": { "hashes": [ - "sha256:3093804752167bcab3998bec9f1048baae6e29505adaf1afd14a37bddede533c", - "sha256:69915a973dd0f60f31a08b8318b73eab2bd6a392c79184b3612226b0a3f8ec02", - "sha256:6ddcac2a081f8b7b9642c09406bc6a4290128fce5f471cddd165960bb9119e5c", - "sha256:8afa18e1d6d20af15b417e728e9f60f3aa108ee76f23c3b2c07a2c3b546d3afd", - "sha256:8f04fa32763dcdb4973d537d6b54e615cc61108c7cb38fe59310c3192d29510a", - "sha256:9b71e0281f36f179d00cbcb119cb19dec4d14a81393e5ea220f64b286173e190", - "sha256:a3157e62729aafb8df6da2c03aa5c0937c7266c626ce11a278b6eb7963c4e37c", - "sha256:a5cb85982d95d906df1e2210e58f8e4f1e3cdc088e52c921a041f9c9a0386de5", - "sha256:cbf16ba3350fb7b889fca858fb215967792dc125b35c7976ca4818bee3521cf0", - "sha256:d71b040839446bac0f4d162e758bea99c8251161dae9d0983a3b88dee345153b" + "sha256:0cd27b587afca21b7cfa59a74dcbd48a50f0a6400cfb59391340ad729d91d326", + "sha256:77179e006c476e69bf8e8ce866640091ec42e1beb80b213c3900006ecfba6901", + "sha256:7d29d9b65f8afef196f8334e80d6bc1d5d4adedb449971fefd3723824e6e77d3", + "sha256:9720e6961b251bde64edfdab7d500725a2af5280f3f4c87e57c0208376aa8c3a", + "sha256:a6768d25248312c297558af96a9f9c929e8c4cee0659cb07e780731095f38135", + "sha256:bd56799fb262994b2c2faa1799693c95cc2e22c62f56fb43af311cae45d26f0e", + "sha256:c96c37eec15086b79762ed265d59ab204dabc53056e3443e702d2681f4b39ce3", + "sha256:e2afbae9b8e1825e3529f88d514754e094278bb95eadc0e199751cdd9a2e82a2", + "sha256:e9db7e292e0ab79dd108d7f1a94fe31601ce1ee3f7b79e0692043423020b0593", + "sha256:f443a394af5ed23672bc6c486be138628fbe5c651ccbc536873d7da23d1868cf" ], "markers": "python_version >= '3.9'", - "version": "==6.33.5" + "version": "==6.33.6" }, "psutil": { "hashes": [ @@ -378,21 +460,70 @@ "markers": "python_version >= '3.6'", "version": "==7.2.2" }, + "pytokens": { + "hashes": [ + "sha256:0fc71786e629cef478cbf29d7ea1923299181d0699dbe7c3c0f4a583811d9fc1", + "sha256:11edda0942da80ff58c4408407616a310adecae1ddd22eef8c692fe266fa5009", + "sha256:140709331e846b728475786df8aeb27d24f48cbcf7bcd449f8de75cae7a45083", + "sha256:24afde1f53d95348b5a0eb19488661147285ca4dd7ed752bbc3e1c6242a304d1", + "sha256:26cef14744a8385f35d0e095dc8b3a7583f6c953c2e3d269c7f82484bf5ad2de", + "sha256:27b83ad28825978742beef057bfe406ad6ed524b2d28c252c5de7b4a6dd48fa2", + "sha256:292052fe80923aae2260c073f822ceba21f3872ced9a68bb7953b348e561179a", + "sha256:29d1d8fb1030af4d231789959f21821ab6325e463f0503a61d204343c9b355d1", + "sha256:2a44ed93ea23415c54f3face3b65ef2b844d96aeb3455b8a69b3df6beab6acc5", + "sha256:30f51edd9bb7f85c748979384165601d028b84f7bd13fe14d3e065304093916a", + "sha256:34bcc734bd2f2d5fe3b34e7b3c0116bfb2397f2d9666139988e7a3eb5f7400e3", + "sha256:3ad72b851e781478366288743198101e5eb34a414f1d5627cdd585ca3b25f1db", + "sha256:3f901fe783e06e48e8cbdc82d631fca8f118333798193e026a50ce1b3757ea68", + "sha256:42f144f3aafa5d92bad964d471a581651e28b24434d184871bd02e3a0d956037", + "sha256:4a14d5f5fc78ce85e426aa159489e2d5961acf0e47575e08f35584009178e321", + "sha256:4a58d057208cb9075c144950d789511220b07636dd2e4708d5645d24de666bdc", + "sha256:4e691d7f5186bd2842c14813f79f8884bb03f5995f0575272009982c5ac6c0f7", + "sha256:5502408cab1cb18e128570f8d598981c68a50d0cbd7c61312a90507cd3a1276f", + "sha256:584c80c24b078eec1e227079d56dc22ff755e0ba8654d8383b2c549107528918", + "sha256:5ad948d085ed6c16413eb5fec6b3e02fa00dc29a2534f088d3302c47eb59adf9", + "sha256:670d286910b531c7b7e3c0b453fd8156f250adb140146d234a82219459b9640c", + "sha256:682fa37ff4d8e95f7df6fe6fe6a431e8ed8e788023c6bcc0f0880a12eab80ad1", + "sha256:6d6c4268598f762bc8e91f5dbf2ab2f61f7b95bdc07953b602db879b3c8c18e1", + "sha256:79fc6b8699564e1f9b521582c35435f1bd32dd06822322ec44afdeba666d8cb3", + "sha256:8bdb9d0ce90cbf99c525e75a2fa415144fd570a1ba987380190e8b786bc6ef9b", + "sha256:8fcb9ba3709ff77e77f1c7022ff11d13553f3c30299a9fe246a166903e9091eb", + "sha256:941d4343bf27b605e9213b26bfa1c4bf197c9c599a9627eb7305b0defcfe40c1", + "sha256:967cf6e3fd4adf7de8fc73cd3043754ae79c36475c1c11d514fc72cf5490094a", + "sha256:970b08dd6b86058b6dc07efe9e98414f5102974716232d10f32ff39701e841c4", + "sha256:97f50fd18543be72da51dd505e2ed20d2228c74e0464e4262e4899797803d7fa", + "sha256:9bd7d7f544d362576be74f9d5901a22f317efc20046efe2034dced238cbbfe78", + "sha256:add8bf86b71a5d9fb5b89f023a80b791e04fba57960aa790cc6125f7f1d39dfe", + "sha256:b35d7e5ad269804f6697727702da3c517bb8a5228afa450ab0fa787732055fc9", + "sha256:b49750419d300e2b5a3813cf229d4e5a4c728dae470bcc89867a9ad6f25a722d", + "sha256:d31b97b3de0f61571a124a00ffe9a81fb9939146c122c11060725bd5aea79975", + "sha256:d70e77c55ae8380c91c0c18dea05951482e263982911fc7410b1ffd1dadd3440", + "sha256:d9907d61f15bf7261d7e775bd5d7ee4d2930e04424bab1972591918497623a16", + "sha256:da5baeaf7116dced9c6bb76dc31ba04a2dc3695f3d9f74741d7910122b456edc", + "sha256:dc74c035f9bfca0255c1af77ddd2d6ae8419012805453e4b0e7513e17904545d", + "sha256:dcafc12c30dbaf1e2af0490978352e0c4041a7cde31f4f81435c2a5e8b9cabb6", + "sha256:ee44d0f85b803321710f9239f335aafe16553b39106384cef8e6de40cb4ef2f6", + "sha256:f66a6bbe741bd431f6d741e617e0f39ec7257ca1f89089593479347cc4d13324" + ], + "markers": "python_version >= '3.8'", + "version": "==0.4.1" + }, "requests": { "hashes": [ - "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", - "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf" + "sha256:2a0d60c172f83ac6ab31e4554906c0f3b3588d37b5cb939b1c061f4907e278e0", + "sha256:f288924cae4e29463698d6d60bc6a4da69c89185ad1e0bcc4104f584e960b9ed" ], - "markers": "python_version >= '3.9'", - "version": "==2.32.5" + "index": "pypi", + "markers": "python_version >= '3.10'", + "version": "==2.34.2" }, "setuptools": { "hashes": [ - "sha256:22e0a2d69474c6ae4feb01951cb69d515ed23728cf96d05513d36e42b62b37cb", - "sha256:70b18734b607bd1da571d097d236cfcfacaf01de45717d59e6e04b96877532e0" + "sha256:7d872682c5d01cfde07da7bccc7b65469d3dca203318515ada1de5eda35efbf9", + "sha256:a59e362652f08dcd477c78bb6e7bd9d80a7995bc73ce773050228a348ce2e5bb" ], "markers": "python_version >= '3.9'", - "version": "==82.0.0" + "version": "==82.0.1" }, "synapseclient": { "markers": "python_version >= '3.10' and python_version < '3.15'", @@ -416,98 +547,107 @@ }, "urllib3": { "hashes": [ - "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", - "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4" + "sha256:231e0ec3b63ceb14667c67be60f2f2c40a518cb38b03af60abc813da26505f4c", + "sha256:9fb4c81ebbb1ce9531cce37674bbc6f1360472bc18ca9a553ede278ef7276897" ], - "markers": "python_version >= '3.9'", - "version": "==2.6.3" + "markers": "python_version >= '3.10'", + "version": "==2.7.0" }, "wrapt": { "hashes": [ - "sha256:02b551d101f31694fc785e58e0720ef7d9a10c4e62c1c9358ce6f63f23e30a56", - "sha256:042ec3bb8f319c147b1301f2393bc19dba6e176b7da446853406d041c36c7828", - "sha256:0610b46293c59a3adbae3dee552b648b984176f8562ee0dba099a56cfbe4df1f", - "sha256:0b02e424deef65c9f7326d8c19220a2c9040c51dc165cddb732f16198c168396", - "sha256:0b1831115c97f0663cb77aa27d381237e73ad4f721391a9bfb2fe8bc25fa6e77", - "sha256:0ed61b7c2d49cee3c027372df5809a59d60cf1b6c2f81ee980a091f3afed6a2d", - "sha256:0f5f51a6466667a5a356e6381d362d259125b57f059103dd9fdc8c0cf1d14139", - "sha256:16ecf15d6af39246fe33e507105d67e4b81d8f8d2c6598ff7e3ca1b8a37213f7", - "sha256:1f0b2f40cf341ee8cc1a97d51ff50dddb9fcc73241b9143ec74b30fc4f44f6cb", - "sha256:1f23fa283f51c890eda8e34e4937079114c74b4c81d2b2f1f1d94948f5cc3d7f", - "sha256:223db574bb38637e8230eb14b185565023ab624474df94d2af18f1cdb625216f", - "sha256:249f88ed15503f6492a71f01442abddd73856a0032ae860de6d75ca62eed8067", - "sha256:24c2ed34dc222ed754247a2702b1e1e89fdbaa4016f324b4b8f1a802d4ffe87f", - "sha256:273a736c4645e63ac582c60a56b0acb529ef07f78e08dc6bfadf6a46b19c0da7", - "sha256:281262213373b6d5e4bb4353bc36d1ba4084e6d6b5d242863721ef2bf2c2930b", - "sha256:30ce38e66630599e1193798285706903110d4f057aab3168a34b7fdc85569afc", - "sha256:33486899acd2d7d3066156b03465b949da3fd41a5da6e394ec49d271baefcf05", - "sha256:343e44b2a8e60e06a7e0d29c1671a0d9951f59174f3709962b5143f60a2a98bd", - "sha256:373342dd05b1d07d752cecbec0c41817231f29f3a89aa8b8843f7b95992ed0c7", - "sha256:3af60380ba0b7b5aeb329bc4e402acd25bd877e98b3727b0135cb5c2efdaefe9", - "sha256:3e62d15d3cfa26e3d0788094de7b64efa75f3a53875cdbccdf78547aed547a81", - "sha256:41b1d2bc74c2cac6f9074df52b2efbef2b30bdfe5f40cb78f8ca22963bc62977", - "sha256:423ed5420ad5f5529db9ce89eac09c8a2f97da18eb1c870237e84c5a5c2d60aa", - "sha256:46acc57b331e0b3bcb3e1ca3b421d65637915cfcd65eb783cb2f78a511193f9b", - "sha256:4da9f45279fff3543c371d5ababc57a0384f70be244de7759c85a7f989cb4ebe", - "sha256:507553480670cab08a800b9463bdb881b2edeed77dc677b0a5915e6106e91a58", - "sha256:53e5e39ff71b3fc484df8a522c933ea2b7cdd0d5d15ae82e5b23fde87d44cbd8", - "sha256:54a30837587c6ee3cd1a4d1c2ec5d24e77984d44e2f34547e2323ddb4e22eb77", - "sha256:5531d911795e3f935a9c23eb1c8c03c211661a5060aab167065896bbf62a5f85", - "sha256:55cbbc356c2842f39bcc553cf695932e8b30e30e797f961860afb308e6b1bb7c", - "sha256:59923aa12d0157f6b82d686c3fd8e1166fa8cdfb3e17b42ce3b6147ff81528df", - "sha256:5a03a38adec8066d5a37bea22f2ba6bbf39fcdefbe2d91419ab864c3fb515454", - "sha256:5a7b3c1ee8265eb4c8f1b7d29943f195c00673f5ab60c192eba2d4a7eae5f46a", - "sha256:5d4478d72eb61c36e5b446e375bbc49ed002430d17cdec3cecb36993398e1a9e", - "sha256:5ea5eb3c0c071862997d6f3e02af1d055f381b1d25b286b9d6644b79db77657c", - "sha256:604d076c55e2fdd4c1c03d06dc1a31b95130010517b5019db15365ec4a405fc6", - "sha256:656873859b3b50eeebe6db8b1455e99d90c26ab058db8e427046dbc35c3140a5", - "sha256:65d1d00fbfb3ea5f20add88bbc0f815150dbbde3b026e6c24759466c8b5a9ef9", - "sha256:6b538e31eca1a7ea4605e44f81a48aa24c4632a277431a6ed3f328835901f4fd", - "sha256:6fd1ad24dc235e4ab88cda009e19bf347aabb975e44fd5c2fb22a3f6e4141277", - "sha256:70d86fa5197b8947a2fa70260b48e400bf2ccacdcab97bb7de47e3d1e6312225", - "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", - "sha256:73d496de46cd2cdbdbcce4ae4bcdb4afb6a11234a1df9c085249d55166b95116", - "sha256:7425ac3c54430f5fc5e7b6f41d41e704db073309acfc09305816bc6a0b26bb16", - "sha256:74afa28374a3c3a11b3b5e5fca0ae03bef8450d6aa3ab3a1e2c30e3a75d023dc", - "sha256:758895b01d546812d1f42204bd443b8c433c44d090248bf22689df673ccafe00", - "sha256:79573c24a46ce11aab457b472efd8d125e5a51da2d1d24387666cd85f54c05b2", - "sha256:7e18f01b0c3e4a07fe6dfdb00e29049ba17eadbc5e7609a2a3a4af83ab7d710a", - "sha256:88547535b787a6c9ce4086917b6e1d291aa8ed914fdd3a838b3539dc95c12804", - "sha256:88bbae4d40d5a46142e70d58bf664a89b6b4befaea7b2ecc14e03cedb8e06c04", - "sha256:8cccf4f81371f257440c88faed6b74f1053eef90807b77e31ca057b2db74edb1", - "sha256:9baa544e6acc91130e926e8c802a17f3b16fbea0fd441b5a60f5cf2cc5c3deba", - "sha256:a36692b8491d30a8c75f1dfee65bef119d6f39ea84ee04d9f9311f83c5ad9390", - "sha256:a47681378a0439215912ef542c45a783484d4dd82bac412b71e59cf9c0e1cea0", - "sha256:a7c06742645f914f26c7f1fa47b8bc4c91d222f76ee20116c43d5ef0912bba2d", - "sha256:a9a2203361a6e6404f80b99234fe7fb37d1fc73487b5a78dc1aa5b97201e0f22", - "sha256:ab232e7fdb44cdfbf55fc3afa31bcdb0d8980b9b95c38b6405df2acb672af0e0", - "sha256:ad85e269fe54d506b240d2d7b9f5f2057c2aa9a2ea5b32c66f8902f768117ed2", - "sha256:af338aa93554be859173c39c85243970dc6a289fa907402289eeae7543e1ae18", - "sha256:afd964fd43b10c12213574db492cb8f73b2f0826c8df07a68288f8f19af2ebe6", - "sha256:b32888aad8b6e68f83a8fdccbf3165f5469702a7544472bdf41f582970ed3311", - "sha256:c31eebe420a9a5d2887b13000b043ff6ca27c452a9a22fa71f35f118e8d4bf89", - "sha256:caea3e9c79d5f0d2c6d9ab96111601797ea5da8e6d0723f77eabb0d4068d2b2f", - "sha256:cf30f6e3c077c8e6a9a7809c94551203c8843e74ba0c960f4a98cd80d4665d39", - "sha256:d40770d7c0fd5cbed9d84b2c3f2e156431a12c9a37dc6284060fb4bec0b7ffd4", - "sha256:d8a210b158a34164de8bb68b0e7780041a903d7b00c87e906fb69928bf7890d5", - "sha256:dc4a8d2b25efb6681ecacad42fca8859f88092d8732b170de6a5dddd80a1c8fa", - "sha256:df7d30371a2accfe4013e90445f6388c570f103d61019b6b7c57e0265250072a", - "sha256:e01375f275f010fcbf7f643b4279896d04e571889b8a5b3f848423d91bf07050", - "sha256:e1a4120ae5705f673727d3253de3ed0e016f7cd78dc463db1b31e2463e1f3cf6", - "sha256:e228514a06843cae89621384cfe3a80418f3c04aadf8a3b14e46a7be704e4235", - "sha256:e405adefb53a435f01efa7ccdec012c016b5a1d3f35459990afc39b6be4d5056", - "sha256:e6b13af258d6a9ad602d57d889f83b9d5543acd471eee12eb51f5b01f8eb1bc2", - "sha256:e6f40a8aa5a92f150bdb3e1c44b7e98fb7113955b2e5394122fa5532fec4b418", - "sha256:e71d5c6ebac14875668a1e90baf2ea0ef5b7ac7918355850c0908ae82bcb297c", - "sha256:ed7c635ae45cfbc1a7371f708727bf74690daedc49b4dba310590ca0bd28aa8a", - "sha256:f38e60678850c42461d4202739f9bf1e3a737c7ad283638251e79cc49effb6b6", - "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", - "sha256:f9b2601381be482f70e5d1051a5965c25fb3625455a2bf520b5a077b22afb775", - "sha256:fbd3c8319de8e1dc79d346929cd71d523622da527cca14e0c1d257e31c2b8b10", - "sha256:fd341868a4b6714a5962c1af0bd44f7c404ef78720c7de4892901e540417111c" + "sha256:03b77d3ecab6c38e5da7a5709cee6899083d08fc1bcd648b4fa78b346fc66282", + "sha256:0680304db389599691bac06a2f9fb3f0ed06af59f132d35801a38cf6c321ab59", + "sha256:07dd562ebb774cad070eeedb93c7a29647979e30f0cfd1f5c9b9f803f687b6f4", + "sha256:10e8f78948d13369b770fc17bf72272aac98b4b92d49a38f479abf718f6b615b", + "sha256:115ff1501c11ac0e267c4afd6f6b3dd24b48afcc77b029e6062f71b12bce1d79", + "sha256:12331011cbf76b782d0beec7c7ed880f51454c127ab12012cfaecf56de01a80c", + "sha256:195db5b92deba6feb818732694ad478abb8a529d97a113cc256e5e49ee2dd80d", + "sha256:199abadf7dcceab4bdc5bfe356275a56b1cb429296e283da2fe90c20b09f8d07", + "sha256:1bf3ea62734b24c0241442d8b7684ef53a8de6cad0c2eba1e99fd2297b4a92e4", + "sha256:1f663528d6ea1804d279462671b2bf98a4c0d8a4a8dd319bb3ee0629b743387f", + "sha256:22c7ee3a3737d9656ddf2c9cc1f1548ec963d966251e899561da142697d33a9d", + "sha256:231e2728ba04536821d2327ad2b3cb2c20cc79197fe5c30ddf71b12d95febe10", + "sha256:298cfa8de891b9aae945b47323a012fe3f1cac5e6b2f69b150961b9ed0df1fc8", + "sha256:29c0b2c075f8854b3345be584ab3d84f8968c45605d1914be1c94939cef5d702", + "sha256:2b3946f0ff079623dc4f117363040433be390bfebce3719de50dfecbf31efdf0", + "sha256:2f0d4a79d9af893d80caa5b709e024dd2d387f3f047008286036143f118d7010", + "sha256:2ff803b3607cd76cb9b853b03d15279c7ffc8ba69e69f76304cd23d2722f2b65", + "sha256:319720847afa6c58c32f84f9743bdcf34448ae56908c00f409764c627ff2c1fe", + "sha256:33ff34dc349320dc16ebe0cdf70dddf5ae9328f4a448823a00f37976d0cc2234", + "sha256:370b2c36e8fee503c275e39b4588d74412cd0a7792f7f3a7b54c44c4d33d4884", + "sha256:3f1dc1d1a2f0b081d8c1eef2203e61717b537a1bcb0d8e4d1405aeb15aa85c34", + "sha256:3fab0258114702859bb9d410e6a886e79477e677ac92580f81b876e7c55590cc", + "sha256:4297b7338cfa48b5cfefc7416d2ae52b0aad89e9b24da479ec010717b987c07f", + "sha256:43c36019a690b2cb089665eab01a50c92d814553c6e57ff03d2c68e63ce8f00b", + "sha256:45d4156fd35d0bdab58eac4a6854fbd053a59544fc57eb66e977b3c13c087a1c", + "sha256:484015d345548472c54c97a318c6eba92db583d9d5a966dde7cf3ae0c1461cf4", + "sha256:49c7ad697d6b13f322a1c3bb22a1c66827d5c0d303a4479e327210ee4d4ad179", + "sha256:4b0aa81f4a3d0203ae8450eae5e794540afbf00a97dd0b81accbe5b4a5362cbb", + "sha256:4d5b485a6f617825fa7449f5025ebcdad9355acb328cb6d198ba225762219bc0", + "sha256:5248171d3cd33f12c144e7aa1222983cb6ab42651e985ce51fec400a876afbfd", + "sha256:57bc3691043b158605c5ceee6b06b3720caf8ac43bd4195d1bfe12457e7014f6", + "sha256:5b7f10aa09d1f5abfe3ccd022dec566a5010465b98b3755cc0705a762547101f", + "sha256:5b865e611c186d15366964e3d9500af504920ce7b92a211d61a83d2d3c42a508", + "sha256:5b9733ef187cf05e774484ed2f703992a44429050f1cfea2e94dac543da78292", + "sha256:5b9f9d351eb8e5798066b505c705ec25e19a793367edaa3280a3f171b6950fc3", + "sha256:5c17982ccfece323bb297a195c9602ef407819199d8dbf99b8041770513fd68f", + "sha256:60bef9dc4348a76e9c2981ec4b06b779bac02556af4479030e6f62b18545b3cc", + "sha256:615be1d2b21450748e759bed7bf9ba8bc28307e91cb96b6e968f54f39e938ee5", + "sha256:628fbd908649611c8b9293e2e050231f1e230be152e7d38140e3b818ec6aade0", + "sha256:63a09b40bba3b2482983e2aeba6e45e20e1f567821ac89c8922229ecc1de7f65", + "sha256:686f1798727bf4a708df015ca782b20abe99b3664e1ee9786b7712b0e2310586", + "sha256:74b7949da2ffcd79869ac1e90946c14ce61a714269403a879ea9ed85a993c81f", + "sha256:76b8111f8f5b8553c066caa26193921dea4185efecf1f9b38473054205137800", + "sha256:778aa2f59615973f2637d9025a708b69196c4814f38d905647fa1a56d7ff6b79", + "sha256:7c5ffaf6e2d35e80bea210e6969910e2ae10c1166831651c22a315425db4f831", + "sha256:7e291fa9129d9998ed5035390d4bb9cf429c489f40e5ddaa06a1e83ed52048a7", + "sha256:8062689c0e6faf0c2532f566a492fb48ba60923c2cd6effda7cac9639dbdc1f3", + "sha256:852bbcc75eab1771d4f294fb6abcc23cd38813e34fa3c71e6d579799493c4db2", + "sha256:885638ab4f8765c5deaab41d1e4452b6d212d231091b84172e3e13df2cb280fb", + "sha256:8a094508b7cd6e583378f3cf50f125814961660225bad88f4ecaa691e30b09e1", + "sha256:8a76b27fe0d600f8a34313e1a528309aa807a16aa3a72000619bc56339020125", + "sha256:8d40f1fb34d600b3eaf812941d6bcf313075728868cad1dafb7021e6a4e77983", + "sha256:9040b15216e07ed68762e44ff231a460036e4bf3543f83988f669e7078847b2c", + "sha256:914fdca0ee2a29ede32c61c28abdaf9c57b0d8c5de9dc1e28ce7e4f0400df877", + "sha256:952ec99e71d584a0e451795dbd468909c8794727ecddd9ebb4fe9803e2803f1e", + "sha256:97fbe7a0df35afe37e7e2f053dee6300a3eed00055cfd907fa51161e22c40236", + "sha256:9ad894d5dc5960ebd546a87a78160a8c645b99899e7e45a538436919bc9be5a6", + "sha256:9b58e2cdbcfe2278a031a12a7d73836d66bc1e9e65f97c63ea0a022f2f9f351b", + "sha256:9c95f72d212e1f178f9619b77fd7ee3533e82ded6a5ad119dd88134e185ee3b0", + "sha256:a3848854af260eb4cc33602c685524fff7c8816f033325f750c7fc75c6deccf9", + "sha256:a4482d1d4108052827b354850bd6e3d1ed56262cbe4b0e8051876c298fb99280", + "sha256:a50822bbbefb90b132a780c17356062a2452cd5525bfa4b5b596fd6474cceaa6", + "sha256:a8ce59cad2ee5a4d58ee647c4ed4d9adc4282ffdc31e98cba7f831536776a0f9", + "sha256:af17d3ce1e2cc5d22ae8fe8921d7801c980ea3f5d6da4ecbd0f85c4f9e030181", + "sha256:b208a5dd6f9da3d4b17aa2e4f8ca9c5dc6b9a2ed571fdef9ed465102487b445c", + "sha256:b4ce4240a3f095e77cfcc5aed6001bd63af13ea53c35ef496af1a5a972e7eaa9", + "sha256:b55f1fcbf83637f42eaf19c553ed69864ff25ac38c653ab024fccfaec8bd2e68", + "sha256:b62f40eb24ccf05246d203461c8920889fd38dce76978df16fe28e6f0128447d", + "sha256:b70a0b75b0a5a58d04aad06b3f167d49e729381d3417413656220c0cd7617847", + "sha256:b93e1ccddbdf59cec4f7683dc84bc56eb61628eb01b22bdefc15f04cd09f8fae", + "sha256:bb7c060c3faa78fe066b6b1c65de285d8d61fb6e01ee8195625b9636c3cd9775", + "sha256:c7af243871699358ebf34a770205bf2b61ccb17a0b003e8726d2028cc36ce364", + "sha256:c990d58100f9ebb8e7a20bd2e7bd3c60838be38c5bbccdd35041bc9f36dc0cea", + "sha256:cb9336f2dc99de00c9e58487cae5541ee4d79e859377b6312d98973d4661c584", + "sha256:cccce5c70a209eb385c82d063f332ed97fc02d1cf7bffb95b2e6995b5a9b8388", + "sha256:cf93c441b11c1f3ae2ccf1e8d876939b301b3234ec19f311ab0e7543a9d4427e", + "sha256:d23ea5a8e4ae99640d027d2fd05c9d03f8d24d561fc26c0462e96affa31bf408", + "sha256:d2aab40474b6adae53d14d1f6a7785f4346a93c072adf1e69ca11a1b6afc789e", + "sha256:d8f6cf451ec4aab0cdbad128d9be1219e95ceaa9940566d71570b2d820ee50b3", + "sha256:d98bf0078736df226e36875aa58a78f9d3b0888bcf585144fb30edbbf7145238", + "sha256:db48e2623a8aca63dfcfa7e574a5f3a9f760be1c464ee23f6387f70cc9112aa2", + "sha256:db93eebcf951f9ee41d75dc0423378fa918fc6706db59bc20c02f6563b6b210d", + "sha256:e8ae3f4b50a3befa56da0f09d2b71a192454ce48e8887823dbc9228cdbb610f3", + "sha256:eb9d0c3f416e2c7c37498d1716fe323379da8b4e860da3d3818a6ec8fff7b7e5", + "sha256:ec257eedd8c3988cf76e351e949e3a56a61d90f4bb4e060de2ebfa6603df2a42", + "sha256:f0318a47d23c9407f4f94c06824662499e889ab8c192c1162e4f542a118fd700", + "sha256:f58e1aa46c204171a2faa49b1ef2953edebb3913d270bb3bae7e970f254c9293", + "sha256:f86e46490908a0ae2b2d633020c12e5283c85332d7ae0846f8a351a8a2da0b82", + "sha256:f990f1b5c8ee4ff980bdef3f73f50728fd911b9ab8de8c43144e8019dcd845ff", + "sha256:fb240700f3b597c1d40d0932bfed2f4130fec2f02b8c2cb0bcdae45d321cb691" ], - "markers": "python_version >= '3.8'", - "version": "==1.17.3" + "markers": "python_version >= '3.9'", + "version": "==2.2.0" }, "zipp": { "hashes": [ @@ -521,11 +661,11 @@ "develop": { "anyio": { "hashes": [ - "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", - "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c" + "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", + "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc" ], - "markers": "python_version >= '3.9'", - "version": "==4.12.1" + "markers": "python_version >= '3.10'", + "version": "==4.13.0" }, "async-lru": { "hashes": [ @@ -545,11 +685,11 @@ }, "attrs": { "hashes": [ - "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11", - "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373" + "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309", + "sha256:d03ceb89cb322a8fd706d4fb91940737b6642aa36998fe130a9bc96c985eff32" ], "markers": "python_version >= '3.9'", - "version": "==25.4.0" + "version": "==26.1.0" }, "babel": { "hashes": [ @@ -561,16 +701,15 @@ }, "backrefs": { "hashes": [ - "sha256:08aa7fae530c6b2361d7bdcbda1a7c454e330cc9dbcd03f5c23205e430e5c3be", - "sha256:0fdc7b012420b6b144410342caeb8adc54c6866cf12064abc9bb211302e496f8", - "sha256:12df81596ab511f783b7d87c043ce26bc5b0288cf3bb03610fe76b8189282b2b", - "sha256:664e33cd88c6840b7625b826ecf2555f32d491800900f5a541f772c485f7cda7", - "sha256:c3f4b9cb2af8cda0d87ab4f57800b57b95428488477be164dd2b47be54db0c90", - "sha256:e5f805ae09819caa1aa0623b4a83790e7028604aa2b8c73ba602c4454e665de7", - "sha256:f44ff4d48808b243b6c0cdc6231e22195c32f77046018141556c66f8bab72a49" + "sha256:4989bb9e1e99eb23647c7160ed51fb21d0b41b5d200f2d3017da41e023097e82", + "sha256:a0fa7360c63509e9e077e174ef4e6d3c21c8db94189b9d957289ae6d794b9475", + "sha256:a6448b28180e3ca01134c9cf09dcebafad8531072e09903c5451748a05f24bc9", + "sha256:b57cd227ea556b0aed3dc9b8da4628db4eabc0402c6d7fcfc69283a93955f7e9", + "sha256:ca42ce6a49ace3d75684dfa9937f3373902a63284ecb385ce36d15e5dcb41c12", + "sha256:f2c52955d631b9e1ac4cd56209f0a3a946d592b98e7790e77699339ae01c102a" ], - "markers": "python_version >= '3.9'", - "version": "==6.2" + "markers": "python_version >= '3.10'", + "version": "==7.0" }, "bcrypt": { "hashes": [ @@ -643,60 +782,60 @@ }, "black": { "hashes": [ - "sha256:101540cb2a77c680f4f80e628ae98bd2bd8812fb9d72ade4f8995c5ff019e82c", - "sha256:1054e8e47ebd686e078c0bb0eaf31e6ce69c966058d122f2c0c950311f9f3ede", - "sha256:1de0f7d01cc894066a1153b738145b194414cc6eeaad8ef4397ac9abacf40f6b", - "sha256:2b807c240b64609cb0e80d2200a35b23c7df82259f80bef1b2c96eb422b4aac9", - "sha256:3cee1487a9e4c640dc7467aaa543d6c0097c391dc8ac74eb313f2fbf9d7a7cb5", - "sha256:53c62883b3f999f14e5d30b5a79bd437236658ad45b2f853906c7cbe79de00af", - "sha256:5e8e75dabb6eb83d064b0db46392b25cabb6e784ea624219736e8985a6b3675d", - "sha256:643d27fb5facc167c0b1b59d0315f2674a6e950341aed0fc05cf307d22bf4954", - "sha256:66912475200b67ef5a0ab665011964bf924745103f51977a78b4fb92a9fc1bf0", - "sha256:6eeca41e70b5f5c84f2f913af857cf2ce17410847e1d54642e658e078da6544f", - "sha256:6f3977a16e347f1b115662be07daa93137259c711e526402aa444d7a88fdc9d4", - "sha256:7ed300200918147c963c87700ccf9966dceaefbbb7277450a8d646fc5646bf24", - "sha256:91a68ae46bf07868963671e4d05611b179c2313301bd756a89ad4e3b3db2325b", - "sha256:9459ad0d6cd483eacad4c6566b0f8e42af5e8b583cee917d90ffaa3778420a0a", - "sha256:9dc8c71656a79ca49b8d3e2ce8103210c9481c57798b48deeb3a8bb02db5f115", - "sha256:a19915ec61f3a8746e8b10adbac4a577c6ba9851fa4a9e9fbfbcf319887a5791", - "sha256:b22b3810451abe359a964cc88121d57f7bce482b53a066de0f1584988ca36e79", - "sha256:ba1d768fbfb6930fc93b0ecc32a43d8861ded16f47a40f14afa9bb04ab93d304", - "sha256:be5e2fe860b9bd9edbf676d5b60a9282994c03fbbd40fe8f5e75d194f96064ca", - "sha256:c5b7713daea9bf943f79f8c3b46f361cc5229e0e604dcef6a8bb6d1c37d9df89", - "sha256:ca699710dece84e3ebf6e92ee15f5b8f72870ef984bf944a57a777a48357c168", - "sha256:d294ac3340eef9c9eb5d29288e96dc719ff269a88e27b396340459dd85da4c58", - "sha256:d62d14ca31c92adf561ebb2e5f2741bf8dea28aef6deb400d49cca011d186c68", - "sha256:dd39eef053e58e60204f2cdf059e2442e2eb08f15989eefe259870f89614c8b6", - "sha256:eb07665d9a907a1a645ee41a0df8a25ffac8ad9c26cdb557b7b88eeeeec934e0", - "sha256:f016baaadc423dc960cdddf9acae679e71ee02c4c341f78f3179d7e4819c095f", - "sha256:fb1dafbbaa3b1ee8b4550a84425aac8874e5f390200f5502cf3aee4a2acb2f14" + "sha256:0e48b87e03bf109288e55cfceadcfa15ff5470aca2851a851950ed2926f450d7", + "sha256:1037d5ac7b7b310b2632ad867ec8d0e4c4819dcdb0b820f63135da746a24e418", + "sha256:1ef92b76f7733f282fd096ea406200b5a286c42947412b0eaff3a74e3616cefe", + "sha256:1f7ea64ebfa01b50f693508fc39f875e264446d3b097088f84f203b9d09618a0", + "sha256:22f2cd76d069cc54c71f10360744ba8983fbb616903b4304a85b734915c8e1b4", + "sha256:2b36cf2ddf5566e205f6535f782a62194a184d33e175b64ae8c40b1737522be3", + "sha256:30d3c14661f2792e9142cce3eeeb1cbc175b3eb5f733be0c8eeb99651e52b0c3", + "sha256:32d5ea7f6c8bdfa6e648326ebca1f02b0764e2a029edc6f8dce2627e19d468c3", + "sha256:3915f256e75a2d7cf88d8953d37f780455dc586cc72dee059c528fe77f581217", + "sha256:4ad6fa01f941920f54f2bbb35f3df7673428a0ef98a0b0840c2eaef3b110efa8", + "sha256:4ed7f7da04046d2e488437170797d3b4a4ad83906683bcb7dfc68b673bbce5e2", + "sha256:5119fa92ae61f786e8c3662fd60aece1d0a2dd5cca5d0c79417a95e7a4272a59", + "sha256:577f21094ea469ef92ec1adaf2c9441a226d2144d01a5be2fa823cecf6543e50", + "sha256:58b4bd92cf88aacf83d88479c8f9caee044b1ec55f2451a337354a7ea2590a22", + "sha256:5c34b25da232ead53a6f335b76dbea124f4d152ad568b9080d6f944bc2b34b52", + "sha256:87ed5c6f450580a2f6790bc7cbfb016dfc73bc750249762268a3695361315eef", + "sha256:89c93167a74d3a75dfaa38a5c7cca015537d5820dd7f17d63267d674a61cae90", + "sha256:96ae2c733b2aabdd9986e2c5df628ff3473676cd1c5faded1ff496cf6d74083c", + "sha256:9942db8888e06943c5dde66ca0037dcff82a2a4ec1ad0ada9e0d2ee9d9823893", + "sha256:9d98d4137277c75dfb898ec8d846c4fd68ba1e9cf77f95e2865c203dc18f4c3d", + "sha256:a1dca32d9f1784af512a13410ec204c6f7f0aa9797a111c42e1c03449821c264", + "sha256:dd321f668053961824bcc1be1cc1df748b2d7e4fa28086b08331e577b0100a73", + "sha256:e1a26503279b6b310669fb0b219c39e4820b77e8189fe80f522bb511f247db0a", + "sha256:e88976690a64b0af98312ca958415849cb42423423c5f2ee74af4b49a97a2168", + "sha256:ea8d16dc41655aa113cd64665e7219446cd7e4ff2248d7178eaa905190c86b18", + "sha256:ecb3e624844c798144e9bd986954e0adc81d8911a1f30f375e1252fe26e8c294", + "sha256:ed1a20af114c301a0269bf01163d51dbef72737fd65f850001e7cbe7f3c7abae" ], "markers": "python_version >= '3.10'", - "version": "==26.1.0" + "version": "==26.5.1" }, "boto3": { "hashes": [ - "sha256:7b3e0c4bfd8815a3df64fbe98fc9f87dfb12bd7a783cf63dfc2f166c66798c9d", - "sha256:ff4a4afb832f63a1358e11fe6eb321da0f4767979c6721dd32fb02e6eabcebf5" + "sha256:bd909b509c459e784dcfcafb3e130cf2891ab26d2d323003bcddaf15a948c9e8", + "sha256:c156ba7b35687379c28f6b7216f06b477b033eab318ac70697128e99d4bfd7b7" ], - "markers": "python_version >= '3.9'", - "version": "==1.42.52" + "markers": "python_version >= '3.10'", + "version": "==1.43.13" }, "botocore": { "hashes": [ - "sha256:3bdef10aee4cee13ff019b6a1423a2ce3ca17352328d9918157a1829e5cc9be1", - "sha256:c3a0b7138a4c5a534da0eb2444c19763b4d03ba2190c0602c49315e54efd7252" + "sha256:10df003c71847b4f1501b98b1c03e1cb6399583b6cc5136ca7ff849e00c4797f", + "sha256:c0fe4ba2d4ee35751f539ae8164da73218e1b8cf3114affd3a5312ba66b9df2e" ], - "markers": "python_version >= '3.9'", - "version": "==1.42.52" + "markers": "python_version >= '3.10'", + "version": "==1.43.13" }, "certifi": { "hashes": [ - "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", - "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120" + "sha256:3c52e209ba0a4ad7aebe60436a4ab349c39e1e602e8c134221e546902ad25897", + "sha256:69dea482ab64caa7b9f6aba1c6bf48bb6a5448d1c0f1b17ab42ad8c763a5344d" ], "markers": "python_version >= '3.7'", - "version": "==2026.1.4" + "version": "==2026.5.20" }, "cffi": { "hashes": [ @@ -798,130 +937,146 @@ }, "charset-normalizer": { "hashes": [ - "sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad", - "sha256:077fbb858e903c73f6c9db43374fd213b0b6a778106bc7032446a8e8b5b38b93", - "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", - "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89", - "sha256:0f04b14ffe5fdc8c4933862d8306109a2c51e0704acfa35d51598eb45a1e89fc", - "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", - "sha256:194f08cbb32dc406d6e1aea671a68be0823673db2832b38405deba2fb0d88f63", - "sha256:1bee1e43c28aa63cb16e5c14e582580546b08e535299b8b6158a7c9c768a1f3d", - "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", - "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", - "sha256:244bfb999c71b35de57821b8ea746b24e863398194a4014e4c76adc2bbdfeff0", - "sha256:2677acec1a2f8ef614c6888b5b4ae4060cc184174a938ed4e8ef690e15d3e505", - "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161", - "sha256:2aaba3b0819274cc41757a1da876f810a3e4d7b6eb25699253a4effef9e8e4af", - "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", - "sha256:2c9d3c380143a1fedbff95a312aa798578371eb29da42106a29019368a475318", - "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", - "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4", - "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", - "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", - "sha256:44c2a8734b333e0578090c4cd6b16f275e07aa6614ca8715e6c038e865e70576", - "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", - "sha256:4902828217069c3c5c71094537a8e623f5d097858ac6ca8252f7b4d10b7560f1", - "sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8", - "sha256:4fe7859a4e3e8457458e2ff592f15ccb02f3da787fcd31e0183879c3ad4692a1", - "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", - "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", - "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", - "sha256:5947809c8a2417be3267efc979c47d76a079758166f7d43ef5ae8e9f92751f88", - "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016", - "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", - "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", - "sha256:5cb4d72eea50c8868f5288b7f7f33ed276118325c1dfd3957089f6b519e1382a", - "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", - "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0", - "sha256:64b55f9dce520635f018f907ff1b0df1fdc31f2795a922fb49dd14fbcdf48c84", - "sha256:6515f3182dbe4ea06ced2d9e8666d97b46ef4c75e326b79bb624110f122551db", - "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1", - "sha256:6aee717dcfead04c6eb1ce3bd29ac1e22663cdea57f943c87d1eab9a025438d7", - "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", - "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8", - "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", - "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", - "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", - "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", - "sha256:778d2e08eda00f4256d7f672ca9fef386071c9202f5e4607920b86d7803387f2", - "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", - "sha256:798d75d81754988d2565bff1b97ba5a44411867c0cf32b77a7e8f8d84796b10d", - "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", - "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", - "sha256:7c308f7e26e4363d79df40ca5b2be1c6ba9f02bdbccfed5abddb7859a6ce72cf", - "sha256:7fa17817dc5625de8a027cb8b26d9fefa3ea28c8253929b8d6649e705d2835b6", - "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", - "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", - "sha256:837c2ce8c5a65a2035be9b3569c684358dfbf109fd3b6969630a87535495ceaa", - "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381", - "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", - "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", - "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc", - "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", - "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", - "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", - "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", - "sha256:9cd98cdc06614a2f768d2b7286d66805f94c48cde050acdbbb7db2600ab3197e", - "sha256:9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313", - "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569", - "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3", - "sha256:a61900df84c667873b292c3de315a786dd8dac506704dea57bc957bd31e22c7d", - "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", - "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", - "sha256:a8bf8d0f749c5757af2142fe7903a9df1d2e8aa3841559b2bad34b08d0e2bcf3", - "sha256:a9768c477b9d7bd54bc0c86dbaebdec6f03306675526c9927c0e8a04e8f94af9", - "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", - "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", - "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", - "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", - "sha256:b5d84d37db046c5ca74ee7bb47dd6cbc13f80665fdde3e8040bdd3fb015ecb50", - "sha256:b7cf1017d601aa35e6bb650b6ad28652c9cd78ee6caff19f3c28d03e1c80acbf", - "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", - "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", - "sha256:c4ef880e27901b6cc782f1b95f82da9313c0eb95c3af699103088fa0ac3ce9ac", - "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", - "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815", - "sha256:cb01158d8b88ee68f15949894ccc6712278243d95f344770fa7593fa2d94410c", - "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", - "sha256:cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6", - "sha256:cd09d08005f958f370f539f186d10aec3377d55b9eeb0d796025d4886119d76e", - "sha256:cd4b7ca9984e5e7985c12bc60a6f173f3c958eae74f3ef6624bb6b26e2abbae4", - "sha256:ce8a0633f41a967713a59c4139d29110c07e826d131a316b50ce11b1d79b4f84", - "sha256:cead0978fc57397645f12578bfd2d5ea9138ea0fac82b2f63f7f7c6877986a69", - "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", - "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", - "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0", - "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897", - "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", - "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", - "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", - "sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d", - "sha256:e912091979546adf63357d7e2ccff9b44f026c075aeaf25a52d0e95ad2281074", - "sha256:eaabd426fe94daf8fd157c32e571c85cb12e66692f15516a83a03264b08d06c3", - "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224", - "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", - "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a", - "sha256:f155a433c2ec037d4e8df17d18922c3a0d9b3232a396690f17175d2946f0218d", - "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", - "sha256:f34be2938726fc13801220747472850852fe6b1ea75869a048d6f896838c896f", - "sha256:f820802628d2694cb7e56db99213f930856014862f3fd943d290ea8438d07ca8", - "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", - "sha256:f8e160feb2aed042cd657a72acc0b481212ed28b1b9a95c0cee1621b524e1966", - "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", - "sha256:fa09f53c465e532f4d3db095e0c55b615f010ad81803d383195b6b5ca6cbf5f3", - "sha256:faa3a41b2b66b6e50f84ae4a68c64fcd0c44355741c6374813a800cd6695db9e", - "sha256:fd44c878ea55ba351104cb93cc85e74916eb8fa440ca7903e57575e97394f608" + "sha256:007d05ec7321d12a40227aae9e2bc6dca73f3cb21058999a1df9e193555a9dcc", + "sha256:03853ed82eeebbce3c2abfdbc98c96dc205f32a79627688ac9a27370ea61a49c", + "sha256:07d9e39b01743c3717745f4c530a6349eadbfa043c7577eef86c502c15df2c67", + "sha256:08e721811161356f97b4059a9ba7bafb23ea5ee2255402c42881c214e173c6b4", + "sha256:0c96c3b819b5c3e9e165495db84d41914d6894d55181d2d108cc1a69bfc9cce0", + "sha256:0ea948db76d31190bf08bd371623927ee1339d5f2a0b4b1b4a4439a65298703c", + "sha256:0f7eb884681e3938906ed0434f20c63046eacd0111c4ba96f27b76084cd679f5", + "sha256:12a6fff75f6bc66711b73a2f0addfc4c8c15a20e805146a02d147a318962c444", + "sha256:12d8baf840cc7889b37c7c770f478adea7adce3dcb3944d02ec87508e2dcf153", + "sha256:14265bfe1f09498b9d8ec91e9ec9fa52775edf90fcbde092b25f4a33d444fea9", + "sha256:16d971e29578a5e97d7117866d15889a4a07befe0e87e703ed63cd90cb348c01", + "sha256:177a0ba5f0211d488e295aaf82707237e331c24788d8d76c96c5a41594723217", + "sha256:1a87ca9d5df6fe460483d9a5bbf2b18f620cbed41b432e2bddb686228282d10b", + "sha256:1c2a768fdd44ee4a9339a9b0b130049139b8ce3c01d2ce09f67f5a68048d477c", + "sha256:1c2aed2e5e41f24ea8ef1590b8e848a79b56f3a5564a65ceec43c9d692dc7d8a", + "sha256:1dc8b0ea451d6e69735094606991f32867807881400f808a106ee1d963c46a83", + "sha256:1efde3cae86c8c273f1eb3b287be7d8499420cf2fe7585c41d370d3e790054a5", + "sha256:202389074300232baeb53ae2569a60901f7efadd4245cf3a3bf0617d60b439d7", + "sha256:203104ed3e428044fd943bc4bf45fa73c0730391f9621e37fe39ecf477b128cb", + "sha256:2257141f39fe65a3fdf38aeccae4b953e5f3b3324f4ff0daf9f15b8518666a2c", + "sha256:298930cec56029e05497a76988377cbd7457ba864beeea92ad7e844fe74cd1f1", + "sha256:2cd4a60d0e2fb04537162c62bbbb4182f53541fe0ede35cdf270a1c1e723cc42", + "sha256:2d6eb928e13016cea4f1f21d1e10c1cebd5a421bc57ddf5b1142ae3f86824fab", + "sha256:2fe249cb4651fd12605b7288b24751d8bfd46d35f12a20b1ba33dea122e690df", + "sha256:30b8d1d8c52a48c2c5690e152c169b673487a2a58de1ec7393196753063fcd5e", + "sha256:320ade88cfb846b8cd6b4ddf5ee9e80ee0c1f52401f2456b84ae1ae6a1a5f207", + "sha256:3534e7dcbdcf757da6b85a0bbf5b6868786d5982dd959b065e65481644817a18", + "sha256:36836d6ff945a00b88ba1e4572d721e60b5b8c98c155d465f56ad19d68f23734", + "sha256:38c0109396c4cfc574d502df99742a45c72c08eff0a36158b6f04000043dbf38", + "sha256:3946fa46a0cf3e4c8cb1cc52f56bb536310d34f25f01ca9b6c16afa767dab110", + "sha256:3bec022aec2c514d9cf199522a802bd007cd588ab17ab2525f20f9c34d067c18", + "sha256:3c9a494bc5ec77d43cea229c4f6db1e4d8fe7e1bbffa8b6f0f0032430ff8ab44", + "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d", + "sha256:3dedcc22d73ec993f42055eff4fcfed9318d1eeb9a6606c55892a26964964e48", + "sha256:4042d5c8f957e15221d423ba781e85d553722fc4113f523f2feb7b188cc34c5e", + "sha256:481551899c856c704d58119b5025793fa6730adda3571971af568f66d2424bb5", + "sha256:4dc1e73c36828f982bfe79fadf5919923f8a6f4df2860804db9a98c48824ce8d", + "sha256:4e5163c14bffd570ef2affbfdd77bba66383890797df43dc8b4cc7d6f500bf53", + "sha256:511ef87c8aec0783e08ac18565a16d435372bc1ac25a91e6ac7f5ef2b0bff790", + "sha256:532bc9bf33a68613fd7d65e4b1c71a6a38d7d42604ecf239c77392e9b4e8998c", + "sha256:54523e136b8948060c0fa0bc7b1b50c32c186f2fceee897a495406bb6e311d2b", + "sha256:5649fd1c7bade02f320a462fdefd0b4bd3ce036065836d4f42e0de958038e116", + "sha256:56be790f86bfb2c98fb742ce566dfb4816e5a83384616ab59c49e0604d49c51d", + "sha256:5b77459df20e08151cd6f8b9ef8ef1f961ef73d85c21a555c7eed5b79410ec10", + "sha256:5ed6ab538499c8644b8a3e18debabcd7ce684f3fa91cf867521a7a0279cab2d6", + "sha256:6178f72c5508bfc5fd446a5905e698c6212932f25bcdd4b47a757a50605a90e2", + "sha256:6370e8686f662e6a3941ee48ed4742317cafbe5707e36406e9df792cdb535776", + "sha256:64f02c6841d7d83f832cd97ccf8eb8a906d06eb95d5276069175c696b024b60a", + "sha256:65bcd23054beab4d166035cabbc868a09c1a49d1efe458fe8e4361215df40265", + "sha256:66671f93accb62ed07da56613636f3641f1a12c13046ce91ffc923721f23c008", + "sha256:6696b7688f54f5af4462118f0bfa7c1621eeb87154f77fa04b9295ce7a8f2943", + "sha256:6785f414ae0f3c733c437e0f3929197934f526d19dfaa75e18fdb4f94c6fb374", + "sha256:67f6279d125ca0046a7fd386d01b311c6363844deac3e5b069b514ba3e63c246", + "sha256:6c114670c45346afedc0d947faf3c7f701051d2518b943679c8ff88befe14f8e", + "sha256:6e0d51f618228538a3e8f46bd246f87a6cd030565e015803691603f55e12afb5", + "sha256:6ed74185b2db44f41ef35fd1617c5888e59792da9bbc9190d6c7300617182616", + "sha256:708838739abf24b2ceb208d0e22403dd018faeef86ddac04319a62ae884c4f15", + "sha256:715479b9a2802ecac752a3b0efa2b0b60285cf962ee38414211abdfccc233b41", + "sha256:733784b6d6def852c814bce5f318d25da2ee65dd4839a0718641c696e09a2960", + "sha256:750e02e074872a3fad7f233b47734166440af3cdea0add3e95163110816d6752", + "sha256:752a45dc4a6934060b3b0dab47e04edc3326575f82be64bc4fc293914566503e", + "sha256:7579e913a5339fb8fa133f6bbcfd8e6749696206cf05acdbdca71a1b436d8e72", + "sha256:7641bb8895e77f921102f72833904dcd9901df5d6d72a2ab8f31d04b7e51e4e7", + "sha256:7804338df6fcc08105c7745f1502ba68d900f45fd770d5bdd5288ddccb8a42d8", + "sha256:80d04837f55fc81da168b98de4f4b797ef007fc8a79ab71c6ec9bc4dd662b15b", + "sha256:813c0e0132266c08eb87469a642cb30aaff57c5f426255419572aaeceeaa7bf4", + "sha256:82b271f5137d07749f7bf32f70b17ab6eaabedd297e75dce75081a24f76eb545", + "sha256:84c018e49c3bf790f9c2771c45e9313a08c2c2a6342b162cd650258b57817706", + "sha256:8751d2787c9131302398b11e6c8068053dcb55d5a8964e114b6e196cf16cb366", + "sha256:8778f0c7a52e56f75d12dae53ae320fae900a8b9b4164b981b9c5ce059cd1fcb", + "sha256:87fad7d9ba98c86bcb41b2dc8dbb326619be2562af1f8ff50776a39e55721c5a", + "sha256:8d828b6667a32a728a1ad1d93957cdf37489c57b97ae6c4de2860fa749b8fc1e", + "sha256:8e385e4267ab76874ae30db04c627faaaf0b509e1ccc11a95b3fc3e83f855c00", + "sha256:92a0a01ead5e668468e952e4238cccd7c537364eb7d851ab144ab6627dbbe12f", + "sha256:94e1885b270625a9a828c9793b4d52a64445299baa1fea5a173bf1d3dd9a1a5a", + "sha256:a180c5e59792af262bf263b21a3c49353f25945d8d9f70628e73de370d55e1e1", + "sha256:a277ab8928b9f299723bc1a2dabb1265911b1a76341f90a510368ca44ad9ab66", + "sha256:a5fe03b42827c13cdccd08e6c0247b6a6d4b5e3cdc53fd1749f5896adcdc2356", + "sha256:a6c5863edfbe888d9eff9c8b8087354e27618d9da76425c119293f11712a6319", + "sha256:a89c23ef8d2c6b27fd200a42aa4ac72786e7c60d40efdc76e6011260b6e949c4", + "sha256:adb2597b428735679446b46c8badf467b4ca5f5056aae4d51a19f9570301b1ad", + "sha256:ae196f021b5e7c78e918242d217db021ed2a6ace2bc6ae94c0fc596221c7f58d", + "sha256:ae89db9e5f98a11a4bf50407d4363e7b09b31e55bc117b4f7d80aab97ba009e5", + "sha256:aed52fea0513bac0ccde438c188c8a471c4e0f457c2dd20cdbf6ea7a450046c7", + "sha256:aef65cd602a6d0e0ff6f9930fcb1c8fec60dd2cfcb6facaf4bdb0e5873042db0", + "sha256:af21eb4409a119e365397b2adbaca4c9ccab56543a65d5dbd9f920d6ac29f686", + "sha256:b14b2d9dac08e28bb8046a1a0434b1750eb221c8f5b87a68f4fa11a6f97b5e34", + "sha256:bb6d88045545b26da47aa879dd4a89a71d1dce0f0e549b1abcb31dfe4a8eac49", + "sha256:bb8cc7534f51d9a017b93e3e85b260924f909601c3df002bcdb58ddb4dc41a5c", + "sha256:bc17a677b21b3502a21f66a8cc64f5bfad4df8a0b8434d661666f8ce90ac3af1", + "sha256:bd6c2a1c7573c64738d716488d2cdd3c00e340e4835707d8fdb8dc1a66ef164e", + "sha256:bd9b23791fe793e4968dba0c447e12f78e425c59fc0e3b97f6450f4781f3ee60", + "sha256:c03a41a8784091e67a39648f70c5f97b5b6a37f216896d44d2cdcb82615339a0", + "sha256:c0f081d69a6e58272819b70288d3221a6ee64b98df852631c80f293514d3b274", + "sha256:c35abb8bfff0185efac5878da64c45dafd2b37fb0383add1be155a763c1f083d", + "sha256:c36c333c39be2dbca264d7803333c896ab8fa7d4d6f0ab7edb7dfd7aea6e98c0", + "sha256:c45e9440fb78f8ddabcf714b68f936737a121355bf59f3907f4e17721b9d1aae", + "sha256:c593052c465475e64bbfe5dbd81680f64a67fdc752c56d7a0ae205dc8aeefe0f", + "sha256:cdd68a1fb318e290a2077696b7eb7a21a49163c455979c639bf5a5dcdc46617d", + "sha256:ce3412fbe1e31eb81ea42f4169ed94861c56e643189e1e75f0041f3fe7020abe", + "sha256:cf1493cd8607bec4d8a7b9b004e699fcf8f9103a9284cc94962cb73d20f9d4a3", + "sha256:cf29836da5119f3c8a8a70667b0ef5fdca3bb12f80fd06487cfa575b3909b393", + "sha256:d4a48e5b3c2a489fae013b7589308a40146ee081f6f509e047e0e096084ceca1", + "sha256:d560742f3c0d62afaccf9f41fe485ed69bd7661a241f86a3ef0f0fb8b1a397af", + "sha256:d6038d37043bced98a66e68d3aa2b6a35505dc01328cd65217cefe82f25def44", + "sha256:d61f00a0869d77422d9b2aba989e2d24afa6ffd552af442e0e58de4f35ea6d00", + "sha256:d635aab80466bc95771bb78d5370e74d36d1fe31467b6b29b8b57b2a3cd7d22c", + "sha256:dca4bbc466a95ba9c0234ef56d7dd9509f63da22274589ebd4ed7f1f4d4c54e3", + "sha256:dd915403e231e6b1809fe9b6d9fc55cf8fb5e02765ac625d9cd623342a7905d7", + "sha256:e044c39e41b92c845bc815e5ae4230804e8e7bc29e399b0437d64222d92809dd", + "sha256:e060d01aec0a910bdccb8be71faf34e7799ce36950f8294c8bf612cba65a2c9e", + "sha256:e1421b502d83040e6d7fb2fb18dff63957f720da3d77b2fbd3187ceb63755d7b", + "sha256:e17b8d5d6a8c47c85e68ca8379def1303fd360c3e22093a807cd34a71cd082b8", + "sha256:e5f4d355f0a2b1a31bc3edec6795b46324349c9cb25eed068049e4f472fb4259", + "sha256:e712b419df8ba5e42b226c510472b37bd57b38e897d3eca5e8cfd410a29fa859", + "sha256:e74327fb75de8986940def6e8dee4f127cc9752bee7355bb323cc5b2659b6d46", + "sha256:e80c8378d8f3d83cd3164da1ad2df9e37a666cdde7b1cb2298ed0b558064be30", + "sha256:e8ac484bf18ce6975760921bb6148041faa8fef0547200386ea0b52b5d27bf7b", + "sha256:eca9705049ad3c7345d574e3510665cb2cf844c2f2dcfe675332677f081cbd46", + "sha256:ed065083d0898c9d5b4bbec7b026fd755ff7454e6e8b73a67f8c744b13986e24", + "sha256:edac0f1ab77644605be2cbba52e6b7f630731fc42b34cb0f634be1a6eface56a", + "sha256:effc3f449787117233702311a1b7d8f59cba9ced946ba727bdc329ec69028e24", + "sha256:f22dec1690b584cea26fade98b2435c132c1b5f68e39f5a0b7627cd7ae31f1dc", + "sha256:f495a1652cf3fbab2eb0639776dad966c2fb874d79d87ca07f9d5f059b8bd215", + "sha256:f496c9c3cc02230093d8330875c4c3cdfc3b73612a5fd921c65d39cbcef08063", + "sha256:f59099f9b66f0d7145115e6f80dd8b1d847176df89b234a5a6b3f00437aa0832", + "sha256:f59ad4c0e8f6bba240a9bb85504faa1ab438237199d4cce5f622761507b8f6a6", + "sha256:fbccdc05410c9ee21bbf16a35f4c1d16123dcdeb8a1d38f33654fa21d0234f79", + "sha256:fea24543955a6a729c45a73fe90e08c743f0b3334bbf3201e6c4bc1b0c7fa464" ], "markers": "python_version >= '3.7'", - "version": "==3.4.4" + "version": "==3.4.7" }, "click": { "hashes": [ - "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", - "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6" + "sha256:40c50b7c6c6adac2823d411041ec84f3f103f1b280d5e9ce0d7f998995832f81", + "sha256:638f1338fe1235c8f4e008e4a8a254fb5c5fbdcbb40ece3c9142ebb78e792973" ], "markers": "python_version >= '3.10'", - "version": "==8.3.1" + "version": "==8.4.0" }, "colorama": { "hashes": [ @@ -936,170 +1091,170 @@ "toml" ], "hashes": [ - "sha256:01d4cbc3c283a17fc1e42d614a119f7f438eabb593391283adca8dc86eff1246", - "sha256:02231499b08dabbe2b96612993e5fc34217cdae907a51b906ac7fca8027a4459", - "sha256:0dd7ab8278f0d58a0128ba2fca25824321f05d059c1441800e934ff2efa52129", - "sha256:0e086334e8537ddd17e5f16a344777c1ab8194986ec533711cbe6c41cde841b6", - "sha256:0fc31c787a84f8cd6027eba44010517020e0d18487064cd3d8968941856d1415", - "sha256:14375934243ee05f56c45393fe2ce81fe5cc503c07cee2bdf1725fb8bef3ffaf", - "sha256:1731dc33dc276dafc410a885cbf5992f1ff171393e48a21453b78727d090de80", - "sha256:19bc3c88078789f8ef36acb014d7241961dbf883fd2533d18cb1e7a5b4e28b11", - "sha256:1af1641e57cf7ba1bd67d677c9abdbcd6cc2ab7da3bca7fa1e2b7e50e65f2ad0", - "sha256:1d4be36a5114c499f9f1f9195e95ebf979460dbe2d88e6816ea202010ba1c34b", - "sha256:200dea7d1e8095cc6e98cdabe3fd1d21ab17d3cee6dab00cadbb2fe35d9c15b9", - "sha256:23e3f687cf945070d1c90f85db66d11e3025665d8dafa831301a0e0038f3db9b", - "sha256:2421d591f8ca05b308cf0092807308b2facbefe54af7c02ac22548b88b95c98f", - "sha256:245e37f664d89861cf2329c9afa2c1fe9e6d4e1a09d872c947e70718aeeac505", - "sha256:25381386e80ae727608e662474db537d4df1ecd42379b5ba33c84633a2b36d47", - "sha256:25a41c3104d08edb094d9db0d905ca54d0cd41c928bb6be3c4c799a54753af55", - "sha256:296f8b0af861d3970c2a4d8c91d48eb4dd4771bcef9baedec6a9b515d7de3def", - "sha256:29e3220258d682b6226a9b0925bc563ed9a1ebcff3cad30f043eceea7eaf2689", - "sha256:2a09cfa6a5862bc2fc6ca7c3def5b2926194a56b8ab78ffcf617d28911123012", - "sha256:2b0f6ccf3dbe577170bebfce1318707d0e8c3650003cb4b3a9dd744575daa8b5", - "sha256:2c048ea43875fbf8b45d476ad79f179809c590ec7b79e2035c662e7afa3192e3", - "sha256:2cb0f1e000ebc419632bbe04366a8990b6e32c4e0b51543a6484ffe15eaeda95", - "sha256:2fa8d5f8de70688a28240de9e139fa16b153cc3cbb01c5f16d88d6505ebdadf9", - "sha256:300deaee342f90696ed186e3a00c71b5b3d27bffe9e827677954f4ee56969601", - "sha256:30b8d0512f2dc8c8747557e8fb459d6176a2c9e5731e2b74d311c03b78451997", - "sha256:33901f604424145c6e9c2398684b92e176c0b12df77d52db81c20abd48c3794c", - "sha256:3599eb3992d814d23b35c536c28df1a882caa950f8f507cef23d1cbf334995ac", - "sha256:391ee8f19bef69210978363ca930f7328081c6a0152f1166c91f0b5fdd2a773c", - "sha256:3998e5a32e62fdf410c0dbd3115df86297995d6e3429af80b8798aad894ca7aa", - "sha256:3c06f0f1337c667b971ca2f975523347e63ec5e500b9aa5882d91931cd3ef750", - "sha256:40aa8808140e55dc022b15d8aa7f651b6b3d68b365ea0398f1441e0b04d859c3", - "sha256:40d74da8e6c4b9ac18b15331c4b5ebc35a17069410cad462ad4f40dcd2d50c0d", - "sha256:4223b4230a376138939a9173f1bdd6521994f2aff8047fae100d6d94d50c5a12", - "sha256:48685fee12c2eb3b27c62f2658e7ea21e9c3239cba5a8a242801a0a3f6a8c62a", - "sha256:4c7d3cc01e7350f2f0f6f7036caaf5673fb56b6998889ccfe9e1c1fe75a9c932", - "sha256:4e83efc079eb39480e6346a15a1bcb3e9b04759c5202d157e1dd4303cd619356", - "sha256:4fc7fa81bbaf5a02801b65346c8b3e657f1d93763e58c0abdf7c992addd81a92", - "sha256:53d133df809c743eb8bce33b24bcababb371f4441340578cd406e084d94a6148", - "sha256:590c0ed4bf8e85f745e6b805b2e1c457b2e33d5255dd9729743165253bc9ad39", - "sha256:5b856a8ccf749480024ff3bd7310adaef57bf31fd17e1bfc404b7940b6986634", - "sha256:65dfcbe305c3dfe658492df2d85259e0d79ead4177f9ae724b6fb245198f55d6", - "sha256:6f01afcff62bf9a08fb32b2c1d6e924236c0383c02c790732b6537269e466a72", - "sha256:6fdef321fdfbb30a197efa02d48fcd9981f0d8ad2ae8903ac318adc653f5df98", - "sha256:71ca20079dd8f27fcf808817e281e90220475cd75115162218d0e27549f95fef", - "sha256:725d985c5ab621268b2edb8e50dfe57633dc69bda071abc470fed55a14935fd3", - "sha256:75eab1ebe4f2f64d9509b984f9314d4aa788540368218b858dad56dc8f3e5eb9", - "sha256:75fcd519f2a5765db3f0e391eb3b7d150cce1a771bf4c9f861aeab86c767a3c0", - "sha256:76451d1978b95ba6507a039090ba076105c87cc76fc3efd5d35d72093964d49a", - "sha256:784fc3cf8be001197b652d51d3fd259b1e2262888693a4636e18879f613a62a9", - "sha256:78cdf0d578b15148b009ccf18c686aa4f719d887e76e6b40c38ffb61d264a552", - "sha256:79be69cf7f3bf9b0deeeb062eab7ac7f36cd4cc4c4dd694bd28921ba4d8596cc", - "sha256:79e73a76b854d9c6088fe5d8b2ebe745f8681c55f7397c3c0a016192d681045f", - "sha256:7b322db1284a2ed3aa28ffd8ebe3db91c929b7a333c0820abec3d838ef5b3525", - "sha256:7d41eead3cc673cbd38a4417deb7fd0b4ca26954ff7dc6078e33f6ff97bed940", - "sha256:7eda778067ad7ffccd23ecffce537dface96212576a07924cbf0d8799d2ded5a", - "sha256:7f57b33491e281e962021de110b451ab8a24182589be17e12a22c79047935e23", - "sha256:8041b6c5bfdc03257666e9881d33b1abc88daccaf73f7b6340fb7946655cd10f", - "sha256:8248977c2e33aecb2ced42fef99f2d319e9904a36e55a8a68b69207fb7e43edc", - "sha256:845f352911777a8e722bfce168958214951e07e47e5d5d9744109fa5fe77f79b", - "sha256:85480adfb35ffc32d40918aad81b89c69c9cc5661a9b8a81476d3e645321a056", - "sha256:8e264226ec98e01a8e1054314af91ee6cde0eacac4f465cc93b03dbe0bce2fd7", - "sha256:8e798c266c378da2bd819b0677df41ab46d78065fb2a399558f3f6cae78b2fbb", - "sha256:9181a3ccead280b828fae232df12b16652702b49d41e99d657f46cc7b1f6ec7a", - "sha256:9351229c8c8407645840edcc277f4a2d44814d1bc34a2128c11c2a031d45a5dd", - "sha256:93550784d9281e374fb5a12bf1324cc8a963fd63b2d2f223503ef0fd4aa339ea", - "sha256:9401ebc7ef522f01d01d45532c68c5ac40fb27113019b6b7d8b208f6e9baa126", - "sha256:94eb63f9b363180aff17de3e7c8760c3ba94664ea2695c52f10111244d16a299", - "sha256:9d107aff57a83222ddbd8d9ee705ede2af2cc926608b57abed8ef96b50b7e8f9", - "sha256:a32ebc02a1805adf637fc8dec324b5cdacd2e493515424f70ee33799573d661b", - "sha256:a3aa4e7b9e416774b21797365b358a6e827ffadaaca81b69ee02946852449f00", - "sha256:a6f94a7d00eb18f1b6d403c91a88fd58cfc92d4b16080dfdb774afc8294469bf", - "sha256:aa3feb8db2e87ff5e6d00d7e1480ae241876286691265657b500886c98f38bda", - "sha256:ad27098a189e5838900ce4c2a99f2fe42a0bf0c2093c17c69b45a71579e8d4a2", - "sha256:ae4578f8528569d3cf303fef2ea569c7f4c4059a38c8667ccef15c6e1f118aa5", - "sha256:b1ec7b6b6e93255f952e27ab58fbc68dcc468844b16ecbee881aeb29b6ab4d8d", - "sha256:b507778ae8a4c915436ed5c2e05b4a6cecfa70f734e19c22a005152a11c7b6a9", - "sha256:b66a2da594b6068b48b2692f043f35d4d3693fb639d5ea8b39533c2ad9ac3ab9", - "sha256:b720ce6a88a2755f7c697c23268ddc47a571b88052e6b155224347389fdf6a3b", - "sha256:b7b38448866e83176e28086674fe7368ab8590e4610fb662b44e345b86d63ffa", - "sha256:b8eb931ee8e6d8243e253e5ed7336deea6904369d2fd8ae6e43f68abbf167092", - "sha256:bb28c0f2cf2782508a40cec377935829d5fcc3ad9a3681375af4e84eb34b6b58", - "sha256:bd60d4fe2f6fa7dff9223ca1bbc9f05d2b6697bc5961072e5d3b952d46e1b1ea", - "sha256:c35eb28c1d085eb7d8c9b3296567a1bebe03ce72962e932431b9a61f28facf26", - "sha256:c4240e7eded42d131a2d2c4dec70374b781b043ddc79a9de4d55ca71f8e98aea", - "sha256:caa421e2684e382c5d8973ac55e4f36bed6821a9bad5c953494de960c74595c9", - "sha256:d490ba50c3f35dd7c17953c68f3270e7ccd1c6642e2d2afe2d8e720b98f5a053", - "sha256:d65b2d373032411e86960604dc4edac91fdfb5dca539461cf2cbe78327d1e64f", - "sha256:dae88bc0fc77edaa65c14be099bd57ee140cf507e6bfdeea7938457ab387efb0", - "sha256:de6defc1c9badbf8b9e67ae90fd00519186d6ab64e5cc5f3d21359c2a9b2c1d3", - "sha256:e101609bcbbfb04605ea1027b10dc3735c094d12d40826a60f897b98b1c30256", - "sha256:e24f9156097ff9dc286f2f913df3a7f63c0e333dcafa3c196f2c18b4175ca09a", - "sha256:e2f25215f1a359ab17320b47bcdaca3e6e6356652e8256f2441e4ef972052903", - "sha256:e5c8f6ed1e61a8b2dcdf31eb0b9bbf0130750ca79c1c49eb898e2ad86f5ccc91", - "sha256:e6f70dec1cc557e52df5306d051ef56003f74d56e9c4dd7ddb07e07ef32a84dd", - "sha256:e856bf6616714c3a9fbc270ab54103f4e685ba236fa98c054e8f87f266c93505", - "sha256:e87f6c587c3f34356c3759f0420693e35e7eb0e2e41e4c011cb6ec6ecbbf1db7", - "sha256:eb30bf180de3f632cd043322dad5751390e5385108b2807368997d1a92a509d0", - "sha256:eb88b316ec33760714a4720feb2816a3a59180fd58c1985012054fa7aebee4c2", - "sha256:eb9078108fbf0bcdde37c3f4779303673c2fa1fe8f7956e68d447d0dd426d38a", - "sha256:ecae9737b72408d6a950f7e525f30aca12d4bd8dd95e37342e5beb3a2a8c4f71", - "sha256:ee756f00726693e5ba94d6df2bdfd64d4852d23b09bb0bc700e3b30e6f333985", - "sha256:f4594c67d8a7c89cf922d9df0438c7c7bb022ad506eddb0fdb2863359ff78242", - "sha256:f53d492307962561ac7de4cd1de3e363589b000ab69617c6156a16ba7237998d", - "sha256:fb07dc5da7e849e2ad31a5d74e9bece81f30ecf5a42909d0a695f8bd1874d6af", - "sha256:fb26a934946a6afe0e326aebe0730cdff393a8bc0bbb65a2f41e30feddca399c", - "sha256:fdfc1e28e7c7cdce44985b3043bc13bbd9c747520f94a4d7164af8260b3d91f0" + "sha256:057a6af2f160a85384cde4ab36f0d2777bae1057bae255f95413cdd382aa5c74", + "sha256:0773d8329cf32b6fd222e4b52622c61fe8d503eb966cfc8d3c3c10c96266d50e", + "sha256:0a951308cde22cf77f953955a754d04dccb57fe3bb8e345d685778ed9fc1632a", + "sha256:0c451757d3fa2603354fdc789b5e58a0e327a117c370a40e3476ba4eabab228c", + "sha256:0f162bc9a15b82d947b02651b0c7e1609d6f7a8735ca330cfadec8481dd97d5a", + "sha256:15228a6800ce7bdf1b74800595e56db7138cecb338fdbf044806e10dcf182dfe", + "sha256:1733198802d71ec4c524f322e2867ee05c62e9e75df86bdca545407a221827d1", + "sha256:1a0abc7342ea9711c469dd8b821c6c311e6bc6aac1442e5fbd6b27fae0a8f3db", + "sha256:1b23b0c6f0b1db6ad769b7050c8b641c0bf215ded26c1816955b17b7f26edfa9", + "sha256:1c9ed6ef99f88fb8c14aa8e2bf8eb0fe55fa2edfea68f8675d78741df1a5ac0e", + "sha256:22a7e06a5f11a757cdfe79018e9095f9f69ae283c5cd8123774c788deec8717b", + "sha256:23b81107f46d3f21d0cbce30664fcec0f5d9f585638a67081750f99738f6bf66", + "sha256:29943e552fdc08e082eb51400fb2f58e118a83b5542bd06531214e084399b644", + "sha256:29fe3da551dface75deb2ccbf87b6b66e2e7ef38f6d89050b428be94afff3490", + "sha256:2fb73254ff43c911c967a899e1359bc5049b4b115d6e8fbdde4937d0a2246cd5", + "sha256:3485a836550b303d006d57cc06e3d5afaabc642c77050b7c985a97b13e3776b8", + "sha256:362cb78e01a5dc82009d88004cf60f2e6b6d6fcbfdec05b05af73b0abf40118f", + "sha256:3a5d8e876dfa2f102e970b183863d6dedd023d3c0eeca1fe7a9787bc5f28b212", + "sha256:3e7e88110bae996d199d1693ca8ec3fd52441d426401ae963437598667b4c5eb", + "sha256:3f5549365af25d770e06b1f8f5682d9a5637d06eb494db91c6fa75d3950cc917", + "sha256:3fd43f0616e765ab78d069cf8358def7363957a45cee446d65c502dcfeea7893", + "sha256:454a380af72c6adada298ed270d38c7a391288198dbfb8467f786f588751a90c", + "sha256:45899ec2138a4346ed34d601dedf5076fb74edf2d1dd9dc76a78e82397edee90", + "sha256:45e0f79d8351fa76e256716df91eab12890d32678b9590df7ae1042e4bd4cf5d", + "sha256:49c005cba1e2f9677fb2845dcdf9a2e72a52a17d63e8231aaaae35d9f50215ef", + "sha256:4b899594a8b2d81e5cc064a0d7f9cac2081fed91049456cae7676787e41549c9", + "sha256:55d3089079ce181a4566b1065ab28d2575eb76d8ac8f81f4fcda2bf037fee087", + "sha256:5904abf7e18cddc463219b17552229650c6b79e061d31a1059283051169cf7d5", + "sha256:5ac83957a80d0701310e96d8bec68cdcf4f90a7674b7d13f15a344315b41ab27", + "sha256:5d4a51aad8ba8bdcd2b8bd8f03d4aca19693fa2327a3470e4718a25b03481020", + "sha256:5ebb8f4614a3787d567e610bbfdf96a4798dd69a1afb1bd8ad228d4111fe6ff3", + "sha256:63df0fe568e698e1045792399f8ab6da3a6c2dce3182813fb92afa2641087b47", + "sha256:65c86fb646d2bd2972e96bd1a8b45817ed907cee68655d6295fe7ec031d04cca", + "sha256:65f267ca1370726ec2c1aa38bbe4df9a71a740f22878d2d4bf59d71a4cd8d323", + "sha256:664123feb0929d7affc135717dbd70d61d98688a08ab1e5ba464739620c6252d", + "sha256:668b92e6958c4db7cf92e81caac328dfbbdbb215db2850ad28f0cbe1eea0bfbd", + "sha256:68af363c07ecd8d4b7d4043d85cb376d7d227eceb54e5323ee45da73dbd3e426", + "sha256:6a6516b02a6101398e19a3f44820f69bab2590697f7def4331f668b14adaf828", + "sha256:6a78e2a9d9c5e3b8d4ab9b9d28c985ea66fced0a7d7c2aec1f216e03a2011480", + "sha256:6b9bf47223dd8db3d4c4b2e443b02bace480d428f0822c3f991600448a176c97", + "sha256:6d160217ec6fe890f16ad3a9531761589443749e448f91986c972714fad361c8", + "sha256:6e57054a583da8ac55edf24117ea4c9133032cfc4cf72aa2d48c1e5d4b52f899", + "sha256:70390b0da32cb90b501953716302906e8bcce087cb283e70d8c97729f22e92b2", + "sha256:72a305291fa8ee01332f1aaf38b348ca34097f6aa0b0ef627eef2837e57bbba5", + "sha256:731dc15b385ac52289743d476245b61e1a2927e803bef655b52bc3b2a75a21f3", + "sha256:731e535b1498b27d13594a0527a79b0510867b0ad891532be41cb883f2128e20", + "sha256:7333cd944ee4393b9b3d3c1b598c936d4fc8d70573a4c7dacfec5590dd50e436", + "sha256:741f57cddc9004a8c81b084660215f33a6b597dbe62c31386b983ee26310e327", + "sha256:742a73ea621953b012f2c4c2219b512180dd84489acf5b1596b0aafc55b9100b", + "sha256:7b2bb6c9d7e769360d0f20a0f219603fd64f0c8f97de17ab25853261602be0fb", + "sha256:7b79d646cf46d5cf9a9f40281d4441df5849e445726e369006d2b117710b33fe", + "sha256:7bf43e000d24012599b879791cff41589af90674722421ef11b11a5431920bab", + "sha256:7c843572c605ab51cfdb5c6b5f2586e2a8467c0d28eca4bdef4ec70c5fecbd82", + "sha256:7ebb1c6df9f78046a1b1e0a89674cd4bf73b7c648914eebcf976a57fd99a5627", + "sha256:7ffd19fc8aed057fd686a17a4935eef5f9859d69208f96310e893e64b9b6ccf5", + "sha256:8231ade007f37959fbf58acc677f26b922c02eda6f0428ea307da0fd39681bf3", + "sha256:827d6397dbd95144939b18f89edf31f63e1f99633e8d5f32f22ba8bdda567477", + "sha256:829994cfe1aeb773ca27bf246d4badc1e764893e3bfb98fff820fcecd1ca4662", + "sha256:84c32d90bf4537f0e7b4dec9aaa9a938fb8205136b9d2ecf4d7629d5262dc075", + "sha256:8767486808c436f05b23ab98eb963fb29185e32a9357a166971685cb3459900f", + "sha256:8de5b61163aee3d05c8a2beab6f47913df7981dad1baf82c414d99158c286ab1", + "sha256:90c1a51bcfddf645b3bb7ec333d9e94393a8e94f55642380fa8a9a5a9e636cb7", + "sha256:9117377b823daa28aa8635fbb08cda1cd6be3d7143257345459559aeef852d52", + "sha256:91b993743d959b8be85b4abf9d5478216a69329c321efe5be0433c1a841d691d", + "sha256:92af52828e7f29d827346b0294e5a0853fa206db77db0395b282918d41e28db9", + "sha256:9336e23e8bb3a3925398261385e2a1533957d3e760e91070dcb0e98bfa514eed", + "sha256:953f521ca9445300397e65fda3dca58b2dbd68fee983777420b57ac3c77e9f90", + "sha256:98af83fd65ae24b1fdd03aaead967a9f523bcd2f1aab2d4f3ffda65bb568a6f1", + "sha256:9aed9fa983514ca032790f3fe0d1c0e42ca7e16b42432af1706b50a9a46bef5d", + "sha256:9cd1169b2230f9cbe9c638ba38022ed7a2b1e641cc07f7cea0365e4be2a74980", + "sha256:9d1aa57a1dc8e05bdc42e81c5d671d849577aeedf279f4c449d6d286f9ed88ca", + "sha256:9d26ac7f5398bafc5b57421ad994e8a4749e8a7a0e62d05ec7d53014d5963bfa", + "sha256:9f323af3e1e4f68b60b7b247e37b8515563a61375518fa59de1af48ba28a3db6", + "sha256:9fbd898551762dea00d3fef2b1c4f99afd2c6a3ff952ea07d60a9bd5ed4f34bc", + "sha256:a1816c505187592dcd1c5a5f226601a549f70365fbd00930ac88b0c225b76bb4", + "sha256:a2bd259c442cd43c49b30fbafc51776eb19ea396faf159d26a83e6a0a5f13b0c", + "sha256:a3b5ddfd6aa7ddad53ee3edb231e88a2151507a43229b7d71b953916deca127d", + "sha256:a706b908dfa85538863504c624b237a3cc34232bf403c057414ebfdb3b4d9f84", + "sha256:a841fae2fadcae4f438d43b6ccc4aac2ad609f47cdb6cfdce60cbb3fe5ca7bc2", + "sha256:a93bac2cb577ef60074999ed56d8a1535894398e2ed920d4185c3ec0c8864742", + "sha256:a9f864ef57b7172e2db87a096642dd51e179e085ab6b2c371c29e885f65c8fb2", + "sha256:acebd068fca5512c3a6fde9c045f901613478781a73f0e82b307b214daef23fb", + "sha256:b34ece8065914f938ed7f2c5872bb865336977a52919149846eac3744327267a", + "sha256:b4cc4fce8672fffcb09b0eafc167b396b3ba53c4a7230f54b7aaffbf6c835fa9", + "sha256:b4e26a0f1b696faf283bffe5b8569e44e336c582439df5d53281ab89ee0cba96", + "sha256:b4f07cf7edcb7ec39431a5074d7ea83b29a9f71fcfc494f0f40af4e65180420f", + "sha256:b812eb847b19876ebf33fb6c4f11819af05ab6050b0bfa1bc53412ae81779adb", + "sha256:ba3b8390db29296dbbf49e91b6fe08f990743a90c8f447ba4c2ffc29670dfa63", + "sha256:bcb2e855b87321259a037429288ae85216d191c74de3e79bf57cd2bc0761992c", + "sha256:bfb0ed8ec5d25e93face268115d7964db9df8b9aae8edcde9ec6b16c726a7cc1", + "sha256:c7492f2d493b976941c7ca050f273cbda2f43c381124f7586a3e3c16d1804fec", + "sha256:c79d2319cabef1fe8e86df73371126931550804738f78ad7d31e3aad85a67367", + "sha256:c83d2399a51bbec8429266905d33616f04bc5726b1138c35844d5fcd896b2e20", + "sha256:ca3d9cf2c32b521bd9518385608787fa86f38daf993695307531822c3430ed67", + "sha256:cc3499459bbcdd51a65b64c35ab7ed2764eaf3cba826e0df3f1d7fe2e102b70b", + "sha256:d128b1bba9361fbaaf6a19e179e6cfd6a9103ce0c0555876f72780acc93efd85", + "sha256:d1bb3543b58fea74d2cd1abc4054cc927e4724687cb4560cd2ed88d2c7d820c0", + "sha256:d8b013632cc1ce1d09dbe4f32667b4d320ec2f54fc326ebeffcd0b0bcc2bb6c4", + "sha256:d8e1762f0e9cbc26ec315471e7b47855218e833cd5a032d706fbf43845d878c7", + "sha256:d9c8ef6ed820c433de075657d72dda1f89a2984955e58b8a75feb3f184250218", + "sha256:dc38367eaa2abb1b766ac333142bce7655335a73537f5c8b75aaa89c2b987757", + "sha256:f2bbb8254370eb4c628ff3d6fa8a7f74ddc40565394d4f7ab791d1fe568e37ef", + "sha256:f580f8c80acd94ac72e863efe2cab791d8c38d153e0b463b92dfa000d5c84cd1", + "sha256:fab3877e4ebb06bd9d4d4d00ee53309ee5478e66873c66a382272e3ee33eb7ea", + "sha256:fb609b3658479e33f9516d46f1a89dbb9b6c261366e3a11844a96ec487533dae", + "sha256:fcaba850dd317c65423a9d63d88f9573c53b00354d6dd95724576cc98a131595" ], "markers": "python_version >= '3.10'", - "version": "==7.13.4" + "version": "==7.14.0" }, "cryptography": { "hashes": [ - "sha256:02f547fce831f5096c9a567fd41bc12ca8f11df260959ecc7c3202555cc47a72", - "sha256:039917b0dc418bb9f6edce8a906572d69e74bd330b0b3fea4f79dab7f8ddd235", - "sha256:1abfdb89b41c3be0365328a410baa9df3ff8a9110fb75e7b52e66803ddabc9a9", - "sha256:2ae6971afd6246710480e3f15824ed3029a60fc16991db250034efd0b9fb4356", - "sha256:2b7a67c9cd56372f3249b39699f2ad479f6991e62ea15800973b956f4b73e257", - "sha256:351695ada9ea9618b3500b490ad54c739860883df6c1f555e088eaf25b1bbaad", - "sha256:38946c54b16c885c72c4f59846be9743d699eee2b69b6988e0a00a01f46a61a4", - "sha256:3b4995dc971c9fb83c25aa44cf45f02ba86f71ee600d81091c2f0cbae116b06c", - "sha256:3ce58ba46e1bc2aac4f7d9290223cead56743fa6ab94a5d53292ffaac6a91614", - "sha256:3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed", - "sha256:4108d4c09fbbf2789d0c926eb4152ae1760d5a2d97612b92d508d96c861e4d31", - "sha256:420d0e909050490d04359e7fdb5ed7e667ca5c3c402b809ae2563d7e66a92229", - "sha256:47fb8a66058b80e509c47118ef8a75d14c455e81ac369050f20ba0d23e77fee0", - "sha256:4c3341037c136030cb46e4b1e17b7418ea4cbd9dd207e4a6f3b2b24e0d4ac731", - "sha256:4d7e3d356b8cd4ea5aff04f129d5f66ebdc7b6f8eae802b93739ed520c47c79b", - "sha256:4d8ae8659ab18c65ced284993c2265910f6c9e650189d4e3f68445ef82a810e4", - "sha256:4e817a8920bfbcff8940ecfd60f23d01836408242b30f1a708d93198393a80b4", - "sha256:50bfb6925eff619c9c023b967d5b77a54e04256c4281b0e21336a130cd7fc263", - "sha256:556e106ee01aa13484ce9b0239bca667be5004efb0aabbed28d353df86445595", - "sha256:582f5fcd2afa31622f317f80426a027f30dc792e9c80ffee87b993200ea115f1", - "sha256:5be7bf2fb40769e05739dd0046e7b26f9d4670badc7b032d6ce4db64dddc0678", - "sha256:60ee7e19e95104d4c03871d7d7dfb3d22ef8a9b9c6778c94e1c8fcc8365afd48", - "sha256:61aa400dce22cb001a98014f647dc21cda08f7915ceb95df0c9eaf84b4b6af76", - "sha256:68f68d13f2e1cb95163fa3b4db4bf9a159a418f5f6e7242564fc75fcae667fd0", - "sha256:7d1f30a86d2757199cb2d56e48cce14deddf1f9c95f1ef1b64ee91ea43fe2e18", - "sha256:7d731d4b107030987fd61a7f8ab512b25b53cef8f233a97379ede116f30eb67d", - "sha256:803812e111e75d1aa73690d2facc295eaefd4439be1023fefc4995eaea2af90d", - "sha256:80a8d7bfdf38f87ca30a5391c0c9ce4ed2926918e017c29ddf643d0ed2778ea1", - "sha256:8293f3dea7fc929ef7240796ba231413afa7b68ce38fd21da2995549f5961981", - "sha256:8456928655f856c6e1533ff59d5be76578a7157224dbd9ce6872f25055ab9ab7", - "sha256:890bcb4abd5a2d3f852196437129eb3667d62630333aacc13dfd470fad3aaa82", - "sha256:94a76daa32eb78d61339aff7952ea819b1734b46f73646a07decb40e5b3448e2", - "sha256:9f16fbdf4da055efb21c22d81b89f155f02ba420558db21288b3d0035bafd5f4", - "sha256:a3d1fae9863299076f05cb8a778c467578262fae09f9dc0ee9b12eb4268ce663", - "sha256:a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c", - "sha256:abace499247268e3757271b2f1e244b36b06f8515cf27c4d49468fc9eb16e93d", - "sha256:ba2a27ff02f48193fc4daeadf8ad2590516fa3d0adeeb34336b96f7fa64c1e3a", - "sha256:bc84e875994c3b445871ea7181d424588171efec3e185dced958dad9e001950a", - "sha256:bfd56bb4b37ed4f330b82402f6f435845a5f5648edf1ad497da51a8452d5d62d", - "sha256:c18ff11e86df2e28854939acde2d003f7984f721eba450b56a200ad90eeb0e6b", - "sha256:c3bcce8521d785d510b2aad26ae2c966092b7daa8f45dd8f44734a104dc0bc1a", - "sha256:c4143987a42a2397f2fc3b4d7e3a7d313fbe684f67ff443999e803dd75a76826", - "sha256:c69fd885df7d089548a42d5ec05be26050ebcd2283d89b3d30676eb32ff87dee", - "sha256:ced80795227d70549a411a4ab66e8ce307899fad2220ce5ab2f296e687eacde9", - "sha256:d66e421495fdb797610a08f43b05269e0a5ea7f5e652a89bfd5a7d3c1dee3648", - "sha256:d861ee9e76ace6cf36a6a89b959ec08e7bc2493ee39d07ffe5acb23ef46d27da", - "sha256:e9251e3be159d1020c4030bd2e5f84d6a43fe54b6c19c12f51cde9542a2817b2", - "sha256:f145bba11b878005c496e93e257c1e88f154d278d2638e6450d17e0f31e558d2", - "sha256:fe346b143ff9685e40192a4960938545c699054ba11d4f9029f94751e3f71d87" - ], - "markers": "python_version >= '3.8' and python_full_version not in '3.9.0, 3.9.1'", - "version": "==46.0.5" + "sha256:0890f502ddf7d9c6426129c3f49f5c0a39278ed7cd6322c8755ffca6ee675a13", + "sha256:0c558d2cdffd8f4bbb30fc7134c74d2ca9a476f830bb053074498fbc86f41ed6", + "sha256:16cd65b9330583e4619939b3a3843eec1e6e789744bb01e7c7e2e62e33c239c8", + "sha256:18349bbc56f4743c8b12dc32e2bccb2cf83ee8b69a3bba74ef8ae857e26b3d25", + "sha256:1e2d54c8be6152856a36f0882ab231e70f8ec7f14e93cf87db8a2ed056bf160c", + "sha256:22a5cb272895dce158b2cacdfdc3debd299019659f42947dbdac6f32d68fe832", + "sha256:27241b1dc9962e056062a8eef1991d02c3a24569c95975bd2322a8a52c6e5e12", + "sha256:2b4d59804e8408e2fea7d1fbaf218e5ec984325221db76e6a241a9abd6cdd95c", + "sha256:2eb992bbd4661238c5a397594c83f5b4dc2bc5b848c365c8f991b6780efcc5c7", + "sha256:369a6348999f94bbd53435c894377b20ab95f25a9065c283570e70150d8abc3c", + "sha256:3cb07a3ed6431663cd321ea8a000a1314c74211f823e4177fefa2255e057d1ec", + "sha256:40ba1f85eaa6959837b1d51c9767e230e14612eea4ef110ee8854ada22da1bf5", + "sha256:4defde8685ae324a9eb9d818717e93b4638ef67070ac9bc15b8ca85f63048355", + "sha256:55b7718303bf06a5753dcdccf2f3945cf18ad7bffde41b61226e4db31ab89a9c", + "sha256:561215ea3879cb1cbbf272867e2efda62476f240fb58c64de6b393ae19246741", + "sha256:58d00498e8933e4a194f3076aee1b4a97dfec1a6da444535755822fe5d8b0b86", + "sha256:59baa2cb386c4f0b9905bd6eb4c2a79a69a128408fd31d32ca4d7102d4156321", + "sha256:5a5ed8fde7a1d09376ca0b40e68cd59c69fe23b1f9768bd5824f54681626032a", + "sha256:5b012212e08b8dd5edc78ef54da83dd9892fd9105323b3993eff6bea65dc21d7", + "sha256:5c3932f4436d1cccb036cb0eaef46e6e2db91035166f1ad6505c3c9d5a635920", + "sha256:614d0949f4790582d2cc25553abd09dd723025f0c0e7c67376a1d77196743d6e", + "sha256:76341972e1eff8b4bea859f09c0d3e64b96ce931b084f9b9b7db8ef364c30eff", + "sha256:77a2ccbbe917f6710e05ba9adaa25fb5075620bf3ea6fb751997875aff4ae4bd", + "sha256:7995ef305d7165c3f11ae07f2517e5a4f1d5c18da1376a0a9ed496336b69e5f3", + "sha256:7ce4bfae76319a532a2dc68f82cc32f5676ee792a983187dac07183690e5c66f", + "sha256:7e8eac43dfca5c4cccc6dad9a80504436fca53bb9bc3100a2386d730fbe6b602", + "sha256:84cf79f0dc8b36ac5da873481716e87aef31fcfa0444f9e1d8b4b2cece142855", + "sha256:8c7378637d7d88016fa6791c159f698b3d3eed28ebf844ac36b9dc04a14dae18", + "sha256:8cd666227ef7af430aa5914a9910e0ddd703e75f039cef0825cd0da71b6b711a", + "sha256:906cbf0670286c6e0044156bc7d4af9cbb0ef6db9f73e52c3ec56ba6bdde5336", + "sha256:9071196d81abc88b3516ac8cdfad32e2b66dd4a5393a8e68a961e9161ddc6239", + "sha256:9249e3cd978541d665967ac2cb2787fd6a62bddf1e75b3e347a594d7dacf4f74", + "sha256:984a20b0f62a26f48a3396c72e4bc34c66e356d356bf370053066b3b6d54634a", + "sha256:9be5aafa5736574f8f15f262adc81b2a9869e2cfe9014d52a44633905b40d52c", + "sha256:9c459db21422be75e2809370b829a87eb37f74cd785fc4aa9ea1e5f43b47cda4", + "sha256:9ccdac7d40688ecb5a3b4a604b8a88c8002e3442d6c60aead1db2a89a041560c", + "sha256:a0e692c683f4df67815a2d258b324e66f4738bd7a96a218c826dce4f4bd05d8f", + "sha256:a5da777e32ffed6f85a7b2b3f7c5cbc88c146bfcd0a1d7baf5fcc6c52ee35dd4", + "sha256:a64697c641c7b1b2178e573cbc31c7c6684cd56883a478d75143dbb7118036db", + "sha256:ad64688338ed4bc1a6618076ba75fd7194a5f1797ac60b47afe926285adb3166", + "sha256:bd72e68b06bb1e96913f97dd4901119bc17f39d4586a5adf2d3e47bc2b9d58b5", + "sha256:c17dfe85494deaeddc5ce251aebd1d60bbe6afc8b62071bb0b469431a000124f", + "sha256:c18684a7f0cc9a3cb60328f496b8e3372def7c5d2df39ac267878b05565aaaae", + "sha256:cc90c0b39b2e3c65ef52c804b72e3c58f8a04ab2a1871272798e5f9572c17d20", + "sha256:db63bf618e5dea46c07de12e900fe1cdd2541e6dc9dbae772a70b7d4d4765f6a", + "sha256:ea8990436d914540a40ab24b6a77c0969695ed52f4a4874c5137ccf7045a7057", + "sha256:ecde28a596bead48b0cfd2a1b4416c3d43074c2d785e3a398d7ec1fc4d0f7fbb", + "sha256:f5333311663ea94f75dd408665686aaf426563556bb5283554a3539177e03b8c", + "sha256:fdfef35d751d510fcef5252703621574364fec16418c4a1e5e1055248401054b" + ], + "markers": "python_version >= '3.9' and python_full_version not in '3.9.0, 3.9.1'", + "version": "==48.0.0" }, "dataclasses-json": { "hashes": [ @@ -1142,11 +1297,11 @@ }, "filelock": { "hashes": [ - "sha256:011a5644dc937c22699943ebbfc46e969cdde3e171470a6e40b9533e5a72affa", - "sha256:426e9a4660391f7f8a810d71b0555bce9008b0a1cc342ab1f6947d37639e002d" + "sha256:69974355e960702e789734cb4871f884ea6fe50bd8404051a3530bc07809cf90", + "sha256:96f5f6344709aa1572bbf631c640e4ebeeb519e08da902c39a001882f30ac258" ], "markers": "python_version >= '3.10'", - "version": "==3.24.3" + "version": "==3.29.0" }, "flake8": { "hashes": [ @@ -1171,32 +1326,19 @@ }, "googleapis-common-protos": { "hashes": [ - "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038", - "sha256:e55a601c1b32b52d7a3e65f43563e2aa61bcd737998ee672ac9b951cd49319f5" + "sha256:53a062ff3c32552fbd62c11fe23768b78e4ddf0494d5e5fd97d3f4689c75fbbd", + "sha256:961ed60399c457ceb0ee8f285a84c870aabc9c6a832b9d37bb281b5bebde43ed" ], - "markers": "python_version >= '3.7'", - "version": "==1.72.0" - }, - "griffe": { - "hashes": [ - "sha256:5418081135a391c3e6e757a7f3f156f1a1a746cc7b4023868ff7d5e2f9a980aa" - ], - "markers": "python_version >= '3.10'", - "version": "==2.0.0" - }, - "griffecli": { - "hashes": [ - "sha256:9f7cd9ee9b21d55e91689358978d2385ae65c22f307a63fb3269acf3f21e643d" - ], - "markers": "python_version >= '3.10'", - "version": "==2.0.0" + "markers": "python_version >= '3.9'", + "version": "==1.75.0" }, "griffelib": { "hashes": [ - "sha256:01284878c966508b6d6f1dbff9b6fa607bc062d8261c5c7253cb285b06422a7f" + "sha256:3cf20b3bc470e83763ffbf236e0076b1211bac1bc67de13daf494640f2de707e", + "sha256:925c857658fb1ba40c0772c37acbc2ab650bd794d9c1b9726922e36ea4117ea1" ], "markers": "python_version >= '3.10'", - "version": "==2.0.0" + "version": "==2.0.2" }, "h11": { "hashes": [ @@ -1224,19 +1366,19 @@ }, "identify": { "hashes": [ - "sha256:391ee4d77741d994189522896270b787aed8670389bfd60f326d677d64a6dfb0", - "sha256:846857203b5511bbe94d5a352a48ef2359532bc8f6727b5544077a0dcfb24980" + "sha256:20e6a87f786f768c092a721ad107fc9df0eb89347be9396cadf3f4abbd1fb78a", + "sha256:6be5020c38fcb07da56c53733538a3081ea5aa70d36a156f83044bfbf9173842" ], "markers": "python_version >= '3.10'", - "version": "==2.6.16" + "version": "==2.6.19" }, "idna": { "hashes": [ - "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", - "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902" + "sha256:048adeaf8c2d788c40fee287673ccaa74c24ffd8dcf09ffa555a2fbb59f10ac8", + "sha256:ca962446ea538f7092a95e057da437618e886f4d349216d2b1e294abfdb65fdc" ], "markers": "python_version >= '3.8'", - "version": "==3.11" + "version": "==3.15" }, "importlib-metadata": { "hashes": [ @@ -1446,19 +1588,19 @@ }, "mkdocs-get-deps": { "hashes": [ - "sha256:162b3d129c7fad9b19abfdcb9c1458a651628e4b1dea628ac68790fb3061c60c", - "sha256:2bf11d0b133e77a0dd036abeeb06dec8775e46efa526dc70667d8863eefc6134" + "sha256:8ee8d5f316cdbbb2834bc1df6e69c08fe769a83e040060de26d3c19fad3599a1", + "sha256:e7878cbeac04860b8b5e0ca31d3abad3df9411a75a32cde82f8e44b6c16ff650" ], - "markers": "python_version >= '3.8'", - "version": "==0.2.0" + "markers": "python_version >= '3.9'", + "version": "==0.2.2" }, "mkdocs-material": { "hashes": [ - "sha256:6776256552290b9b7a7aa002780e25b1e04bc9c3a8516b6b153e82e16b8384bd", - "sha256:9bf6f53452d4a4d527eac3cef3f92b7b6fc4931c55d57766a7d87890d47e1b92" + "sha256:00bdde50574f776d328b1862fe65daeaf581ec309bd150f7bff345a098c64a69", + "sha256:71b84353921b8ea1ba84fe11c50912cc512da8fe0881038fcc9a0761c0e635ba" ], "markers": "python_version >= '3.8'", - "version": "==9.7.2" + "version": "==9.7.6" }, "mkdocs-material-extensions": { "hashes": [ @@ -1478,19 +1620,19 @@ }, "mkdocstrings": { "hashes": [ - "sha256:0d66d18430c2201dc7fe85134277382baaa15e6b30979f3f3bdbabd6dbdb6046", - "sha256:ab670f55040722b49bb45865b2e93b824450fb4aef638b00d7acb493a9020434" + "sha256:3969a6515b77db65fd097b53c1b7aa4ae840bd71a2ee62a6a3e89503446d7172", + "sha256:63464b4b29053514f32a1dbbf604e52876d5e638111b0c295ab7ed3cac73ca9b" ], "markers": "python_version >= '3.10'", - "version": "==1.0.3" + "version": "==1.0.4" }, "mkdocstrings-python": { "hashes": [ - "sha256:31241c0f43d85a69306d704d5725786015510ea3f3c4bdfdb5a5731d83cdc2b0", - "sha256:4a32ccfc4b8d29639864698e81cfeb04137bce76bb9f3c251040f55d4b6e1ad8" + "sha256:0b83513478bdfd803ff05aa43e9b1fca9dd22bcd9471f09ca6257f009bc5ee12", + "sha256:c518632751cc869439b31c9d3177678ad2bfa5c21b79b863956ad68fc92c13b8" ], "markers": "python_version >= '3.10'", - "version": "==2.0.2" + "version": "==2.0.3" }, "mypy-extensions": { "hashes": [ @@ -1526,185 +1668,185 @@ }, "numpy": { "hashes": [ - "sha256:00ab83c56211a1d7c07c25e3217ea6695e50a3e2f255053686b081dc0b091a82", - "sha256:068cdb2d0d644cdb45670810894f6a0600797a69c05f1ac478e8d31670b8ee75", - "sha256:0f01dcf33e73d80bd8dc0f20a71303abbafa26a19e23f6b68d1aa9990af90257", - "sha256:0fece1d1f0a89c16b03442eae5c56dc0be0c7883b5d388e0c03f53019a4bfd71", - "sha256:12e26134a0331d8dbd9351620f037ec470b7c75929cb8a1537f6bfe411152a1a", - "sha256:1ae241bbfc6ae276f94a170b14785e561cb5e7f626b6688cf076af4110887413", - "sha256:1f92f53998a17265194018d1cc321b2e96e900ca52d54c7c77837b71b9465181", - "sha256:209fae046e62d0ce6435fcfe3b1a10537e858249b3d9b05829e2a05218296a85", - "sha256:20abd069b9cda45874498b245c8015b18ace6de8546bf50dfa8cea1696ed06ef", - "sha256:21982668592194c609de53ba4933a7471880ccbaadcc52352694a59ecc860b3a", - "sha256:25f2059807faea4b077a2b6837391b5d830864b3543627f381821c646f31a63c", - "sha256:2653de5c24910e49c2b106499803124dde62a5a1fe0eedeaecf4309a5f639390", - "sha256:2b8f157c8a6f20eb657e240f8985cc135598b2b46985c5bccbde7616dc9c6b1e", - "sha256:2fb882da679409066b4603579619341c6d6898fc83a8995199d5249f986e8e8f", - "sha256:40397bda92382fcec844066efb11f13e1c9a3e2a8e8f318fb72ed8b6db9f60f1", - "sha256:444be170853f1f9d528428eceb55f12918e4fda5d8805480f36a002f1415e09b", - "sha256:47c5a6ed21d9452b10227e5e8a0e1c22979811cad7dcc19d8e3e2fb8fa03f1a3", - "sha256:4f069069931240b3fc703f1e23df63443dbd6390614c8c44a87d96cd0ec81eb1", - "sha256:52b913ec40ff7ae845687b0b34d8d93b60cb66dcee06996dd5c99f2fc9328657", - "sha256:5633c0da313330fd20c484c78cdd3f9b175b55e1a766c4a174230c6b70ad8262", - "sha256:5daf6f3914a733336dab21a05cdec343144600e964d2fcdabaac0c0269874b2a", - "sha256:5eea80d908b2c1f91486eb95b3fb6fab187e569ec9752ab7d9333d2e66bf2d6b", - "sha256:602f65afdef699cda27ec0b9224ae5dc43e328f4c24c689deaf77133dbee74d0", - "sha256:659a6107e31a83c4e33f763942275fd278b21d095094044eb35569e86a21ddae", - "sha256:66cb9422236317f9d44b67b4d18f44efe6e9c7f8794ac0462978513359461554", - "sha256:6d82351358ffbcdcd7b686b90742a9b86632d6c1c051016484fa0b326a0a1548", - "sha256:6e9f61981ace1360e42737e2bae58b27bf28a1b27e781721047d84bd754d32e7", - "sha256:6ed0be1ee58eef41231a5c943d7d1375f093142702d5723ca2eb07db9b934b05", - "sha256:7cdde6de52fb6664b00b056341265441192d1291c130e99183ec0d4b110ff8b1", - "sha256:7df2de1e4fba69a51c06c28f5a3de36731eb9639feb8e1cf7e4a7b0daf4cf622", - "sha256:7edc794af8b36ca37ef5fcb5e0d128c7e0595c7b96a2318d1badb6fcd8ee86b1", - "sha256:7f54844851cdb630ceb623dcec4db3240d1ac13d4990532446761baede94996a", - "sha256:805cc8de9fd6e7a22da5aed858e0ab16be5a4db6c873dde1d7451c541553aa27", - "sha256:8906e71fd8afcb76580404e2a950caef2685df3d2a57fe82a86ac8d33cc007ba", - "sha256:89f7268c009bc492f506abd6f5265defa7cb3f7487dc21d357c3d290add45082", - "sha256:8c50dd1fc8826f5b26a5ee4d77ca55d88a895f4e4819c7ecc2a9f5905047a443", - "sha256:8e4549f8a3c6d13d55041925e912bfd834285ef1dd64d6bc7d542583355e2e98", - "sha256:8e9afaeb0beff068b4d9cd20d322ba0ee1cecfb0b08db145e4ab4dd44a6b5110", - "sha256:98f16a80e917003a12c0580f97b5f875853ebc33e2eaa4bccfc8201ac6869308", - "sha256:9e35d3e0144137d9fdae62912e869136164534d64a169f86438bc9561b6ad49f", - "sha256:9e4424677ce4b47fe73c8b5556d876571f7c6945d264201180db2dc34f676ab5", - "sha256:adb6ed2ad29b9e15321d167d152ee909ec73395901b70936f029c3bc6d7f4460", - "sha256:aea4f66ff44dfddf8c2cffd66ba6538c5ec67d389285292fe428cb2c738c8aef", - "sha256:b21041e8cb6a1eb5312dd1d2f80a94d91efffb7a06b70597d44f1bd2dfc315ab", - "sha256:b2f0073ed0868db1dcd86e052d37279eef185b9c8db5bf61f30f46adac63c909", - "sha256:b3a24467af63c67829bfaa61eecf18d5432d4f11992688537be59ecd6ad32f5e", - "sha256:b9c618d56a29c9cb1c4da979e9899be7578d2e0b3c24d52079c166324c9e8695", - "sha256:bba37bc29d4d85761deed3954a1bc62be7cf462b9510b51d367b769a8c8df325", - "sha256:bd3a7a9f5847d2fb8c2c6d1c862fa109c31a9abeca1a3c2bd5a64572955b2979", - "sha256:be71bf1edb48ebbbf7f6337b5bfd2f895d1902f6335a5830b20141fc126ffba0", - "sha256:c02ef4401a506fb60b411467ad501e1429a3487abca4664871d9ae0b46c8ba32", - "sha256:c3cd545784805de05aafe1dde61752ea49a359ccba9760c1e5d1c88a93bbf2b7", - "sha256:c7ac672d699bf36275c035e16b65539931347d68b70667d28984c9fb34e07fa7", - "sha256:cb7bbb88aa74908950d979eeaa24dbdf1a865e3c7e45ff0121d8f70387b55f73", - "sha256:cd2bd2bbed13e213d6b55dc1d035a4f91748a7d3edc9480c13898b0353708920", - "sha256:cda077c2e5b780200b6b3e09d0b42205a3d1c68f30c6dceb90401c13bff8fe74", - "sha256:cf28c0c1d4c4bf00f509fa7eb02c58d7caf221b50b467bcb0d9bbf1584d5c821", - "sha256:d0d9b7c93578baafcbc5f0b83eaf17b79d345c6f36917ba0c67f45226911d499", - "sha256:d1240d50adff70c2a88217698ca844723068533f3f5c5fa6ee2e3220e3bdb000", - "sha256:d30291931c915b2ab5717c2974bb95ee891a1cf22ebc16a8006bd59cd210d40a", - "sha256:d9f64d786b3b1dd742c946c42d15b07497ed14af1a1f3ce840cce27daa0ce913", - "sha256:da6cad4e82cb893db4b69105c604d805e0c3ce11501a55b5e9f9083b47d2ffe8", - "sha256:df1b10187212b198dd45fa943d8985a3c8cf854aed4923796e0e019e113a1bda", - "sha256:e04ae107ac591763a47398bb45b568fc38f02dbc4aa44c063f67a131f99346cb", - "sha256:e6dee3bb76aa4009d5a912180bf5b2de012532998d094acee25d9cb8dee3e44a", - "sha256:e7e88598032542bd49af7c4747541422884219056c268823ef6e5e89851c8825", - "sha256:e98c97502435b53741540a5717a6749ac2ada901056c7db951d33e11c885cc7d", - "sha256:ec055f6dae239a6299cace477b479cca2fc125c5675482daf1dd886933a1076f", - "sha256:f74f0f7779cc7ae07d1810aab8ac6b1464c3eafb9e283a40da7309d5e6e48fbb", - "sha256:fbde1b0c6e81d56f5dccd95dd4a711d9b95df1ae4009a60887e56b27e8d903fa", - "sha256:fcf92bee92742edd401ba41135185866f7026c502617f422eb432cfeca4fe236", - "sha256:fd49860271d52127d61197bb50b64f58454e9f578cb4b2c001a6de8b1f50b0b1" + "sha256:001fbb8e08d942dd57599e781f2472269ee7f2755fae407b4f67b2f0b17da3f1", + "sha256:0280e0356c0829a18d9de1cb7eee50ec22ca639878d7240307ca0943d73cd2c4", + "sha256:043191bfa8eab18c776647b62723ac9dddece59743b13f49b2016094129c2b3f", + "sha256:06ca2f61ec4385a07a6977c55ba998a4466c123642b4a32694d3128fce18c079", + "sha256:0a041d3d761dc3c35cc56ce0351506a02bcbc25f7b169f652435141a17db9096", + "sha256:0ab0a9c4ffb1a6d95ef519fe4247dba8eb6b18ad93999f76b7f657039acabd47", + "sha256:0c9136e14ed34a9e343a31c533d78a9813a69a3148332bce5e9821cb2f996e66", + "sha256:110f8b71aacb688ec69062bb7f6938a0f8acb01b7c1c4beb453c65b6d234584d", + "sha256:112b06a867b235ef466ed3508ddf0238050df9c727cafb5301ac385b899189a1", + "sha256:17f9ade344e7d9b464a084d69bcf18fc691cb1db67c62ed80820bf4926d78f0e", + "sha256:1e254a00cdf42b1e4d5b3d68d33af63268d41340d8885df2ab6470f2e1500147", + "sha256:1e978ec1e8bd0e0e4de6bb75de9d30cbb74db6b6a2bb727618613703ca0167dd", + "sha256:25c692919ac5a01f170a3bfcd62d745b24fd095c353d50812637d6fcab442e75", + "sha256:260a5d70215b61ab4fadf5c7baacd64821842975eea312125ed3c39a6391b063", + "sha256:2803abfebfc990042cd494d8ce2d5f82e9d847af6d35ec486923aa19dbad5e73", + "sha256:29a287e0cf63ff528da061de6b9f64a4618da591ca1046aafc54062e40ca7eab", + "sha256:29cb7f67d10b479ff07c17d33e39f78c07f71c40ef30d63c153d340e96cd3fb4", + "sha256:3213d622a0283a39a93d188f3cf72b26862df52fbb4ca3697f51705016523d41", + "sha256:33111801a01c12a8a1e3721f0a9232f8cfc8ae2c6b7098167e6f623c6073f402", + "sha256:357cc07a6d7b0b182ff02249616a03742827ebb1277546b5c7cd7f7620a45698", + "sha256:38efbc8de75c7a0fc1ac190162d892787f3f47b57cc291231aafee36b80982b7", + "sha256:4081eb135ac24158bd51cdfbef16f1c64df7063b1143f24731387137c092bec8", + "sha256:40fdc1ae7125e518ea98e53e69a4ebc27e1fd50510c47b7ea130cf21e5e1d42b", + "sha256:4cfe66903cc32a9921a6733d96b19bb6abf310397581bbad89c228f5abaf0ee8", + "sha256:511dbaf848decaaaf4b4ca48032619fb3138710c4bf7da7617765edad1ef96b0", + "sha256:55cced7c52e981362f708ad635198e97a752dfba412cc03c23bbf3bd8d5cd662", + "sha256:56b39e5e0622a09a25bf5baf62f4bcf0cb8a41ae6e2819cf49bbc5a74c083f91", + "sha256:5dbbdb29840ca3d91ee0fece42fc29278886d908280bfec0a5846c6f901a3eb0", + "sha256:5f9fb9157b4ce2971008323afe46053787b526ef624fea915b261468a8421a0f", + "sha256:6180d8b35af935aed8ece3a85e0a43f87393ae0ac87c8d2c8bd2c993f7270ef3", + "sha256:68a5124b13fa6cc2086764a20005d30bc0548146f7f5322f02fce212ca14317f", + "sha256:68bb27509ac1b9a3443094260f6326150663b06abe40b73a2f81160623da5b67", + "sha256:6f41ae150c4e32db4f3310cdaf64b1593a03dbabe29eec77fc9b50fe64061df6", + "sha256:7265a2f3d436e54ef9f2b52b5c937e6be778781bd97a590319d7348f1c1ca997", + "sha256:72fbe16c6fac95aedf5937fa873445cec2110be35d8a4e9433d7501fd98dae6b", + "sha256:7d92c3819208a60205a12a245c91ad70cb0a85336659b19b834205573ac8456e", + "sha256:8155154c7c691289fe18f510b5d4657c68c67989f293f0535a91360392ff6538", + "sha256:81a1cca95ed5bb92aa8b10dd2cdc9a0d3853a50fad926c28b5d7e8ea54389627", + "sha256:89cd468399cfd2504718f0ba50e410dca55a170b61a02ad92bb18c8a65186e93", + "sha256:8ad03c0965fb3c692200e74d458ca28c1dbb4ce96f9a479a8aa041ad5fabca02", + "sha256:90f9849678c75fe7afa2d348ac842c168b0a4d3d61919687216dfc547976d853", + "sha256:948424b06129ce883307e8cff868c31396d8dc7630a59c61d70d98dbe70f222c", + "sha256:9cd5ffd25db4e7ba6a375693b3fc0fc1791ec636c17db3720da19bde7180ec43", + "sha256:a0df0043bdb289bde1f62da130d20df23d58b45429f752bc7a8fc5325a225ecd", + "sha256:a2c306dea656c12c68f51f4cea133cbe78ca7435eb28c735eac1d3ebe73be6e8", + "sha256:a7830bab239b79cda9c08c2da014761cafb48da6150e1da17ac06283f43b6089", + "sha256:a7c711e21628b52034bb5ab8d1bce291f752fcc5e92accc615778acee1ff4778", + "sha256:aaf159caa35993cb1f56fb9b8e4610d35758e7ca005412eb1daa856a78c9c4b1", + "sha256:ae506e6902902557576a26ff33eda8695e7ecb3cb36c3b573a0765dee114ebdb", + "sha256:b507f5c4c1d508876d1819b6bf9a49d365b96320b5d4993426b33a23ca4b8261", + "sha256:bf162abab1c1a736333192707cef898e735a5ca00f38f27eeedf44b39d9e85eb", + "sha256:c1a2af6c6ef86344a6b0db6b97834208bf598db514f2b155042439b62605601a", + "sha256:c2d37ab77531417474168eb79d6d80b14f821a966818505d03013d0833edb7a8", + "sha256:c4fc99836233ea196540b17ab0983aff60ed07941751930f5f4d05bc3b3b7359", + "sha256:d581b735e177fdcdce6fed8e7e8880a3fb6ee4e3653a3ac6af01c6f4c03effc5", + "sha256:d6da64deb6b8ed903e7560180a92f2d804ee1ba5eeb849ac2748b8c1aba1f6d7", + "sha256:d8e8286dd7cea7895157318d1b91cdacac64c479f3cbc8dce548331728484751", + "sha256:ddea102b48f9e339f3948bf22040944184627a30fdf7f858667673b9c5f033c8", + "sha256:dfa20cc6ca228e6b155b11da03825975ce66aea520985dbbddf0f2a5a495c605", + "sha256:e3e5193ef5a3dc73bceee50f7fdc2c90dbb76c42df8d8fae3d1067a583df579e", + "sha256:e3eeb0aabd6bd5ce64faae67e9935203a6991b4bc2a485a767fbafb2c5125f45", + "sha256:e5805d5a22fd19c8ccff10a9561f9df94436b0545619ea579db2d3c35294bce2", + "sha256:e85b752a1e912b70eaad4fafbd4d1238007ab221de2009b9a2f5ae7461239895", + "sha256:eaf7fa2de5c0be8ae6ff8e9bea2ccd725e980541244521d8d4b5f3354a27babe", + "sha256:ebfb099f8dcf083deef3ac1ca4c1503f387cf76296fcb3816b66f5ecb5f54fdb", + "sha256:ece3d2cfe132e7d51f44a832b303895e6f2d499c5e74dfbdb06ee246147a304a", + "sha256:ed9749eef4cbd126da3dc1d6bcb3a57f5eb7ac6a6484146bdbf743f552dfc577", + "sha256:ede83e07a75dd06bc501566c1eca2afc0d61677c1472ac9ad93fdee6e638a48d", + "sha256:ef4aea96ce4d3b074422cb4f2f64e216bf9e213004bb58ecfdf50ea02ea8eb9a", + "sha256:f3a3570c4a2a16746ac2c31a7c7c7b0c186b95ce902e33db6f28094ed7387dda", + "sha256:f407cb6b8e9d6d8c626bc73c945db1706035af8fd632295547bf1c9e46d092d6", + "sha256:f74a575920ab21fe304421a3fc28793d82e299cae9eccb37084e9fc7f3617c20" ], "markers": "python_version >= '3.11'", - "version": "==2.4.2" + "version": "==2.4.6" }, "opentelemetry-api": { "hashes": [ - "sha256:2edd8463432a7f8443edce90972169b195e7d6a05500cd29e6d13898187c9950", - "sha256:fbde8c80e1b937a2c61f20347e91c0c18a1940cecf012d62e65a7caf08967c9c" + "sha256:51a69edacadbc03a8950ace1c4c21099cacc538820ac2c9e36277e78cebba714", + "sha256:56c63bea9f77b62856be8c47600474acad853b2924b99b1687c4cb6297166716" ], - "markers": "python_version >= '3.9'", - "version": "==1.39.1" + "markers": "python_version >= '3.10'", + "version": "==1.42.1" }, "opentelemetry-exporter-otlp-proto-common": { "hashes": [ - "sha256:08f8a5862d64cc3435105686d0216c1365dc5701f86844a8cd56597d0c764fde", - "sha256:763370d4737a59741c89a67b50f9e39271639ee4afc999dadfe768541c027464" + "sha256:04f1f01fb597c4249dfcd7f8b861c902c2102369d376d9d346ff38de4469a2ee", + "sha256:f48d395ab815b444da118868977e9798ea354c25737d5cf39578ae894011c140" ], - "markers": "python_version >= '3.9'", - "version": "==1.39.1" + "markers": "python_version >= '3.10'", + "version": "==1.42.1" }, "opentelemetry-exporter-otlp-proto-http": { "hashes": [ - "sha256:31bdab9745c709ce90a49a0624c2bd445d31a28ba34275951a6a362d16a0b9cb", - "sha256:d9f5207183dd752a412c4cd564ca8875ececba13be6e9c6c370ffb752fd59985" + "sha256:00a16da1b312a1d6c7233d600d557c91df71125af73020f3b9a7765bd699d59d", + "sha256:bf142a21035d7571ac3a09cb2e5639f49886f243972883cfe777ed3bf02b734d" ], - "markers": "python_version >= '3.9'", - "version": "==1.39.1" + "markers": "python_version >= '3.10'", + "version": "==1.42.1" }, "opentelemetry-instrumentation": { "hashes": [ - "sha256:04480db952b48fb1ed0073f822f0ee26012b7be7c3eac1a3793122737c78632d", - "sha256:57ddc7974c6eb35865af0426d1a17132b88b2ed8586897fee187fd5b8944bd6a" + "sha256:32368d6ae52c8de20aa790a6ad86b10a76f09956092337ae37d675773990e541", + "sha256:f1986716d52cc316ea5f60189098726a9071d8ecc0eee96c9ed110be08bade9c" ], - "markers": "python_version >= '3.9'", - "version": "==0.60b1" + "markers": "python_version >= '3.10'", + "version": "==0.63b1" }, "opentelemetry-instrumentation-httpx": { "hashes": [ - "sha256:a506ebaf28c60112cbe70ad4f0338f8603f148938cb7b6794ce1051cd2b270ae", - "sha256:f37636dd742ad2af83d896ba69601ed28da51fa4e25d1ab62fde89ce413e275b" + "sha256:14df6e99d81be9a8cd238f6639b6fa52404c4d3ce219058fcb5dc8c0f2211f86", + "sha256:f41ec82f25c3abcdada621052db3e5fd648e3b43d55eec4b9c0c5d3ecb7b4ff4" ], - "markers": "python_version >= '3.9'", - "version": "==0.60b1" + "markers": "python_version >= '3.10'", + "version": "==0.63b1" }, "opentelemetry-instrumentation-requests": { "hashes": [ - "sha256:9a1063c16c44a3ba6e81870c4fa42a0fac3ecef5a4d60a11d0976eec9046f3d4", - "sha256:eec9fac3fab84737f663a2e08b12cb095b4bd67643b24587a8ecfa3cf4d0ca4c" + "sha256:513fcaa3d93debbdb359c00ce1a137a34a89ee908c51ac43beb7e8c18ac2b3cd", + "sha256:935c980a11e33bfd7ed969c741e4bd7c84077045651469f10e163534368d87f7" ], - "markers": "python_version >= '3.9'", - "version": "==0.60b1" + "markers": "python_version >= '3.10'", + "version": "==0.63b1" }, "opentelemetry-instrumentation-threading": { "hashes": [ - "sha256:20b18a68abe5801fa9474336b7c27487d4af3e00b66f6a8734e4fdd75c8b0b43", - "sha256:92a52a60fee5e32bc6aa8f5acd749b15691ad0bc4457a310f5736b76a6d9d1de" + "sha256:33059298e68c94b13c38b562ad28799ec16a2fd06182ebfc762bb4e956e55d94", + "sha256:afa8c2cada8ed136f07b04dc8739bc861a15e9a5edea1a65e4c5e1919c62946c" ], - "markers": "python_version >= '3.9'", - "version": "==0.60b1" + "markers": "python_version >= '3.10'", + "version": "==0.63b1" }, "opentelemetry-instrumentation-urllib": { "hashes": [ - "sha256:7d6c56e45551bdbf21efc11bd463e10862e8fd04ed4a94b5695325a56440b13e", - "sha256:bf36188d684ca6454b7162492a66749181955011e0cc47a2324cbe66e7f13e81" + "sha256:500b959d7933408ef30a6f4bb2a0b6979f71129e62b945fc5615aa63df4ad9b8", + "sha256:538e8c72515b48c69e03c2789a03d245ba6e1bf5c22c2052df1e872bb8274d96" ], - "markers": "python_version >= '3.9'", - "version": "==0.60b1" + "markers": "python_version >= '3.10'", + "version": "==0.63b1" }, "opentelemetry-proto": { "hashes": [ - "sha256:22cdc78efd3b3765d09e68bfbd010d4fc254c9818afd0b6b423387d9dee46007", - "sha256:6c8e05144fc0d3ed4d22c2289c6b126e03bcd0e6a7da0f16cedd2e1c2772e2c8" + "sha256:c6a51e6b4f05ae63565f3a113217f3d2bfaec68f78c02d7a6c85f9010d1cfca6", + "sha256:dedb74cba2886c59c7789b227a7a670613025a07489040050aedff6e5c0fb43c" ], - "markers": "python_version >= '3.9'", - "version": "==1.39.1" + "markers": "python_version >= '3.10'", + "version": "==1.42.1" }, "opentelemetry-sdk": { "hashes": [ - "sha256:4d5482c478513ecb0a5d938dcc61394e647066e0cc2676bee9f3af3f3f45f01c", - "sha256:cf4d4563caf7bff906c9f7967e2be22d0d6b349b908be0d90fb21c8e9c995cc6" + "sha256:083cd4bbfaa5aa7b5a9e552430d9951219967cfb27aa61feb13a77aba1fc839d", + "sha256:8c834e8f8c9ba4171d4ec843d0cb8a67e4c7394d3f9e9297e582cbd9456ddbf7" ], - "markers": "python_version >= '3.9'", - "version": "==1.39.1" + "markers": "python_version >= '3.10'", + "version": "==1.42.1" }, "opentelemetry-semantic-conventions": { "hashes": [ - "sha256:87c228b5a0669b748c76d76df6c364c369c28f1c465e50f661e39737e84bc953", - "sha256:9fa8c8b0c110da289809292b0591220d3a7b53c1526a23021e977d68597893fb" + "sha256:3daf963611334b365e98a57438183eb012d3bfb40b2d931a9af613476b8701a9", + "sha256:dfe5ef4dee82586b746f522b818ceb298d00b3d59f660042bd79404bff8d0682" ], - "markers": "python_version >= '3.9'", - "version": "==0.60b1" + "markers": "python_version >= '3.10'", + "version": "==0.63b1" }, "opentelemetry-util-http": { "hashes": [ - "sha256:0d97152ca8c8a41ced7172d29d3622a219317f74ae6bb3027cfbdcf22c3cc0d6", - "sha256:66381ba28550c91bee14dcba8979ace443444af1ed609226634596b4b0faf199" + "sha256:6284194028c59cd439f8acfe388145069a6127f11dc077e1344a2094adacc3f8", + "sha256:ba1268f00922ee522dba2ae38458060f99486e7385a8056985901ca9685adfff" ], - "markers": "python_version >= '3.9'", - "version": "==0.60b1" + "markers": "python_version >= '3.10'", + "version": "==0.63b1" }, "packaging": { "hashes": [ - "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", - "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529" + "sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e", + "sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661" ], "markers": "python_version >= '3.8'", - "version": "==26.0" + "version": "==26.2" }, "paginate": { "hashes": [ @@ -1791,19 +1933,19 @@ }, "pathspec": { "hashes": [ - "sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645", - "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723" + "sha256:17db5ecd524104a120e173814c90367a96a98d07c45b2e10c2f3919fff91bf5a", + "sha256:a00ce642f577bf7f473932318056212bc4f8bfdf53128c78bbd5af0b9b20b189" ], "markers": "python_version >= '3.9'", - "version": "==1.0.4" + "version": "==1.1.1" }, "platformdirs": { "hashes": [ - "sha256:9170634f126f8efdae22fb58ae8a0eaa86f38365bc57897a6c4f781d1f5875bd", - "sha256:9a33809944b9db043ad67ca0db94b14bf452cc6aeaac46a88ea55b26e2e9d291" + "sha256:3bfa75b0ad0db84096ae777218481852c0ebc6c727b3168c1b9e0118e458cf0a", + "sha256:e61adb1d5e5cb3441b4b7710bea7e4c12250ca49439228cc1021c00dcfac0917" ], "markers": "python_version >= '3.10'", - "version": "==4.9.2" + "version": "==4.9.6" }, "pluggy": { "hashes": [ @@ -1815,27 +1957,27 @@ }, "pre-commit": { "hashes": [ - "sha256:3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77", - "sha256:eb545fcff725875197837263e977ea257a402056661f09dae08e4b149b030a61" + "sha256:718d2208cef53fdc38206e40524a6d4d9576d103eb16f0fec11c875e7716e9d9", + "sha256:e2cf246f7299edcabcf15f9b0571fdce06058527f0a06535068a86d38089f29b" ], "markers": "python_version >= '3.10'", - "version": "==4.5.1" + "version": "==4.6.0" }, "protobuf": { "hashes": [ - "sha256:3093804752167bcab3998bec9f1048baae6e29505adaf1afd14a37bddede533c", - "sha256:69915a973dd0f60f31a08b8318b73eab2bd6a392c79184b3612226b0a3f8ec02", - "sha256:6ddcac2a081f8b7b9642c09406bc6a4290128fce5f471cddd165960bb9119e5c", - "sha256:8afa18e1d6d20af15b417e728e9f60f3aa108ee76f23c3b2c07a2c3b546d3afd", - "sha256:8f04fa32763dcdb4973d537d6b54e615cc61108c7cb38fe59310c3192d29510a", - "sha256:9b71e0281f36f179d00cbcb119cb19dec4d14a81393e5ea220f64b286173e190", - "sha256:a3157e62729aafb8df6da2c03aa5c0937c7266c626ce11a278b6eb7963c4e37c", - "sha256:a5cb85982d95d906df1e2210e58f8e4f1e3cdc088e52c921a041f9c9a0386de5", - "sha256:cbf16ba3350fb7b889fca858fb215967792dc125b35c7976ca4818bee3521cf0", - "sha256:d71b040839446bac0f4d162e758bea99c8251161dae9d0983a3b88dee345153b" + "sha256:0cd27b587afca21b7cfa59a74dcbd48a50f0a6400cfb59391340ad729d91d326", + "sha256:77179e006c476e69bf8e8ce866640091ec42e1beb80b213c3900006ecfba6901", + "sha256:7d29d9b65f8afef196f8334e80d6bc1d5d4adedb449971fefd3723824e6e77d3", + "sha256:9720e6961b251bde64edfdab7d500725a2af5280f3f4c87e57c0208376aa8c3a", + "sha256:a6768d25248312c297558af96a9f9c929e8c4cee0659cb07e780731095f38135", + "sha256:bd56799fb262994b2c2faa1799693c95cc2e22c62f56fb43af311cae45d26f0e", + "sha256:c96c37eec15086b79762ed265d59ab204dabc53056e3443e702d2681f4b39ce3", + "sha256:e2afbae9b8e1825e3529f88d514754e094278bb95eadc0e199751cdd9a2e82a2", + "sha256:e9db7e292e0ab79dd108d7f1a94fe31601ce1ee3f7b79e0692043423020b0593", + "sha256:f443a394af5ed23672bc6c486be138628fbe5c651ccbc536873d7da23d1868cf" ], "markers": "python_version >= '3.9'", - "version": "==6.33.5" + "version": "==6.33.6" }, "psutil": { "hashes": [ @@ -1898,19 +2040,19 @@ }, "pygments": { "hashes": [ - "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", - "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b" + "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", + "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176" ], - "markers": "python_version >= '3.8'", - "version": "==2.19.2" + "markers": "python_version >= '3.9'", + "version": "==2.20.0" }, "pymdown-extensions": { "hashes": [ - "sha256:39f4a020f40773f6b2ff31d2cd2546c2c04d0a6498c31d9c688d2be07e1767d5", - "sha256:91b879f9f864d49794c2d9534372b10150e6141096c3908a455e45ca72ad9d3f" + "sha256:72cfcf55f07aea0d4af2c4f11dd4e52466ddfb1bb819673146398e0bd3a77354", + "sha256:d7a5d08014fc571e80ca21dd6f854e31f94c489800350564d55d15b3c41e76b6" ], "markers": "python_version >= '3.9'", - "version": "==10.21" + "version": "==10.21.3" }, "pynacl": { "hashes": [ @@ -1989,6 +2131,22 @@ "markers": "python_version >= '3.7'", "version": "==1.6.0" }, + "pytest-html": { + "hashes": [ + "sha256:70a01e8ae5800f4a074b56a4cb1025c8f4f9b038bba5fe31e3c98eb996686f07", + "sha256:c8152cea03bd4e9bee6d525573b67bbc6622967b72b9628dda0ea3e2a0b5dd71" + ], + "markers": "python_version >= '3.8'", + "version": "==4.1.1" + }, + "pytest-metadata": { + "hashes": [ + "sha256:c8e0844db684ee1c798cfa38908d20d67d0463ecb6137c72e91f418558dd5f4b", + "sha256:d2a29b0355fbc03f168aa96d41ff88b1a3b44a3b02acbe491801c98a048017c8" + ], + "markers": "python_version >= '3.8'", + "version": "==3.1.1" + }, "pytest-mock": { "hashes": [ "sha256:0a25e2eb88fe5168d535041d09a4529a188176ae608a6d249ee65abc0949630d", @@ -2032,6 +2190,14 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", "version": "==2.9.0.post0" }, + "python-discovery": { + "hashes": [ + "sha256:62f6db28064c9613e7ca76cb3f00c38c839a07c31c00dfe7ed0986493d2150a6", + "sha256:ed188687ebb3b82c01a17cd5ac62fc94d9f6487a7f1a0f9dfe89753fec91039c" + ], + "markers": "python_version >= '3.8'", + "version": "==1.3.1" + }, "pytokens": { "hashes": [ "sha256:0fc71786e629cef478cbf29d7ea1923299181d0699dbe7c3c0f4a583811d9fc1", @@ -2082,10 +2248,10 @@ }, "pytz": { "hashes": [ - "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", - "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00" + "sha256:04156e608bee23d3792fd45c94ae47fae1036688e75032eea2e3bf0323d1f126", + "sha256:0e60b47b29f21574376f218fe21abc009894a2321ea16c6754f3cad6eb7cdd6a" ], - "version": "==2025.2" + "version": "==2026.2" }, "pyyaml": { "hashes": [ @@ -2192,11 +2358,12 @@ }, "requests": { "hashes": [ - "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", - "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf" + "sha256:2a0d60c172f83ac6ab31e4554906c0f3b3588d37b5cb939b1c061f4907e278e0", + "sha256:f288924cae4e29463698d6d60bc6a4da69c89185ad1e0bcc4104f584e960b9ed" ], - "markers": "python_version >= '3.9'", - "version": "==2.32.5" + "index": "pypi", + "markers": "python_version >= '3.10'", + "version": "==2.34.2" }, "rpds-py": { "hashes": [ @@ -2321,19 +2488,19 @@ }, "s3transfer": { "hashes": [ - "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", - "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920" + "sha256:9edeb6d1c3c2f89d6050348548834ad8289610d886e5bf7b7207728bd43ce33a", + "sha256:ce3801712acf4ad3e89fb9990df97b4972e93f4b3b0004d214be5bce12814c20" ], - "markers": "python_version >= '3.9'", - "version": "==0.16.0" + "markers": "python_version >= '3.10'", + "version": "==0.17.0" }, "setuptools": { "hashes": [ - "sha256:22e0a2d69474c6ae4feb01951cb69d515ed23728cf96d05513d36e42b62b37cb", - "sha256:70b18734b607bd1da571d097d236cfcfacaf01de45717d59e6e04b96877532e0" + "sha256:7d872682c5d01cfde07da7bccc7b65469d3dca203318515ada1de5eda35efbf9", + "sha256:a59e362652f08dcd477c78bb6e7bd9d80a7995bc73ce773050228a348ce2e5bb" ], "markers": "python_version >= '3.9'", - "version": "==82.0.0" + "version": "==82.0.1" }, "six": { "hashes": [ @@ -2344,16 +2511,25 @@ "version": "==1.17.0" }, "synapseclient": { - "markers": "python_version >= '3.10' and python_version < '3.15'", - "path": "." + "editable": true, + "extras": [ + "boto3", + "curator", + "dev", + "docs", + "pandas", + "pysftp", + "tests" + ], + "markers": "python_version >= '3.10' and python_version < '3.15'" }, "termynal": { "hashes": [ - "sha256:7a1605a4d4ed38bf851bf25f842aafa52ef29786d52762aecee59622ed030592", - "sha256:c7abecfdbda3ccdeee2723049a70f0e2e7f63e1973bb9b8f0b3fe6c1bfb391cd" + "sha256:2918a1a6cf468497fbb3a72dc3726743988829eef1136d0c78a81df70ec869f6", + "sha256:b5147545e29256d35148251f5c2d3d23dec1d2fb0b4ba464ff330a7de8b56fc6" ], "markers": "python_version >= '3.9'", - "version": "==0.13.1" + "version": "==0.14.0" }, "tqdm": { "hashes": [ @@ -2380,27 +2556,27 @@ }, "tzdata": { "hashes": [ - "sha256:06a47e5700f3081aab02b2e513160914ff0694bce9947d6b76ebd6bf57cfc5d1", - "sha256:de39c2ca5dc7b0344f2eba86f49d614019d29f060fc4ebc8a417896a620b56a7" + "sha256:9173fde7d80d9018e02a662e168e5a2d04f87c41ea174b139fbef642eda62d10", + "sha256:bbe9af844f658da81a5f95019480da3a89415801f6cc966806612cc7169bffe7" ], "markers": "python_version >= '2'", - "version": "==2025.3" + "version": "==2026.2" }, "urllib3": { "hashes": [ - "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", - "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4" + "sha256:231e0ec3b63ceb14667c67be60f2f2c40a518cb38b03af60abc813da26505f4c", + "sha256:9fb4c81ebbb1ce9531cce37674bbc6f1360472bc18ca9a553ede278ef7276897" ], - "markers": "python_version >= '3.9'", - "version": "==2.6.3" + "markers": "python_version >= '3.10'", + "version": "==2.7.0" }, "virtualenv": { "hashes": [ - "sha256:94f39b1abaea5185bf7ea5a46702b56f1d0c9aa2f41a6c2b8b0af4ddc74c10a7", - "sha256:d6e78e5889de3a4742df2d3d44e779366325a90cf356f15621fddace82431794" + "sha256:7d5987d8369e098e41406efb780a3d4ca79280097293899e351a6407ee153ab3", + "sha256:f5bda277e553b1c2b3c1a8debfc30496e1288cc93ce6b7b71b3280047e317328" ], "markers": "python_version >= '3.8'", - "version": "==20.38.0" + "version": "==21.3.3" }, "watchdog": { "hashes": [ @@ -2440,90 +2616,99 @@ }, "wrapt": { "hashes": [ - "sha256:02b551d101f31694fc785e58e0720ef7d9a10c4e62c1c9358ce6f63f23e30a56", - "sha256:042ec3bb8f319c147b1301f2393bc19dba6e176b7da446853406d041c36c7828", - "sha256:0610b46293c59a3adbae3dee552b648b984176f8562ee0dba099a56cfbe4df1f", - "sha256:0b02e424deef65c9f7326d8c19220a2c9040c51dc165cddb732f16198c168396", - "sha256:0b1831115c97f0663cb77aa27d381237e73ad4f721391a9bfb2fe8bc25fa6e77", - "sha256:0ed61b7c2d49cee3c027372df5809a59d60cf1b6c2f81ee980a091f3afed6a2d", - "sha256:0f5f51a6466667a5a356e6381d362d259125b57f059103dd9fdc8c0cf1d14139", - "sha256:16ecf15d6af39246fe33e507105d67e4b81d8f8d2c6598ff7e3ca1b8a37213f7", - "sha256:1f0b2f40cf341ee8cc1a97d51ff50dddb9fcc73241b9143ec74b30fc4f44f6cb", - "sha256:1f23fa283f51c890eda8e34e4937079114c74b4c81d2b2f1f1d94948f5cc3d7f", - "sha256:223db574bb38637e8230eb14b185565023ab624474df94d2af18f1cdb625216f", - "sha256:249f88ed15503f6492a71f01442abddd73856a0032ae860de6d75ca62eed8067", - "sha256:24c2ed34dc222ed754247a2702b1e1e89fdbaa4016f324b4b8f1a802d4ffe87f", - "sha256:273a736c4645e63ac582c60a56b0acb529ef07f78e08dc6bfadf6a46b19c0da7", - "sha256:281262213373b6d5e4bb4353bc36d1ba4084e6d6b5d242863721ef2bf2c2930b", - "sha256:30ce38e66630599e1193798285706903110d4f057aab3168a34b7fdc85569afc", - "sha256:33486899acd2d7d3066156b03465b949da3fd41a5da6e394ec49d271baefcf05", - "sha256:343e44b2a8e60e06a7e0d29c1671a0d9951f59174f3709962b5143f60a2a98bd", - "sha256:373342dd05b1d07d752cecbec0c41817231f29f3a89aa8b8843f7b95992ed0c7", - "sha256:3af60380ba0b7b5aeb329bc4e402acd25bd877e98b3727b0135cb5c2efdaefe9", - "sha256:3e62d15d3cfa26e3d0788094de7b64efa75f3a53875cdbccdf78547aed547a81", - "sha256:41b1d2bc74c2cac6f9074df52b2efbef2b30bdfe5f40cb78f8ca22963bc62977", - "sha256:423ed5420ad5f5529db9ce89eac09c8a2f97da18eb1c870237e84c5a5c2d60aa", - "sha256:46acc57b331e0b3bcb3e1ca3b421d65637915cfcd65eb783cb2f78a511193f9b", - "sha256:4da9f45279fff3543c371d5ababc57a0384f70be244de7759c85a7f989cb4ebe", - "sha256:507553480670cab08a800b9463bdb881b2edeed77dc677b0a5915e6106e91a58", - "sha256:53e5e39ff71b3fc484df8a522c933ea2b7cdd0d5d15ae82e5b23fde87d44cbd8", - "sha256:54a30837587c6ee3cd1a4d1c2ec5d24e77984d44e2f34547e2323ddb4e22eb77", - "sha256:5531d911795e3f935a9c23eb1c8c03c211661a5060aab167065896bbf62a5f85", - "sha256:55cbbc356c2842f39bcc553cf695932e8b30e30e797f961860afb308e6b1bb7c", - "sha256:59923aa12d0157f6b82d686c3fd8e1166fa8cdfb3e17b42ce3b6147ff81528df", - "sha256:5a03a38adec8066d5a37bea22f2ba6bbf39fcdefbe2d91419ab864c3fb515454", - "sha256:5a7b3c1ee8265eb4c8f1b7d29943f195c00673f5ab60c192eba2d4a7eae5f46a", - "sha256:5d4478d72eb61c36e5b446e375bbc49ed002430d17cdec3cecb36993398e1a9e", - "sha256:5ea5eb3c0c071862997d6f3e02af1d055f381b1d25b286b9d6644b79db77657c", - "sha256:604d076c55e2fdd4c1c03d06dc1a31b95130010517b5019db15365ec4a405fc6", - "sha256:656873859b3b50eeebe6db8b1455e99d90c26ab058db8e427046dbc35c3140a5", - "sha256:65d1d00fbfb3ea5f20add88bbc0f815150dbbde3b026e6c24759466c8b5a9ef9", - "sha256:6b538e31eca1a7ea4605e44f81a48aa24c4632a277431a6ed3f328835901f4fd", - "sha256:6fd1ad24dc235e4ab88cda009e19bf347aabb975e44fd5c2fb22a3f6e4141277", - "sha256:70d86fa5197b8947a2fa70260b48e400bf2ccacdcab97bb7de47e3d1e6312225", - "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", - "sha256:73d496de46cd2cdbdbcce4ae4bcdb4afb6a11234a1df9c085249d55166b95116", - "sha256:7425ac3c54430f5fc5e7b6f41d41e704db073309acfc09305816bc6a0b26bb16", - "sha256:74afa28374a3c3a11b3b5e5fca0ae03bef8450d6aa3ab3a1e2c30e3a75d023dc", - "sha256:758895b01d546812d1f42204bd443b8c433c44d090248bf22689df673ccafe00", - "sha256:79573c24a46ce11aab457b472efd8d125e5a51da2d1d24387666cd85f54c05b2", - "sha256:7e18f01b0c3e4a07fe6dfdb00e29049ba17eadbc5e7609a2a3a4af83ab7d710a", - "sha256:88547535b787a6c9ce4086917b6e1d291aa8ed914fdd3a838b3539dc95c12804", - "sha256:88bbae4d40d5a46142e70d58bf664a89b6b4befaea7b2ecc14e03cedb8e06c04", - "sha256:8cccf4f81371f257440c88faed6b74f1053eef90807b77e31ca057b2db74edb1", - "sha256:9baa544e6acc91130e926e8c802a17f3b16fbea0fd441b5a60f5cf2cc5c3deba", - "sha256:a36692b8491d30a8c75f1dfee65bef119d6f39ea84ee04d9f9311f83c5ad9390", - "sha256:a47681378a0439215912ef542c45a783484d4dd82bac412b71e59cf9c0e1cea0", - "sha256:a7c06742645f914f26c7f1fa47b8bc4c91d222f76ee20116c43d5ef0912bba2d", - "sha256:a9a2203361a6e6404f80b99234fe7fb37d1fc73487b5a78dc1aa5b97201e0f22", - "sha256:ab232e7fdb44cdfbf55fc3afa31bcdb0d8980b9b95c38b6405df2acb672af0e0", - "sha256:ad85e269fe54d506b240d2d7b9f5f2057c2aa9a2ea5b32c66f8902f768117ed2", - "sha256:af338aa93554be859173c39c85243970dc6a289fa907402289eeae7543e1ae18", - "sha256:afd964fd43b10c12213574db492cb8f73b2f0826c8df07a68288f8f19af2ebe6", - "sha256:b32888aad8b6e68f83a8fdccbf3165f5469702a7544472bdf41f582970ed3311", - "sha256:c31eebe420a9a5d2887b13000b043ff6ca27c452a9a22fa71f35f118e8d4bf89", - "sha256:caea3e9c79d5f0d2c6d9ab96111601797ea5da8e6d0723f77eabb0d4068d2b2f", - "sha256:cf30f6e3c077c8e6a9a7809c94551203c8843e74ba0c960f4a98cd80d4665d39", - "sha256:d40770d7c0fd5cbed9d84b2c3f2e156431a12c9a37dc6284060fb4bec0b7ffd4", - "sha256:d8a210b158a34164de8bb68b0e7780041a903d7b00c87e906fb69928bf7890d5", - "sha256:dc4a8d2b25efb6681ecacad42fca8859f88092d8732b170de6a5dddd80a1c8fa", - "sha256:df7d30371a2accfe4013e90445f6388c570f103d61019b6b7c57e0265250072a", - "sha256:e01375f275f010fcbf7f643b4279896d04e571889b8a5b3f848423d91bf07050", - "sha256:e1a4120ae5705f673727d3253de3ed0e016f7cd78dc463db1b31e2463e1f3cf6", - "sha256:e228514a06843cae89621384cfe3a80418f3c04aadf8a3b14e46a7be704e4235", - "sha256:e405adefb53a435f01efa7ccdec012c016b5a1d3f35459990afc39b6be4d5056", - "sha256:e6b13af258d6a9ad602d57d889f83b9d5543acd471eee12eb51f5b01f8eb1bc2", - "sha256:e6f40a8aa5a92f150bdb3e1c44b7e98fb7113955b2e5394122fa5532fec4b418", - "sha256:e71d5c6ebac14875668a1e90baf2ea0ef5b7ac7918355850c0908ae82bcb297c", - "sha256:ed7c635ae45cfbc1a7371f708727bf74690daedc49b4dba310590ca0bd28aa8a", - "sha256:f38e60678850c42461d4202739f9bf1e3a737c7ad283638251e79cc49effb6b6", - "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", - "sha256:f9b2601381be482f70e5d1051a5965c25fb3625455a2bf520b5a077b22afb775", - "sha256:fbd3c8319de8e1dc79d346929cd71d523622da527cca14e0c1d257e31c2b8b10", - "sha256:fd341868a4b6714a5962c1af0bd44f7c404ef78720c7de4892901e540417111c" + "sha256:03b77d3ecab6c38e5da7a5709cee6899083d08fc1bcd648b4fa78b346fc66282", + "sha256:0680304db389599691bac06a2f9fb3f0ed06af59f132d35801a38cf6c321ab59", + "sha256:07dd562ebb774cad070eeedb93c7a29647979e30f0cfd1f5c9b9f803f687b6f4", + "sha256:10e8f78948d13369b770fc17bf72272aac98b4b92d49a38f479abf718f6b615b", + "sha256:115ff1501c11ac0e267c4afd6f6b3dd24b48afcc77b029e6062f71b12bce1d79", + "sha256:12331011cbf76b782d0beec7c7ed880f51454c127ab12012cfaecf56de01a80c", + "sha256:195db5b92deba6feb818732694ad478abb8a529d97a113cc256e5e49ee2dd80d", + "sha256:199abadf7dcceab4bdc5bfe356275a56b1cb429296e283da2fe90c20b09f8d07", + "sha256:1bf3ea62734b24c0241442d8b7684ef53a8de6cad0c2eba1e99fd2297b4a92e4", + "sha256:1f663528d6ea1804d279462671b2bf98a4c0d8a4a8dd319bb3ee0629b743387f", + "sha256:22c7ee3a3737d9656ddf2c9cc1f1548ec963d966251e899561da142697d33a9d", + "sha256:231e2728ba04536821d2327ad2b3cb2c20cc79197fe5c30ddf71b12d95febe10", + "sha256:298cfa8de891b9aae945b47323a012fe3f1cac5e6b2f69b150961b9ed0df1fc8", + "sha256:29c0b2c075f8854b3345be584ab3d84f8968c45605d1914be1c94939cef5d702", + "sha256:2b3946f0ff079623dc4f117363040433be390bfebce3719de50dfecbf31efdf0", + "sha256:2f0d4a79d9af893d80caa5b709e024dd2d387f3f047008286036143f118d7010", + "sha256:2ff803b3607cd76cb9b853b03d15279c7ffc8ba69e69f76304cd23d2722f2b65", + "sha256:319720847afa6c58c32f84f9743bdcf34448ae56908c00f409764c627ff2c1fe", + "sha256:33ff34dc349320dc16ebe0cdf70dddf5ae9328f4a448823a00f37976d0cc2234", + "sha256:370b2c36e8fee503c275e39b4588d74412cd0a7792f7f3a7b54c44c4d33d4884", + "sha256:3f1dc1d1a2f0b081d8c1eef2203e61717b537a1bcb0d8e4d1405aeb15aa85c34", + "sha256:3fab0258114702859bb9d410e6a886e79477e677ac92580f81b876e7c55590cc", + "sha256:4297b7338cfa48b5cfefc7416d2ae52b0aad89e9b24da479ec010717b987c07f", + "sha256:43c36019a690b2cb089665eab01a50c92d814553c6e57ff03d2c68e63ce8f00b", + "sha256:45d4156fd35d0bdab58eac4a6854fbd053a59544fc57eb66e977b3c13c087a1c", + "sha256:484015d345548472c54c97a318c6eba92db583d9d5a966dde7cf3ae0c1461cf4", + "sha256:49c7ad697d6b13f322a1c3bb22a1c66827d5c0d303a4479e327210ee4d4ad179", + "sha256:4b0aa81f4a3d0203ae8450eae5e794540afbf00a97dd0b81accbe5b4a5362cbb", + "sha256:4d5b485a6f617825fa7449f5025ebcdad9355acb328cb6d198ba225762219bc0", + "sha256:5248171d3cd33f12c144e7aa1222983cb6ab42651e985ce51fec400a876afbfd", + "sha256:57bc3691043b158605c5ceee6b06b3720caf8ac43bd4195d1bfe12457e7014f6", + "sha256:5b7f10aa09d1f5abfe3ccd022dec566a5010465b98b3755cc0705a762547101f", + "sha256:5b865e611c186d15366964e3d9500af504920ce7b92a211d61a83d2d3c42a508", + "sha256:5b9733ef187cf05e774484ed2f703992a44429050f1cfea2e94dac543da78292", + "sha256:5b9f9d351eb8e5798066b505c705ec25e19a793367edaa3280a3f171b6950fc3", + "sha256:5c17982ccfece323bb297a195c9602ef407819199d8dbf99b8041770513fd68f", + "sha256:60bef9dc4348a76e9c2981ec4b06b779bac02556af4479030e6f62b18545b3cc", + "sha256:615be1d2b21450748e759bed7bf9ba8bc28307e91cb96b6e968f54f39e938ee5", + "sha256:628fbd908649611c8b9293e2e050231f1e230be152e7d38140e3b818ec6aade0", + "sha256:63a09b40bba3b2482983e2aeba6e45e20e1f567821ac89c8922229ecc1de7f65", + "sha256:686f1798727bf4a708df015ca782b20abe99b3664e1ee9786b7712b0e2310586", + "sha256:74b7949da2ffcd79869ac1e90946c14ce61a714269403a879ea9ed85a993c81f", + "sha256:76b8111f8f5b8553c066caa26193921dea4185efecf1f9b38473054205137800", + "sha256:778aa2f59615973f2637d9025a708b69196c4814f38d905647fa1a56d7ff6b79", + "sha256:7c5ffaf6e2d35e80bea210e6969910e2ae10c1166831651c22a315425db4f831", + "sha256:7e291fa9129d9998ed5035390d4bb9cf429c489f40e5ddaa06a1e83ed52048a7", + "sha256:8062689c0e6faf0c2532f566a492fb48ba60923c2cd6effda7cac9639dbdc1f3", + "sha256:852bbcc75eab1771d4f294fb6abcc23cd38813e34fa3c71e6d579799493c4db2", + "sha256:885638ab4f8765c5deaab41d1e4452b6d212d231091b84172e3e13df2cb280fb", + "sha256:8a094508b7cd6e583378f3cf50f125814961660225bad88f4ecaa691e30b09e1", + "sha256:8a76b27fe0d600f8a34313e1a528309aa807a16aa3a72000619bc56339020125", + "sha256:8d40f1fb34d600b3eaf812941d6bcf313075728868cad1dafb7021e6a4e77983", + "sha256:9040b15216e07ed68762e44ff231a460036e4bf3543f83988f669e7078847b2c", + "sha256:914fdca0ee2a29ede32c61c28abdaf9c57b0d8c5de9dc1e28ce7e4f0400df877", + "sha256:952ec99e71d584a0e451795dbd468909c8794727ecddd9ebb4fe9803e2803f1e", + "sha256:97fbe7a0df35afe37e7e2f053dee6300a3eed00055cfd907fa51161e22c40236", + "sha256:9ad894d5dc5960ebd546a87a78160a8c645b99899e7e45a538436919bc9be5a6", + "sha256:9b58e2cdbcfe2278a031a12a7d73836d66bc1e9e65f97c63ea0a022f2f9f351b", + "sha256:9c95f72d212e1f178f9619b77fd7ee3533e82ded6a5ad119dd88134e185ee3b0", + "sha256:a3848854af260eb4cc33602c685524fff7c8816f033325f750c7fc75c6deccf9", + "sha256:a4482d1d4108052827b354850bd6e3d1ed56262cbe4b0e8051876c298fb99280", + "sha256:a50822bbbefb90b132a780c17356062a2452cd5525bfa4b5b596fd6474cceaa6", + "sha256:a8ce59cad2ee5a4d58ee647c4ed4d9adc4282ffdc31e98cba7f831536776a0f9", + "sha256:af17d3ce1e2cc5d22ae8fe8921d7801c980ea3f5d6da4ecbd0f85c4f9e030181", + "sha256:b208a5dd6f9da3d4b17aa2e4f8ca9c5dc6b9a2ed571fdef9ed465102487b445c", + "sha256:b4ce4240a3f095e77cfcc5aed6001bd63af13ea53c35ef496af1a5a972e7eaa9", + "sha256:b55f1fcbf83637f42eaf19c553ed69864ff25ac38c653ab024fccfaec8bd2e68", + "sha256:b62f40eb24ccf05246d203461c8920889fd38dce76978df16fe28e6f0128447d", + "sha256:b70a0b75b0a5a58d04aad06b3f167d49e729381d3417413656220c0cd7617847", + "sha256:b93e1ccddbdf59cec4f7683dc84bc56eb61628eb01b22bdefc15f04cd09f8fae", + "sha256:bb7c060c3faa78fe066b6b1c65de285d8d61fb6e01ee8195625b9636c3cd9775", + "sha256:c7af243871699358ebf34a770205bf2b61ccb17a0b003e8726d2028cc36ce364", + "sha256:c990d58100f9ebb8e7a20bd2e7bd3c60838be38c5bbccdd35041bc9f36dc0cea", + "sha256:cb9336f2dc99de00c9e58487cae5541ee4d79e859377b6312d98973d4661c584", + "sha256:cccce5c70a209eb385c82d063f332ed97fc02d1cf7bffb95b2e6995b5a9b8388", + "sha256:cf93c441b11c1f3ae2ccf1e8d876939b301b3234ec19f311ab0e7543a9d4427e", + "sha256:d23ea5a8e4ae99640d027d2fd05c9d03f8d24d561fc26c0462e96affa31bf408", + "sha256:d2aab40474b6adae53d14d1f6a7785f4346a93c072adf1e69ca11a1b6afc789e", + "sha256:d8f6cf451ec4aab0cdbad128d9be1219e95ceaa9940566d71570b2d820ee50b3", + "sha256:d98bf0078736df226e36875aa58a78f9d3b0888bcf585144fb30edbbf7145238", + "sha256:db48e2623a8aca63dfcfa7e574a5f3a9f760be1c464ee23f6387f70cc9112aa2", + "sha256:db93eebcf951f9ee41d75dc0423378fa918fc6706db59bc20c02f6563b6b210d", + "sha256:e8ae3f4b50a3befa56da0f09d2b71a192454ce48e8887823dbc9228cdbb610f3", + "sha256:eb9d0c3f416e2c7c37498d1716fe323379da8b4e860da3d3818a6ec8fff7b7e5", + "sha256:ec257eedd8c3988cf76e351e949e3a56a61d90f4bb4e060de2ebfa6603df2a42", + "sha256:f0318a47d23c9407f4f94c06824662499e889ab8c192c1162e4f542a118fd700", + "sha256:f58e1aa46c204171a2faa49b1ef2953edebb3913d270bb3bae7e970f254c9293", + "sha256:f86e46490908a0ae2b2d633020c12e5283c85332d7ae0846f8a351a8a2da0b82", + "sha256:f990f1b5c8ee4ff980bdef3f73f50728fd911b9ab8de8c43144e8019dcd845ff", + "sha256:fb240700f3b597c1d40d0932bfed2f4130fec2f02b8c2cb0bcdae45d321cb691" ], - "markers": "python_version >= '3.8'", - "version": "==1.17.3" + "markers": "python_version >= '3.9'", + "version": "==2.2.0" }, "zipp": { "hashes": [ diff --git a/docs/CLAUDE.md b/docs/CLAUDE.md new file mode 100644 index 000000000..175bf3414 --- /dev/null +++ b/docs/CLAUDE.md @@ -0,0 +1,68 @@ + + +## Project + +User-facing documentation for the Synapse Python Client. Built with MkDocs + Material theme, deployed via GitHub Pages. Follows the Diataxis documentation framework with four content types: tutorials, guides, reference, and explanations. + +## Stack + +MkDocs with Material theme, mkdocstrings (Google-style docstrings), termynal (CLI animations), pymdownx.snippets (named-tag embeddings). + +### Python style +- Use built-in generics (`list`, `dict`, `tuple`, `set`) instead of `typing.List`, `typing.Dict`, etc. (Python 3.9+) + +## Conventions + +### Content types (Diataxis framework) +- **tutorials/** — Step-by-step learning (competence-building). Themed around a biomedical researcher working with Alzheimer's Disease data. Progressive build-up: Project → Folder → File → Annotations → etc. +- **guides/** — How-to guides for specific use cases (problem-solution oriented). Includes extension-specific guides (curator). +- **reference/** — API reference auto-generated from docstrings via mkdocstrings. Split into `experimental/sync/` and `experimental/async/` for new OOP API. +- **explanations/** — Deep conceptual content ("why" not just "how"). Design decisions, internal machinery. + +### File inclusion pattern (pymdownx.snippets) +Tutorial code lives in `tutorials/python/tutorial_scripts/*.py` and is embedded in markdown via named-tag includes (the `--8<--` directive must be the only content on its line): +```markdown +--8<-- "docs/tutorials/python/tutorial_scripts/annotation.py:retrieve_synapse_ids" +``` +In the paired `.py` file, wrap the region with matching comment markers: +```python +# --8<-- [start:retrieve_synapse_ids] + +# --8<-- [end:retrieve_synapse_ids] +``` +Single source of truth — edit the `.py` file, not the markdown. Use descriptive snake_case tag names (e.g. `setup`, `create_dataset`) rather than `step_1`. To include the whole file, omit the tag: `--8<-- "docs/tutorials/python/tutorial_scripts/annotation.py"`. + +### mkdocstrings reference generation +Reference markdown files use `::: synapseclient.ClassName` syntax to trigger auto-generation from docstrings. Key configuration: +- `docstring_style: google` — parse Google-style docstrings +- `members_order: source` — preserve source code order +- `filters: ["!^_", "!to_synapse_request", "!fill_from_dict"]` — private members, `to_synapse_request()`, and `fill_from_dict()` are excluded from docs +- `inherited_members: true` — shows mixin methods on inheriting classes +- Member lists are explicit — each reference page specifies which methods to document +- When adding a new public method to a model class, add it to the `members:` list in the corresponding reference pages (`docs/reference/experimental/sync/` and `docs/reference/experimental/async/`). Without this, mkdocstrings won't generate an anchor and cross-references like `[synapseclient.models.ClassName.method]` will break. + +### Anchor links for cross-referencing +Pattern: `[](){ #reference-anchor }` in reference pages. Tutorials link to reference via `[API Reference][project-reference-sync]`. Explicit type hints use: `[syn.login][synapseclient.Synapse.login]`. + +### termynal CLI animations +Terminal animation blocks marked with `` HTML comment. Prompts configured as `$` or `>`. Used in authentication.md and installation docs. + +### Custom CSS (`css/custom.css`) +- API reference indentation: `doc-contents` has 25px left padding with border +- Smaller table font (0.7rem) for API docs +- Wide layout: `max-width: 1700px` for complex content + +### Navigation structure +Defined in `mkdocs.yml` nav section. 5 main sections: Home, Tutorials, How-To Guides, API Reference, Further Reading, News. API Reference has ~85 markdown files (~40 legacy, ~45 experimental). + +## Constraints + +- Do not edit tutorial code inline in markdown — edit the `.py` script file in `tutorial_scripts/` and update line ranges if needed. +- Reference docs auto-generate from source docstrings — to change method documentation, edit the docstring in the Python source, not the markdown. +- `mkdocs.yml` is at the repo root, not in `docs/` — it configures the entire doc build. +- Docs deploy to Read the Docs (configured via `.readthedocs.yaml` at repo root). +- Local build output goes to `docs_site/` (via `site_dir` in `mkdocs.yml`) — gitignored. +- Cross-referencing uses the `autorefs` plugin: `[display text][synapseclient.ClassName.method]` auto-resolves to mkdocstrings anchors. + +### news.md +Release notes live in `docs/news.md`. Each release gets a heading with the version number and date, followed by bullet points describing changes. Group entries by category (Features, Bug Fixes, etc.). Reference Jira ticket numbers (SYNPY-XXXX) in each entry. diff --git a/docs/explanations/manifest_csv.md b/docs/explanations/manifest_csv.md new file mode 100644 index 000000000..e4b9dee45 --- /dev/null +++ b/docs/explanations/manifest_csv.md @@ -0,0 +1,120 @@ +# Manifest CSV + +The manifest is a CSV file with file locations and metadata used to bulk upload and download files in Synapse. It is the standard manifest format used by `Project.sync_from_synapse`, `Project.sync_to_synapse`, `Folder.sync_from_synapse`, `Folder.sync_to_synapse`, `Project.generate_sync_manifest`, `Folder.generate_sync_manifest`, the Synapse UI download cart, and the `synapse get-download-list` CLI command. + +!!! note + This CSV manifest replaces the legacy TSV manifest produced by `synapseutils.syncFromSynapse`. The `syncFromSynapse` and `syncToSynapse` utility functions are deprecated and will be removed in v5.0.0. Use `Project.sync_from_synapse` / `Folder.sync_from_synapse` and `Project.sync_to_synapse` / `Folder.sync_to_synapse` instead. See the [legacy TSV manifest documentation](manifest_tsv.md) for details on the old format. + +## Manifest file format + +The format of the manifest file is a comma-separated value (CSV) file with one row per file and columns describing the file. The minimum required columns for uploading are **path** and **parentId**, where `path` is the local file path and `parentId` is the Synapse ID of the project or folder where the file is uploaded to. Values that contain commas are automatically quoted (e.g., `"hello, world"`). + +### Required fields for upload + +| Field | Meaning | Example | +|----------|----------------------------|-------------------------| +| path | local file path or URL | /path/to/local/file.txt | +| parentId | Synapse ID of parent | syn1235 | + +!!! note + The legacy TSV manifest used the columns `parent` and `id`, while the CSV manifest uses `parentId` and `ID` to align with Synapse REST API field names. If you’re migrating a TSV manifest to CSV, you’ll need to rename `parent` to `parentId` and `id` to `ID`. + +### Standard fields + +These columns are recognized by `sync_to_synapse` and have specific meaning. Any of these columns may be present in the manifest but only `path` and `parentId` are required for upload. +Each of these are individual examples and is what you would find in a row in each of these columns. To clarify, "syn1235;/path/to_local/file.txt" below states that you would like both "syn1235" and "/path/to_local/file.txt" added as items used to generate a file. You can also specify one item by specifying "syn1234" + +| Field | Meaning | Example | +|---------------------|--------------------------------------------|----------------------------------------------| +| path | local file path or URL | /path/to/local/file.txt | +| parentId | Synapse ID of parent container | syn1235 | +| ID | Synapse entity ID | syn2345 | +| name | name of file in Synapse | Example_file | +| synapseStore | whether to upload the file | True | +| contentType | content type of file to overwrite defaults | text/html | +| forceVersion | whether to update version | False | +| activityName | name of activity in provenance | Ran normalization | +| activityDescription | text description of what was done | Ran algorithm xyz with parameters... | +| used | list of items used to generate file | syn1235;/path/to_local/file.txt | +| executed | list of items executed | https://github.org/;/path/to_local/code.py | + +### Metadata fields (ignored during upload) + +These columns are present in manifests generated by the Synapse UI download cart and `synapse get-download-list` CLI. They are ignored by `sync_to_synapse` and are **not** treated as annotations. + +| Field | Meaning | +|-------------------|-------------------------------| +| error | any error in downloading file | +| versionNumber | version of the file | +| dataFileSizeBytes | size of the file in bytes | +| createdBy | user who created the file | +| createdOn | date the file was created | +| modifiedBy | user who last modified | +| modifiedOn | date last modified | +| synapseURL | URL to the file in Synapse | +| dataFileMD5Hex | MD5 hash of the file | + +### Annotations + +Any columns that are not in the standard or metadata fields described above will be interpreted as annotations of the file. + +Adding annotations to each row: + +| path | parentId | annot1 | annot2 | annot3 | annot4 | annot5 | annot6 | +| --- | --- | --- | --- | --- | --- | --- | --- | +| /path/file1.txt | syn1243 | bar | 3.1415 | "aaaa, bbbb" | "[14,27,30]" | "Annotation, with a comma" | "True" | +| /path/file2.txt | syn12433 | baz | 2.71 | value_1 | "[1,2,3]" | string without commas | "[True,False]" | +| /path/file3.txt | syn12455 | zzz | 3.52 | value_3 | "[42,56,77]" | a_single_string | | + +#### Multiple values of annotations per key + +Using multiple values for a single annotation should be used sparingly as it makes it more +difficult for you to manage the data. However, it is supported. + +**Annotations can be comma `,` separated lists surrounded by brackets `[]`.** + +Because the manifest is a CSV file, multi-value annotations that contain commas are automatically quoted. For example, `[a,b,c]` will appear in the CSV as `"[a,b,c]"`. + +This is an annotation with 3 values: + +| path | parentId | annot1 | +|-----------------|----------|--------------| +| /path/file1.txt | syn1243 | "[a,b,c]" | + +### Dates in the manifest file + +Dates within the manifest file will always be written as [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) format in UTC without milliseconds. For example: `2023-12-20T16:55:08Z`. + +Dates can be written in other formats specified in ISO 8601 and they will be recognized. However, `sync_from_synapse` will always write dates in the UTC format specified above. For example, you may want to specify a datetime at a specific timezone like `2023-12-20 23:55:08-07:00` and this will be recognized as a valid datetime. + +## Manifest sources + +The CSV manifest format is shared across multiple tools: + +| Source | Filename | +|----------------------------------------------------------------------|---------------------------------| +| `Project.sync_from_synapse` / `Folder.sync_from_synapse` | manifest.csv | +| `Project.generate_sync_manifest` / `Folder.generate_sync_manifest` | user-specified `manifest_path` | +| Synapse UI download cart | manifest.csv | +| CLI `synapse get-download-list` | `manifest_.csv` | + +A manifest generated by any of these sources can be used as input to `sync_to_synapse`, provided the `path` column is present with valid local file paths. Manifests from the Synapse UI do not include a `path` column by default, so users must add it before uploading. + +### Example manifest file + +| path | parentId | ID | name | annot1 | annot2 | collection_date | used | executed | +|-----------------|----------|---------|-----------|--------|--------|---------------------------|--------------------------|------------------------------| +| /path/file1.txt | syn1243 | syn5001 | file1.txt | bar | 3.1415 | 2023-12-04T07:00:00Z | syn124;/path/file2.txt | https://github.org/foo/bar | +| /path/file2.txt | syn12433 | syn5002 | file2.txt | baz | 2.71 | 2001-01-01T08:00:00Z | | https://github.org/foo/baz | +| /path/file3.txt | syn12455 | syn5003 | file3.txt | zzz | 3.52 | 2023-12-04T07:00:00Z | | https://github.org/foo/zzz | + +## References + +- [Project.sync_from_synapse][synapseclient.models.Project.sync_from_synapse] +- [Project.sync_to_synapse][synapseclient.models.Project.sync_to_synapse] +- [Project.generate_sync_manifest][synapseclient.models.Project.generate_sync_manifest] +- [Folder.sync_from_synapse][synapseclient.models.Folder.sync_from_synapse] +- [Folder.sync_to_synapse][synapseclient.models.Folder.sync_to_synapse] +- [Folder.generate_sync_manifest][synapseclient.models.Folder.generate_sync_manifest] +- [Manifest TSV (legacy)](manifest_tsv.md) +- [Managing custom metadata at scale](https://help.synapse.org/docs/Managing-Custom-Metadata-at-Scale.2004254976.html#ManagingCustomMetadataatScale-BatchUploadFileswithAnnotations) diff --git a/docs/explanations/manifest_tsv.md b/docs/explanations/manifest_tsv.md index 3e4964db3..c176ed922 100644 --- a/docs/explanations/manifest_tsv.md +++ b/docs/explanations/manifest_tsv.md @@ -1,6 +1,9 @@ -# Manifest +# Manifest TSV (Legacy) The manifest is a tsv file with file locations and metadata to be pushed to Synapse. The purpose is to allow bulk actions through a TSV without the need to manually execute commands for every requested action. +!!! warning "Deprecated" + This TSV manifest format is produced by [synapseutils.syncFromSynapse][] and consumed by [synapseutils.syncToSynapse][], both of which are deprecated and will be removed in v5.0.0. Use `Project.sync_from_synapse` / `Folder.sync_from_synapse` and `Project.sync_to_synapse` / `Folder.sync_to_synapse` instead, which use the [CSV manifest format](manifest_csv.md). + ## Manifest file format The format of the manifest file is a tab delimited file with one row per file to upload and columns describing the file. The minimum required columns are **path** and **parent** where path is the local file path and parent is the Synapse Id of the project or folder where the file is uploaded to. @@ -20,6 +23,9 @@ Any additional columns will be added as annotations. | path | local file path or URL | /path/to/local/file.txt | | parent | synapse id | syn1235 | +!!! note "Column renamed in CSV format" + The CSV manifest format uses `parentId` instead of `parent`. If you are migrating to the new [CSV manifest format](manifest_csv.md), rename the `parent` column to `parentId`. + ### Common fields: | Field | Meaning | Example | diff --git a/docs/explanations/storage_location_architecture.md b/docs/explanations/storage_location_architecture.md new file mode 100644 index 000000000..fda4db01d --- /dev/null +++ b/docs/explanations/storage_location_architecture.md @@ -0,0 +1,754 @@ +# Storage Location Architecture + +This document provides an in-depth architectural overview of the StorageLocation +system in the Synapse Python Client. It explains the design decisions, class +relationships, and data flows that enable flexible storage configuration. + +--- + +## On This Page + +- **[Domain Model](#domain-model)** + + Core classes, enums, and their relationships + +- **[Storage Types](#storage-type-mapping)** + + How storage types map to REST API types and choosing the right one + +- **[Entity Inheritance](#entity-inheritance-hierarchy)** + + How Projects and Folders gain storage capabilities + +- **[Operation Flows](#operation-flows)** + + Sequence diagrams for store, setup, and STS operations + +- **[Settings & API](#project-setting-lifecycle)** + + Project settings lifecycle and REST API architecture + +- **[Migration](#migration-flow)** + + Two-phase file migration process + + +--- + +## Overview + +The StorageLocation setting enables Synapse users to configure a location where files are uploaded to and downloaded from via Synapse. +By default, Synapse stores files in its internal S3 storage, but +users can configure projects and folders to use external storage backends such as +AWS S3 buckets, Google Cloud Storage, SFTP servers, or a local file server using a proxy server. + +### Key Concepts +- [**StorageLocationSetting**](https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/project/StorageLocationSetting.html): A configuration specifying file storage and download locations. +- [**ProjectSetting**](https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/project/ProjectSetting.html): A configuration applied to projects that allows customization of file storage locations. +- [**UploadType**](https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/file/UploadType.html): An enumeration that defines the types of file upload destinations that Synapse supports. +- **STS Credentials**: Temporary AWS credentials for direct S3 access. +- **StorageLocation Migration**: The process of transferring the files associated with Synapse entities between storage locations while preserving the entities’ structure and identifiers. + +--- + +
+ +# Part 1: Data Model + +This section covers the core classes, enumerations, and type mappings. + +
+ +## Domain Model + +The following class diagram shows the core classes and their relationships in the +StorageLocation system. + +```mermaid +classDiagram + direction TB + + class StorageLocation { + +int storage_location_id + +StorageLocationType storage_type + +UploadType upload_type + +str bucket + +str base_key + +bool sts_enabled + +str banner + +str description + +str etag + +str created_on + +int created_by + +str url + +bool supports_subfolders + +str endpoint_url + +str proxy_url + +str secret_key + +str benefactor_id + +store() StorageLocation + +get() StorageLocation + +fill_from_dict(dict) StorageLocation + } + + class StorageLocationType { + <> + SYNAPSE_S3 + EXTERNAL_S3 + EXTERNAL_GOOGLE_CLOUD + EXTERNAL_SFTP + EXTERNAL_HTTPS + EXTERNAL_OBJECT_STORE + PROXY + } + + class UploadType { + <> + S3 + GOOGLE_CLOUD_STORAGE + SFTP + HTTPS + PROXYLOCAL + NONE + } + + class StorageLocationConfigurable { + <> + +get_sts_storage_token(permission, output_format) dict + +index_files_for_migration(dest_storage_location_id, db_path) MigrationResult + +migrate_indexed_files(db_path) MigrationResult + } + + class ProjectSettingsMixin { + <> + +set_storage_location(storage_location_id) ProjectSetting + +get_project_setting(setting_type) ProjectSetting + +delete_project_setting(setting_id) + } + + class Project { + +str id + +str name + +str description + } + + class Folder { + +str id + +str name + +str parent_id + } + + class ProjectSetting { + <> + +str id + +str project_id + +str settings_type + +List~int~ locations + +str concrete_type + +str etag + +store() ProjectSetting + +get() ProjectSetting + +delete() + } + + StorageLocation --> StorageLocationType : storage_type + StorageLocation --> UploadType : upload_type + StorageLocationConfigurable <|-- ProjectSettingsMixin : extends + ProjectSettingsMixin <|-- Project : implements + ProjectSettingsMixin <|-- Folder : implements + ProjectSettingsMixin ..> ProjectSetting : returns + +``` + +
+ + +### Key Components + +| Component | Description | +|-----------|-------------| +| [synapseclient.models.StorageLocation] | The model representing a storage location setting in Synapse | +| [synapseclient.models.StorageLocationType] | Enumeration defining the supported storage backend types | +| [synapseclient.models.UploadType] | Enumeration defining the upload protocol for each storage type | +| [synapseclient.models.mixins.StorageLocationConfigurable] | Mixin providing STS token and file migration methods | +| [synapseclient.models.mixins.ProjectSettingsMixin] | Mixin extending `StorageLocationConfigurable` with storage location and project settings management | +| [synapseclient.models.ProjectSetting] | Dataclass representing a project's upload destination configuration, backed by `UploadDestinationListSetting` in the REST API | + +--- + +
+ +## Storage Type Mapping + +Each `StorageLocationType` maps to a specific REST API `concreteType` and has a +default `UploadType`. This mapping allows the system to parse +responses from the API and construct requests. + +```mermaid +flowchart LR + subgraph StorageLocationType + SYNAPSE_S3["SYNAPSE_S3"] + EXTERNAL_S3["EXTERNAL_S3"] + EXTERNAL_GOOGLE_CLOUD["EXTERNAL_GOOGLE_CLOUD"] + EXTERNAL_SFTP["EXTERNAL_SFTP"] + EXTERNAL_HTTPS["EXTERNAL_HTTPS"] + EXTERNAL_OBJECT_STORE["EXTERNAL_OBJECT_STORE"] + PROXY["PROXY"] + end + + subgraph concreteType + S3SLS["S3StorageLocationSetting"] + ExtS3SLS["ExternalS3StorageLocationSetting"] + ExtGCSSLS["ExternalGoogleCloudStorageLocationSetting"] + ExtSLS["ExternalStorageLocationSetting"] + ExtObjSLS["ExternalObjectStorageLocationSetting"] + ProxySLS["ProxyStorageLocationSettings"] + end + + subgraph UploadType + S3["S3"] + GCS["GOOGLECLOUDSTORAGE"] + SFTP["SFTP"] + HTTPS["HTTPS"] + PROXYLOCAL["PROXYLOCAL"] + end + + SYNAPSE_S3 --> S3SLS --> S3 + EXTERNAL_S3 --> ExtS3SLS --> S3 + EXTERNAL_GOOGLE_CLOUD --> ExtGCSSLS --> GCS + EXTERNAL_SFTP --> ExtSLS --> SFTP + EXTERNAL_HTTPS --> ExtSLS --> HTTPS + EXTERNAL_OBJECT_STORE --> ExtObjSLS --> S3 + PROXY --> ProxySLS --> HTTPS +``` + +
+ +### Storage Type Attributes + +Different storage types support different configuration attributes: + +| Attribute | Type | S3StorageLocationSetting | ExternalS3StorageLocationSetting | ExternalObjectStorageLocationSetting | ExternalStorageLocationSetting | ExternalGoogleCloudStorageLocationSetting | ProxyStorageLocationSettings | +|-----------|------|--------------------------|----------------------------------|--------------------------------------|--------------------------------|-------------------------------------------|------------------------------| +| **Common (all types)** | +| `concreteType` | string (enum) | ✓ (required) | ✓ (required) | ✓ (required) | ✓ (required) | ✓ (required) | ✓ (required) | +| `storageLocationId` | integer (int32) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +| `uploadType` | string | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +| `banner` | string | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +| `description` | string | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +| `etag` | string | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +| `createdOn` | string | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +| `createdBy` | integer (int32) | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +| **Type-specific** | +| `baseKey` | string | ✓ | ✓ | — | — | ✓ | — | +| `stsEnabled` | boolean | ✓ | ✓ | — | — | — | — | +| `bucket` | string | — | ✓ (required) | ✓ (required) | — | ✓ (required) | — | +| `endpointUrl` | string | — | ✓ | ✓ (required) | — | — | — | +| `url` | string | — | — | — | ✓ (required) | — | — | +| `supportsSubfolders` | boolean | — | — | — | ✓ | — | — | +| `proxyUrl` | string | — | — | — | — | — | ✓ (required) | +| `secretKey` | string | — | — | — | — | — | ✓ (required) | +| `benefactorId` | string | — | — | — | — | — | ✓ (required) | + +## Summary by type + +| Setting type | Description | Type-specific attributes | +|--------------|-------------|---------------------------| +| **S3StorageLocationSetting** | Default Synapse storage on Amazon S3. | `baseKey`, `stsEnabled` | +| **ExternalS3StorageLocationSetting** | External S3 bucket connected with Synapse (Synapse-accessed). | `bucket` (required), `baseKey`, `stsEnabled`, `endpointUrl` | +| **ExternalObjectStorageLocationSetting** | S3-compatible object storage **not** accessed by Synapse. | `bucket` (required), `endpointUrl` (required) | +| **ExternalStorageLocationSetting** | SFTP or HTTPS upload destination. | `url` (required), `supportsSubfolders` | +| **ExternalGoogleCloudStorageLocationSetting** | External Google Cloud Storage bucket connected with Synapse. | `bucket` (required), `baseKey` | +| **ProxyStorageLocationSettings** | HTTPS proxy for all upload/download operations. | `proxyUrl` (required), `secretKey` (required), `benefactorId` (required) | + + +
+ +### Choosing a Storage Type + +Use this decision tree to select the appropriate storage type for your use case: + +```mermaid +flowchart TB + Start{Need custom storage?} + Start -->|No| DEFAULT[Use default Synapse storage] + Start -->|Yes| Q1{Want Synapse to
manage storage?} + + Q1 -->|Yes| DEFAULT[Use default Synapse storage] + Q1 -->|No| Q2{What storage
backend?} + + Q2 -->|AWS S3| Q3{Synapse accesses
bucket directly?} + Q2 -->|Google Cloud| EXTERNAL_GOOGLE_CLOUD[Use EXTERNAL_GOOGLE_CLOUD] + Q2 -->|SFTP Server| EXTERNAL_SFTP[Use EXTERNAL_SFTP] + Q2 -->|Proxy Server| PROXY[Use PROXY] + Q2 -->|AWS S3 | EXTERNAL_OBJECT_STORE[Use EXTERNAL_OBJECT_STORE] + + Q3 -->|Yes| Q4{Need STS
credentials?} + Q3 -->|No| EXTERNAL_OBJECT_STORE + + Q4 -->|Yes| EXTERNAL_S3_STS[Use EXTERNAL_S3
with sts_enabled=True] + Q4 -->|No| EXTERNAL_S3[Use EXTERNAL_S3] + + SYNAPSE_S3 --> Benefits1[Benefits:
- Zero configuration
- Managed by Synapse
- STS available] + EXTERNAL_S3 --> Benefits2[Benefits:
- Use your own bucket
- Control access & costs
- Optional STS] + EXTERNAL_S3_STS --> Benefits2 + EXTERNAL_GOOGLE_CLOUD --> Benefits3[Benefits:
- GCP native
- Use existing GCS buckets] + EXTERNAL_SFTP --> Benefits4[Benefits:
- Legacy systems
- Synapse never touches data] + EXTERNAL_OBJECT_STORE --> Benefits5[Benefits:
- OpenStack, MinIO, etc
- Synapse never touches data] + PROXY --> Benefits6[Benefits:
- Custom access control
- Data transformation] + DEFAULT --> Benefits0[Benefits:
- No configuration needed
- Synapse-managed S3] +``` + +--- + +
+ +## Entity Inheritance Hierarchy + +Projects and Folders inherit storage configuration capabilities through two +cooperating mixins: `StorageLocationConfigurable` (STS tokens and file migration) +and `ProjectSettingsMixin` (storage location and project settings management). +This pattern allows consistent storage management across container entities. + +```mermaid +classDiagram + direction TB + + class StorageLocationConfigurable { + <> + +get_sts_storage_token() + +index_files_for_migration() + +migrate_indexed_files() + } + + class ProjectSettingsMixin { + <> + +set_storage_location() + +get_project_setting() + +delete_project_setting() + } + + class Project { + +str id + +str name + +str description + +str etag + } + + class Folder { + +str id + +str name + +str parent_id + +str etag + } + + StorageLocationConfigurable <|-- ProjectSettingsMixin + ProjectSettingsMixin <|-- Project + ProjectSettingsMixin <|-- Folder +``` + +The mixin pattern allows `Project` and `Folder` to share storage location +functionality without code duplication. Both classes inherit all methods +from `ProjectSettingsMixin`, which itself extends `StorageLocationConfigurable`. + +--- + +
+
+ +# Part 2: Operation Flows + +This section contains sequence diagrams for key operations. + +
+ +## Operation Flows + +### Store Operation + +The `store()` method creates a new storage location in Synapse. Creating a storage location is idempotent per user. Repeating a creation request with the same properties will return the previously created storage location rather than creating a new one. + +```mermaid +sequenceDiagram + participant User + participant StorageLocation + participant _to_synapse_request as _to_synapse_request() + participant API as storage_location_services + participant Synapse as Synapse REST API + + User->>StorageLocation: store() + activate StorageLocation + + StorageLocation->>_to_synapse_request: Build request body + activate _to_synapse_request + + Note over _to_synapse_request: Validate storage_type is set + Note over _to_synapse_request: Build concreteType from storage_type + Note over _to_synapse_request: Determine uploadType + Note over _to_synapse_request: Add type-specific fields + + _to_synapse_request-->>StorageLocation: Request body dict + deactivate _to_synapse_request + + StorageLocation->>API: create_storage_location_setting(body) + activate API + + API->>Synapse: POST /storageLocation + activate Synapse + + Synapse-->>API: Response with storageLocationId + deactivate Synapse + + API-->>StorageLocation: Response dict + deactivate API + + StorageLocation->>StorageLocation: fill_from_dict(response) + Note over StorageLocation: Parse storageLocationId + Note over StorageLocation: Parse concreteType → storage_type + Note over StorageLocation: Parse uploadType → upload_type + Note over StorageLocation: Extract type-specific fields + + StorageLocation-->>User: StorageLocation (populated) + deactivate StorageLocation +``` + +
+ +### STS Token Retrieval + +STS (AWS Security Token Service) enables direct S3 access using temporary credentials. + +When a Synapse client is constructed (`Synapse.__init__`), it creates an in-memory token cache: + +- `self._sts_token_store = sts_transfer.StsTokenStore()` (see `synapseclient/client.py`) + +```mermaid +sequenceDiagram + participant User + participant Entity as Folder/Project + participant Mixin as StorageLocation + participant STS as sts_transfer module + participant Client as Synapse Client + participant TokenStore as _sts_token_store (StsTokenStore) + participant Synapse as Synapse REST API + + Note over Client,TokenStore: Client.__init__ creates self._sts_token_store = sts_transfer.StsTokenStore() + + User->>Entity: get_sts_storage_token(permission, output_format) + activate Entity + + Entity->>Mixin: get_sts_storage_token_async() + activate Mixin + + Mixin->>Client: Synapse.get_client() + Client-->>Mixin: Synapse client instance + + Mixin->>STS: sts_transfer.get_sts_credentials() + activate STS + + STS->>Client: syn._sts_token_store.get_token(...) + activate Client + Client->>TokenStore: get_token(entity_id, permission, min_remaining_life) + activate TokenStore + + alt token cached and not expired + TokenStore-->>Client: Cached token + else cache miss or token expired + TokenStore->>Synapse: GET /entity/{id}/sts?permission={permission} + activate Synapse + Synapse-->>TokenStore: STS credentials response + deactivate Synapse + TokenStore-->>Client: New token (cached) + end + deactivate TokenStore + Client-->>STS: Token + deactivate Client + + Note over STS: Parse credentials + + alt output_format == "boto" + Note over STS: Format for boto3 client kwargs + STS-->>Mixin: {aws_access_key_id, aws_secret_access_key, aws_session_token} + else output_format == "json" + Note over STS: Return JSON string + STS-->>Mixin: JSON credentials string + else output_format == "shell" / "bash" + Note over STS: Format as export commands + STS-->>Mixin: Shell export commands + end + deactivate STS + + Mixin-->>Entity: Formatted credentials + deactivate Mixin + + Entity-->>User: Credentials + deactivate Entity +``` + +
+ +#### Credential Output Formats + +| Format | Description | Use Case | +|--------|-------------|----------| +| `boto` | Dict with `aws_access_key_id`, `aws_secret_access_key`, `aws_session_token` | Pass directly to `boto3.client('s3', **creds)` | +| `json` | JSON string | Store or pass to external tools | +| `shell` / `bash` | `export AWS_ACCESS_KEY_ID=...` format | Execute in shell | +| `cmd` | Windows SET commands | Windows command prompt | +| `powershell` | PowerShell variable assignments | PowerShell scripts | + +--- + +
+
+ +# Part 3: Settings & Infrastructure + +This section covers project settings, API architecture, and the async/sync pattern. + +
+ +## Project Setting Lifecycle + +Project settings control which storage location(s) are used for uploads to an +entity. The following state diagram shows the lifecycle of a project setting. + +```mermaid +stateDiagram-v2 + [*] --> NoSetting: Entity created + + NoSetting --> Created: set_storage_location() + Note right of NoSetting: Inherits from parent or uses Synapse default + + Created --> Updated: set_storage_location() updates existing setting + Updated --> Updated: set_storage_location() updates existing setting + + Created --> Deleted: delete_project_setting(project_setting_id) + Updated --> Deleted: delete_project_setting(project_setting_id) + + Deleted --> NoSetting: Returns to default (inherits from parent) + + state NoSetting { + [*] --> Inherited + Inherited: No project setting exists + Inherited: Uses parent or Synapse default (ID=1) + } + + state Created { + [*] --> Active + Active: concreteType = UploadDestinationListSetting + Active: locations = [storage_location_id] + Active: settingsType = "upload" + Active: projectId = entity.id + Active: Has id and etag + } + + state Updated { + [*] --> Modified + Modified: concreteType = UploadDestinationListSetting + Modified: locations = [new_id, ...] (max 10) + Modified: settingsType = "upload" + Modified: etag updated (OCC) + } +``` + +
+ +### Setting Types + +| Type | Purpose | Status | +|------|---------|--------| +| `upload` | Configures upload destination storage location(s) | **Supported** | + +Other setting types may be added in the future. + +--- + +
+ +## API Layer Architecture + +The storage location services module provides async functions that wrap the +Synapse REST API endpoints. This layer handles serialization and error handling. + +```mermaid +flowchart TB + subgraph "Model Layer" + SL[StorageLocation] + SLCM[StorageLocation Mixin] + end + + subgraph "API Layer" + create_sls[create_storage_location_setting] + get_sls[get_storage_location_setting] + get_ps[get_project_setting] + create_ps[create_project_setting] + update_ps[update_project_setting] + delete_ps[delete_project_setting] + end + + subgraph "REST Endpoints" + POST_SL["POST /storageLocation"] + GET_SL["GET /storageLocation/{id}"] + GET_PS["GET /projectSettings/{id}/type/{type}"] + POST_PS["POST /projectSettings"] + PUT_PS["PUT /projectSettings"] + DELETE_PS["DELETE /projectSettings/{id}"] + end + + SL --> create_sls --> POST_SL + SL --> get_sls --> GET_SL + + SLCM --> get_ps --> GET_PS + SLCM --> create_ps --> POST_PS + SLCM --> update_ps --> PUT_PS + SLCM --> delete_ps --> DELETE_PS +``` + +
+ +### REST API Reference + +| Method | Endpoint | Description | +|--------|----------|-------------| +| POST | `/storageLocation` | Create a new storage location setting | +| GET | `/storageLocation/{id}` | Retrieve a storage location by ID | +| GET | `/projectSettings/{projectId}/type/{type}` | Get project settings for an entity | +| POST | `/projectSettings` | Create a new project setting | +| PUT | `/projectSettings` | Update an existing project setting | +| DELETE | `/projectSettings/{id}` | Delete a project setting | + +--- + +
+ +## Async/Sync Pattern + +The StorageLocation system follows the Python client's `@async_to_sync` pattern, +providing both async and sync versions of all methods. + +```mermaid +flowchart LR + subgraph "User Code" + SyncCall["folder.set_storage_location()"] + AsyncCall["await folder.set_storage_location_async()"] + end + + subgraph "@async_to_sync Decorator" + Wrapper["Sync wrapper"] + AsyncMethod["Async implementation"] + end + + subgraph "Event Loop" + RunSync["wrap_async_to_sync()"] + AsyncIO["asyncio"] + end + + SyncCall --> Wrapper + Wrapper --> RunSync + RunSync --> AsyncIO + AsyncIO --> AsyncMethod + + AsyncCall --> AsyncMethod +``` + +
+ +### Method Pairs + +| Sync Method | Async Method | +|-------------|--------------| +| `StorageLocation.store()` | `StorageLocation.store_async()` | +| `StorageLocation.get()` | `StorageLocation.get_async()` | +| `folder.set_storage_location()` | `folder.set_storage_location_async()` | +| `folder.get_project_setting()` | `folder.get_project_setting_async()` | +| `folder.delete_project_setting()` | `folder.delete_project_setting_async()` | +| `folder.get_sts_storage_token()` | `folder.get_sts_storage_token_async()` | +| `folder.index_files_for_migration()` | `folder.index_files_for_migration_async()` | +| `folder.migrate_indexed_files()` | `folder.migrate_indexed_files_async()` | + +--- + +
+
+ +# Part 4: Migration + +This section covers the file migration system. + +
+ +## Migration Flow + +File migration is a two-phase process that first indexes all candidate files and then performs an asynchronous, batched migration that reuses copied file handles where possible, respects concurrency limits, snapshots affected tables when needed, and updates entities and table cells via transactional table operations while recording per-item status in a SQLite database. + +```mermaid +sequenceDiagram + participant User + participant IndexFn as index_files_for_migration + participant DB as SQLite Database + participant MigrateFn as migrate_indexed_files + participant Synapse as Synapse REST API + + Note over User,Synapse: Phase 1: Index Files + User->>IndexFn: index_files_for_migration_async(dest_id, source_ids, ...) + IndexFn->>Synapse: Verify ownership of destination storage location + IndexFn->>DB: Initialize DB and store migration settings + + alt Project/Folder + IndexFn->>Synapse: get_children() → recurse into each child + IndexFn->>DB: Insert FILE / TABLE_ATTACHED_FILE rows per entity + else File + IndexFn->>Synapse: Get file handle(s) per version strategy + IndexFn->>DB: Insert FILE migration rows + else Table (include_table_files=true) + IndexFn->>Synapse: Query FILEHANDLEID columns + fetch handles + IndexFn->>DB: Insert TABLE_ATTACHED_FILE rows + end + + IndexFn-->>User: MigrationResult (db_path) + + Note over User,Synapse: Phase 2: Migrate Files + User->>MigrateFn: migrate_indexed_files_async(db_path) + MigrateFn->>User: Confirm migration (skipped if force=True) + + loop Batches of indexed items + MigrateFn->>DB: Check for existing destination file handle + alt Not already copied + MigrateFn->>Synapse: Copy file to new storage location + end + alt FILE entity + MigrateFn->>Synapse: Create new version or update existing file handle + else TABLE_ATTACHED_FILE + MigrateFn->>Synapse: Snapshot table (if enabled) + update cell via PartialRowSet + end + MigrateFn->>DB: Mark row MIGRATED / ERRORED + end + + MigrateFn-->>User: MigrationResult (counts) +``` + +
+ +### Migration Strategies + +| Strategy | Description | +|----------|-------------| +| `new` | Create new file versions in destination (default) | +| `all` | Migrate all versions of each file | +| `latest` | Only migrate the latest version | +| `skip` | Skip if file already exists in destination | + +--- + +
+
+ +# Learn More + +| Resource | Description | +|----------|-------------| +| [Storage Location Tutorial](../tutorials/python/storage_location.md) | Step-by-step guide to using storage locations | +| [StorageLocation API Reference][synapseclient.models.StorageLocation] | Complete API documentation | +| [ProjectSettingsMixin][synapseclient.models.mixins.ProjectSettingsMixin] | Mixin methods for Projects and Folders | +| [Custom Storage Locations (Synapse Docs)](https://help.synapse.org/docs/Custom-Storage-Locations.2048327803.html) | Official Synapse documentation | diff --git a/docs/guides/extensions/curator/metadata_contribution.md b/docs/guides/extensions/curator/metadata_contribution.md new file mode 100644 index 000000000..67d933a94 --- /dev/null +++ b/docs/guides/extensions/curator/metadata_contribution.md @@ -0,0 +1,293 @@ +# How to Enter and Update Metadata for a Record-Based Curation Task + +This guide shows how to programmatically complete a record-based metadata curation task, including adding, editing, validating, and submitting metadata. + +## Overview + +By following this guide, you will: + +- List curation tasks in a Synapse project +- Create a Grid session for a record-based curation task +- Download metadata from the Grid to a local CSV +- Edit the metadata locally +- Upload the metadata back into the Grid +- Export the Grid to the RecordSet to trigger schema validation +- Review the validation report +- Mark the curation task as COMPLETED to signal the administrator that you're done + +## Requirements + +- A Synapse account +- Completion of the certification quiz +- A minimum of **view** access on the Synapse project +- A minimum of **edit** access on the folder containing the RecordSet entity +- Python environment with synapseclient and the `curator` extension installed (`pip install --upgrade "synapseclient[curator]"`) +- The Synapse ID of the project where the administrator created the curation tasks +- (Optional) The `task_id` of a specific `CurationTask` you've been pointed at + +### Step 1: Authenticate + +```python +from synapseclient import Synapse + +syn = Synapse() +syn.login() +``` + +### Step 2: Find a curation task + +Each `CurationTask` carries the information you need. For record-based tasks, `task_properties` will contain a `record_set_id`. + +Choose whichever approach fits your situation. Whichever you pick, the goal is the same: end up with a single `curation_task` object to use in Step 3. `CurationTask.list()` returns fully-populated tasks — each one already carries its `task_properties`, so there is no need to call `.get()` again on a task you got from a list. + +#### Option A: List all tasks in the project + +Use this when you don't know the task ID yet and want to browse what's available. List the tasks, inspect them, and pick the one you want. + +```python +from pprint import pprint +from synapseclient.models import CurationTask + +PROJECT_ID = "syn123456789" # The Synapse ID of the project to list tasks from + +all_tasks = list(CurationTask.list(project_id=PROJECT_ID)) +for task in all_tasks: + pprint(task) + +# Select the task you want to work on (here, the first one as an example) +curation_task = all_tasks[0] +``` + +#### Option B: Filter the list by assignee, state, or name + +Use this when you want to find tasks assigned to you, tasks in a specific state, or locate a task by name. Each filter still returns a list — pick the one you want from it. + +```python +from synapseclient.models import CurationTask + +PROJECT_ID = "syn123456789" + +# Find all tasks assigned to the currently logged-in user +my_tasks = list(CurationTask.list(project_id=PROJECT_ID, assigned_to_me=True)) + +# Find tasks assigned to specific users or teams (by principal ID) +team_tasks = list(CurationTask.list(project_id=PROJECT_ID, assignee_ids=["1234567", "7654321"])) + +# Find all tasks that are currently in progress +in_progress_tasks = list( + CurationTask.list(project_id=PROJECT_ID, state_filter=["IN_PROGRESS"]) +) + +# Find a task by name (list() does not support name filtering directly — filter after listing) +target_name = "AnimalMetadata_Curation" +named_tasks = [ + task + for task in CurationTask.list(project_id=PROJECT_ID) + if task.name == target_name +] + +# Select the task you want from whichever list you built above +curation_task = my_tasks[0] +``` + +#### Option C: Fetch a task directly by ID + +Use this when the administrator has given you a specific task ID. + +```python +from synapseclient.models import CurationTask + +curation_task = CurationTask(task_id=12345).get() +``` + +### Step 3: Create a Grid session for the task + +Each option in Step 2 leaves you with a single `curation_task`. Start a new Grid session on it — it picks the `record_set_id` from the task properties automatically and links the session back to the task. If the task already has an active Grid session linked, calling this replaces the link with the new session. + +```python +latest_grid = curation_task.create_grid_session() +``` + +### Step 4: Download record-based metadata as a local CSV + +Download the current grid contents so you can edit them locally — in pandas, Excel, or any tool that reads CSV. + +```python +csv_path = latest_grid.download_csv(destination=".", file_name="grid_export.csv") +print(f"Grid downloaded to: {csv_path}") +``` + +Open the CSV, make your edits, and save it back to a local path. For example, with pandas: + +```python +import pandas as pd +import numpy as np + +df = pd.read_csv(csv_path) +print(df) + +# Smoke-test stand-in: fills 4 rows with random integers regardless of column type. +# Replace this with real edits that match your task's schema before importing — +# schema validation runs in Step 6 and will reject values that don't fit. +df = pd.DataFrame( + np.random.randint(0, 100, size=(4, len(df.columns))), + columns=df.columns, +) + +edited_path = "./grid_edited.csv" +df.to_csv(edited_path, index=False) +``` + +### Step 5: Import edited record-based metadata to Synapse + +`import_csv` upserts rows into the grid based on the `upsert_keys` the administrator configured when setting up the `RecordSet`. Existing rows matching on those keys are updated; new rows are inserted. + +```python +latest_grid = latest_grid.import_csv(path=edited_path) +print(f"Upserted edits into grid session: https://www.synapse.org/Grid:default?sessionId={latest_grid.session_id}") +``` + +### Step 6: Export the grid back to the RecordSet + +> **Important:** Until you call `export_to_record_set()`, your edits live only inside the Grid session — they aren't visible on the RecordSet and won't be validated. Apply changes whenever you reach a logical checkpoint. + +Exporting triggers schema validation and makes your edits visible to administrators and other contributors. It creates a new version of the RecordSet and generates the validation report. + +```python +latest_grid.export_to_record_set() +print(f"Exported to RecordSet version: {latest_grid.record_set_version_number}") +``` + +### Step 7: Review your validation results + +When you exported the grid in Step 6, Synapse validated each row against the JSON schema bound to the RecordSet and generated a row-level report. Reviewing this report before handing the task back to the administrator lets you catch and fix problems in your own data first — saving a round trip. + +#### Prerequisites for validation results + +A validation report is only generated when **all** of the following are true: + +1. A JSON schema has been bound to the RecordSet by the administrator who set up the task +2. You have entered data through a Grid session +3. The Grid session has been exported back to the RecordSet — this is the step that triggers validation and populates the RecordSet's validation_file_handle_id + +If the Grid was never exported (Step 6), there is nothing to review yet. + +#### Retrieve and inspect the results + +Validation results live on the RecordSet itself, so you can retrieve them whether or not the Grid session is still open. Use the record_set_id from your CurationTask, re-fetch the RecordSet to pick up the latest validation_file_handle_id, and pull the detailed report as a pandas DataFrame: + +```python +from synapseclient.models import RecordBasedMetadataTaskProperties, RecordSet + +if isinstance(curation_task.task_properties, RecordBasedMetadataTaskProperties): + record_set = RecordSet(id=curation_task.task_properties.record_set_id).get() + + validation_df = record_set.get_detailed_validation_results() + + if validation_df is None: + print("No validation results yet — make sure the Grid was exported in Step 6.") + else: + total = len(validation_df) + valid = validation_df["is_valid"].sum() + invalid = (~validation_df["is_valid"]).sum() + + print(f"Total records: {total}") + print(f"Valid records: {valid}") + print(f"Invalid records: {invalid}") + + invalid_rows = validation_df[~validation_df["is_valid"]] + for _, row in invalid_rows.iterrows(): + print(f"\nRow {row['row_index']}:") + print(f" Error: {row['validation_error_message']}") + print(f" All messages: {row['all_validation_messages']}") +``` + +!!! note "Older CurationTasks without task properties" + CurationTasks created before task properties were introduced will not have a + `taskProperties` field in the Synapse response. Attempting to retrieve such a task + via `get()`, `store()`, or `list()` will raise a `ValueError`. If you encounter + this error, delete the task with `task.delete(delete_source=False)` and recreate + it with the appropriate task properties. + +Each row of the report carries: + +- row_index — the row in the RecordSet that was validated +- is_valid — boolean indicating whether the row passes the schema +- validation_error_message — the primary schema violation for that row (if any) +- all_validation_messages — every schema violation for that row; a row may fail on multiple fields + +Sample output for a submission with errors looks like: + +```text +Row 1: + Error: expected type: String, found: Null + All messages: ["#/genotype: expected type: String, found: Null"] + +Row 2: + Error: other is not a valid enum value + All messages: ["#/sex: other is not a valid enum value"] +``` + +#### Fix and re-export + +If any rows are invalid, recreate a Grid session against the RecordSet (see Step 3), correct the offending rows, and re-run Steps 4–6 to re-export. The validation report is regenerated on each export, so iterate until the report is clean before letting the administrator know your task is ready. + +> **If get_detailed_validation_results returns None after exporting:** check that record_set.validation_file_handle_id is set after the re-fetch. If it isn't, the export did not complete — re-run export_to_record_set() on an active Grid session against the same RecordSet. + +### Step 8: Mark the curation task as COMPLETED + +Once your validation report is clean and you've cleaned up the Grid session, transition the curation task to COMPLETED. This signals the administrator that the task is ready for their review — they can list tasks in the project and pick up the ones whose status is COMPLETED. + +```python +curation_task.set_task_state(state="COMPLETED") +``` + +## File-Based Curation Tasks + +File-based tasks follow the same overall flow as record-based tasks (Steps 1–8 above), with three key differences: + +**No CSV import.** `import_csv` is not currently supported for file-based grids. Instead, you can either: + +- Download the CSV (Step 4) as a local reference, make your edits locally, then copy-paste the values back into the Grid UI +- Make edits directly in the Synapse Grid UI — Step 3 prints the session URL (`https://www.synapse.org/Grid:default?sessionId=...`) after creating the session + +**Use `synchronize()` instead of `export_to_record_set()`.** After editing in the Grid UI, push your changes back to the underlying files: + +```python +latest_grid.synchronize() +``` + +This writes the Grid annotation values back to each file as Synapse annotations. There is no versioned RecordSet — the files themselves are updated in place. + +**No per-row validation report.** Validation is enforced by the JSON schema bound to the folder containing the files, not by a row-level export report. After you call `synchronize()`, the administrator verifies schema compliance on their end — there is nothing to retrieve from the contributor side. If the administrator reports violations, correct the flagged annotations in the Grid UI and re-synchronize. + +## Appendix + +### Cleaning up a Grid session + +```python +latest_grid.delete() +``` + +Deleting is permanent — you can no longer re-export from this session. If you spot more issues later, create a new Grid session via Step 3. + +## References + +### API Documentation + + +- [CurationTask.list][synapseclient.models.CurationTask.list] - List curation tasks in a project +- [CurationTask.get][synapseclient.models.CurationTask.get] - Fetch a CurationTask by id +- [CurationTask.create_grid_session][synapseclient.models.CurationTask.create_grid_session] - Create a Grid session for a CurationTask and link it to the task status +- [CurationTask.set_task_state][synapseclient.models.CurationTask.set_task_state] - Set the state on a CurationTask's status +- [Grid.download_csv][synapseclient.models.Grid.download_csv] - Download Grid contents as a local CSV +- [Grid.import_csv][synapseclient.models.Grid.import_csv] - Upsert CSV edits back into a Grid session (record-based grids only) +- [Grid.export_to_record_set][synapseclient.models.Grid.export_to_record_set] - Export Grid data back to RecordSet and generate validation results +- [Grid.synchronize][synapseclient.models.Grid.synchronize] - Synchronize a file-based Grid against its source file view +- [Grid.delete][synapseclient.models.Grid.delete] - Delete a Grid session +- [RecordSet.get_detailed_validation_results][synapseclient.models.RecordSet.get_detailed_validation_results] - Retrieve the row-level validation report for a RecordSet + + +### Related Documentation + +- [How to Set Up Metadata Curation Workflows](metadata_curation.md) - The administrator-facing companion to this guide diff --git a/docs/guides/extensions/curator/metadata_curation.md b/docs/guides/extensions/curator/metadata_curation.md index ea13b93f2..321b9411a 100644 --- a/docs/guides/extensions/curator/metadata_curation.md +++ b/docs/guides/extensions/curator/metadata_curation.md @@ -1,24 +1,24 @@ -# How to Create Metadata Curation Workflows +# How to Set Up Metadata Curation Workflows -This guide shows you how to set up a metadata curation workflow in Synapse using the curator extension. You'll learn to find appropriate schemas, create curation tasks for your research data. +This guide is for **curation administrators** — the person responsible for designing a curation workflow: choosing a JSON schema, deciding whether metadata is record-based or file-based, creating the `CurationTask`, and reviewing the validation results contributors submit. + +If you're a data contributor opening a task an administrator has already created, see [How to Enter and Update Metadata for a Curation Task](metadata_contribution.md) instead. ## What you'll accomplish By following this guide, you will: - Find and select the right JSON schema for your data type -- Create a metadata curation workflow with automatic validation -- Set up either file-based or record-based metadata collection -- Configure curation tasks that guide collaborators through metadata entry -- Retrieve and analyze detailed validation results to identify data quality issues +- Create a record-based or file-based metadata curation workflow +- Configure curation tasks that guide contributors through metadata entry ## Prerequisites - A Synapse account with project creation permissions -- Python environment with synapseclient and the `curator` extension installed (ie. `pip install --upgrade "synapseclient[curator]"`) +- Python environment with synapseclient and the `curator` extension installed (`pip install --upgrade "synapseclient[curator]"`) - An existing Synapse project and folder where you want to manage metadata - A JSON Schema registered in Synapse (many schemas are already available for Sage-affiliated projects, or you can register your own by following the [JSON Schema tutorial](../../../tutorials/python/json_schema.md)) - - If you are leveraging the [Curator CSV data model](../../../explanations/curator_data_model.md), you can create JSON schemas by following this [tutorial](../../extensions/curator/schema_operations.md) + - If you are using the [Curator CSV data model](../../../explanations/curator_data_model.md), you can create JSON schemas by following this [guide](schema_operations.md) - (Optional) An existing Synapse team if you want multiple users to collaborate on the same Grid session. Pass the team's ID as `assignee_principal_id` when creating the curation task. ## Step 1: Authenticate and import required functions @@ -29,7 +29,12 @@ from synapseclient.extensions.curator import ( create_file_based_metadata_task, query_schema_registry ) +from synapseclient.models import ( + ViewTypeMask, +) from synapseclient import Synapse +from synapseclient.models import Grid +from synapseclient.models.table_components import Query syn = Synapse() syn.login() @@ -55,6 +60,7 @@ print("Latest schema URI:", schema_uri) **When to use this approach:** You know your DCC and data type, you want the most current schema version, and it has already been registered into . **Alternative - browse available schemas:** + ```python # Get all versions to see what's available all_schemas = query_schema_registry( @@ -69,12 +75,11 @@ all_schemas = query_schema_registry( ### Option A: Record-based metadata -Use this when metadata describes individual data files and is stored as annotations directly on each file. +Use this when metadata is normalized in structured records to eliminate duplication and ensure consistency. ```python -record_set, curation_task, data_grid = create_record_based_metadata_task( +record_set, curation_task = create_record_based_metadata_task( synapse_client=syn, - project_id="syn123456789", # Your project ID folder_id="syn987654321", # Folder where RecordSet Entity will be stored record_set_name="AnimalMetadata_Records", record_set_description="Centralized metadata for animal study data", @@ -83,7 +88,8 @@ record_set, curation_task, data_grid = create_record_based_metadata_task( instructions="Complete all required fields according to the schema. Use StudyKey to link records to your data files.", schema_uri=schema_uri, # Schema found in Step 2 bind_schema_to_record_set=True, - assignee_principal_id="123456" # Optional: Assign to a user or team + create_grid=False, + assignee_principal_id=123456 # Optional: Assign to a user or team ) print(f"Created RecordSet: {record_set.id}") @@ -95,11 +101,10 @@ print(f"Created CurationTask: {curation_task.task_id}") - A RecordSet where metadata is stored as structured records (like a spreadsheet) - A CurationTask that guides users through completing the metadata - Automatic schema binding for validation -- A data grid interface for easy metadata entry ### Option B: File-based metadata (for unique per-file metadata) -Use this when metadata is normalized in structured records to eliminate duplication and ensure consistency. +Use this when metadata describes individual data files and is stored as annotations directly on each file. ```python entity_view_id, task_id = create_file_based_metadata_task( @@ -110,11 +115,13 @@ entity_view_id, task_id = create_file_based_metadata_task( attach_wiki=False, # Creates a wiki in the folder with the entity view (Defaults to False) entity_view_name="Animal Study Files View", schema_uri=schema_uri, # Schema found in Step 2 - assignee_principal_id="123456" # Optional: Assign to a user or team + assignee_principal_id=123456 # Optional: Assign to a user or team + view_type_mask=ViewTypeMask.FILE # Optional: include additional entity types in the view (ViewTypeMask.FILE | ViewTypeMask.DOCKER). (Defaults to ViewTypeMask.FILE) ) print(f"Created EntityView: {entity_view_id}") print(f"Created CurationTask: {task_id}") + ``` **What this creates:** @@ -150,9 +157,8 @@ schema_uri = query_schema_registry( print("Using schema:", schema_uri) # Step 3A: Create record-based workflow -record_set, curation_task, data_grid = create_record_based_metadata_task( +record_set, curation_task = create_record_based_metadata_task( synapse_client=syn, - project_id="syn123456789", folder_id="syn987654321", record_set_name="AnimalMetadata_Records", record_set_description="Centralized animal study metadata", @@ -161,10 +167,11 @@ record_set, curation_task, data_grid = create_record_based_metadata_task( instructions="Complete metadata for all study animals using StudyKey to link records to data files.", schema_uri=schema_uri, bind_schema_to_record_set=True, - assignee_principal_id="123456" # Optional: Assign to a user or team + create_grid=False, + assignee_principal_id=123456 # Optional: Assign to a user or team ) -print(f"Record-based workflow created:") +print("Record-based workflow created:") print(f" RecordSet: {record_set.id}") print(f" CurationTask: {curation_task.task_id}") @@ -177,10 +184,11 @@ entity_view_id, task_id = create_file_based_metadata_task( attach_wiki=True, entity_view_name="Animal Study Files View", schema_uri=schema_uri, - assignee_principal_id="123456" # Optional: Assign to a user or team + assignee_principal_id=123456 # Optional: Assign to a user or team + view_type_mask=ViewTypeMask.FILE # Optional: include additional entity types in the view (ViewTypeMask.FILE | ViewTypeMask.DOCKER). (Defaults to ViewTypeMask.FILE) ) -print(f"File-based workflow created:") +print("File-based workflow created:") print(f" EntityView: {entity_view_id}") print(f" CurationTask: {task_id}") ``` @@ -317,9 +325,8 @@ os.close(temp_fd) test_data.to_csv(temp_csv, index=False) # Step 2: Create the curation task (this creates an empty template RecordSet) -record_set, curation_task, data_grid = create_record_based_metadata_task( +record_set, curation_task = create_record_based_metadata_task( synapse_client=syn, - project_id="syn123456789", folder_id="syn987654321", record_set_name="AnimalMetadata_Records", record_set_description="Animal study metadata with validation", @@ -328,6 +335,7 @@ record_set, curation_task, data_grid = create_record_based_metadata_task( instructions="Enter metadata for each animal. All required fields must be completed.", schema_uri=schema_uri, bind_schema_to_record_set=True, + create_grid=False, ) time.sleep(10) @@ -480,6 +488,28 @@ for curation_task in CurationTask.list( pprint(curation_task) ``` +### Update the state of a curation task + +Use this script to change a task's lifecycle state. Valid states are +NOT_STARTED, IN_PROGRESS, COMPLETED, and CANCELED. + +```python +from synapseclient import Synapse +from synapseclient.models import CurationTask + +TASK_ID = 123456 # The numeric ID of the CurationTask to update + +syn = Synapse() +syn.login() + +status = CurationTask(task_id=TASK_ID).set_task_state( + state="COMPLETED", + synapse_client=syn, +) + +print(f"Task {TASK_ID} state is now: {status.state}") +``` + ## References ### API Documentation @@ -488,13 +518,15 @@ for curation_task in CurationTask.list( - [create_record_based_metadata_task][synapseclient.extensions.curator.create_record_based_metadata_task] - Create RecordSet-based curation workflows - [create_file_based_metadata_task][synapseclient.extensions.curator.create_file_based_metadata_task] - Create EntityView-based curation workflows - [RecordSet.get_detailed_validation_results][synapseclient.models.RecordSet.get_detailed_validation_results] - Get detailed validation results for RecordSet data -- [Grid.create][synapseclient.models.curation.Grid.create] - Create a Grid session from a RecordSet +- [Grid.create][synapseclient.models.curation.Grid.create] - Create a Grid session from a RecordSet or EntityView - [Grid.export_to_record_set][synapseclient.models.curation.Grid.export_to_record_set] - Export Grid data back to RecordSet and generate validation results - [Folder.bind_schema][synapseclient.models.Folder.bind_schema] - Bind schemas to folders - [Folder.validate_schema][synapseclient.models.Folder.validate_schema] - Validate folder schema compliance - [CurationTask.list][synapseclient.models.CurationTask.list] - List curation tasks in a project +- [CurationTask.set_task_state][synapseclient.models.CurationTask.set_task_state] - Update the lifecycle state of a curation task ### Related Documentation +- [How to Enter and Update Metadata for a Curation Task](metadata_contribution.md) - The contributor-facing companion to this guide - [JSON Schema Tutorial](../../../tutorials/python/json_schema.md) - Learn how to register schemas - [Schema Registry](https://synapse.org/Synapse:syn69735275/tables/) - Browse available schemas diff --git a/docs/guides/synapse_mcp.md b/docs/guides/synapse_mcp.md new file mode 100644 index 000000000..2ae6ceab2 --- /dev/null +++ b/docs/guides/synapse_mcp.md @@ -0,0 +1,118 @@ +# Using the Synapse MCP Server + +The [Synapse MCP server](https://github.com/Sage-Bionetworks/synapse-mcp) implements the [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) and lets AI assistants (Claude, GitHub Copilot, Cursor, and others) directly query Synapse — search for datasets, inspect entity metadata, explore project hierarchies, and trace provenance — without you writing any code. + +The server is implemented in Python and built on top of this `synapseclient` package, so its behavior and capabilities mirror what you can do programmatically through the Python client. + +!!! warning "Terms of Service" + Using the Synapse MCP server with consumer AI services that store conversation data may violate the Synapse Terms of Service prohibition on data redistribution. Prefer enterprise deployments with data-residency guarantees or self-hosted models when working with sensitive or restricted datasets. + +--- + +## Installation + +### Remote server (recommended) + +The hosted MCP server at `https://mcp.synapse.org/mcp` authenticates via OAuth2 — no token management required. + +#### "Claude Code (CLI)" + + ```bash + claude mcp add --transport http synapse -- https://mcp.synapse.org/mcp + ``` + + On first use, Claude Code will open a browser window to complete the OAuth2 login. + +#### "Claude Desktop" + + 1. Open **Settings → Connectors → Add custom connector** + 2. Enter the URL: `https://mcp.synapse.org/mcp` + 3. Save and restart Claude Desktop + +#### "VS Code / GitHub Copilot" + + Add to your `settings.json` or `.vscode/mcp.json`: + + ```json + { + "mcp": { + "servers": { + "synapse": { + "type": "http", + "url": "https://mcp.synapse.org/mcp" + } + } + } + } + ``` + +### Local installation + +For air-gapped environments or development, you can run the server locally using a [Personal Access Token (PAT)](https://www.synapse.org/#!PersonalAccessTokens:0). + +```bash +git clone https://github.com/Sage-Bionetworks/synapse-mcp.git +cd synapse-mcp +pip install -e . +export SYNAPSE_PAT="your_personal_access_token" +synapse-mcp +``` + +Configure your MCP client to point to `http://localhost:8000/mcp` (or the port shown in the startup output). + +--- + +## Available tools + +For the full and up-to-date list of tools, see the [synapse-mcp repository](https://github.com/Sage-Bionetworks/synapse-mcp). At the time of writing, the server exposes tools including: + +- `search_synapse` — full-text search across public and private entities +- `get_entity` — fetch core metadata for any entity by Synapse ID +- `get_entity_annotations` — retrieve custom annotation key/value pairs +- `get_entity_children` — list children within a project or folder +- `get_entity_provenance` — inspect the activity log and inputs/outputs for an entity version + +--- + +## Example prompts + +Once the MCP server is connected, you can interact with Synapse in natural language. Here are some useful prompts to try: + +**Discover data** + +``` +Search Synapse for RNA-seq datasets related to Alzheimer's Disease. +``` + +``` +What files are in the project syn12345678? +``` + +**Inspect metadata** + +``` +What are the annotations on syn9876543? +``` + +``` +Show me the provenance for the latest version of syn11223344. +``` + +**Explore a project** + +``` +List all folders and files in syn5678901 and summarize what the project contains. +``` + +**Combine with code generation** + +``` +Find the Synapse ID for the ROSMAP bulk RNA-seq dataset, then write Python code +using synapseclient to download it and load it into a pandas DataFrame. +``` + +--- + +## Feature requests and feedback + +Have an idea for a new MCP tool or want to report a bug? [Open a support ticket](https://sagebionetworks.jira.com/servicedesk/customer/portal/9/group/16/create/206) via the Sage Bionetworks service desk. diff --git a/docs/index.md b/docs/index.md index 4efc2df19..c87ad13aa 100644 --- a/docs/index.md +++ b/docs/index.md @@ -28,6 +28,10 @@ Installing this package will install `synapseclient`, `synapseutils` and the com * [Further Reading](./explanations/home.md) to gain a deeper understanding of best practices and advanced use cases * Our [release notes](./news.md) +## Use Synapse with AI assistants + +The [Synapse MCP server](./guides/synapse_mcp.md) lets you query Synapse directly from AI tools like Claude, GitHub Copilot, and Cursor using natural language — search datasets, inspect metadata, explore project hierarchies, and generate `synapseclient` code, all without leaving your AI assistant. See the [how-to guide](./guides/synapse_mcp.md) to get started in minutes. + ## Additional Background * Read [about Synapse](https://help.synapse.org/docs/About-Synapse.2058846607.html)—how it got started and how it fits into the bigger data-sharing picture diff --git a/docs/reference/experimental/async/curator.md b/docs/reference/experimental/async/curator.md index bf292948b..1b1654e2a 100644 --- a/docs/reference/experimental/async/curator.md +++ b/docs/reference/experimental/async/curator.md @@ -12,9 +12,14 @@ at your own risk. inherited_members: true members: - get_async + - get_status_async + - update_status_async + - set_active_grid_session_async - delete_async - store_async - list_async + - create_grid_session_async + - set_task_state_async --- [](){ #RecordSet-reference-async } @@ -56,6 +61,11 @@ at your own risk. members: - create_async - export_to_record_set_async + - synchronize_async + - download_csv_async + - import_csv_async + - delete_async + - list_async --- [](){ #query-reference-async } ::: synapseclient.models.Query diff --git a/docs/reference/experimental/async/download_list.md b/docs/reference/experimental/async/download_list.md new file mode 100644 index 000000000..990ac3d2d --- /dev/null +++ b/docs/reference/experimental/async/download_list.md @@ -0,0 +1,30 @@ +# Download List + +The Synapse Download List (cart) lets you queue files for bulk download via the Synapse +web UI or API. Files are downloaded individually rather than packaged into a zip because +download lists can exceed 100 GB. Successfully downloaded files are removed from the cart +automatically, so interrupted runs are safely resumable. + +## API Reference + +[](){ #download-list-reference-async } + +::: synapseclient.operations.download_list_files_async + +::: synapseclient.operations.download_list_manifest_async + +::: synapseclient.operations.download_list_add_async + +::: synapseclient.operations.download_list_remove_async + +::: synapseclient.operations.download_list_clear_async + +--- + +[](){ #download-list-item-reference-async } +## DownloadListItem + +Identifies a specific file version in the download list. Used as input to +download_list_add_async and download_list_remove_async. + +::: synapseclient.operations.DownloadListItem diff --git a/docs/reference/experimental/async/folder.md b/docs/reference/experimental/async/folder.md index 7b29f84ea..d864d3cdc 100644 --- a/docs/reference/experimental/async/folder.md +++ b/docs/reference/experimental/async/folder.md @@ -16,6 +16,8 @@ at your own risk. - copy_async - walk_async - sync_from_synapse_async + - sync_to_synapse_async + - generate_sync_manifest_async - flatten_file_list - map_directory_to_all_contained_files - get_permissions_async @@ -30,3 +32,9 @@ at your own risk. - get_schema_derived_keys_async - get_schema_validation_statistics_async - get_invalid_validation_async + - set_storage_location_async + - get_project_setting_async + - delete_project_setting_async + - get_sts_storage_token_async + - index_files_for_migration_async + - migrate_indexed_files_async diff --git a/docs/reference/experimental/async/project.md b/docs/reference/experimental/async/project.md index e3adfa9fc..6c948268b 100644 --- a/docs/reference/experimental/async/project.md +++ b/docs/reference/experimental/async/project.md @@ -15,6 +15,8 @@ at your own risk. - delete_async - walk_async - sync_from_synapse_async + - sync_to_synapse_async + - generate_sync_manifest_async - flatten_file_list - map_directory_to_all_contained_files - get_permissions_async @@ -29,3 +31,9 @@ at your own risk. - get_schema_derived_keys_async - get_schema_validation_statistics_async - get_invalid_validation_async + - set_storage_location_async + - get_project_setting_async + - delete_project_setting_async + - get_sts_storage_token_async + - index_files_for_migration_async + - migrate_indexed_files_async diff --git a/docs/reference/experimental/async/storage_location.md b/docs/reference/experimental/async/storage_location.md new file mode 100644 index 000000000..cf9630de2 --- /dev/null +++ b/docs/reference/experimental/async/storage_location.md @@ -0,0 +1,22 @@ +# StorageLocation + +Contained within this file are experimental interfaces for working with the Synapse Python +Client. Unless otherwise noted these interfaces are subject to change at any time. Use +at your own risk. + +## API Reference + +::: synapseclient.models.StorageLocation + options: + inherited_members: true + members: + - store_async + - get_async + +--- + +::: synapseclient.models.StorageLocationType + +--- + +::: synapseclient.models.UploadType diff --git a/docs/reference/experimental/mixins/storage_location.md b/docs/reference/experimental/mixins/storage_location.md new file mode 100644 index 000000000..a32bfd351 --- /dev/null +++ b/docs/reference/experimental/mixins/storage_location.md @@ -0,0 +1,11 @@ +# Storage Location + +::: synapseclient.models.mixins.StorageLocationConfigurable + +--- + +::: synapseclient.models.mixins.ProjectSettingsMixin + +--- + +::: synapseclient.models.protocols.storage_location_mixin_protocol.StorageLocationConfigurableSynchronousProtocol diff --git a/docs/reference/experimental/sync/curator.md b/docs/reference/experimental/sync/curator.md index b02244aab..f0866edef 100644 --- a/docs/reference/experimental/sync/curator.md +++ b/docs/reference/experimental/sync/curator.md @@ -12,9 +12,14 @@ at your own risk. inherited_members: true members: - get + - get_status + - update_status + - set_active_grid_session - delete - store - list + - create_grid_session + - set_task_state --- [](){ #RecordSet-reference } @@ -56,6 +61,11 @@ at your own risk. members: - create - export_to_record_set + - synchronize + - download_csv + - import_csv + - delete + - list --- [](){ #query-reference } ::: synapseclient.models.Query diff --git a/docs/reference/experimental/sync/download_list.md b/docs/reference/experimental/sync/download_list.md new file mode 100644 index 000000000..8e3a1829c --- /dev/null +++ b/docs/reference/experimental/sync/download_list.md @@ -0,0 +1,42 @@ +[](){ #download-list-reference-sync } +# Download List + +The Synapse Download List (cart) lets you queue files for bulk download via the Synapse +web UI or API. Files are downloaded individually rather than packaged into a zip because +download lists can exceed 100 GB. Successfully downloaded files are removed from the cart +automatically, so interrupted runs are safely resumable. + +## Example + +```python +from synapseclient import Synapse +from synapseclient.operations import download_list_files + +syn = Synapse() +syn.login() + +# Download all files in the cart to a local directory +manifest_path = download_list_files(download_location="./downloads") +``` + +## API Reference + +::: synapseclient.operations.download_list_files + +::: synapseclient.operations.download_list_manifest + +::: synapseclient.operations.download_list_add + +::: synapseclient.operations.download_list_remove + +::: synapseclient.operations.download_list_clear + +--- + +[](){ #download-list-item-reference-sync } +## DownloadListItem + +Identifies a specific file version in the download list. Used as input to +download_list_add and download_list_remove. + +::: synapseclient.operations.DownloadListItem diff --git a/docs/reference/experimental/sync/folder.md b/docs/reference/experimental/sync/folder.md index 43272ea30..9246f0cf4 100644 --- a/docs/reference/experimental/sync/folder.md +++ b/docs/reference/experimental/sync/folder.md @@ -27,6 +27,8 @@ at your own risk. - copy - walk - sync_from_synapse + - sync_to_synapse + - generate_sync_manifest - flatten_file_list - map_directory_to_all_contained_files - get_permissions @@ -41,3 +43,9 @@ at your own risk. - get_schema_derived_keys - get_schema_validation_statistics - get_invalid_validation + - set_storage_location + - get_project_setting + - delete_project_setting + - get_sts_storage_token + - index_files_for_migration + - migrate_indexed_files diff --git a/docs/reference/experimental/sync/project.md b/docs/reference/experimental/sync/project.md index 4e2f35a26..3df9de76b 100644 --- a/docs/reference/experimental/sync/project.md +++ b/docs/reference/experimental/sync/project.md @@ -26,6 +26,8 @@ at your own risk. - delete - walk - sync_from_synapse + - sync_to_synapse + - generate_sync_manifest - flatten_file_list - map_directory_to_all_contained_files - get_permissions @@ -40,3 +42,9 @@ at your own risk. - get_schema_derived_keys - get_schema_validation_statistics - get_invalid_validation + - set_storage_location + - get_project_setting + - delete_project_setting + - get_sts_storage_token + - index_files_for_migration + - migrate_indexed_files diff --git a/docs/reference/experimental/sync/storage_location.md b/docs/reference/experimental/sync/storage_location.md new file mode 100644 index 000000000..0ee7b4c6d --- /dev/null +++ b/docs/reference/experimental/sync/storage_location.md @@ -0,0 +1,23 @@ +[](){ #storage-location-reference-sync } +# StorageLocation + +Contained within this file are experimental interfaces for working with the Synapse Python +Client. Unless otherwise noted these interfaces are subject to change at any time. Use +at your own risk. + +## API Reference + +::: synapseclient.models.StorageLocation + options: + inherited_members: true + members: + - store + - get + +--- + +::: synapseclient.models.StorageLocationType + +--- + +::: synapseclient.models.UploadType diff --git a/docs/scripts/object_orientated_programming_poc/synapse_project.py b/docs/scripts/object_orientated_programming_poc/synapse_project.py index 9ee8186a9..d5a5acf21 100644 --- a/docs/scripts/object_orientated_programming_poc/synapse_project.py +++ b/docs/scripts/object_orientated_programming_poc/synapse_project.py @@ -10,6 +10,7 @@ All steps also include setting a number of annotations for the objects. """ + import os import uuid from datetime import datetime, timedelta, timezone diff --git a/docs/tutorials/authentication.md b/docs/tutorials/authentication.md index 347dc1e88..91db36ee5 100644 --- a/docs/tutorials/authentication.md +++ b/docs/tutorials/authentication.md @@ -41,10 +41,9 @@ For writing code using the Synapse Python client that is easy to share with othe The Synapse Python Client supports multiple profiles within the `~/.synapseConfig` file, enabling users to manage credentials for multiple accounts. Each profile is defined in its own `[profile ]` section. A default profile can still be defined using `[default]`. -When installing the Synapse Python client, the `~/.synapseConfig` is added to your home directory. +### Create/Modify the `~/.synapseConfig` file with the Command line Client -### Automatically modifying the `~/.synapseConfig` file with the Command line Client -You may modify the `~/.synapseConfig` file by utilizing the [command line client command and following the interactive prompts](./command_line_client.md#config): +After installing the Synapse Python client, run the [command line client command and following the interactive prompts](./command_line_client.md#config) to create a `.synapseConfig` file in your home directory. You may also modify the `~/.synapseConfig` file by utilizing this command. #### Modifying the synapse config for multiple profiles diff --git a/docs/tutorials/configuration.md b/docs/tutorials/configuration.md index 021d635d4..38d3b15c8 100644 --- a/docs/tutorials/configuration.md +++ b/docs/tutorials/configuration.md @@ -2,32 +2,107 @@ The Synapse Python client can be configured either programmatically or by using a configuration file. -**The default configuration file does not need to be modified for most use-cases**. +!!! note "Default Configuration" + The default configuration file does not need to be modified for most use-cases +After installing the Synapse Python client, run the `synapse config` CLI command to create a `.synapseConfig` file in your home directory. This file stores configuration options including your Synapse auth token, cache location, multi-threading settings, and storage credentials. -When installing the Synapse Python client, the `.synapseConfig` is added to your home directory. This configuration file is used to store a number of configuration options, including your Synapse authtoken, cache, and multi-threading settings. - -A full example `.synapseConfig` can be found in the [github repository](https://github.com/Sage-Bionetworks/synapsePythonClient/blob/develop/synapseclient/.synapseConfig). +For a reference showing the available settings and their meanings, see the full annotated example `.synapseConfig` in the [GitHub repository](https://github.com/Sage-Bionetworks/synapsePythonClient/blob/develop/synapseclient/.synapseConfig). ## `.synapseConfig` sections -### `[authentication]` +### `[default]` and `[profile ]` + +Holds Synapse login credentials. `[default]` is used when no profile is specified; named profiles use `[profile ]` syntax. See the [authentication](./authentication.md) document for full details including how to create tokens, select profiles, and use environment variables. + +### `[sftp://hostname]` + +Credentials for files stored on SFTP servers. Use one section per server; the section name is the full SFTP URL. + +| Key | Description | +| --- | --- | +| `username` | Username for the SFTP server. | +| `password` | Password for the SFTP server. | + +```ini +[sftp://some.sftp.url.com] +username = sftpuser +password = sftppassword +``` -See details on this section in the [authentication](./authentication.md) document. +### `[https://s3.amazonaws.com/bucket_name]` + +Credentials for files stored in AWS S3 or S3-compatible storage that Synapse does not manage access for. Use one section per bucket; the section name is the full endpoint URL including the bucket name. + +| Key | Description | +| --- | --- | +| `profile_name` | Name of an AWS CLI profile from `~/.aws/credentials`. If omitted, the `default` AWS profile is used. | + +```ini +[https://s3.amazonaws.com/bucket_name] +profile_name = local_credential_profile_name +``` + +For more information on AWS credentials files, see the [AWS CLI documentation](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html). ### `[cache]` -Your downloaded files are cached to avoid repeat downloads of the same file. Change 'location' to use a different folder on your computer as the cache location +Downloaded files are cached to avoid repeat downloads of the same file. + +| Key | Description | +| --- | --- | +| `location` | Path to the cache directory. Supports `~` and environment variables. Default: `~/.synapseCache`. | + +```ini +[cache] +location = ~/.synapseCache +``` + +### `[debug]` + +When this section is present (no keys required), the client prints debug-level log output. Equivalent to passing `debug=True` to the `Synapse()` constructor. + +```ini +[debug] +``` ### `[endpoints]` -Configuring these will cause the Python client to use these as Synapse service endpoints instead of the default prod endpoints. +Override the default Synapse production service endpoints. Useful for testing against staging or development environments. + +| Key | Description | +| --- | --- | +| `repoEndpoint` | Synapse repository REST API endpoint. | +| `authEndpoint` | Synapse authentication service endpoint. | +| `fileHandleEndpoint` | Synapse file service endpoint. | +| `portalEndpoint` | Synapse web portal URL. | + +Note: The following are the default endpoints. + +```ini +[endpoints] +repoEndpoint = https://repo-prod.prod.sagebase.org/repo/v1 +authEndpoint = https://auth-prod.prod.sagebase.org/auth/v1 +fileHandleEndpoint = https://file-prod.prod.sagebase.org/file/v1 +portalEndpoint = https://www.synapse.org/ +``` ### `[transfer]` -Settings to configure how Synapse uploads/downloads data. +Settings to configure how Synapse uploads and downloads data. + +| Key | Description | +| --- | --- | +| `max_threads` | Number of concurrent threads/connections for file transfers. Applies to AWS S3 transfers (uploads and downloads). Default: `min(cpu_count + 4, 128)`. Maximum: `128`. Minimum: `1`. | +| `use_boto_sts` | If `true`, use AWS STS (Security Token Service) to obtain temporary credentials for S3 transfers instead of using stored AWS credentials directly. Valid values: `true` or `false` (case-insensitive). Default: `false`. | + +```ini +[transfer] +max_threads = 16 +use_boto_sts = false +``` -You may also set the `max_threads` programmatically via: +You may also set `max_threads` programmatically: ```python import synapseclient diff --git a/docs/tutorials/python/activity.md b/docs/tutorials/python/activity.md index a256965c4..994c87dc6 100644 --- a/docs/tutorials/python/activity.md +++ b/docs/tutorials/python/activity.md @@ -1,13 +1,9 @@ # Activity/Provenance -[See the current available tutorial](../python_client.md#provenance) -![Under Construction](../../assets/under_construction.png) - -Provenance is a concept describing the origin of something. In Synapse, it is used to describe the connections between the workflow steps used to create a particular file or set of results. Data analysis often involves multiple steps to go from a raw data file to a finished analysis. Synapse’s provenance tools allow users to keep track of each step involved in an analysis and share those steps with other users. +Provenance is a concept describing the origin of something. In Synapse, it is used to describe the connections between the workflow steps used to create a particular file or set of results. Data analysis often involves multiple steps to go from a raw data file to a finished analysis. Synapse's provenance tools allow users to keep track of each step involved in an analysis and share those steps with other users. The model Synapse uses for provenance is based on the [W3C provenance spec](https://www.w3.org/TR/prov-n/) where items are derived from an activity which has components that were **used** and components that were **executed**. Think of the **used** items as input files and **executed** items as software or code. Both **used** and **executed** items can reside in Synapse or in URLs such as a link to a GitHub commit or a link to a specific version of a software tool. -[Dive into Activity/Provenance further here](../../explanations/domain_models_of_synapse.md#activityprovenance) ## Tutorial Purpose In this tutorial you will: @@ -18,4 +14,108 @@ In this tutorial you will: 1. Delete an activity ## Prerequisites -- In order to follow this tutorial you will need to have a [Project](./project.md) created with at least one [File](./file.md) with multiple [Versions](./versions.md). +- In order to follow this tutorial you will need to have a [Project](./project.md) created with a Folder named `biospecimen_experiment_1` containing at least one [File](./file.md). You will also need the Synapse ID of that file (e.g. `synNNNNN`). + +## 1. Add a new Activity to your File + +#### First retrieve the project, folder and a file is created within that folder to track provenance + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/activity.py:retrieve_project_folder_file" +``` + +#### Create an Activity and attach it to the file + +An `Activity` captures what was **used** (input data and reference URLs) and **executed** (code and software) to produce a file. Here we record a QC pipeline run on the biospecimen data: + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/activity.py:create_activity" +``` + +
+ You'll notice the output looks like: + +``` +Stored file: fileA.txt (version 1) with activity: Quality Control Analysis +``` +
+ + +## 2. Add a new Activity to a specific version of your File + +Each time you store an updated file, Synapse creates a new version. You can associate a distinct activity with each version to capture the full history of how the data evolved. Here we record a downstream analysis step that used the QC-passed data from version 1: + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/activity.py:add_activity_to_version" +``` + +
+ You'll notice the output looks like: + +``` +Stored activity 'Downstream Analysis' on file fileA.txt (version 2) +``` +
+ + +## 3. Print stored activities on your File + +Use `Activity.from_parent()` to retrieve the provenance for any version of a file. Pass a `parent_version_number` to retrieve the activity for a specific older version: + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/activity.py:print_activities" +``` + +
+ You'll notice the output looks like: + +``` +Activity on latest version (v1): + Name: Downstream Analysis + Description: Downstream analysis of QC-passed biospecimen samples. + Used: UsedURL(name='Seurat v5.0.0', url='https://github.com/satijalab/seurat/releases/tag/v5.0.0') + Used: UsedEntity(target_id='syn12345678', target_version_number=1) + Executed: UsedURL(name='Downstream Analysis Script', url='https://github.com/Sage-Bionetworks/analysis-scripts/blob/v1.0/downstream_analysis.py') + +Activity on version 1: + Name: Quality Control Analysis + Description: Initial QC analysis of biospecimen data using the FastQC pipeline. +``` +
+ + +## 4. Delete an activity + +Deleting an activity is a two-step process: first call `disassociate_from_entity()` to remove the link between the activity and the file version, then call `delete()` to remove the activity record from Synapse entirely: + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/activity.py:delete_activity" +``` + +
+ You'll notice the output looks like: + +``` +Deleted activity from: fileA.txt (version 2) +Activity after deletion: None +``` +
+ + +## Source code for this tutorial + +
+ Click to show me + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/activity.py" +``` +
+ +## References used in this tutorial + +- [Activity][synapseclient.models.Activity] +- [UsedEntity][synapseclient.models.UsedEntity] +- [UsedURL][synapseclient.models.UsedURL] +- [File][file-reference-sync] +- [syn.login][synapseclient.Synapse.login] diff --git a/docs/tutorials/python/annotation.md b/docs/tutorials/python/annotation.md index 735351225..5cb12dc2f 100644 --- a/docs/tutorials/python/annotation.md +++ b/docs/tutorials/python/annotation.md @@ -25,19 +25,19 @@ In this tutorial you will: #### First let's retrieve all of the Synapse IDs we are going to use ```python -{!docs/tutorials/python/tutorial_scripts/annotation.py!lines=5-23} +--8<-- "docs/tutorials/python/tutorial_scripts/annotation.py:retrieve_synapse_ids" ``` #### Next let's define the annotations I want to set ```python -{!docs/tutorials/python/tutorial_scripts/annotation.py!lines=25-31} +--8<-- "docs/tutorials/python/tutorial_scripts/annotation.py:define_annotations" ``` #### Finally we'll loop over all of the files and set their annotations ```python -{!docs/tutorials/python/tutorial_scripts/annotation.py!lines=33-51} +--8<-- "docs/tutorials/python/tutorial_scripts/annotation.py:set_annotations_loop" ``` @@ -64,7 +64,7 @@ In order for the following script to work please replace the files with ones tha already exist on your local machine. ```python -{!docs/tutorials/python/tutorial_scripts/annotation.py!lines=53-78} +--8<-- "docs/tutorials/python/tutorial_scripts/annotation.py:upload_with_annotations" ```
@@ -115,7 +115,7 @@ files in the synapse web UI. It should look similar to: Click to show me ```python -{!docs/tutorials/python/tutorial_scripts/annotation.py!} +--8<-- "docs/tutorials/python/tutorial_scripts/annotation.py" ```
diff --git a/docs/tutorials/python/dataset.md b/docs/tutorials/python/dataset.md index 61e234efe..6078f5db6 100644 --- a/docs/tutorials/python/dataset.md +++ b/docs/tutorials/python/dataset.md @@ -29,7 +29,7 @@ In this tutorial, you will: Let's get started by authenticating with Synapse and retrieving the ID of your project. ```python -{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=3-24} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset.py:setup" ``` ## 2. Create your Dataset @@ -37,7 +37,7 @@ Let's get started by authenticating with Synapse and retrieving the ID of your p Next, we will create the dataset. We will use the project ID to tell Synapse where we want the dataset to be created. After this step, we will have a Dataset object with all of the needed information to start building the dataset. ```python -{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=29-30} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset.py:create_dataset" ``` Because we haven't added any files to the dataset yet, it will be empty, but if you view the dataset's schema in the UI, you will notice that datasets come with default columns that help to describe each file that we add to the dataset. @@ -50,20 +50,20 @@ Let's add some files to the dataset now. There are three ways to add files to a 1. Add an Entity Reference to a file with its ID and version ```python -{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=34-36} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset.py:add_entity_ref" ``` 2. Add a File with its ID and version ```python -{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=38-40} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset.py:add_file" ``` 3. Add a Folder. When adding a folder, all child files inside of the folder are added to the dataset recursively. ```python -{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=42-44} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset.py:add_folder" ``` Whenever we make changes to the dataset, we need to call the `store()` method to save the changes to Synapse. ```python -{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=46} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset.py:store_dataset" ``` And now we are able to see our dataset with all of the files that we added to it. @@ -75,7 +75,7 @@ And now we are able to see our dataset with all of the files that we added to it Now that we have a dataset with some files in it, we can retrieve the dataset from Synapse the next time we need to use it. ```python -{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=50-52} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset.py:retrieve_dataset" ``` ## 5. Query the dataset @@ -83,7 +83,7 @@ Now that we have a dataset with some files in it, we can retrieve the dataset fr Now that we have a dataset with some files in it, we can query the dataset to find files that match certain criteria. ```python -{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=56-59} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset.py:query_dataset" ``` ## 6. Add a custom column to the dataset @@ -91,13 +91,13 @@ Now that we have a dataset with some files in it, we can query the dataset to fi We can also add a custom column to the dataset. This will allow us to annotate files in the dataset with additional information. ```python -{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=63-69} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset.py:add_custom_column" ``` Our custom column isn't all that useful empty, so let's update the dataset with some values. ```python -{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=72-80} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset.py:update_custom_column_values" ``` ## 7. Save a snapshot of the dataset @@ -105,7 +105,7 @@ Our custom column isn't all that useful empty, so let's update the dataset with Finally, let's save a snapshot of the dataset. This creates a read-only version of the dataset that captures the current state of the dataset and can be referenced later. ```python -{!docs/tutorials/python/tutorial_scripts/dataset.py!lines=84-88} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset.py:snapshot_dataset" ``` ## Source Code for this Tutorial @@ -114,7 +114,7 @@ Finally, let's save a snapshot of the dataset. This creates a read-only version Click to show me ```python -{!docs/tutorials/python/tutorial_scripts/dataset.py!} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset.py" ``` diff --git a/docs/tutorials/python/dataset_collection.md b/docs/tutorials/python/dataset_collection.md index 9f132d6d7..939650452 100644 --- a/docs/tutorials/python/dataset_collection.md +++ b/docs/tutorials/python/dataset_collection.md @@ -23,7 +23,7 @@ In this tutorial, you will: Let's get started by authenticating with Synapse and retrieving the ID of your project. ```python -{!docs/tutorials/python/tutorial_scripts/dataset_collection.py!lines=3-16} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset_collection.py:setup" ``` ## 2. Create your Dataset Collection @@ -31,7 +31,7 @@ Let's get started by authenticating with Synapse and retrieving the ID of your p Next, we will create the Dataset Collection using the project ID to tell Synapse where we want the Dataset Collection to be created. After this step, we will have a Dataset Collection object with all of the necessary information to start building the collection. ```python -{!docs/tutorials/python/tutorial_scripts/dataset_collection.py!lines=25-33} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset_collection.py:create_collection" ``` Because we haven't added any datasets to the collection yet, it will be empty, but if you view the Dataset Collection's schema in the UI, you will notice that Dataset Collections come with default columns. @@ -43,13 +43,13 @@ Because we haven't added any datasets to the collection yet, it will be empty, b Now, let's add some datasets to the collection. We will loop through our dataset ids and add each dataset to the collection using the `add_item` method. ```python -{!docs/tutorials/python/tutorial_scripts/dataset_collection.py!lines=37-38} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset_collection.py:add_datasets" ``` Whenever we make changes to the Dataset Collection, we need to call the `store()` method to save the changes to Synapse. ```python -{!docs/tutorials/python/tutorial_scripts/dataset_collection.py!lines=40} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset_collection.py:store_collection" ``` And now we are able to see our Dataset Collection with all of the datasets that we added to it. @@ -61,7 +61,7 @@ And now we are able to see our Dataset Collection with all of the datasets that Now that our Dataset Collection has been created and we have added some Datasets to it, we can retrieve the Dataset Collection from Synapse the next time we need to use it. ```python -{!docs/tutorials/python/tutorial_scripts/dataset_collection.py!lines=44-46} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset_collection.py:retrieve_collection" ``` ## 5. Add a custom column to the Dataset Collection @@ -69,13 +69,13 @@ Now that our Dataset Collection has been created and we have added some Datasets In addition to the default columns, you may want to annotate items in your DatasetCollection using custom columns. ```python -{!docs/tutorials/python/tutorial_scripts/dataset_collection.py!lines=50-56} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset_collection.py:add_custom_column" ``` Our custom column isn't all that useful empty, so let's update the Dataset Collection with some values. ```python -{!docs/tutorials/python/tutorial_scripts/dataset_collection.py!lines=59-67} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset_collection.py:update_custom_column_values" ``` ## 6. Query the Dataset Collection @@ -83,7 +83,7 @@ Our custom column isn't all that useful empty, so let's update the Dataset Colle If you want to query your DatasetCollection for items that match certain criteria, you can do so using the `query` method. ```python -{!docs/tutorials/python/tutorial_scripts/dataset_collection.py!lines=71-74} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset_collection.py:query_collection" ``` ## 7. Save a snapshot of the Dataset Collection @@ -91,7 +91,7 @@ If you want to query your DatasetCollection for items that match certain criteri Finally, let's save a snapshot of the Dataset Collection. This creates a read-only version of the Dataset Collection that captures the current state of the Dataset Collection and can be referenced later. ```python -{!docs/tutorials/python/tutorial_scripts/dataset_collection.py!lines=77} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset_collection.py:snapshot_collection" ``` ## Source Code for this Tutorial @@ -100,7 +100,7 @@ Finally, let's save a snapshot of the Dataset Collection. This creates a read-on Click to show me ```python -{!docs/tutorials/python/tutorial_scripts/dataset_collection.py!} +--8<-- "docs/tutorials/python/tutorial_scripts/dataset_collection.py" ``` diff --git a/docs/tutorials/python/download_data_by_synid.md b/docs/tutorials/python/download_data_by_synid.md new file mode 100644 index 000000000..cb9d6abda --- /dev/null +++ b/docs/tutorials/python/download_data_by_synid.md @@ -0,0 +1,66 @@ +[](){ #tutorial-downloading-data-by-synapse-id } +# Downloading data by Synapse ID + +This tutorial shows how to download any set of files from Synapse using their +Synapse IDs. Rather than syncing an entire project or folder, this approach lets +you target exactly the files you need and download them **concurrently** — even +directing each file to a different local directory. + + +## Tutorial Purpose +In this tutorial you will: + +1. Build a mapping of Synapse IDs to local download directories +1. Download all files concurrently using the async API + + +## Prerequisites +* Make sure that you have completed the following tutorials: + * [Folder](./folder.md) + * [File](./file.md) +* The target directories (`~/temp/subdir1`, etc.) must exist before running the + script. Create them or replace them with directories of your choice. + + +## 1. Build a mapping of Synapse IDs to download directories + +Create a dictionary that maps each Synapse ID to the local path where that file +should be saved. Files can be directed to different directories as needed. + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/download_data_by_synid.py:syn_id_mapping" +``` + + +## 2. Download all files concurrently + +Use `File.get_async()` together with `asyncio.gather` to kick off every download +at the same time and wait for them all to finish. + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/download_data_by_synid.py:concurrent_download" +``` + +
+ After all downloads finish you'll see output like: +``` +Retrieved 12 files +``` +
+ + +## Source code for this tutorial + +
+ Click to show me + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/download_data_by_synid.py" +``` +
+ +## References used in this tutorial + +- [File][synapseclient.models.File] +- [File.get_async][synapseclient.models.File.get_async] +- [syn.login][synapseclient.Synapse.login] diff --git a/docs/tutorials/python/download_data_in_bulk.md b/docs/tutorials/python/download_data_in_bulk.md index 04e27ec92..08bd8a60d 100644 --- a/docs/tutorials/python/download_data_in_bulk.md +++ b/docs/tutorials/python/download_data_in_bulk.md @@ -28,6 +28,7 @@ With a project that has this example layout: In this tutorial you will: 1. Download all files/folder from a project +1. Control manifest CSV generation during download 1. Download all files/folders for a specific folder within the project 1. Loop over all files/folders on the project/folder object instances @@ -44,48 +45,75 @@ another desired directory exists. #### First let's set up some constants we'll use in this script ```python -{!docs/tutorials/python/tutorial_scripts/download_data_in_bulk.py!lines=5-19} +--8<-- "docs/tutorials/python/tutorial_scripts/download_data_in_bulk.py:setup" ``` #### Next we'll create an instance of the Project we are going to sync ```python -{!docs/tutorials/python/tutorial_scripts/download_data_in_bulk.py!lines=20-22} +--8<-- "docs/tutorials/python/tutorial_scripts/download_data_in_bulk.py:get_project" ``` #### Finally we'll sync the project from synapse to your local machine ```python -{!docs/tutorials/python/tutorial_scripts/download_data_in_bulk.py!lines=23-28} +--8<-- "docs/tutorials/python/tutorial_scripts/download_data_in_bulk.py:sync_project" ```
While syncing your project you'll see results like: ``` -Syncing Project (syn53185532:My uniquely named project about Alzheimer's Disease) from Synapse. -Syncing Folder (syn53205630:experiment_notes) from Synapse. -Syncing Folder (syn53205632:notes_2022) from Synapse. -Syncing Folder (syn53205629:single_cell_RNAseq_batch_1) from Synapse. -Syncing Folder (syn53205656:single_cell_RNAseq_batch_2) from Synapse. -Syncing Folder (syn53205631:notes_2023) from Synapse. -Downloading [####################]100.00% 4.0bytes/4.0bytes (1.8kB/s) fileA.txt Done... -Downloading [####################]100.00% 3.0bytes/3.0bytes (1.1kB/s) SRR92345678_R1.fastq.gz Done... -Downloading [####################]100.00% 4.0bytes/4.0bytes (1.7kB/s) SRR12345678_R1.fastq.gz Done... -Downloading [####################]100.00% 4.0bytes/4.0bytes (1.9kB/s) fileC.txt Done... -Downloading [####################]100.00% 4.0bytes/4.0bytes (2.7kB/s) fileB.txt Done... -Downloading [####################]100.00% 4.0bytes/4.0bytes (2.7kB/s) SRR12345678_R2.fastq.gz Done... -Downloading [####################]100.00% 4.0bytes/4.0bytes (2.6kB/s) SRR12345678_R2.fastq.gz Done... -Downloading [####################]100.00% 4.0bytes/4.0bytes (1.8kB/s) SRR12345678_R1.fastq.gz Done... -Downloading [####################]100.00% 3.0bytes/3.0bytes (1.5kB/s) SRR92345678_R2.fastq.gz Done... -Downloading [####################]100.00% 4.0bytes/4.0bytes (1.6kB/s) fileD.txt Done... -['single_cell_RNAseq_batch_2', 'single_cell_RNAseq_batch_1', 'experiment_notes'] +[syn74583648:My uniquely named project about Alzheimer's Disease]: Syncing Project from Synapse. +[syn74584000:biospecimen_experiment_1]: Syncing Folder from Synapse. +[syn74584007:single_cell_RNAseq_batch_2]: Syncing Folder from Synapse. +[syn74584001:biospecimen_experiment_2]: Syncing Folder from Synapse. +[syn74584006:single_cell_RNAseq_batch_1]: Syncing Folder from Synapse. +[syn74584146]: Downloaded to /biospecimen_experiment_1/fileB.png +[syn74584154]: Downloaded to /biospecimen_experiment_2/fileD.png +[syn74584155]: Downloaded to /biospecimen_experiment_2/fileC.png +[syn74584188]: Downloaded to /single_cell_RNAseq_batch_1/SRR12345678_R1.fastq.png +[syn74584147]: Downloaded to /biospecimen_experiment_1/fileA.png +[syn74584206]: Downloaded to /single_cell_RNAseq_batch_2/SRR12345678_R1.fastq.png +[syn74584189]: Downloaded to /single_cell_RNAseq_batch_1/SRR12345678_R2.fastq.png +[syn74584207]: Downloaded to /single_cell_RNAseq_batch_2/SRR12345678_R2.fastq.png +Downloading files: 100%|████████████████████| 1.31M/1.31M [00:02<00:00, 606kB/s] +Project(id='syn74583648', name="My uniquely named project about Alzheimer's Disease", files=[], folders=[ + Folder(id='syn74584000', name='biospecimen_experiment_1', parent_id='syn74583648', files=[ + File(id='syn74584147', name='fileA.png', path='/biospecimen_experiment_1/fileA.png', parent_id='syn74584000', ...), + File(id='syn74584146', name='fileB.png', path='/biospecimen_experiment_1/fileB.png', parent_id='syn74584000', ...) + ], folders=[], ...), + Folder(id='syn74584001', name='biospecimen_experiment_2', parent_id='syn74583648', files=[ + File(id='syn74584155', name='fileC.png', path='/biospecimen_experiment_2/fileC.png', parent_id='syn74584001', ...), + File(id='syn74584154', name='fileD.png', path='/biospecimen_experiment_2/fileD.png', parent_id='syn74584001', ...) + ], folders=[], ...), + Folder(id='syn74584006', name='single_cell_RNAseq_batch_1', parent_id='syn74583648', files=[ + File(id='syn74584188', name='SRR12345678_R1.fastq.png', path='/single_cell_RNAseq_batch_1/SRR12345678_R1.fastq.png', parent_id='syn74584006', ...), + File(id='syn74584189', name='SRR12345678_R2.fastq.png', path='/single_cell_RNAseq_batch_1/SRR12345678_R2.fastq.png', parent_id='syn74584006', ...) + ], folders=[], ...), + Folder(id='syn74584007', name='single_cell_RNAseq_batch_2', parent_id='syn74583648', files=[ + File(id='syn74584206', name='SRR12345678_R1.fastq.png', path='/single_cell_RNAseq_batch_2/SRR12345678_R1.fastq.png', parent_id='syn74584007', ...), + File(id='syn74584207', name='SRR12345678_R2.fastq.png', path='/single_cell_RNAseq_batch_2/SRR12345678_R2.fastq.png', parent_id='syn74584007', ...) + ], folders=[], ...) +], ...) ```
-## 2. Download all files/folders for a specific folder within the project +## 2. Control manifest CSV generation during download + +By default (`manifest="all"`), `sync_from_synapse` writes a `manifest.csv` into every +synced directory. The manifest.csv is interoperable with sync_to_synapse, the Synapse UI download cart, and `download_list_files`. + +Use `manifest="root"` to write a single manifest at the root path, or +`manifest="suppress"` to skip manifest generation entirely. + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/download_data_in_bulk.py:sync_project_with_root_manifest" +``` + +## 3. Download all files/folders for a specific folder within the project Following the same set of steps let's sync a specific folder ```python -{!docs/tutorials/python/tutorial_scripts/download_data_in_bulk.py!lines=30-36} +--8<-- "docs/tutorials/python/tutorial_scripts/download_data_in_bulk.py:sync_folder" ```
@@ -105,12 +133,12 @@ download the content again. If you were to use an `if_collision` of `"overwrite. you would see that when the content on your machine does not match Synapse the file will be overwritten. -## 3. Loop over all files/folders on the project/folder object instances +## 4. Loop over all files/folders on the project/folder object instances Using `sync_from_synapse` will load into memory the state of all Folders and Files retrieved from Synapse. This will allow you to loop over the contents of your container. ```python -{!docs/tutorials/python/tutorial_scripts/download_data_in_bulk.py!lines=37-47} +--8<-- "docs/tutorials/python/tutorial_scripts/download_data_in_bulk.py:loop_over_project_folder" ```
@@ -140,6 +168,6 @@ File in single_cell_RNAseq_batch_2: SRR12345678_R2.fastq.gz Click to show me ```python -{!docs/tutorials/python/tutorial_scripts/download_data_in_bulk.py!} +--8<-- "docs/tutorials/python/tutorial_scripts/download_data_in_bulk.py" ```
diff --git a/docs/tutorials/python/entityview.md b/docs/tutorials/python/entityview.md index b6feacba0..0bd929520 100644 --- a/docs/tutorials/python/entityview.md +++ b/docs/tutorials/python/entityview.md @@ -44,7 +44,7 @@ and [File](./file.md) tutorials. First let's set up some constants we'll use in this script, and find the ID of our project ```python -{!docs/tutorials/python/tutorial_scripts/entityview.py!lines=5-22} +--8<-- "docs/tutorials/python/tutorial_scripts/entityview.py:setup" ``` ## 2. Create a EntityView with Columns @@ -53,19 +53,19 @@ Now, we will create 4 columns to add to our EntityView. Recall that any data add these columns will be stored as an annotation on the underlying File. ```python -{!docs/tutorials/python/tutorial_scripts/entityview.py!lines=24-31} +--8<-- "docs/tutorials/python/tutorial_scripts/entityview.py:create_columns" ``` Next we're going to store what we have to Synapse and print out the results ```python -{!docs/tutorials/python/tutorial_scripts/entityview.py!lines=33-47} +--8<-- "docs/tutorials/python/tutorial_scripts/entityview.py:create_view" ``` ## 3. Query the EntityView ```python -{!docs/tutorials/python/tutorial_scripts/entityview.py!lines=49-54} +--8<-- "docs/tutorials/python/tutorial_scripts/entityview.py:query_view" ```
@@ -85,7 +85,7 @@ value. Since the results were returned as a Pandas DataFrame you have many options to search through and set values on your data. ```python -{!docs/tutorials/python/tutorial_scripts/entityview.py!lines=56-66} +--8<-- "docs/tutorials/python/tutorial_scripts/entityview.py:update_rows" ``` A note on `wait_for_eventually_consistent_view`: EntityViews in Synapse are eventually @@ -104,7 +104,7 @@ to include in your view. In order to accomplish this you may modify the `scope_i attribute on your view. ```python -{!docs/tutorials/python/tutorial_scripts/entityview.py!lines=69-73} +--8<-- "docs/tutorials/python/tutorial_scripts/entityview.py:update_scope" ``` ## 6. Update the types of Entities included in your EntityView @@ -113,7 +113,7 @@ You may also want to change what types of Entities may be included in your view. accomplish this you'll be modifying the `view_type_mask` attribute on your view. ```python -{!docs/tutorials/python/tutorial_scripts/entityview.py!lines=75-79} +--8<-- "docs/tutorials/python/tutorial_scripts/entityview.py:update_view_type_mask" ``` ## Results @@ -128,7 +128,7 @@ Synapse web UI. It should look similar to: Click to show me ```python -{!docs/tutorials/python/tutorial_scripts/entityview.py!} +--8<-- "docs/tutorials/python/tutorial_scripts/entityview.py" ```
diff --git a/docs/tutorials/python/evaluation.md b/docs/tutorials/python/evaluation.md index 12fc8328a..9a1363053 100644 --- a/docs/tutorials/python/evaluation.md +++ b/docs/tutorials/python/evaluation.md @@ -20,7 +20,7 @@ In this tutorial you will: In this first part, we'll be showing you how to interact with an Evaluation object as well as introducing you to its two core functionalities `store()` and `get()`. ```python -{!docs/tutorials/python/tutorial_scripts/evaluation.py!lines=5-46} +--8<-- "docs/tutorials/python/tutorial_scripts/evaluation.py:create_and_update" ``` ## 2. Update the ACL of an Evaluation on Synapse @@ -28,13 +28,13 @@ In this first part, we'll be showing you how to interact with an Evaluation obje Like Synapse entities, Evaluations have ACLs that can be used to control who has access to your evaluations and what level of access they have. Updating the ACL of an Evaluation object is slightly different from updating other Evaluation components, because the ACL is not an attribute of the Evaluation object. Let's see an example of how this looks: ```python -{!docs/tutorials/python/tutorial_scripts/evaluation.py!lines=54-64} +--8<-- "docs/tutorials/python/tutorial_scripts/evaluation.py:update_acl" ``` You can also remove principals from an ACL by simply feeding `update_acl` an empty list for the `access_type` argument, like so: ```python -{!docs/tutorials/python/tutorial_scripts/evaluation.py!lines=66-67} +--8<-- "docs/tutorials/python/tutorial_scripts/evaluation.py:remove_from_acl" ``` ## 3. Retrieve and delete all Evaluations from a given Project @@ -42,7 +42,7 @@ You can also remove principals from an ACL by simply feeding `update_acl` an emp Now we will show how you can retrieve lists of Evaluation objects, rather than retrieving them one-by-one with `get()`. This is a powerful tool if you want to perform the same action on all the evaluations in a given project, for example, like what we're about to do here: ```python -{!docs/tutorials/python/tutorial_scripts/evaluation.py!lines=69-75} +--8<-- "docs/tutorials/python/tutorial_scripts/evaluation.py:retrieve_and_delete" ``` ## Source code for this tutorial @@ -51,7 +51,7 @@ Now we will show how you can retrieve lists of Evaluation objects, rather than r Click to show me ```python -{!docs/tutorials/python/tutorial_scripts/evaluation.py!} +--8<-- "docs/tutorials/python/tutorial_scripts/evaluation.py" ```
diff --git a/docs/tutorials/python/file.md b/docs/tutorials/python/file.md index d5032d2ee..1261622a8 100644 --- a/docs/tutorials/python/file.md +++ b/docs/tutorials/python/file.md @@ -50,19 +50,19 @@ In this tutorial you will: #### First let's retrieve all of the Synapse IDs we are going to use ```python -{!docs/tutorials/python/tutorial_scripts/file.py!lines=5-30} +--8<-- "docs/tutorials/python/tutorial_scripts/file.py:retrieve_folder_ids" ``` #### Next let's create all of the File objects to upload content ```python -{!docs/tutorials/python/tutorial_scripts/file.py!lines=32-75} +--8<-- "docs/tutorials/python/tutorial_scripts/file.py:create_file_objects" ``` #### Finally we'll store the files in Synapse ```python -{!docs/tutorials/python/tutorial_scripts/file.py!lines=77-85} +--8<-- "docs/tutorials/python/tutorial_scripts/file.py:store_files" ``` @@ -83,7 +83,7 @@ Uploading [####################]100.00% 2.0bytes/2.0bytes (1.8bytes/s) SRR1234 ## 2. Print stored attributes about your files ```python -{!docs/tutorials/python/tutorial_scripts/file.py!lines=87-99} +--8<-- "docs/tutorials/python/tutorial_scripts/file.py:print_attributes" ```
@@ -103,7 +103,7 @@ My file was last modified on: 2023-12-28T21:55:17.971Z Now that your project has a number of Folders and Files let's explore how we can traverse the content stored within the Project. ```python -{!docs/tutorials/python/tutorial_scripts/file.py!lines=101-112} +--8<-- "docs/tutorials/python/tutorial_scripts/file.py:walk_project" ``` @@ -138,7 +138,7 @@ Now that you have created your files you'll be able to inspect this on the Files Click to show me ```python -{!docs/tutorials/python/tutorial_scripts/file.py!} +--8<-- "docs/tutorials/python/tutorial_scripts/file.py" ```
diff --git a/docs/tutorials/python/folder.md b/docs/tutorials/python/folder.md index 346c341fe..9bbe42368 100644 --- a/docs/tutorials/python/folder.md +++ b/docs/tutorials/python/folder.md @@ -55,13 +55,13 @@ In this tutorial you will: ## 1. Create a new folder ```python -{!docs/tutorials/python/tutorial_scripts/folder.py!lines=5-35} +--8<-- "docs/tutorials/python/tutorial_scripts/folder.py:create_folder" ``` ## 2. Print stored attributes about your folder ```python -{!docs/tutorials/python/tutorial_scripts/folder.py!lines=35-49} +--8<-- "docs/tutorials/python/tutorial_scripts/folder.py:print_attributes" ```
@@ -79,7 +79,7 @@ My folder was last modified on: 2023-12-28T20:52:50.193Z ## 3. Create 2 sub-folders ```python -{!docs/tutorials/python/tutorial_scripts/folder.py!lines=52-59} +--8<-- "docs/tutorials/python/tutorial_scripts/folder.py:create_subfolders" ``` ## Results @@ -94,7 +94,7 @@ Now that you have created your folders you'll be able to inspect this on the Fil Click to show me ```python -{!docs/tutorials/python/tutorial_scripts/folder.py!} +--8<-- "docs/tutorials/python/tutorial_scripts/folder.py" ```
diff --git a/docs/tutorials/python/json_schema.md b/docs/tutorials/python/json_schema.md index dac4e51f5..42367d698 100644 --- a/docs/tutorials/python/json_schema.md +++ b/docs/tutorials/python/json_schema.md @@ -25,13 +25,13 @@ By the end of this tutorial, you will: ## 1. Set Up Synapse Python Client ```python -{!docs/tutorials/python/tutorial_scripts/json_schema.py!lines=1-10} +--8<-- "docs/tutorials/python/tutorial_scripts/json_schema.py:setup" ``` ## 2. Take a Look at the Constants and Structure of the JSON Schema ```python -{!docs/tutorials/python/tutorial_scripts/json_schema.py!lines=13-43} +--8<-- "docs/tutorials/python/tutorial_scripts/json_schema.py:constants_and_schema" ``` Derived annotations allow you to define default values for annotations based on schema rules, ensuring consistency and reducing manual input errors. As you can see here, you could use derived annotations to prescribe default annotation values. Please read more about derived annotations [here](https://help.synapse.org/docs/JSON-Schemas.3107291536.html#JSONSchemas-DerivedAnnotations). @@ -40,12 +40,12 @@ Derived annotations allow you to define default values for annotations based on ## 3. Try Create Test Organization and JSON Schema if They Do Not Exist Next, try creating a test organization and register a schema if they do not already exist: ```python -{!docs/tutorials/python/tutorial_scripts/json_schema.py!lines=46-62} +--8<-- "docs/tutorials/python/tutorial_scripts/json_schema.py:create_org_and_schema" ``` Note: If you update your schema, you can re-register it with the organization by assigning a new version number to reflect the changes. Synapse does not allow re-creating a schema with the same version number, so please ensure that each schema version within an organization is unique: ```python -{!docs/tutorials/python/tutorial_scripts/json_schema.py!lines=64-97} +--8<-- "docs/tutorials/python/tutorial_scripts/json_schema.py:update_schema_version" ``` ## 4. Bind the JSON Schema to the Folder @@ -54,7 +54,7 @@ After creating the organization, you can now bind your json schema to a test fol When you bind the schema, you may also include the boolean property `enable_derived_annotations` to have Synapse automatically calculate derived annotations based on the schema: ```python -{!docs/tutorials/python/tutorial_scripts/json_schema.py!lines=100-108} +--8<-- "docs/tutorials/python/tutorial_scripts/json_schema.py:bind_schema" ```
@@ -77,7 +77,7 @@ JSON schema was bound successfully. Please see details below: ## 5. Retrieve the Bound Schema Next, we can retrieve the bound schema: ```python -{!docs/tutorials/python/tutorial_scripts/json_schema.py!lines=110-113} +--8<-- "docs/tutorials/python/tutorial_scripts/json_schema.py:retrieve_bound_schema" ```
@@ -106,12 +106,12 @@ JSON Schema was retrieved successfully. Please see details below: ## 6. Add Invalid Annotations to the Folder and Store, and Validate the Folder against the Schema Try adding invalid annotations to your folder: This step and the step below demonstrate how the system handles invalid annotations and how the schema validation process works. ```python -{!docs/tutorials/python/tutorial_scripts/json_schema.py!lines=115-119} +--8<-- "docs/tutorials/python/tutorial_scripts/json_schema.py:add_invalid_annotations" ``` Try validating the folder. You should be able to see messages related to invalid annotations. ```python -{!docs/tutorials/python/tutorial_scripts/json_schema.py!lines=123-125} +--8<-- "docs/tutorials/python/tutorial_scripts/json_schema.py:validate_folder" ``` @@ -147,12 +147,12 @@ This step is only relevant for container entities, such as a folder or a project Try creating a test file locally and store the file in the folder that we created earlier. Then, try adding invalid annotations to that file. This step demonstrates how the files inside a folder also inherit the schema from the parent entity. ```python -{!docs/tutorials/python/tutorial_scripts/json_schema.py!lines=129-134} +--8<-- "docs/tutorials/python/tutorial_scripts/json_schema.py:create_file_with_invalid_annotations" ``` You could then use `get_schema_validation_statistics` to get information such as the number of children with invalid annotations inside a container. ```python -{!docs/tutorials/python/tutorial_scripts/json_schema.py!lines=137-141} +--8<-- "docs/tutorials/python/tutorial_scripts/json_schema.py:validation_statistics" ``` @@ -171,7 +171,7 @@ Validation statistics were retrieved successfully. Please see details below: You could also use `get_invalid_validation` to see more detailed results of all the children inside a container, which includes all validation messages and validation exception details. ```python -{!docs/tutorials/python/tutorial_scripts/json_schema.py!lines=143-146} +--8<-- "docs/tutorials/python/tutorial_scripts/json_schema.py:invalid_validation_details" ```
@@ -206,7 +206,7 @@ In the synapse web UI, you could also see your invalid annotations being marked Click to show me ```python -{!docs/tutorials/python/tutorial_scripts/json_schema.py!} +--8<-- "docs/tutorials/python/tutorial_scripts/json_schema.py" ```
diff --git a/docs/tutorials/python/materializedview.md b/docs/tutorials/python/materializedview.md index fdd0e40c7..956c87fd5 100644 --- a/docs/tutorials/python/materializedview.md +++ b/docs/tutorials/python/materializedview.md @@ -30,7 +30,7 @@ You will want to replace `"My uniquely named project about Alzheimer's Disease"` the name of your project. ```python -{!docs/tutorials/python/tutorial_scripts/materializedview.py!lines=3-72} +--8<-- "docs/tutorials/python/tutorial_scripts/materializedview.py:setup" ``` ## 2. Create and query a Materialized View @@ -39,7 +39,7 @@ First, we will create a simple Materialized View that selects all rows from a ta then query it to retrieve the results. ```python -{!docs/tutorials/python/tutorial_scripts/materializedview.py!lines=75-97} +--8<-- "docs/tutorials/python/tutorial_scripts/materializedview.py:basic_view" ```
@@ -62,7 +62,7 @@ Next, we will create a Materialized View that combines data from two tables usin clause and then query it to retrieve the results. ```python -{!docs/tutorials/python/tutorial_scripts/materializedview.py!lines=100-130} +--8<-- "docs/tutorials/python/tutorial_scripts/materializedview.py:join_view" ```
@@ -86,7 +86,7 @@ rows from another table using a LEFT JOIN clause and then query it to retrieve t results. ```python -{!docs/tutorials/python/tutorial_scripts/materializedview.py!lines=133-163} +--8<-- "docs/tutorials/python/tutorial_scripts/materializedview.py:left_join_view" ```
@@ -111,7 +111,7 @@ matches rows from another table using a RIGHT JOIN clause and then query it to r the results. ```python -{!docs/tutorials/python/tutorial_scripts/materializedview.py!lines=166-196} +--8<-- "docs/tutorials/python/tutorial_scripts/materializedview.py:right_join_view" ```
@@ -135,7 +135,7 @@ Finally, we can create a Materialized View that combines rows from two tables us UNION clause and then query it to retrieve the results. ```python -{!docs/tutorials/python/tutorial_scripts/materializedview.py!lines=199-229} +--8<-- "docs/tutorials/python/tutorial_scripts/materializedview.py:union_view" ```
@@ -160,7 +160,7 @@ Results from the materialized view with UNION: Click to show me ```python -{!docs/tutorials/python/tutorial_scripts/materializedview.py!} +--8<-- "docs/tutorials/python/tutorial_scripts/materializedview.py" ```
diff --git a/docs/tutorials/python/migration.md b/docs/tutorials/python/migration.md new file mode 100644 index 000000000..d8f8476cb --- /dev/null +++ b/docs/tutorials/python/migration.md @@ -0,0 +1,100 @@ +# Migrating Files to a New Storage Location + +Storage location migration lets you move files from one Synapse storage location +to another — for example, from Synapse-managed S3 (`SYNAPSE_S3`) to your own +S3 bucket (`EXTERNAL_S3`). The process is intentionally two-phase so you can +review exactly what will be moved before committing to the transfer. + +This tutorial demonstrates how to index a folder's files and then migrate them +to a new storage location using the Python client. + +[Read more about Custom Storage Locations](https://help.synapse.org/docs/Custom-Storage-Locations.2048327803.html) +[Read more about setting up storage location](./storage_location.md) + +## Tutorial Purpose + +In this tutorial you will: + +1. Set up and get a project and folder +2. Index files in a folder for migration to a destination storage location +3. Review the index results CSV +4. Migrate the indexed files +5. Review the migration results CSV + +## Prerequisites + +* Make sure that you have completed the [Installation](../installation.md) and + [Authentication](../authentication.md) setup. +* You must have a [Project](./project.md) and a destination storage location + already created. See the [Storage Locations tutorial](./storage_location.md). +* Migration is currently supported **only** between S3 storage locations + (`SYNAPSE_S3` and `EXTERNAL_S3`) that reside in the **same AWS region**. + +## How Migration Works + +Migration is a two-phase process: + +1. **Index** — scan the project or folder and record every file that needs to + move into a local SQLite database. +2. **Migrate** — read the index database and copy each file to the destination + storage location, updating the entity's file handle. + +Separating the phases lets you inspect what will be migrated before committing +to the move. + +> **Warning:** Migration modifies existing entities. Always run against a test +> project first and review the index results before migrating production data. + +## 1. Set up and get project + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/migration.py:setup" +``` + +## 2. Index and migrate files + +Phase 1 scans the folder and records all files that need to move. The result is +a `MigrationResult` whose `db_path` points to the local SQLite database. Use +`as_csv` to export the index for review before proceeding. + +Phase 2 reads the index database and performs the actual migration, returning +another `MigrationResult`. Set `continue_on_error=True` to record failures in +the database rather than aborting. Set `force=True` to skip the interactive +confirmation prompt. + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/migration.py:index_and_migrate_files" +``` + +Review the index CSV to confirm what was discovered before migration runs: + +![indexresults](./tutorial_screenshots/index_results.png) + +After migration, inspect the results CSV for status details and any errors. +Detailed tracebacks are saved in the exception column of the CSV: + +![migrationresults](./tutorial_screenshots/migration_results.png) + +## Source code for this tutorial + +
+ Click to show me + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/migration.py" +``` +
+ +## References used in this tutorial + +- [Folder][synapseclient.models.Folder] +- [Project][synapseclient.models.Project] +- [FailureStrategy][synapseclient.models.FailureStrategy] +- [MigrationResult][synapseclient.models.services.MigrationResult] +- [syn.login][synapseclient.Synapse.login] +- [Custom Storage Locations Documentation](https://help.synapse.org/docs/Custom-Storage-Locations.2048327803.html) + +## See also + +- [Storage Location Tutorial](./storage_location.md) — How to create and manage storage locations +- [Storage Location Architecture](../../explanations/storage_location_architecture.md) — In-depth architecture diagrams and design documentation diff --git a/docs/tutorials/python/project.md b/docs/tutorials/python/project.md index 7a3dabc71..7f864f90b 100644 --- a/docs/tutorials/python/project.md +++ b/docs/tutorials/python/project.md @@ -89,7 +89,7 @@ I just got my project: My uniquely named project about Alzheimer's Disease, id: Click to show me ```python -{!docs/tutorials/python/tutorial_scripts/project.py!} +--8<-- "docs/tutorials/python/tutorial_scripts/project.py" ```
diff --git a/docs/tutorials/python/proxy_storage_location.md b/docs/tutorials/python/proxy_storage_location.md new file mode 100644 index 000000000..c9eb8141f --- /dev/null +++ b/docs/tutorials/python/proxy_storage_location.md @@ -0,0 +1,110 @@ +# Proxy Storage Locations in Synapse + +A proxy storage location delegates file access to a proxy server that controls +authentication and access to the underlying storage. Synapse stores only the +metadata; the proxy server handles the actual file retrieval. + +This tutorial demonstrates how to create a proxy storage location, register a +file via a `ProxyFileHandle`, and associate it with a Synapse File entity. + +[Read more about Custom Storage Locations](https://help.synapse.org/docs/Custom-Storage-Locations.2048327803.html) + +## Tutorial Purpose + +In this tutorial you will: + +1. Set up and get a project +2. Create a proxy storage location and assign it to a folder +3. Register a file by creating a `ProxyFileHandle` via the REST API +4. Associate the `ProxyFileHandle` with a Synapse File entity + +## Prerequisites + +* Make sure that you have completed the [Installation](../installation.md) and + [Authentication](../authentication.md) setup. +* You must have a [Project](./project.md) created and replace the one used in + this tutorial. +* A running proxy server with a shared secret key. See the + [Synapse Proxy Storage documentation](https://help.synapse.org/docs/Custom-Storage-Locations.2048327803.html) + for proxy server requirements. + +## 1. Set up and get project + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/proxy_storage_location.py:setup" +``` + +## 2. Create a proxy storage location + +Create a `StorageLocation` of type `PROXY`, providing your proxy server URL and +the shared secret key. Setting `benefactor_id` to the project or folder ensures that +access control is inherited from the project or folder. Assign it to a folder so that +files uploaded there are served through the proxy. + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/proxy_storage_location.py:create_proxy_storage_location" +``` + +
+ You'll notice the output looks like: + +``` +Created proxy storage location: 12345 + Proxy URL: https://my-proxy-server.example.com + Benefactor ID: syn123456 +``` +
+ +## 3. Register a file via ProxyFileHandle + +Files in proxy storage are **not** uploaded through the UI or Python client. Instead, you +register a file that already exists on the proxy server by posting a +`ProxyFileHandle` to the Synapse file service. You provide the file's MD5, +size, and the relative path used by the proxy to serve it. + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/proxy_storage_location.py:create_proxy_file_handle" +``` + +
+ You'll notice the output looks like: + +``` +{"id": ..., "etag":..., ..., "filePath":...} +``` +
+ +## 4. Associate the ProxyFileHandle with a File entity + +Create a `File` entity using the `data_file_handle_id` returned above. Synapse +stores the metadata and uses the `ProxyFileHandle` to serve downloads through +your proxy server. + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/proxy_storage_location.py:associate_proxy_file_handle" +``` + +## Source code for this tutorial + +
+ Click to show me + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/proxy_storage_location.py" +``` +
+ +## References used in this tutorial + +- [StorageLocation][synapseclient.models.StorageLocation] +- [StorageLocationType][synapseclient.models.StorageLocationType] +- [Folder][synapseclient.models.Folder] +- [File][synapseclient.models.File] +- [Project][synapseclient.models.Project] +- [syn.login][synapseclient.Synapse.login] +- [Custom Storage Locations Documentation](https://help.synapse.org/docs/Custom-Storage-Locations.2048327803.html) + +## See also + +- [Storage Locations Tutorial](./storage_location.md) — How to create and manage all storage location types +- [Storage Location Architecture](../../explanations/storage_location_architecture.md) — In-depth architecture diagrams and design documentation diff --git a/docs/tutorials/python/sharing_settings.md b/docs/tutorials/python/sharing_settings.md index 78744ec1c..23685a542 100644 --- a/docs/tutorials/python/sharing_settings.md +++ b/docs/tutorials/python/sharing_settings.md @@ -77,13 +77,13 @@ The inheritance and benefactor concepts apply consistently across all entity typ **⚠️ IMPORTANT**: Before running the tutorial, you MUST edit the script to set a valid `PRINCIPAL_ID`. ```python -{!docs/tutorials/python/tutorial_scripts/sharing_settings.py!lines=17-34} +--8<-- "docs/tutorials/python/tutorial_scripts/sharing_settings.py:setup_and_project" ``` ## 2. Create a main folder to set custom sharing settings ```python -{!docs/tutorials/python/tutorial_scripts/sharing_settings.py!lines=36-45} +--8<-- "docs/tutorials/python/tutorial_scripts/sharing_settings.py:create_main_folder" ``` ## 3. Examine Current Permissions @@ -91,7 +91,7 @@ The inheritance and benefactor concepts apply consistently across all entity typ The `get_permissions()` method returns the permissions that the current user has on an entity ```python -{!docs/tutorials/python/tutorial_scripts/sharing_settings.py!lines=46-49} +--8<-- "docs/tutorials/python/tutorial_scripts/sharing_settings.py:get_permissions" ```
@@ -108,7 +108,7 @@ Current user permissions on main folder: ['READ', 'UPDATE', 'CREATE', 'DELETE', The `get_acl()` method gets the specific permissions for a given principal (user or team): ```python -{!docs/tutorials/python/tutorial_scripts/sharing_settings.py!lines=51-62} +--8<-- "docs/tutorials/python/tutorial_scripts/sharing_settings.py:get_acl" ``` Depending on if you've already given permissions to given user/team your may see @@ -129,7 +129,7 @@ Principal ######## permissions on folder only: [] Use `set_permissions()` to grant specific permissions to a user or team: ```python -{!docs/tutorials/python/tutorial_scripts/sharing_settings.py!lines=63-79} +--8<-- "docs/tutorials/python/tutorial_scripts/sharing_settings.py:set_permissions" ```
@@ -149,7 +149,7 @@ Verified new permissions: ['DOWNLOAD', 'READ'] Create a sub-folder and give it more restrictive permissions than its parent: ```python -{!docs/tutorials/python/tutorial_scripts/sharing_settings.py!lines=80-101} +--8<-- "docs/tutorials/python/tutorial_scripts/sharing_settings.py:create_sub_folder" ```
@@ -171,7 +171,7 @@ the `log_tree=True` argument for the method we can get an ascii tree representat the ACLs on the project. Alternatively you will also be able to loop over the data. ```python -{!docs/tutorials/python/tutorial_scripts/sharing_settings.py!lines=102-117} +--8<-- "docs/tutorials/python/tutorial_scripts/sharing_settings.py:list_acl" ```
@@ -209,7 +209,7 @@ Using `overwrite=False` will allow you to add Permissions Non-destructively. **Note:** The default behavior is `overwrite=True` which will replace the permissions for the given Principal. ```python -{!docs/tutorials/python/tutorial_scripts/sharing_settings.py!lines=118-132} +--8<-- "docs/tutorials/python/tutorial_scripts/sharing_settings.py:advanced_permissions" ```
@@ -227,7 +227,7 @@ Updated permissions after adding UPDATE: ['READ', 'DOWNLOAD', 'UPDATE'] Remove permissions for a specific principal by setting an empty access type list: ```python -{!docs/tutorials/python/tutorial_scripts/sharing_settings.py!lines=133-147} +--8<-- "docs/tutorials/python/tutorial_scripts/sharing_settings.py:remove_permissions" ```
@@ -245,7 +245,7 @@ After removal - Principal ####### permissions: [] Use `delete_permissions()` to remove entire ACLs and revert to inheritance: ```python -{!docs/tutorials/python/tutorial_scripts/sharing_settings.py!lines=149-167} +--8<-- "docs/tutorials/python/tutorial_scripts/sharing_settings.py:delete_acls" ```
@@ -282,7 +282,7 @@ Sub-folder now inherits permissions: [] Get a complete view of your permission structure: ```python -{!docs/tutorials/python/tutorial_scripts/sharing_settings.py!lines=168-172} +--8<-- "docs/tutorials/python/tutorial_scripts/sharing_settings.py:final_overview" ```
@@ -344,7 +344,7 @@ ACL Tree Structure: Click to show me ```python -{!docs/tutorials/python/tutorial_scripts/sharing_settings.py!} +--8<-- "docs/tutorials/python/tutorial_scripts/sharing_settings.py" ```
diff --git a/docs/tutorials/python/storage_location.md b/docs/tutorials/python/storage_location.md new file mode 100644 index 000000000..65b9f1530 --- /dev/null +++ b/docs/tutorials/python/storage_location.md @@ -0,0 +1,220 @@ +# Storage Locations in Synapse + +Storage locations allow you to configure where files uploaded to Synapse are +stored. By default, files are stored in Synapse's internal S3 storage, but you +can configure projects or folders to use your own AWS S3 buckets, Google Cloud +Storage buckets, or other external storage. + +This tutorial demonstrates how to use the Python client to manage storage +locations using the new object-oriented models. + +[Read more about Custom Storage Locations](https://help.synapse.org/docs/Custom-Storage-Locations.2048327803.html) + +## Tutorial Purpose +In this tutorial you will: + +1. Create an external S3 storage location and assign it to a folder +2. Create a Google Cloud Storage location and assign it to a folder +3. Create an SFTP storage location and assign it to a folder +4. Create an HTTPS storage location and assign it to a folder +5. Create an External Object Store location and assign it to a folder +6. Create a Proxy storage location, register a proxy file handle, and assign it to a folder +7. Retrieve and inspect storage location settings +8. Update a storage location (create a replacement and reassign) +9. Index and migrate files to a new storage location + +## Prerequisites + +* Make sure that you have completed the [Installation](../installation.md) and + [Authentication](../authentication.md) setup. +* You must have a [Project](./project.md) created and replace the one used in + this tutorial. +* An AWS S3 bucket properly configured for use with Synapse, including an + `owner.txt` file. See + [Custom Storage Locations](https://help.synapse.org/docs/Custom-Storage-Locations.2048327803.html). +* (Optional) `boto3` installed for STS credential examples. +* For SFTP: `pysftp` installed (`pip install "synapseclient[pysftp]"`). +* For Object Store: AWS credentials configured in your environment. +* For Proxy: a running proxy server and its shared secret key. + +## Understanding Storage Location Types + +Synapse supports several types of storage locations: + +- **SYNAPSE_S3**: Synapse-managed S3 storage (default) +- **EXTERNAL_S3**: User-owned AWS S3 bucket, accessed by Synapse on + your behalf. Synapse transfers the data for uploads and downloads. Requires + an `owner.txt` file in the bucket to verify ownership. +- **EXTERNAL_GOOGLE_CLOUD**: User-owned Google Cloud Storage bucket +- **EXTERNAL_SFTP**: External SFTP server +- **EXTERNAL_HTTPS**: External HTTPS server (uploading via client is **not** + supported right now.) +- **EXTERNAL_OBJECT_STORE**: An S3-compatible store (e.g., MinIO, OpenStack + Swift) that Synapse does **not** access. The client transfers data directly + to the object store using credentials configured in your environment; Synapse + only stores the file metadata. +- **PROXY**: A proxy server that controls access to the underlying storage + +## Storage Location Settings + +Each storage type exposes a different set of configuration fields on +`StorageLocation`. When you retrieve a stored location, only the fields +relevant to its type are populated: + +| Type | Key fields | +|------|-----------| +| `SYNAPSE_S3` | `base_key`, `sts_enabled` | +| `EXTERNAL_S3` | `bucket`, `base_key`, `sts_enabled`, `endpoint_url` | +| `EXTERNAL_GOOGLE_CLOUD` | `bucket`, `base_key` | +| `EXTERNAL_SFTP` / `EXTERNAL_HTTPS` | `url`, `supports_subfolders` | +| `EXTERNAL_OBJECT_STORE` | `bucket`, `endpoint_url` | +| `PROXY` | `proxy_url`, `secret_key`, `benefactor_id` | + +Common attributes are: `concrete_type`, `storage_location_id`, `storage_type`, `upload_type`, `banner`, `description`, `etag`, `created_on`, `created_by` + +## 1. Set up and get project + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/storage_location.py:setup" +``` + +## 2. Create an external S3 storage location + +Create a storage location backed by your own S3 bucket. The bucket must be +properly configured with an `owner.txt` file. Synapse will transfer data +directly to and from this bucket on the user's behalf. + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/storage_location.py:create_s3_storage_location" +``` + +
+ You'll notice the output looks like: + +``` +Created storage location: 12345 +storage location type: StorageLocationType.EXTERNAL_S3 +``` +
+ +## 3. Set up a folder with external S3 storage + +Create a folder and assign it the S3 storage location. All files uploaded into +this folder will be stored in your S3 bucket. + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/storage_location.py:create_folder_with_s3_storage_location" +``` + +
+ You'll notice the output looks like: + +``` +ProjectSetting(id=..., project_id=..., settings_type='upload', locations=[...], concrete_type='org.sagebionetworks.repo.model.project.UploadDestinationListSetting', etag='...') +``` +
+ +## 4. Create a Google Cloud Storage location + +Create a storage location backed by a Google Cloud Storage bucket and assign it +to a folder. + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/storage_location.py:create_gcs_storage_location" +``` + +## 5. Create an SFTP storage location + +SFTP storage locations point to an external SFTP server, where files are stored outside of Synapse. Synapse only manages the metadata and does not handle the file transfer itself. This setup requires the pysftp package, and files must be uploaded separately through the **client** once configured. + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/storage_location.py:create_sftp_storage_location" +``` +## 6. Create an HTTPS storage location + +`EXTERNAL_HTTPS` uses the same underlying API type as `EXTERNAL_SFTP` but is +used when the external server is accessed over HTTPS. Note that the Python +client does NOT support uploading files to HTTPS storage locations directly yet. To add files, use the Synapse REST API directly. + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/storage_location.py:create_https_storage_location" +``` + +## 7. Create an External Object Store storage location + +Use `EXTERNAL_OBJECT_STORE` for S3-compatible stores that are not directly +accessed by Synapse. Unlike `EXTERNAL_S3`, the Python client transfers data +directly to the object store using locally configured AWS credentials — +Synapse is never involved in the data transfer, only in storing the metadata. + +Configure your AWS credentials using any method supported by the AWS SDK +(environment variables, `~/.aws/credentials`, IAM roles, etc.). See the +[AWS documentation on credential configuration](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html) +for details. + +Once credentials are configured, add a matching profile section to `~/.synapseConfig` +so the client knows which profile to use for a given endpoint and bucket: + +``` +[https://s3.us-east-1.amazonaws.com/test-external-object-store] +profile_name = my-s3-profile +``` + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/storage_location.py:create_object_store_storage_location" +``` + +## 8. Create a Proxy storage location + +Proxy storage locations delegate file access to a proxy server that controls +authentication and access to the underlying storage. Files are registered by +creating a `ProxyFileHandle` via the REST API. Then, files can be uploaded via store function with data_file_handle_id. + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/storage_location.py:create_proxy_storage_location" +``` + +## 9. Retrieve and inspect storage location settings + +You can retrieve a storage location by ID. Only fields relevant to the storage +type are populated. + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/storage_location.py:retrieve_storage_location" +``` + +
+ You'll notice the output looks like: + +``` +Retrieved storage location ID: 12345 +Storage type: StorageLocationType.EXTERNAL_S3 +Bucket: my-synapse-bucket +Base key: synapse-data +``` +
+ +## 10. Update a storage location + +Storage locations are immutable — individual fields cannot be edited after +creation. To "update" a storage location, create a new one with the desired +settings and reassign it to the folder or project. + +```python +--8<-- "docs/tutorials/python/tutorial_scripts/storage_location.py:update_storage_location" +``` + +## References used in this tutorial + +- [StorageLocation][synapseclient.models.StorageLocation] +- [StorageLocationType][synapseclient.models.StorageLocationType] +- [Folder][synapseclient.models.Folder] +- [File][synapseclient.models.File] +- [Project][synapseclient.models.Project] +- [syn.login][synapseclient.Synapse.login] +- [Custom Storage Locations Documentation](https://help.synapse.org/docs/Custom-Storage-Locations.2048327803.html) + +## See also + +- [Storage Location Architecture](../../explanations/storage_location_architecture.md) - + In-depth architecture diagrams and design documentation diff --git a/docs/tutorials/python/submission.md b/docs/tutorials/python/submission.md index a77dfbdf1..12aecda36 100644 --- a/docs/tutorials/python/submission.md +++ b/docs/tutorials/python/submission.md @@ -65,7 +65,7 @@ As an organizer of a Synapse challenge, you will Script setup: ```python -{!docs/tutorials/python/tutorial_scripts/submission_participant.py!lines=13-30} +--8<-- "docs/tutorials/python/tutorial_scripts/submission_participant.py:setup" ``` ## 1. Participating in a Synapse challenge @@ -73,37 +73,37 @@ Script setup: ### 1. Make a submission to an existing evaluation queue on Synapse ```python -{!docs/tutorials/python/tutorial_scripts/submission_participant.py!lines=32-54} +--8<-- "docs/tutorials/python/tutorial_scripts/submission_participant.py:make_submission" ``` ### 2. Fetch your existing submission ```python -{!docs/tutorials/python/tutorial_scripts/submission_participant.py!lines=56-71} +--8<-- "docs/tutorials/python/tutorial_scripts/submission_participant.py:fetch_submission" ``` ### 3. Count your submissions ```python -{!docs/tutorials/python/tutorial_scripts/submission_participant.py!lines=72-82} +--8<-- "docs/tutorials/python/tutorial_scripts/submission_participant.py:count_submissions" ``` ### 4. Fetch all of your submissions from an existing evaluation queue on Synapse ```python -{!docs/tutorials/python/tutorial_scripts/submission_participant.py!lines=82-95} +--8<-- "docs/tutorials/python/tutorial_scripts/submission_participant.py:fetch_all_submissions" ``` ### 5. Check the status of your submission ```python -{!docs/tutorials/python/tutorial_scripts/submission_participant.py!lines=97-119} +--8<-- "docs/tutorials/python/tutorial_scripts/submission_participant.py:check_status" ``` ### 6. Cancel your submission ```python -{!docs/tutorials/python/tutorial_scripts/submission_participant.py!lines=120-137} +--8<-- "docs/tutorials/python/tutorial_scripts/submission_participant.py:cancel_submission" ``` ## 2. Organizing a Synapse challenge @@ -112,37 +112,37 @@ Script setup: Script setup: ```python -{!docs/tutorials/python/tutorial_scripts/submission_organizer.py!lines=12-31} +--8<-- "docs/tutorials/python/tutorial_scripts/submission_organizer.py:setup" ``` ### 1. Annotate a submission to score it ```python -{!docs/tutorials/python/tutorial_scripts/submission_organizer.py!lines=33-60} +--8<-- "docs/tutorials/python/tutorial_scripts/submission_organizer.py:annotate_submission" ``` ### 2. Batch-update submission statuses ```python -{!docs/tutorials/python/tutorial_scripts/submission_organizer.py!lines=62-99} +--8<-- "docs/tutorials/python/tutorial_scripts/submission_organizer.py:batch_update" ``` ### 3. Fetch the submission bundle for a given submission ```python -{!docs/tutorials/python/tutorial_scripts/submission_organizer.py!lines=101-136} +--8<-- "docs/tutorials/python/tutorial_scripts/submission_organizer.py:fetch_bundle" ``` ### 4. Allow cancellation of submissions ```python -{!docs/tutorials/python/tutorial_scripts/submission_organizer.py!lines=138-177} +--8<-- "docs/tutorials/python/tutorial_scripts/submission_organizer.py:allow_cancellation" ``` ### 5. Delete submissions ```python -{!docs/tutorials/python/tutorial_scripts/submission_organizer.py!lines=179-209} +--8<-- "docs/tutorials/python/tutorial_scripts/submission_organizer.py:delete_submissions" ``` ## Source code for this tutorial @@ -151,7 +151,7 @@ Script setup: Click to show me (source code for Participant) ```python -{!docs/tutorials/python/tutorial_scripts/submission_participant.py!} +--8<-- "docs/tutorials/python/tutorial_scripts/submission_participant.py" ```
@@ -159,7 +159,7 @@ Script setup: Click to show me (source code for Organizer) ```python -{!docs/tutorials/python/tutorial_scripts/submission_organizer.py!} +--8<-- "docs/tutorials/python/tutorial_scripts/submission_organizer.py" ```
diff --git a/docs/tutorials/python/submissionview.md b/docs/tutorials/python/submissionview.md index b8a29b495..8a7c64d3e 100644 --- a/docs/tutorials/python/submissionview.md +++ b/docs/tutorials/python/submissionview.md @@ -35,7 +35,7 @@ The name of the Evaluation must also be globally unique. Please update `"Test Evaluation Queue for Alzheimer conference"` with a new value. ```python -{!docs/tutorials/python/tutorial_scripts/submissionview.py!lines=13-44} +--8<-- "docs/tutorials/python/tutorial_scripts/submissionview.py:setup_and_evaluation" ``` ## 2. Create a SubmissionView for the evaluation queue @@ -45,7 +45,7 @@ in its scope. We'll also add custom columns for metrics that will be used for sc submissions. ```python -{!docs/tutorials/python/tutorial_scripts/submissionview.py!lines=46-82} +--8<-- "docs/tutorials/python/tutorial_scripts/submissionview.py:create_submissionview" ``` ## 3. Create and submit a file to the evaluation queue @@ -54,7 +54,7 @@ Now let's create a test file and submit it to our evaluation queue. For convenie we'll use a temporary file that will be automatically cleaned up after execution. ```python -{!docs/tutorials/python/tutorial_scripts/submissionview.py!lines=84-105} +--8<-- "docs/tutorials/python/tutorial_scripts/submissionview.py:submit_file" ``` ## 4. Query and update the submission status @@ -66,7 +66,7 @@ Note: Due to Synapse's eventual consistency model, we need to wait briefly for t submission to appear in the view. ```python -{!docs/tutorials/python/tutorial_scripts/submissionview.py!lines=107-126} +--8<-- "docs/tutorials/python/tutorial_scripts/submissionview.py:query_and_update" ```
@@ -93,7 +93,7 @@ The name of the Evaluation must also be globally unique. Please update `"Second Test Evaluation Queue for Alzheimer conference"` with a new value. ```python -{!docs/tutorials/python/tutorial_scripts/submissionview.py!lines=128-143} +--8<-- "docs/tutorials/python/tutorial_scripts/submissionview.py:modify_scope" ``` ## 6. Create a snapshot of the view @@ -102,7 +102,7 @@ SubmissionViews support creating snapshots, which capture the state of all submi specific point in time. This is useful for archiving or comparing submission states. ```python -{!docs/tutorials/python/tutorial_scripts/submissionview.py!lines=145-152} +--8<-- "docs/tutorials/python/tutorial_scripts/submissionview.py:create_snapshot" ``` ## 7. Query the snapshot @@ -111,7 +111,7 @@ After creating a snapshot, you can query it to retrieve the state of submissions time the snapshot was created. This is useful for historical analysis or auditing. ```python -{!docs/tutorials/python/tutorial_scripts/submissionview.py!lines=153-162} +--8<-- "docs/tutorials/python/tutorial_scripts/submissionview.py:query_snapshot" ``` ## Source Code for this Tutorial @@ -120,7 +120,7 @@ time the snapshot was created. This is useful for historical analysis or auditin Click to show me ```python -{!docs/tutorials/python/tutorial_scripts/submissionview.py!} +--8<-- "docs/tutorials/python/tutorial_scripts/submissionview.py" ```
diff --git a/docs/tutorials/python/tutorial_screenshots/index_results.png b/docs/tutorials/python/tutorial_screenshots/index_results.png new file mode 100644 index 000000000..f8955cc48 Binary files /dev/null and b/docs/tutorials/python/tutorial_screenshots/index_results.png differ diff --git a/docs/tutorials/python/tutorial_screenshots/migration_results.png b/docs/tutorials/python/tutorial_screenshots/migration_results.png new file mode 100644 index 000000000..e629860f2 Binary files /dev/null and b/docs/tutorials/python/tutorial_screenshots/migration_results.png differ diff --git a/docs/tutorials/python/tutorial_scripts/activity.py b/docs/tutorials/python/tutorial_scripts/activity.py new file mode 100644 index 000000000..54a669bcf --- /dev/null +++ b/docs/tutorials/python/tutorial_scripts/activity.py @@ -0,0 +1,156 @@ +""" +Here is where you'll find the code for the Activity/Provenance tutorial. +""" + +# Step 1: Add a new Activity to your File +# --8<-- [start:retrieve_project_folder_file] +import os +import tempfile + +import synapseclient +from synapseclient.models import Activity, File, Folder, Project, UsedEntity, UsedURL + +syn = synapseclient.login() + +# Set project and folder name that exists within the project +PROJECT_NAME = "My uniquely named project about Alzheimer's Disease" +FOLDER_NAME = "biospecimen_experiment_1" + +# Retrieve the project and folder IDs +my_project_id = Project(name=PROJECT_NAME).get().id + +biospecimen_experiment_1_folder = Folder( + name=FOLDER_NAME, parent_id=my_project_id +).get() + +with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as tmp: + tmp.write("First biospecimen data - post-QC analysis results") + tmp_path = tmp.name +# Store a first version of the file in Synapse +my_file = File( + path=tmp_path, + name="biospecimen_data.txt", + parent_id=biospecimen_experiment_1_folder.id, +) +my_file.store() + +# --8<-- [end:retrieve_project_folder_file] + +# --8<-- [start:create_activity] +# Create an Activity describing the analysis step that produced this file +analysis_activity = Activity( + name="Quality Control Analysis", + description="Initial QC analysis of biospecimen data using the FastQC pipeline.", + used=[ + UsedURL( + name="FastQC v0.12.1", + url="https://github.com/s-andrews/FastQC/releases/tag/v0.12.1", + ), + UsedEntity(target_id=my_project_id), + ], + executed=[ + UsedURL( + name="QC Analysis Script", + url="https://github.com/Sage-Bionetworks/analysis-scripts/blob/v1.0/qc_analysis.py", + ), + ], +) + +# Attach the activity to the file and store it +my_file.activity = analysis_activity +my_file = my_file.store() + +first_version_number = my_file.version_number +print( + f"Stored file: {my_file.name} (version {first_version_number}) " + f"with activity: {my_file.activity.name}" +) +# --8<-- [end:create_activity] + +# --8<-- [start:add_activity_to_version] +# Step 2: Add a new Activity to a specific version of your File +# Each time you store an updated file, Synapse creates a new version. +# You can track a different activity for each version to capture the +# full history of what was done to produce each version of the file. + +# Create a dummy file and upload it as a new version +with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as tmp: + tmp.write("Updated biospecimen data - post-QC analysis results") + tmp_path = tmp.name + +updated_file = File( + path=tmp_path, + name="biospecimen_data.txt", + parent_id=biospecimen_experiment_1_folder.id, +) +updated_file.store() +second_version_number = updated_file.version_number + +downstream_activity = Activity( + name="Downstream Analysis", + description="Downstream analysis of QC-passed biospecimen samples.", + used=[ + UsedURL( + name="Seurat v5.0.0", + url="https://github.com/satijalab/seurat/releases/tag/v5.0.0", + ), + UsedEntity( + target_id=my_file.id, + target_version_number=first_version_number, + ), + ], + executed=[ + UsedURL( + name="Downstream Analysis Script", + url="https://github.com/Sage-Bionetworks/analysis-scripts/blob/v1.0/downstream_analysis.py", + ), + ], +) + +# Store the activity on the new version using Activity.store() +downstream_activity.store(parent=updated_file) +print( + f"Stored activity '{downstream_activity.name}' on file " + f"{updated_file.name} (version {second_version_number})" +) +# --8<-- [end:add_activity_to_version] + +# --8<-- [start:print_activities] +# Step 3: Print stored activities on your File +# Retrieve and print the activity on the latest version of the file +current_activity = Activity.from_parent(parent=my_file) +print(f"\nActivity on latest version (v{my_file.version_number}):") +print(f" Name: {current_activity.name}") +print(f" Description: {current_activity.description}") +for item in current_activity.used: + print(f" Used: {item}") +for item in current_activity.executed: + print(f" Executed: {item}") + +# Retrieve and print the activity for the first version +first_activity = Activity.from_parent( + parent=my_file, + parent_version_number=first_version_number, +) +print(f"\nActivity on version {first_version_number}:") +print(f" Name: {first_activity.name}") +print(f" Description: {first_activity.description}") +# --8<-- [end:print_activities] + +# --8<-- [start:delete_activity] +# Step 4: Delete an activity +# Deleting an activity disassociates it from the entity and removes it from +# Synapse once it is no longer referenced by any entity. + +current_activity.disassociate_from_entity(parent=updated_file) +current_activity.delete(parent=updated_file) +print( + f"\nDeleted activity from: {updated_file.name} (version {updated_file.version_number})" +) + +# Verify the activity was removed +deleted_activity = Activity.from_parent( + parent=updated_file, parent_version_number=updated_file.version_number +) +print(f"Activity after deletion: {deleted_activity}") +# --8<-- [end:delete_activity] diff --git a/docs/tutorials/python/tutorial_scripts/annotation.py b/docs/tutorials/python/tutorial_scripts/annotation.py index 71d9a168a..23ce36056 100644 --- a/docs/tutorials/python/tutorial_scripts/annotation.py +++ b/docs/tutorials/python/tutorial_scripts/annotation.py @@ -2,6 +2,7 @@ Here is where you'll find the code for the Annotation tutorial. """ +# --8<-- [start:retrieve_synapse_ids] # Step 1: Add several annotations to stored files import os @@ -21,7 +22,9 @@ ).get() print(f"Batch 1 Folder ID: {batch_1_folder.id}") +# --8<-- [end:retrieve_synapse_ids] +# --8<-- [start:define_annotations] # Define the annotations I want to set annotation_values = { "species": "Homo sapiens", @@ -29,7 +32,9 @@ "assay": "SCRNA-seq", "fileFormat": "fastq", } +# --8<-- [end:define_annotations] +# --8<-- [start:set_annotations_loop] batch_1_folder.sync_from_synapse(download_file=False) # Loop over all of the files and set their annotations for file_batch_1 in batch_1_folder.files: @@ -48,7 +53,9 @@ print( f"Set the annotations for File: {file_batch_1.name}, ID: {file_batch_1.id}, Annotations: {existing_annotations_for_file}" ) +# --8<-- [end:set_annotations_loop] +# --8<-- [start:upload_with_annotations] # Step 2: Upload 2 new files and set the annotations at the same time # In order for the following script to work please replace the files with ones that # already exist on your local machine. @@ -75,3 +82,4 @@ print( f"Stored file: {batch_1_scrnaseq_new_file_2.name}, ID: {batch_1_scrnaseq_new_file_2.id}, Annotations: {batch_1_scrnaseq_new_file_2.annotations}" ) +# --8<-- [end:upload_with_annotations] diff --git a/docs/tutorials/python/tutorial_scripts/dataset.py b/docs/tutorials/python/tutorial_scripts/dataset.py index 61c3a5b06..2ac626fa7 100644 --- a/docs/tutorials/python/tutorial_scripts/dataset.py +++ b/docs/tutorials/python/tutorial_scripts/dataset.py @@ -1,5 +1,6 @@ """Here is where you'll find the code for the dataset tutorial.""" +# --8<-- [start:setup] import pandas as pd from synapseclient import Synapse @@ -22,44 +23,61 @@ ).get() # Replace with your project name project_id = project.id print(f"My project ID is {project_id}") +# --8<-- [end:setup] # Next, let's create the dataset. We'll use the project id as the parent id. # To begin, the dataset will be empty, but if you view the dataset's schema in the UI, # you will notice that datasets come with default columns. + +# --8<-- [start:create_dataset] my_new_dataset = Dataset(parent_id=project_id, name="My New Dataset").store() print(f"My Dataset's ID is {my_new_dataset.id}") +# --8<-- [end:create_dataset] # Now, let's add some files to the dataset. There are three ways to add files to a dataset: # 1. Add an Entity Reference to a file with its ID and version +# --8<-- [start:add_entity_ref] my_new_dataset.add_item( EntityRef(id="syn51790029", version=1) ) # Replace with the ID of the file you want to add +# --8<-- [end:add_entity_ref] # 2. Add a File with its ID and version +# --8<-- [start:add_file] my_new_dataset.add_item( File(id="syn51790028", version_number=1) ) # Replace with the ID of the file you want to add +# --8<-- [end:add_file] # 3. Add a Folder. In this case, all child files of the folder are added to the dataset recursively. +# --8<-- [start:add_folder] my_new_dataset.add_item( Folder(id="syn64893446") ) # Replace with the ID of the folder you want to add +# --8<-- [end:add_folder] # Our changes won't be persisted to Synapse until we call the store() method. +# --8<-- [start:store_dataset] my_new_dataset.store() +# --8<-- [end:store_dataset] # Now that our Dataset with all of our files has been created, the next time # we want to use it, we can retrieve it from Synapse. +# --8<-- [start:retrieve_dataset] my_retrieved_dataset = Dataset(id=my_new_dataset.id).get() print(f"My Dataset's ID is {my_retrieved_dataset.id}") print(len(my_retrieved_dataset.items)) +# --8<-- [end:retrieve_dataset] # If you want to query your dataset for files that match certain criteria, you can do so # using the query method. +# --8<-- [start:query_dataset] rows = Dataset.query( query=f"SELECT * FROM {my_retrieved_dataset.id} WHERE name like '%test%'" ) print(rows) +# --8<-- [end:query_dataset] # In addition to the default columns, you may want to annotate items in your dataset using # custom columns. +# --8<-- [start:add_custom_column] my_retrieved_dataset.add_column( column=Column( name="my_annotation", @@ -67,8 +85,10 @@ ) ) my_retrieved_dataset.store() +# --8<-- [end:add_custom_column] # Now that our custom column has been added, we can update the dataset with new values. +# --8<-- [start:update_custom_column_values] modified_data = pd.DataFrame( { "id": "syn51790028", # The ID of one of our Files @@ -78,11 +98,14 @@ my_retrieved_dataset.update_rows( values=modified_data, primary_keys=["id"], dry_run=False ) +# --8<-- [end:update_custom_column_values] # Finally, let's save a snapshot of the dataset. +# --8<-- [start:snapshot_dataset] snapshot_info = my_retrieved_dataset.snapshot( comment="My first snapshot", label="My first snapshot", ) print(snapshot_info) +# --8<-- [end:snapshot_dataset] diff --git a/docs/tutorials/python/tutorial_scripts/dataset_collection.py b/docs/tutorials/python/tutorial_scripts/dataset_collection.py index b1e095aa0..94b7b52ac 100644 --- a/docs/tutorials/python/tutorial_scripts/dataset_collection.py +++ b/docs/tutorials/python/tutorial_scripts/dataset_collection.py @@ -1,5 +1,6 @@ """Here is where you'll find the code for the DatasetCollection tutorial.""" +# --8<-- [start:setup] import pandas as pd from synapseclient import Synapse @@ -14,6 +15,7 @@ ).get() # Replace with your project name project_id = project.id print(f"My project ID is {project_id}") +# --8<-- [end:setup] # This tutorial assumes that you have already created datasets that you would like to add to a DatasetCollection. # If you need help creating datasets, you can refer to the dataset tutorial. @@ -22,6 +24,7 @@ # Let's create the DatasetCollection. We'll use the project id as the parent id. # At first, the DatasetCollection will be empty, but if you view the DatasetCollection's schema in the UI, # you will notice that DatasetCollections come with default columns. +# --8<-- [start:create_collection] DATASET_IDS = [ "syn65987017", "syn65987019", @@ -31,22 +34,30 @@ parent_id=project_id, name="test_dataset_collection" ).store() print(f"My DatasetCollection's ID is {test_dataset_collection.id}") +# --8<-- [end:create_collection] # Now, let's add some datasets to the collection. We will loop through our dataset ids and add each dataset to the # collection using the `add_item` method. +# --8<-- [start:add_datasets] for dataset_id in DATASET_IDS: test_dataset_collection.add_item(Dataset(id=dataset_id).get()) +# --8<-- [end:add_datasets] # Our changes won't be persisted to Synapse until we call the `store` method on our DatasetCollection. +# --8<-- [start:store_collection] test_dataset_collection.store() +# --8<-- [end:store_collection] # Now that our DatasetCollection with all of our datasets has been created, the next time we want to use it, # we can retrieve it from Synapse. +# --8<-- [start:retrieve_collection] my_retrieved_dataset_collection = DatasetCollection(id=test_dataset_collection.id).get() print(f"My DatasetCollection's ID is still {my_retrieved_dataset_collection.id}") print(f"My DatasetCollection has {len(my_retrieved_dataset_collection.items)} items") +# --8<-- [end:retrieve_collection] # In addition to the default columns, you may want to annotate items in your DatasetCollection using # custom columns. +# --8<-- [start:add_custom_column] my_retrieved_dataset_collection.add_column( column=Column( name="my_annotation", @@ -54,8 +65,10 @@ ) ) my_retrieved_dataset_collection.store() +# --8<-- [end:add_custom_column] # Now that our custom column has been added, we can update the DatasetCollection with new annotations. +# --8<-- [start:update_custom_column_values] modified_data = pd.DataFrame( { "id": DATASET_IDS, @@ -65,13 +78,18 @@ my_retrieved_dataset_collection.update_rows( values=modified_data, primary_keys=["id"], dry_run=False ) +# --8<-- [end:update_custom_column_values] # If you want to query your DatasetCollection for items that match certain criteria, you can do so # using the `query` method. +# --8<-- [start:query_collection] rows = my_retrieved_dataset_collection.query( query=f"SELECT id, my_annotation FROM {my_retrieved_dataset_collection.id} WHERE my_annotation = 'good dataset'" ) print(rows) +# --8<-- [end:query_collection] # Create a snapshot of the DatasetCollection +# --8<-- [start:snapshot_collection] my_retrieved_dataset_collection.snapshot(comment="test snapshot") +# --8<-- [end:snapshot_collection] diff --git a/docs/tutorials/python/tutorial_scripts/download_data_by_synid.py b/docs/tutorials/python/tutorial_scripts/download_data_by_synid.py new file mode 100644 index 000000000..8147d4830 --- /dev/null +++ b/docs/tutorials/python/tutorial_scripts/download_data_by_synid.py @@ -0,0 +1,47 @@ +""" +Here is where you'll find the code for the downloading files by synapse ids tutorial. +""" + +import asyncio + +from synapseclient import Synapse +from synapseclient.models import File + +syn = Synapse() +syn.login() + +# --8<-- [start:syn_id_mapping] +# A mapping of Synapse IDs to the local directory each file should be downloaded to. +# Files can be directed to different directories as needed. +SYN_IDS_AND_PATHS = { + "syn60584250": "~/temp/subdir1", + "syn60584256": "~/temp/subdir1", + "syn60584248": "~/temp/subdir1", + "syn60584252": "~/temp/subdir1", + "syn60584258": "~/temp/subdir1", + "syn60584260": "~/temp/subdir1", + "syn60584257": "~/temp/subdir1", + "syn60584251": "~/temp/subdir1", + "syn60584253": "~/temp/subdir1", + "syn60584390": "~/temp/subdir1", + "syn60584405": "~/temp/subdir2", + "syn60584400": "~/temp/subdir3", +} +# --8<-- [end:syn_id_mapping] + + +# --8<-- [start:concurrent_download] +async def main(): + # Build a list of concurrent download tasks — one per Synapse ID + tasks = [] + for syn_id, path in SYN_IDS_AND_PATHS.items(): + tasks.append(File(id=syn_id, path=path).get_async()) + + # Download all files concurrently and wait for every one to finish + results = await asyncio.gather(*tasks) + + print(f"Retrieved {len(results)} files") + + +asyncio.run(main()) +# --8<-- [end:concurrent_download] diff --git a/docs/tutorials/python/tutorial_scripts/download_data_in_bulk.py b/docs/tutorials/python/tutorial_scripts/download_data_in_bulk.py index 0ca946f13..1757c1037 100644 --- a/docs/tutorials/python/tutorial_scripts/download_data_in_bulk.py +++ b/docs/tutorials/python/tutorial_scripts/download_data_in_bulk.py @@ -2,6 +2,7 @@ Here is where you'll find the code for the downloading data in bulk tutorial. """ +# --8<-- [start:setup] import os import synapseclient @@ -16,32 +17,55 @@ DIRECTORY_TO_SYNC_FOLDER_TO = os.path.join( DIRECTORY_TO_SYNC_PROJECT_TO, FOLDER_NAME_TO_SYNC ) +# --8<-- [end:setup] -# Step 1: Create an instance of the container I want to sync the data from and sync -project = Project(name="My uniquely named project about Alzheimer's Disease") -# We'll set the `if_collision` to `keep.local` so that we don't overwrite any files +# Step 1: Get an instance of the container I want to sync the data from and sync +# --8<-- [start:get_project] +project = Project(name="My uniquely named project about Alzheimer's Disease").get() +# --8<-- [end:get_project] + +# By default, sync_from_synapse generates a manifest.csv in each synced directory. +# The manifest.csv is interoperable with sync_to_synapse, the Synapse +# UI download cart, and `download_list_files`. +# --8<-- [start:sync_project] +# We'll set the `if_collision` to `keep.local` so that we don't overwrite any files. project.sync_from_synapse(path=DIRECTORY_TO_SYNC_PROJECT_TO, if_collision="keep.local") # Print out the contents of the directory where the data was synced to # Explore the directory to see the contents have been recursively synced. print(os.listdir(DIRECTORY_TO_SYNC_PROJECT_TO)) +# --8<-- [end:sync_project] +# Or, use `manifest="root"` to generate a single manifest.csv at the root directory +# instead of one in each sub-directory. Use `manifest="suppress"` to skip +# manifest generation entirely. + +# --8<-- [start:sync_project_with_root_manifest] +project.sync_from_synapse( + path=DIRECTORY_TO_SYNC_PROJECT_TO, + if_collision="keep.local", + manifest="root", +) +print(os.listdir(DIRECTORY_TO_SYNC_PROJECT_TO)) +# --8<-- [end:sync_project_with_root_manifest] -# Step 2: The same as step 1, but for a single folder +# Step 3: The same as step 1, but for a single folder +# --8<-- [start:sync_folder] folder = Folder(name=FOLDER_NAME_TO_SYNC, parent_id=project.id) folder.sync_from_synapse(path=DIRECTORY_TO_SYNC_FOLDER_TO, if_collision="keep.local") print(os.listdir(os.path.expanduser(DIRECTORY_TO_SYNC_FOLDER_TO))) +# --8<-- [end:sync_folder] -# Step 3: Loop over all files/folders on the project/folder object instances +# Step 4: Loop over all files/folders on the project/folder object instances +# --8<-- [start:loop_over_project_folder] for folder_at_root in project.folders: print(f"Folder at root: {folder_at_root.name}") - for file_in_root_folder in folder_at_root.files: print(f"File in {folder_at_root.name}: {file_in_root_folder.name}") - for folder_in_folder in folder_at_root.folders: print(f"Folder in {folder_at_root.name}: {folder_in_folder.name}") for file_in_folder in folder_in_folder.files: print(f"File in {folder_in_folder.name}: {file_in_folder.name}") +# --8<-- [end:loop_over_project_folder] diff --git a/docs/tutorials/python/tutorial_scripts/entityview.py b/docs/tutorials/python/tutorial_scripts/entityview.py index dcac79236..07406cb5a 100644 --- a/docs/tutorials/python/tutorial_scripts/entityview.py +++ b/docs/tutorials/python/tutorial_scripts/entityview.py @@ -2,6 +2,7 @@ Here is where you'll find the code for the EntityView tutorial. """ +# --8<-- [start:setup] import pandas as pd from synapseclient import Synapse @@ -20,7 +21,9 @@ # First let's get the project we want to create the EntityView in my_project = Project(name="My uniquely named project about Alzheimer's Disease").get() project_id = my_project.id +# --8<-- [end:setup] +# --8<-- [start:create_columns] # Next let's add some columns to the EntityView, the data in these columns will end up # being stored as annotations on the files columns = [ @@ -29,7 +32,9 @@ Column(name="assay", column_type=ColumnType.STRING), Column(name="fileFormat", column_type=ColumnType.STRING), ] +# --8<-- [end:create_columns] +# --8<-- [start:create_view] # Then we will create a EntityView that is scoped to the project, and will contain a row # for each file in the project view = EntityView( @@ -45,14 +50,18 @@ # When the columns are printed you'll notice that it contains a number of columns that # are automatically added by Synapse in addition to the ones we added print(view.columns.keys()) +# --8<-- [end:create_view] +# --8<-- [start:query_view] # Query the EntityView results_as_dataframe: pd.DataFrame = query( query=f"SELECT id, name, species, dataType, assay, fileFormat, path FROM {view.id} WHERE path like '%single_cell_RNAseq_batch_1%'", include_row_id_and_row_version=False, ) print(results_as_dataframe) +# --8<-- [end:query_view] +# --8<-- [start:update_rows] # Finally let's update the annotations on the files in the project results_as_dataframe["species"] = ["Homo sapiens"] * len(results_as_dataframe) results_as_dataframe["dataType"] = ["geneExpression"] * len(results_as_dataframe) @@ -64,16 +73,21 @@ primary_keys=["id"], wait_for_eventually_consistent_view=True, ) +# --8<-- [end:update_rows] +# --8<-- [start:update_scope] # Over time you may have a need to add or remove scopes from the EntityView, you may # use `add` or `remove` along with the Synapse ID of the scope you wish to add/remove view.scope_ids.add("syn1234") # view.scope_ids.remove("syn1234") view.store() +# --8<-- [end:update_scope] +# --8<-- [start:update_view_type_mask] # You may also need to add or remove the types of Entities that may show up in your view # You will be able to specify multiple types using the bitwise OR operator, or a single value view.view_type_mask = ViewTypeMask.FILE | ViewTypeMask.FOLDER # view.view_type_mask = ViewTypeMask.FILE view.store() +# --8<-- [end:update_view_type_mask] diff --git a/docs/tutorials/python/tutorial_scripts/evaluation.py b/docs/tutorials/python/tutorial_scripts/evaluation.py index 430c13a11..a885285a3 100644 --- a/docs/tutorials/python/tutorial_scripts/evaluation.py +++ b/docs/tutorials/python/tutorial_scripts/evaluation.py @@ -2,6 +2,7 @@ Here is where you'll find the code for the Evaluation tutorial. """ +# --8<-- [start:create_and_update] from synapseclient import Synapse from synapseclient.models import Evaluation, Project @@ -44,6 +45,7 @@ print("Evaluation has been updated with the following name and description:") print(evaluation.name) print(evaluation.description) +# --8<-- [end:create_and_update] # Confirm what's in Synapse matches the evaluation stored from_synapse = Evaluation(id=evaluation.id).get() @@ -51,6 +53,7 @@ print("The following evaluation has been retrieved from Synapse:") print(from_synapse) +# --8<-- [start:update_acl] # Update the Evaluation's ACL on Synapse by adding a new user assert ( PRINCIPAL_ID is not None @@ -62,10 +65,14 @@ acl = evaluation.get_acl() print("The following ACL has been retrieved from Synapse:") print(acl) +# --8<-- [end:update_acl] +# --8<-- [start:remove_from_acl] # Now let's remove the user we just added from the Evaluation's ACL evaluation.update_acl(principal_id=PRINCIPAL_ID, access_type=[]) +# --8<-- [end:remove_from_acl] +# --8<-- [start:retrieve_and_delete] # Finally let's retrieve all Evaluations stored within this project, including the one we just created evaluations_list = Evaluation.get_evaluations_by_project(project_id) @@ -73,3 +80,4 @@ # for evaluation_to_delete in evaluations_list: # print(f"Deleting evaluation: {evaluation_to_delete.name}") # evaluation_to_delete.delete() +# --8<-- [end:retrieve_and_delete] diff --git a/docs/tutorials/python/tutorial_scripts/file.py b/docs/tutorials/python/tutorial_scripts/file.py index e881627b7..1ddfdffcb 100644 --- a/docs/tutorials/python/tutorial_scripts/file.py +++ b/docs/tutorials/python/tutorial_scripts/file.py @@ -2,6 +2,7 @@ Here is where you'll find the code for the File tutorial. """ +# --8<-- [start:retrieve_folder_ids] # Step 1: Upload several files to Synapse import os @@ -27,7 +28,9 @@ biospecimen_experiment_2_folder = Folder( parent_id=my_project.id, name="biospecimen_experiment_2" ).get() +# --8<-- [end:retrieve_folder_ids] +# --8<-- [start:create_file_objects] # Create a File object for each file I want to upload biospecimen_experiment_1_a_2022 = File( path=os.path.expanduser("~/my_ad_project/biospecimen_experiment_1/fileA.txt"), @@ -72,7 +75,9 @@ ), parent_id=batch_2_folder.id, ) +# --8<-- [end:create_file_objects] +# --8<-- [start:store_files] # Upload each file to Synapse biospecimen_experiment_1_a_2022.store() biospecimen_experiment_1_b_2022.store() @@ -82,7 +87,9 @@ batch_1_scrnaseq_file_2.store() batch_2_scrnaseq_file_1.store() batch_2_scrnaseq_file_2.store() +# --8<-- [end:store_files] +# --8<-- [start:print_attributes] # Step 2: Print stored attributes about your file batch_1_scrnaseq_file_1_id = batch_1_scrnaseq_file_1.id print(f"My file ID is: {batch_1_scrnaseq_file_1_id}") @@ -96,7 +103,9 @@ ) print(f"My file was last modified on: {batch_1_scrnaseq_file_1.modified_on}") +# --8<-- [end:print_attributes] +# --8<-- [start:walk_project] # Step 3: List all Folders and Files within my project my_project.sync_from_synapse(download_file=False) dir_mapping = my_project.map_directory_to_all_contained_files("./") @@ -104,3 +113,4 @@ print(f"Directory: {directory_name}") for file_entity in file_entities: print(f"\tFile: {file_entity.name}, ID: {file_entity.id}") +# --8<-- [end:walk_project] diff --git a/docs/tutorials/python/tutorial_scripts/folder.py b/docs/tutorials/python/tutorial_scripts/folder.py index 5eef14533..7d6c13d12 100644 --- a/docs/tutorials/python/tutorial_scripts/folder.py +++ b/docs/tutorials/python/tutorial_scripts/folder.py @@ -2,6 +2,7 @@ Here is where you'll find the code for the Folder tutorial. """ +# --8<-- [start:create_folder] # Step 1: Create a new folder import synapseclient from synapseclient.models import Folder, Project @@ -31,7 +32,9 @@ name="biospecimen_experiment_2", parent_id=my_project.id ) biospecimen_experiment_2_folder.store() +# --8<-- [end:create_folder] +# --8<-- [start:print_attributes] # Step 2: Print stored attributes about your folder my_scrnaseq_batch_1_folder_id = my_scrnaseq_batch_1_folder.id print(f"My folder ID is: {my_scrnaseq_batch_1_folder_id}") @@ -46,6 +49,9 @@ print(f"My folder was last modified on: {my_scrnaseq_batch_1_folder.modified_on}") +# --8<-- [end:print_attributes] + +# --8<-- [start:create_subfolders] # Step 3: Create 2 sub-folders hierarchical_root_folder = Folder(name="experiment_notes", parent_id=my_project.id) hierarchical_root_folder.store() @@ -55,3 +61,4 @@ folder_notes_2022 = Folder(name="notes_2022", parent_id=hierarchical_root_folder.id) folder_notes_2022.store() +# --8<-- [end:create_subfolders] diff --git a/docs/tutorials/python/tutorial_scripts/json_schema.py b/docs/tutorials/python/tutorial_scripts/json_schema.py index d05574f0b..e5bf16942 100644 --- a/docs/tutorials/python/tutorial_scripts/json_schema.py +++ b/docs/tutorials/python/tutorial_scripts/json_schema.py @@ -1,3 +1,4 @@ +# --8<-- [start:setup] import time from pprint import pprint @@ -8,8 +9,10 @@ # 1. Set up Synapse Python client syn = synapseclient.Synapse() syn.login() +# --8<-- [end:setup] # 2. Take a look at the constants and structure of the JSON schema +# --8<-- [start:constants_and_schema] # Replace your own project name here PROJECT_ENT = Project(name="My uniquely named project about Alzheimer's Disease").get() # Replace your own json schema organization name here @@ -41,8 +44,10 @@ }, }, } +# --8<-- [end:constants_and_schema] # 3. Try create test organization and json schema if they do not exist +# --8<-- [start:create_org_and_schema] organization = SchemaOrganization(name=ORG_NAME) try: organization.store() @@ -60,7 +65,9 @@ schema.store(schema_body=schema_body, version=VERSION) schema.get_body() +# --8<-- [end:create_org_and_schema] +# --8<-- [start:update_schema_version] # If you want to make an update, you can re-register your schema with the organization: updated_schema = { "$schema": "http://json-schema.org/draft-07/schema#", @@ -95,8 +102,10 @@ schema.store(schema_body=updated_schema, version=NEW_VERSION) schema.get_body(version=VERSION) +# --8<-- [end:update_schema_version] # 4. Bind the JSON schema to the folder +# --8<-- [start:bind_schema] # Create a test folder for JSON schema experiments test_folder = Folder(name="test_folder", parent_id=PROJECT_ENT.id).store() @@ -106,25 +115,33 @@ json_schema_version_info = bound_schema.json_schema_version_info syn.logger.info("JSON schema was bound successfully. Please see details below:") pprint(vars(json_schema_version_info)) +# --8<-- [end:bind_schema] +# --8<-- [start:retrieve_bound_schema] # 5. Retrieve the Bound Schema schema = test_folder.get_schema() syn.logger.info("JSON Schema was retrieved successfully. Please see details below:") pprint(vars(schema)) +# --8<-- [end:retrieve_bound_schema] +# --8<-- [start:add_invalid_annotations] # 6. Add Invalid Annotations to the Folder and Store test_folder.annotations = { "patient_id": "1234", "cognitive_score": "invalid str", } test_folder.store() +# --8<-- [end:add_invalid_annotations] time.sleep(2) +# --8<-- [start:validate_folder] validation_results = test_folder.validate_schema() syn.logger.info("Validation was completed. Please see details below:") +# --8<-- [end:validate_folder] pprint(vars(validation_results)) +# --8<-- [start:create_file_with_invalid_annotations] # 7. Create a File with Invalid Annotations and Upload It # Then, view validation statistics and invalid validation results path_to_file = make_bogus_data_file(n=5) @@ -132,16 +149,21 @@ annotations = {"patient_id": "123456", "cognitive_score": "invalid child str"} child_file = File(path=path_to_file, parent_id=test_folder.id, annotations=annotations) +# --8<-- [end:create_file_with_invalid_annotations] child_file = child_file.store() time.sleep(2) +# --8<-- [start:validation_statistics] validation_statistics = test_folder.get_schema_validation_statistics() syn.logger.info( "Validation statistics were retrieved successfully. Please see details below:" ) +# --8<-- [end:validation_statistics] pprint(vars(validation_statistics)) +# --8<-- [start:invalid_validation_details] invalid_validation = invalid_results = test_folder.get_invalid_validation() for child in invalid_validation: syn.logger.info("See details of validation results: ") + # --8<-- [end:invalid_validation_details] pprint(vars(child)) diff --git a/docs/tutorials/python/tutorial_scripts/materializedview.py b/docs/tutorials/python/tutorial_scripts/materializedview.py index 4ce1186a4..ba140f907 100644 --- a/docs/tutorials/python/tutorial_scripts/materializedview.py +++ b/docs/tutorials/python/tutorial_scripts/materializedview.py @@ -1,5 +1,6 @@ """Here is where you'll find the code for the MaterializedView tutorial.""" +# --8<-- [start:setup] import pandas as pd from synapseclient import Synapse @@ -70,8 +71,10 @@ ] ) table2.upsert_rows(values=data2, primary_keys=["sample_id"]) +# --8<-- [end:setup] +# --8<-- [start:basic_view] def create_materialized_view(): """ Example: Create a new materialized view with a defining SQL query. @@ -97,6 +100,10 @@ def create_materialized_view(): print(query_result) +# --8<-- [end:basic_view] + + +# --8<-- [start:join_view] def create_materialized_view_with_join(): """ Example: Create a materialized view with a JOIN clause. @@ -130,6 +137,10 @@ def create_materialized_view_with_join(): print(query_result) +# --8<-- [end:join_view] + + +# --8<-- [start:left_join_view] def create_materialized_view_with_left_join(): """ Example: Create a materialized view with a LEFT JOIN clause. @@ -163,6 +174,10 @@ def create_materialized_view_with_left_join(): print(query_result) +# --8<-- [end:left_join_view] + + +# --8<-- [start:right_join_view] def create_materialized_view_with_right_join(): """ Example: Create a materialized view with a RIGHT JOIN clause. @@ -196,6 +211,10 @@ def create_materialized_view_with_right_join(): print(query_result) +# --8<-- [end:right_join_view] + + +# --8<-- [start:union_view] def create_materialized_view_with_union(): """ Example: Create a materialized view with a UNION clause. @@ -229,6 +248,9 @@ def create_materialized_view_with_union(): print(query_result) +# --8<-- [end:union_view] + + def main(): create_materialized_view() create_materialized_view_with_join() diff --git a/docs/tutorials/python/tutorial_scripts/migration.py b/docs/tutorials/python/tutorial_scripts/migration.py new file mode 100644 index 000000000..e359e6724 --- /dev/null +++ b/docs/tutorials/python/tutorial_scripts/migration.py @@ -0,0 +1,37 @@ +"""Tutorial code for Index and migrate files to the new storage location""" + +# --8<-- [start:setup] +import synapseclient +from synapseclient.models import Folder, Project + +syn = synapseclient.login() +my_project = Project(name="My uniquely named project about Alzheimer's Disease").get() +# --8<-- [end:setup] +MY_S3_STORAGE_LOCATION_ID = "1234567890" +# --8<-- [start:index_and_migrate_files] +# WARNING: This will actually migrate files associated with the project/folder. +# Run against a test project first and review the index (MigrationResult) before +# migrating production data. +my_migration_folder = Folder( + name="my-data-migration-folder", parent_id=my_project.id +).get() +index_result = my_migration_folder.index_files_for_migration( + dest_storage_location_id=MY_S3_STORAGE_LOCATION_ID, + db_path="/path/to/your/migration.db", + include_table_files=False, # Set True if you also want table-attached files +) +index_result.as_csv("/path/to/your/index_results.csv") +print(f"Migration index database: {index_result.db_path}") +print(f"Indexed counts by status: {index_result.counts_by_status}") + +migrate_result = my_migration_folder.migrate_indexed_files( + db_path="/path/to/your/migration.db", + continue_on_error=True, + force=True, # Skip interactive confirmation for tutorial purposes +) +migrate_result.as_csv("/path/to/your/migrate_results.csv") +if migrate_result is not None: + print(f"Migrated counts by status: {migrate_result.counts_by_status}") +else: + print("Migration was aborted (confirmation declined).") +# --8<-- [end:migrate_indexed_files] diff --git a/docs/tutorials/python/tutorial_scripts/proxy_storage_location.py b/docs/tutorials/python/tutorial_scripts/proxy_storage_location.py new file mode 100644 index 000000000..f08343641 --- /dev/null +++ b/docs/tutorials/python/tutorial_scripts/proxy_storage_location.py @@ -0,0 +1,92 @@ +"""Tutorial code for creating a Proxy storage location and uploading a file via ProxyFileHandle.""" + +# --8<-- [start:setup] +import asyncio +import hashlib +import json +import os + +import synapseclient +from synapseclient.models import ( + File, + Folder, + Project, + StorageLocation, + StorageLocationType, +) + +syn = synapseclient.login() + +my_project = Project(name="My uniquely named project about Alzheimer's Disease").get() +# --8<-- [end:setup] + +# --8<-- [start:create_proxy_storage_location] +# Replace with your proxy server URL and provide the shared secret key via the +# MY_PROXY_SECRET_KEY environment variable. +MY_PROXY_URL = "https://my-proxy-server.example.com" +MY_PROXY_SECRET_KEY = os.environ.get("MY_PROXY_SECRET_KEY") + +# Replace with the path to a local file to register via the proxy +FILE_PATH = "/path/to/your/file.csv" + +# Use this when a proxy server controls access to the underlying storage. +my_proxy_folder = Folder(name="my-folder-for-proxy", parent_id=my_project.id) +my_proxy_folder = my_proxy_folder.store() + +proxy_storage = StorageLocation( + storage_type=StorageLocationType.PROXY, + proxy_url=MY_PROXY_URL, + secret_key=MY_PROXY_SECRET_KEY, + benefactor_id=my_project.id, + description="Proxy-controlled storage", +).store() + +print(f"Created proxy storage location: {proxy_storage.storage_location_id}") +print(f" Proxy URL: {proxy_storage.proxy_url}") +print(f" Benefactor ID: {proxy_storage.benefactor_id}") + +my_proxy_folder.set_storage_location( + storage_location_id=proxy_storage.storage_location_id +) +my_proxy_folder_storage_location = my_proxy_folder.get_project_setting() +# --8<-- [end:create_proxy_storage_location] + +# Register a file in the proxy storage location by creating a ProxyFileHandle via +# the REST API, then associate it with a Synapse File entity. +# --8<-- [start:create_proxy_file_handle] +with open(FILE_PATH, "rb") as f: + content_md5 = hashlib.md5(f.read(), usedforsecurity=False).hexdigest() +file_size = os.path.getsize(FILE_PATH) +file_name = os.path.basename(FILE_PATH) + + +async def create_proxy_file_handle() -> str: + proxy_file_handle = await syn.rest_post_async( + "/externalFileHandle/proxy", + body=json.dumps( + { + "concreteType": "org.sagebionetworks.repo.model.file.ProxyFileHandle", + "storageLocationId": proxy_storage.storage_location_id, + "filePath": file_name, + "fileName": file_name, + "contentType": "text/csv", + "contentMd5": content_md5, + "contentSize": file_size, + } + ), + endpoint=syn.fileHandleEndpoint, + ) + print(f"File handle ID: {proxy_file_handle['id']}") + return proxy_file_handle["id"] + + +proxy_file_handle_id = asyncio.run(create_proxy_file_handle()) +# --8<-- [end:create_proxy_file_handle] +# --8<-- [start:associate_proxy_file_handle] +proxy_file = File( + parent_id=my_proxy_folder.id, + name=file_name, + data_file_handle_id=proxy_file_handle_id, +).store() +print(f"Synapse entity: {proxy_file.id}") +# --8<-- [end:associate_proxy_file_handle] diff --git a/docs/tutorials/python/tutorial_scripts/sharing_settings.py b/docs/tutorials/python/tutorial_scripts/sharing_settings.py index e0e0b06cd..a3a8e117e 100644 --- a/docs/tutorials/python/tutorial_scripts/sharing_settings.py +++ b/docs/tutorials/python/tutorial_scripts/sharing_settings.py @@ -14,6 +14,7 @@ - Public access: 273949 """ +# --8<-- [start:setup_and_project] import synapseclient from synapseclient.models import Folder, Project @@ -32,7 +33,9 @@ # Step 1: Get or create a project for this tutorial print("=== Step 1: Getting project ===") project = Project(name="My uniquely named project about Alzheimer's Disease").get() +# --8<-- [end:setup_and_project] +# --8<-- [start:create_main_folder] # Step 2: Create a main folder to set custom sharing settings print("\n=== Step 2: Creating main to set custom permissions ===") main_folder = Folder( @@ -43,11 +46,15 @@ main_folder = main_folder.store() print(f"Created main folder: {main_folder.name} (ID: {main_folder.id})") +# --8<-- [end:create_main_folder] +# --8<-- [start:get_permissions] # Step 3: Demonstrate get_permissions() - Get current user's permissions print("\n=== Step 3: Getting current user's permissions ===") permissions = main_folder.get_permissions() print(f"Current user permissions on main folder: {permissions.access_types}") +# --8<-- [end:get_permissions] +# --8<-- [start:get_acl] # Step 4: Demonstrate get_acl() - Get ACL for specific principal print("\n=== Step 4: Getting ACL for specific principal ===") # First check what permissions the principal currently has (likely inherited from project) @@ -60,6 +67,8 @@ folder_only_acl = main_folder.get_acl(principal_id=PRINCIPAL_ID, check_benefactor=False) print(f"Principal {PRINCIPAL_ID} permissions on folder only: {folder_only_acl}") +# --8<-- [end:get_acl] +# --8<-- [start:set_permissions] # Step 5: Demonstrate set_permissions() - Set specific permissions for the main folder print("\n=== Step 5: Setting permissions on main folder ===") main_folder_permissions = ["READ", "DOWNLOAD"] @@ -77,6 +86,8 @@ new_acl = main_folder.get_acl(principal_id=PRINCIPAL_ID, check_benefactor=False) print(f"Verified new permissions: {new_acl}") +# --8<-- [end:set_permissions] +# --8<-- [start:create_sub_folder] # Step 6: Create a sub-folder with different sharing settings print("\n=== Step 6: Creating sub-folder with different permissions ===") sub_folder = Folder( @@ -99,6 +110,8 @@ f"Set more restrictive permissions for principal {PRINCIPAL_ID} on sub-folder: {sub_folder_permissions}" ) +# --8<-- [end:create_sub_folder] +# --8<-- [start:list_acl] # Step 7: Demonstrate list_acl() - List all ACLs print("\n=== Step 7: Listing ACLs ===") @@ -115,6 +128,8 @@ for acl_entry in entity_acl.acl_entries: print(f" Principal {acl_entry.principal_id}: {acl_entry.permissions}") +# --8<-- [end:list_acl] +# --8<-- [start:advanced_permissions] # Step 8: Demonstrate additional set_permissions() options print("\n=== Step 8: Demonstrating additional permission options ===") @@ -130,6 +145,8 @@ updated_acl = main_folder.get_acl(principal_id=PRINCIPAL_ID) print(f"Updated permissions after adding UPDATE: {updated_acl}") +# --8<-- [end:advanced_permissions] +# --8<-- [start:remove_permissions] # Step 9: Demonstrate permission removal using set_permissions with empty list print("\n=== Step 9: Removing specific permissions ===") print( @@ -145,7 +162,9 @@ removed_acl = main_folder.get_acl(principal_id=PRINCIPAL_ID) print(f"After removal - Principal {PRINCIPAL_ID} permissions: {removed_acl}") +# --8<-- [end:remove_permissions] +# --8<-- [start:delete_acls] # Step 10: Demonstrate delete_permissions() with dry run print("\n=== Step 10: Demonstrating delete_permissions() ===") @@ -165,8 +184,11 @@ inherited_acl = sub_folder.get_acl(principal_id=PRINCIPAL_ID) print(f"Sub-folder now inherits permissions: {inherited_acl}") +# --8<-- [end:delete_acls] +# --8<-- [start:final_overview] # Step 11: Final ACL overview print("\n=== Step 11: Final ACL overview ===") final_overview = main_folder.list_acl( recursive=True, include_container_content=True, log_tree=True ) +# --8<-- [end:final_overview] diff --git a/docs/tutorials/python/tutorial_scripts/storage_location.py b/docs/tutorials/python/tutorial_scripts/storage_location.py new file mode 100644 index 000000000..cb231fc62 --- /dev/null +++ b/docs/tutorials/python/tutorial_scripts/storage_location.py @@ -0,0 +1,219 @@ +""" +Tutorial code for the Storage Location and project settings. +""" + +# --8<-- [start:setup] +import os + +import synapseclient +from synapseclient.models import Folder, Project, StorageLocation, StorageLocationType + +syn = synapseclient.login() + +# Step 1: Retrieve the project +my_project = Project(name="My uniquely named project about Alzheimer's Disease").get() +# --8<-- [end:setup] + +# Step 2: Create an External S3 Storage Location that in the same region as the current storage location +# Replace with your S3 bucket name (must have owner.txt configured) +# --8<-- [start:create_s3_storage_location] +MY_BUCKET_NAME = "my-synapse-bucket" +MY_BASE_KEY = "synapse-data" + +external_s3_storage_location = StorageLocation( + storage_type=StorageLocationType.EXTERNAL_S3, + bucket=MY_BUCKET_NAME, + base_key=MY_BASE_KEY, + description="External S3 storage location", +).store() + +print(f"Created storage location: {external_s3_storage_location.storage_location_id}") +print(f"storage location type: {external_s3_storage_location.storage_type}") +# --8<-- [end:create_s3_storage_location] + +# Step 3. Create a Folder with the new storage location +# --8<-- [start:create_folder_with_s3_storage_location] +external_s3_folder = Folder(name="my-folder-for-external-s3", parent_id=my_project.id) +external_s3_folder = external_s3_folder.store() + +# Set the storage location for the folder +external_s3_folder.set_storage_location( + storage_location_id=external_s3_storage_location.storage_location_id +) +# --8<-- [end:create_folder_with_s3_storage_location] + +# Step 4: Create a Google Cloud Storage location +# --8<-- [start:create_gcs_storage_location] +MY_GCS_BUCKET = "my-gcs-bucket" +MY_GCS_BASE_KEY = "synapse-data" +gcs_storage = StorageLocation( + storage_type=StorageLocationType.EXTERNAL_GOOGLE_CLOUD, + bucket=MY_GCS_BUCKET, + base_key=MY_GCS_BASE_KEY, + description="External Google Cloud Storage location", +).store() + +print(f"Created GCS storage location: {gcs_storage.storage_location_id}") +print(f"storage location type: {gcs_storage.storage_type}") + +gcs_folder = Folder(name="my-folder-for-gcs", parent_id=my_project.id) +gcs_folder = gcs_folder.store() + +# Set the storage location for the folder +gcs_folder.set_storage_location(storage_location_id=gcs_storage.storage_location_id) +# --8<-- [end:create_gcs_storage_location] + +# Step 5: Create an SFTP storage location +# --8<-- [start:create_sftp_storage_location] +MY_SFTP_URL = "sftp://your-sftp-server.example.com/upload" +sftp_storage = StorageLocation( + storage_type=StorageLocationType.EXTERNAL_SFTP, + url=MY_SFTP_URL, + supports_subfolders=True, + description="External SFTP server", +).store() + +print(f"Created SFTP storage location: {sftp_storage.storage_location_id}") +print(f"storage location type: {sftp_storage.storage_type}") + +sftp_folder = Folder(name="my-folder-for-sftp", parent_id=my_project.id) +sftp_folder = sftp_folder.store() + +# Set the storage location for the folder +sftp_folder.set_storage_location(storage_location_id=sftp_storage.storage_location_id) +# --8<-- [end:create_sftp_storage_location] + +# Step 6: Create an HTTPS storage location +# EXTERNAL_HTTPS shares the same underlying API type as EXTERNAL_SFTP but is used +# when the external server is accessed over HTTPS rather than SFTP. +# --8<-- [start:create_https_storage_location] +MY_HTTPS_URL = "https://my-https-server.example.com" + +https_storage = StorageLocation( + storage_type=StorageLocationType.EXTERNAL_HTTPS, + url=MY_HTTPS_URL, + description="External HTTPS server", +).store() + +print(f"Created HTTPS storage location: {https_storage.storage_location_id}") +print(f"storage location type: {https_storage.storage_type}") + +my_https_folder = Folder(name="my-folder-for-https", parent_id=my_project.id) +my_https_folder = my_https_folder.store() + +# Set the storage location for the folder +my_https_folder.set_storage_location( + storage_location_id=https_storage.storage_location_id +) +# --8<-- [end:create_https_storage_location] + +# Note: The Python client does not support uploading files directly to HTTPS +# storage locations. To add files, use the Synapse web UI or REST API directly. + +# Step 7: Create an External Object Store storage location +# Use this for S3-compatible stores not accessed by Synapse. +# --8<-- [start:create_object_store_storage_location] +MY_OBJECT_STORE_BUCKET = "test-external-object-store" +MY_OBJECT_STORE_ENDPOINT_URL = "https://s3.us-east-1.amazonaws.com" + +object_store_storage = StorageLocation( + storage_type=StorageLocationType.EXTERNAL_OBJECT_STORE, + bucket=MY_OBJECT_STORE_BUCKET, + endpoint_url=MY_OBJECT_STORE_ENDPOINT_URL, + description="External S3-compatible object store", +).store() + +print(f"Created object store location: {object_store_storage.storage_location_id}") +print(f"storage location type: {object_store_storage.storage_type}") + +# create a folder with the object store storage location +object_store_folder = Folder(name="my-folder-for-object-store", parent_id=my_project.id) +object_store_folder = object_store_folder.store() + +# Set the storage location for the folder +object_store_folder.set_storage_location( + storage_location_id=object_store_storage.storage_location_id +) +# --8<-- [end:create_object_store_storage_location] + +# Step 8: Create a Proxy storage location +# Use this when a proxy server controls access to the underlying storage. +# --8<-- [start:create_proxy_storage_location] + +# Replace with your proxy server URL and provide the shared secret key via the +# MY_PROXY_SECRET_KEY environment variable. +MY_PROXY_URL = "https://my-proxy-server.example.com" +MY_PROXY_SECRET_KEY = os.environ.get("MY_PROXY_SECRET_KEY") +proxy_storage = StorageLocation( + storage_type=StorageLocationType.PROXY, + proxy_url=MY_PROXY_URL, + secret_key=MY_PROXY_SECRET_KEY, + benefactor_id=my_project.id, + description="Proxy-controlled storage", +).store() + +print(f"Created proxy storage location: {proxy_storage.storage_location_id}") +print(f" Proxy URL: {proxy_storage.proxy_url}") +print(f" Benefactor ID: {proxy_storage.benefactor_id}") + +my_proxy_folder = Folder(name="my-folder-for-proxy", parent_id=my_project.id) +my_proxy_folder = my_proxy_folder.store() + +# Set the storage location for the folder +my_proxy_folder.set_storage_location( + storage_location_id=proxy_storage.storage_location_id +) +# --8<-- [end:create_proxy_storage_location] + +# Step 9: Retrieve and inspect storage location settings +# Only fields that belong to the storage type are populated after retrieval. +# --8<-- [start:retrieve_storage_location] +retrieved_storage = StorageLocation( + storage_location_id=external_s3_storage_location.storage_location_id +).get() +print(f"Retrieved storage location ID: {retrieved_storage.storage_location_id}") +print(f"Storage type: {retrieved_storage.storage_type}") +print(f"Bucket: {retrieved_storage.bucket}") +print(f"Base key: {retrieved_storage.base_key}") +# --8<-- [end:retrieve_storage_location] + +# Step 10: Update a storage location + +# Storage locations are immutable in Synapse — individual fields cannot be edited +# after creation. To "update" a storage location, create a new one with the desired +# settings and reassign it to the folder or project. +# --8<-- [start:update_storage_location] +# Example: change the base key of the External S3 storage location used by +# external_s3_folder from MY_BASE_KEY to "synapse-data-v2". + +updated_s3_storage_location = StorageLocation( + storage_type=StorageLocationType.EXTERNAL_S3, + bucket=MY_BUCKET_NAME, + base_key="synapse-data-v2", + description="External S3 storage location (updated base key)", +).store() + +print(f"New storage location ID: {updated_s3_storage_location.storage_location_id}") + +# Reassign the folder to point at the new storage location +external_s3_folder.set_storage_location( + storage_location_id=updated_s3_storage_location.storage_location_id +) +updated_folder_setting = external_s3_folder.get_project_setting() + +print( + f"Folder now uses the updated storage location: {updated_s3_storage_location.storage_location_id}" +) + +# Step 10b: Partial update — add a storage location without removing existing ones +# +# `set_storage_location` is a destructive replacement. To append a new location +# while keeping the ones already configured, read the current ProjectSetting, +# append to its `locations` list, and call store() on the setting directly. + +setting = external_s3_folder.get_project_setting() +if setting is not None: + setting.locations.append(gcs_storage.storage_location_id) + setting.store() + print(f"Updated locations after partial update: {setting.locations}") +# --8<-- [end:update_storage_location] diff --git a/docs/tutorials/python/tutorial_scripts/submission_organizer.py b/docs/tutorials/python/tutorial_scripts/submission_organizer.py index 4fe67a466..44c9002c6 100644 --- a/docs/tutorials/python/tutorial_scripts/submission_organizer.py +++ b/docs/tutorials/python/tutorial_scripts/submission_organizer.py @@ -9,6 +9,8 @@ 5. Delete submissions """ +# --8<-- [start:setup] + from synapseclient import Synapse from synapseclient.models import SubmissionBundle, SubmissionStatus @@ -29,7 +31,9 @@ print(f"Working with Evaluation: {EVALUATION_ID}") print(f"Managing Submission: {SUBMISSION_ID}") +# --8<-- [end:setup] +# --8<-- [start:annotate_submission] # ============================================================================== # 1. Annotate a submission to score it # ============================================================================== @@ -58,7 +62,9 @@ print(f"Annotations added:") for key, value in updated_status.submission_annotations.items(): print(f" {key}: {value}") +# --8<-- [end:annotate_submission] +# --8<-- [start:batch_update] # ============================================================================== # 2. Batch-update submission statuses # ============================================================================== @@ -97,6 +103,9 @@ else: print("No submissions found with 'RECEIVED' status to update") +# --8<-- [end:batch_update] + +# --8<-- [start:fetch_bundle] # ============================================================================== # 3. Fetch the submission bundle for a given submission # ============================================================================== @@ -134,6 +143,9 @@ if key in ["accuracy", "f1_score", "precision", "recall"]: print(f" {key}: {value}") +# --8<-- [end:fetch_bundle] + +# --8<-- [start:allow_cancellation] # ============================================================================== # 4. Allow cancellation of submissions # ============================================================================== @@ -173,6 +185,9 @@ target_status = target_status.store() print(f"Cancellation enabled for submission {SUBMISSION_ID}") +# --8<-- [end:allow_cancellation] + +# --8<-- [start:delete_submissions] # ============================================================================== # 5. Delete submissions # ============================================================================== @@ -206,3 +221,4 @@ print(f"Uncomment the deletion code if you want to test this functionality.") print(f"\n=== Organizer tutorial completed! ===") +# --8<-- [end:delete_submissions] diff --git a/docs/tutorials/python/tutorial_scripts/submission_participant.py b/docs/tutorials/python/tutorial_scripts/submission_participant.py index f3195573b..1fa5e17b1 100644 --- a/docs/tutorials/python/tutorial_scripts/submission_participant.py +++ b/docs/tutorials/python/tutorial_scripts/submission_participant.py @@ -10,6 +10,7 @@ 6. Cancel your submission """ +# --8<-- [start:setup] from synapseclient import Synapse from synapseclient.models import Submission, SubmissionStatus @@ -28,7 +29,9 @@ print(f"Working with Evaluation: {EVALUATION_ID}") print(f"Submitting Entity: {ENTITY_ID}") +# --8<-- [end:setup] +# --8<-- [start:make_submission] # ============================================================================== # 1. Make a submission to an existing evaluation queue on Synapse # ============================================================================== @@ -52,7 +55,9 @@ # Store the submission ID for later use submission_id = submission.id +# --8<-- [end:make_submission] +# --8<-- [start:fetch_submission] # ============================================================================== # 2. Fetch your existing submission # ============================================================================== @@ -69,6 +74,9 @@ print(f" Submitter: {retrieved_submission.submitter_alias}") print(f" Created On: {retrieved_submission.created_on}") +# --8<-- [end:fetch_submission] + +# --8<-- [start:count_submissions] # ============================================================================== # 3. Count your submissions # ============================================================================== @@ -79,8 +87,10 @@ submission_count = Submission.get_submission_count(evaluation_id=EVALUATION_ID) print(f"Total submissions in evaluation: {submission_count}") +# --8<-- [end:count_submissions] +# --8<-- [start:fetch_all_submissions] # ============================================================================== # 4. Fetch all of your submissions from an existing evaluation queue # ============================================================================== @@ -93,7 +103,9 @@ print(f"Found {len(user_submissions)} submissions from the current user:") for i, sub in enumerate(user_submissions, 1): print(f" {i}. ID: {sub.id}, Name: {sub.name}, Created: {sub.created_on}") +# --8<-- [end:fetch_all_submissions] +# --8<-- [start:check_status] # ============================================================================== # 5. Check the status of your submission # ============================================================================== @@ -117,6 +129,9 @@ else: print(f" No submission annotations available") +# --8<-- [end:check_status] + +# --8<-- [start:cancel_submission] # ============================================================================== # 6. Cancel your submission (optional) # ============================================================================== @@ -139,3 +154,4 @@ print(f"\n=== Tutorial completed! ===") print(f"Your submission ID {submission_id} is ready for evaluation.") print(f"Check back later to see if the organizers have scored your submission.") +# --8<-- [end:cancel_submission] diff --git a/docs/tutorials/python/tutorial_scripts/submissionview.py b/docs/tutorials/python/tutorial_scripts/submissionview.py index 0b42c8096..40533dd55 100644 --- a/docs/tutorials/python/tutorial_scripts/submissionview.py +++ b/docs/tutorials/python/tutorial_scripts/submissionview.py @@ -11,6 +11,8 @@ """ +# --8<-- [start:setup_and_evaluation] + import tempfile import pandas as pd @@ -42,7 +44,9 @@ ) evaluation = syn.store(evaluation) print(f"Created evaluation queue with ID: {evaluation.id}") +# --8<-- [end:setup_and_evaluation] +# --8<-- [start:create_submissionview] # Step 2: Create a SubmissionView for the evaluation queue view = SubmissionView( name="SubmissionView for Alzheimer conference", @@ -80,7 +84,9 @@ view.store() print("Available columns in the view:", list(view.columns.keys())) +# --8<-- [end:create_submissionview] +# --8<-- [start:submit_file] # Step 3: Create and submit a file to the evaluation queue with tempfile.NamedTemporaryFile( mode="w", suffix=".txt", delete=True, delete_on_close=False @@ -103,7 +109,9 @@ ) print(f"Created submission with ID: {submission.id}") +# --8<-- [end:submit_file] +# --8<-- [start:query_and_update] # Step 4: Query and update the submission status # Query the SubmissionView to see our submission query = f"SELECT * FROM {view.id} WHERE id = '{submission.id}'" @@ -124,7 +132,9 @@ submission_status = syn.store(submission_status) print(f"Updated submission status to: {submission_status.status}") +# --8<-- [end:query_and_update] # Step 5: Modify the SubmissionView scope +# --8<-- [start:modify_scope] # First let's make sure we have the latest view from Synapse: view.get() @@ -141,7 +151,9 @@ view.scope_ids.append(second_evaluation.id) view.store() # Store the updated view print("Updated SubmissionView scope. Current scope IDs:", view.scope_ids) +# --8<-- [end:modify_scope] +# --8<-- [start:create_snapshot] # Step 6: Create a snapshot of the view snapshot_info = view.snapshot( comment="Initial submission review snapshot", @@ -150,6 +162,8 @@ print(snapshot_info) snapshot_version = snapshot_info.snapshot_version_number print(f"Snapshot version number: {snapshot_version}") +# --8<-- [end:create_snapshot] +# --8<-- [start:query_snapshot] # Step 7: Query the snapshot we just created # You may also get the snapshot version from the view object directly by looking at the version number @@ -160,3 +174,4 @@ snapshot_results = view.query(snapshot_query) print("Query results from the snapshot:") print(snapshot_results) +# --8<-- [end:query_snapshot] diff --git a/docs/tutorials/python/tutorial_scripts/upload_data_in_bulk.py b/docs/tutorials/python/tutorial_scripts/upload_data_in_bulk.py index 56cbe91ef..7d7d6f45d 100644 --- a/docs/tutorials/python/tutorial_scripts/upload_data_in_bulk.py +++ b/docs/tutorials/python/tutorial_scripts/upload_data_in_bulk.py @@ -2,62 +2,59 @@ Here is where you'll find the code for the uploading data in bulk tutorial. """ -import os +# --8<-- [start:imports_and_constants] +import pandas as pd import synapseclient -import synapseutils +from synapseclient.models import Project syn = synapseclient.Synapse() syn.login() -# Create some constants to store the paths to the data -DIRECTORY_FOR_MY_PROJECT = os.path.expanduser(os.path.join("~", "my_ad_project")) -PATH_TO_MANIFEST_FILE = os.path.expanduser(os.path.join("~", "manifest-for-upload.tsv")) - -# Step 1: Let's find the synapse ID of our project: -my_project_id = syn.findEntityId( - name="My uniquely named project about Alzheimer's Disease" -) +# Step 1: Create some constants to store the paths to the data +DIRECTORY_FOR_MY_PROJECT = "test_folder" # This should exist with your files in it +PATH_TO_MANIFEST_FILE = "test_manifest.csv" # This doesn't need to exist yet +SYNAPSE_PROJECT_ID = "" # Put your Synapse project ID here. This is the project where you want to upload your data. +project = Project(id=SYNAPSE_PROJECT_ID) +# --8<-- [end:imports_and_constants] -# Step 2: Create a manifest TSV file to upload data in bulk +# --8<-- [start:generate_manifest] +# Step 2: Create a manifest CSV file with the paths to the files and their parent folders # Note: When this command is run it will re-create your directory structure within # Synapse. Be aware of this before running this command. # If folders with the exact names already exists in Synapse, those folders will be used. -synapseutils.generate_sync_manifest( - syn=syn, +project.generate_sync_manifest( directory_path=DIRECTORY_FOR_MY_PROJECT, - parent_id=my_project_id, manifest_path=PATH_TO_MANIFEST_FILE, ) +# --8<-- [end:generate_manifest] +# --8<-- [start:sync_to_synapse] # Step 3: After generating the manifest file, we can upload the data in bulk -synapseutils.syncToSynapse( - syn=syn, manifestFile=PATH_TO_MANIFEST_FILE, sendMessages=False -) +project.sync_to_synapse(manifest_path=PATH_TO_MANIFEST_FILE, send_messages=False) +# --8<-- [end:sync_to_synapse] +# --8<-- [start:add_annotation] # Step 4: Let's add an annotation to our manifest file # Pandas is a powerful data manipulation library in Python, although it is not required # for this tutorial, it is used here to demonstrate how you can manipulate the manifest # file before uploading it to Synapse. -import pandas as pd -# Read TSV file into a pandas DataFrame -df = pd.read_csv(PATH_TO_MANIFEST_FILE, sep="\t") +# Read CSV file into a pandas DataFrame +df = pd.read_csv(PATH_TO_MANIFEST_FILE) # Add a new column to the DataFrame df["species"] = "Homo sapiens" # Write the DataFrame back to the manifest file -df.to_csv(PATH_TO_MANIFEST_FILE, sep="\t", index=False) +df.to_csv(PATH_TO_MANIFEST_FILE, index=False) -synapseutils.syncToSynapse( - syn=syn, - manifestFile=PATH_TO_MANIFEST_FILE, - sendMessages=False, -) +project.sync_to_synapse(manifest_path=PATH_TO_MANIFEST_FILE, send_messages=False) +# --8<-- [end:add_annotation] +# --8<-- [start:add_provenance] # Step 5: Let's create an Activity/Provenance -# First let's find the row in the TSV we want to update. This code finds the row number +# First let's find the row in the CSV we want to update. This code finds the row number # that we would like to update. row_index = df[ df["path"] == f"{DIRECTORY_FOR_MY_PROJECT}/biospecimen_experiment_1/fileA.txt" @@ -66,9 +63,9 @@ # After finding the row we want to update let's go ahead and add a relationship to # another file in our manifest. This allows us to say "We used 'this' file in some way". -df.loc[ - row_index, "used" -] = f"{DIRECTORY_FOR_MY_PROJECT}/single_cell_RNAseq_batch_1/SRR12345678_R1.fastq.gz" +df.loc[row_index, "used"] = ( + f"{DIRECTORY_FOR_MY_PROJECT}/single_cell_RNAseq_batch_1/SRR12345678_R1.fastq.gz" +) # Let's also link to the pipeline that we ran in order to produce these results. In a # real scenario you may want to link to a specific run of the tool where the results @@ -76,15 +73,12 @@ df.loc[row_index, "executed"] = "https://nf-co.re/rnaseq/3.14.0" # Let's also add a description for this Activity/Provenance -df.loc[ - row_index, "activityDescription" -] = "Experiment results created as a result of the linked data while running the pipeline." +df.loc[row_index, "activityDescription"] = ( + "Experiment results created as a result of the linked data while running the pipeline." +) # Write the DataFrame back to the manifest file -df.to_csv(PATH_TO_MANIFEST_FILE, sep="\t", index=False) +df.to_csv(PATH_TO_MANIFEST_FILE, index=False) -synapseutils.syncToSynapse( - syn=syn, - manifestFile=PATH_TO_MANIFEST_FILE, - sendMessages=False, -) +project.sync_to_synapse(manifest_path=PATH_TO_MANIFEST_FILE, send_messages=False) +# --8<-- [end:add_provenance] diff --git a/docs/tutorials/python/tutorial_scripts/virtualtable.py b/docs/tutorials/python/tutorial_scripts/virtualtable.py index 123a203ff..9c6f96708 100644 --- a/docs/tutorials/python/tutorial_scripts/virtualtable.py +++ b/docs/tutorials/python/tutorial_scripts/virtualtable.py @@ -1,5 +1,6 @@ """Here is where you'll find the code for the VirtualTable tutorial.""" +# --8<-- [start:setup] import pandas as pd from synapseclient import Synapse @@ -70,10 +71,12 @@ ] ) table2.upsert_rows(values=data2, primary_keys=["sample_id"]) +# --8<-- [end:setup] # Note: VirtualTables do not support JOIN or UNION operations in the defining_sql. # If you need to combine data from multiple tables, consider using a MaterializedView instead. +# --8<-- [start:basic_view] def create_basic_virtual_table(): """ Example: Create a basic virtual table with a simple SELECT query. @@ -99,6 +102,10 @@ def create_basic_virtual_table(): print(query_result) +# --8<-- [end:basic_view] + + +# --8<-- [start:column_selection] def create_virtual_table_with_column_selection(): """ Example: Create a virtual table that selects only specific columns. @@ -124,6 +131,10 @@ def create_virtual_table_with_column_selection(): print(query_result) +# --8<-- [end:column_selection] + + +# --8<-- [start:filtering] def create_virtual_table_with_filtering(): """ Example: Create a virtual table with a WHERE clause for filtering. @@ -149,6 +160,10 @@ def create_virtual_table_with_filtering(): print(query_result) +# --8<-- [end:filtering] + + +# --8<-- [start:ordering] def create_virtual_table_with_ordering(): """ Example: Create a virtual table with an ORDER BY clause. @@ -174,6 +189,10 @@ def create_virtual_table_with_ordering(): print(query_result) +# --8<-- [end:ordering] + + +# --8<-- [start:aggregation] def create_virtual_table_with_aggregation(): """ Example: Create a virtual table with an aggregate function. @@ -199,6 +218,9 @@ def create_virtual_table_with_aggregation(): print(query_result) +# --8<-- [end:aggregation] + + def main(): create_basic_virtual_table() create_virtual_table_with_column_selection() diff --git a/docs/tutorials/python/tutorial_scripts/wiki.py b/docs/tutorials/python/tutorial_scripts/wiki.py index 41c49ce36..93b59fd13 100644 --- a/docs/tutorials/python/tutorial_scripts/wiki.py +++ b/docs/tutorials/python/tutorial_scripts/wiki.py @@ -12,6 +12,8 @@ 7. Delete wiki pages """ + +# --8<-- [start:setup_and_imports] import os from synapseclient import Synapse @@ -28,7 +30,9 @@ # Get the project project = Project(name="My uniquely named project about Alzheimer's Disease").get() +# --8<-- [end:setup_and_imports] +# --8<-- [start:create_root_wiki_plain_text] # Section1: Create, read, and update wiki pages # Create a new wiki page for the project with plain text markdown root_wiki_page = WikiPage( @@ -36,7 +40,9 @@ title="My Root Wiki Page", markdown="# Welcome to My Root Wiki\n\nThis is a sample root wiki page created with the Synapse client.", ).store() +# --8<-- [end:create_root_wiki_plain_text] +# --8<-- [start:create_root_wiki_from_file] # OR you can create a wiki page with an existing markdown file. More instructions can be found in section 2. markdown_file_path = "path/to/your_markdown_file.md" root_wiki_page = WikiPage( @@ -44,7 +50,9 @@ title="My First Root Wiki Page Version with existing markdown file", markdown=markdown_file_path, ).store() +# --8<-- [end:create_root_wiki_from_file] +# --8<-- [start:update_wiki_page] # Update the wiki page root_wiki_page_new = WikiPage( owner_id=project.id, @@ -52,12 +60,16 @@ markdown="# Welcome to My Root Wiki NEW\n\nThis is a sample root wiki page created with the Synapse client.", id=root_wiki_page.id, ).store() +# --8<-- [end:update_wiki_page] +# --8<-- [start:restore_wiki_page] # Restore the wiki page to the original version wiki_page_restored = WikiPage( owner_id=project.id, id=root_wiki_page.id, wiki_version="0" ).restore() +# --8<-- [end:restore_wiki_page] +# --8<-- [start:verify_restore] # check if the content is restored assert ( root_wiki_page.markdown_file_handle_id == wiki_page_restored.markdown_file_handle_id @@ -68,15 +80,22 @@ assert ( root_wiki_page.title == wiki_page_restored.title ), "Wiki page title does not match after restore" +# --8<-- [end:verify_restore] # Get the wiki page + +# --8<-- [start:get_wiki_by_id] # Once you know the Wiki page id, you can retrieve the Wiki page with the id retrieved_wiki = WikiPage(owner_id=project.id, id=root_wiki_page.id).get() +# --8<-- [end:get_wiki_by_id] # Or you can retrieve the Wiki page with the title +# --8<-- [start:get_wiki_by_title] retrieved_wiki = WikiPage(owner_id=project.id, title=root_wiki_page.title).get() +# --8<-- [end:get_wiki_by_title] # Check if the retrieved Wiki page is the same as the original Wiki page +# --8<-- [start:verify_retrieved_wiki] assert ( root_wiki_page.markdown_file_handle_id == retrieved_wiki.markdown_file_handle_id ), "Markdown file handle ID does not match retrieved wiki page" @@ -86,15 +105,19 @@ assert ( root_wiki_page.title == retrieved_wiki.title ), "Wiki page title does not match retrieved wiki page" +# --8<-- [end:verify_retrieved_wiki] # Create a sub-wiki page +# --8<-- [start:create_sub_wiki] sub_wiki_1 = WikiPage( owner_id=project.id, title="Sub Wiki Page 1", parent_id=root_wiki_page.id, markdown="# Sub Page 1\n\nThis is a sub-page of another wiki.", ).store() +# --8<-- [end:create_sub_wiki] +# --8<-- [start:create_wiki_from_markdown_string] # Section 2: WikiPage Markdown Operations # Create wiki page from markdown text markdown_content = """# Sample Markdown Content @@ -119,8 +142,10 @@ title="Sub Page 2 created from markdown text", markdown=markdown_content, ).store() +# --8<-- [end:create_wiki_from_markdown_string] +# --8<-- [start:create_wiki_from_markdown_file] # Create a wiki page from a markdown file markdown_file_path = "~/temp/temp_markdown_file.md.gz" @@ -131,7 +156,9 @@ title="Sub Page 3 created from markdown file", markdown=markdown_file_path, ).store() +# --8<-- [end:create_wiki_from_markdown_file] +# --8<-- [start:get_markdown_file_url] # Download the markdown file # Note: If the markdown is generated from plain text using the client, the downloaded file will be named wiki_markdown_.md.gz. If it is generated from an existing markdown file, the downloaded file will retain the original filename. # Download the markdown file for sub_wiki_2 that is created from markdown text @@ -141,17 +168,23 @@ ).get_markdown_file( download_file=False, ) +# --8<-- [end:get_markdown_file_url] +# --8<-- [start:download_markdown_from_text] # Download the markdown file for sub_wiki_2 that is created from markdown text wiki_page_markdown_2 = WikiPage( owner_id=project.id, id=sub_wiki_2.id ).get_markdown_file(download_file=True, download_location=".") +# --8<-- [end:download_markdown_from_text] +# --8<-- [start:download_markdown_from_file] # Download the markdown file for sub_wiki_3 that is created from a markdown file wiki_page_markdown_3 = WikiPage( owner_id=project.id, id=sub_wiki_3.id ).get_markdown_file(download_file=True, download_location=".") +# --8<-- [end:download_markdown_from_file] +# --8<-- [start:create_wiki_with_attachment] # Section 3: WikiPage with Attachments # Create a temporary file for the attachment attachment_file_name = "path/to/temp_attachment.txt" @@ -167,7 +200,9 @@ markdown=f"# Sub Page 4 with Attachments\n\nThis is a attachment: ${{previewattachment?fileName={attachment_file_name_reformatted}}}", attachments=[attachment_file_name], ).store() +# --8<-- [end:create_wiki_with_attachment] +# --8<-- [start:create_wiki_with_image] # Inlucde images in the markdown file image_file_path = "path/to/test_image.png" # use the original file name instead of the gzipped file name for images @@ -180,12 +215,16 @@ markdown=markdown_content, attachments=[image_file_path], ).store() +# --8<-- [end:create_wiki_with_image] +# --8<-- [start:get_attachment_handles] # Get attachment handles attachment_handles = WikiPage( owner_id=project.id, id=sub_wiki_4.id ).get_attachment_handles() +# --8<-- [end:get_attachment_handles] +# --8<-- [start:get_attachment_url] # Get attachment URL without downloading wiki_page_attachment_url = WikiPage( owner_id=project.id, id=sub_wiki_4.id @@ -193,7 +232,9 @@ file_name=os.path.basename(attachment_file_name), download_file=False, ) +# --8<-- [end:get_attachment_url] +# --8<-- [start:download_attachment] # Download an attachment wiki_page_attachment = WikiPage(owner_id=project.id, id=sub_wiki_4.id).get_attachment( file_name=os.path.basename(attachment_file_name), @@ -202,8 +243,11 @@ ) # Unzip the attachment file unzipped_attachment_file_path = WikiPage.unzip_gzipped_file(wiki_page_attachment) +# --8<-- [end:download_attachment] # Download an attachment preview. Instead of using the file_name from the attachmenthandle response when isPreview=True, you should use the original file name in the get_attachment_preview request. The downloaded file will still be named according to the file_name provided in the response when isPreview=True. + +# --8<-- [start:get_attachment_preview_url] # Get attachment preview URL without downloading attachment_preview_url = WikiPage( owner_id=project.id, id=sub_wiki_4.id @@ -211,7 +255,9 @@ file_name=os.path.basename(attachment_file_name), download_file=False, ) +# --8<-- [end:get_attachment_preview_url] +# --8<-- [start:download_attachment_preview] # Download an attachment preview attachment_preview = WikiPage( owner_id=project.id, id=sub_wiki_4.id @@ -220,18 +266,27 @@ download_file=True, download_location=".", ) +# --8<-- [end:download_attachment_preview] +# --8<-- [start:get_wiki_header_tree] # Section 4: WikiHeader - Working with Wiki Hierarchy # Get wiki header tree (hierarchy) headers = WikiHeader.get(owner_id=project.id) +# --8<-- [end:get_wiki_header_tree] +# --8<-- [start:get_wiki_history] # Section 5. WikiHistorySnapshot - Version History # Get wiki history for root_wiki_page history = WikiHistorySnapshot.get(owner_id=project.id, id=root_wiki_page.id) +# --8<-- [end:get_wiki_history] +# --8<-- [start:get_order_hint] # Section 6. WikiOrderHint - Ordering Wiki Pages # Set the wiki order hint order_hint = WikiOrderHint(owner_id=project.id).get() +# --8<-- [end:get_order_hint] + +# --8<-- [start:set_order_hint] # As you can see from the printed message, the order hint is not set by default, so you need to set it explicitly at the beginning. order_hint.id_list = [ root_wiki_page.id, @@ -242,7 +297,9 @@ sub_wiki_5.id, ] order_hint.store() +# --8<-- [end:set_order_hint] +# --8<-- [start:update_order_hint] # Update wiki order hint order_hint = WikiOrderHint(owner_id=project.id).get() order_hint.id_list = [ @@ -254,7 +311,9 @@ sub_wiki_5.id, ] order_hint.store() +# --8<-- [end:update_order_hint] +# --8<-- [start:delete_wiki_page] # Delete a wiki page sub_wiki_6 = WikiPage( owner_id=project.id, @@ -263,3 +322,4 @@ markdown=f"# Sub Page 6 to be deleted\n\nThis is a sub page to be deleted.", ).store() wiki_page_to_delete = WikiPage(owner_id=project.id, id=sub_wiki_6.id).delete() +# --8<-- [end:delete_wiki_page] diff --git a/docs/tutorials/python/upload_data_in_bulk.md b/docs/tutorials/python/upload_data_in_bulk.md index 01de64dde..b017163b1 100644 --- a/docs/tutorials/python/upload_data_in_bulk.md +++ b/docs/tutorials/python/upload_data_in_bulk.md @@ -1,4 +1,5 @@ # Uploading data in bulk + This tutorial will follow a [Flattened Data Layout](../../explanations/structuring_your_project.md#flattened-data-layout-example). With a project that has this example layout: @@ -19,21 +20,29 @@ With a project that has this example layout: ``` ## Tutorial Purpose + In this tutorial you will: -1. Find the synapse ID of your project -1. Create a manifest TSV file to upload data in bulk +1. Set up constants for your project +1. Create a manifest CSV file to upload data in bulk 1. Upload all of the files for our project 1. Add an annotation to all of our files 1. Add a provenance/activity record to one of our files +!!! tip "Preferred API" + The recommended way to upload files in bulk is + [`Project.sync_to_synapse`][synapseclient.models.mixins.StorableContainer.sync_to_synapse] + (or `Folder.sync_to_synapse`). + The legacy `synapseutils.syncToSynapse` is deprecated and will be removed in v5.0.0. + !!! warning "Uploading Very Large Files" - The bulk upload approach using `synapseutils.syncToSynapse()` is optimized for uploading many files efficiently. However, if you are uploading very large files (>100 GiB each), consider using **sequential uploads with async API** instead. + The bulk upload approach using `Project.sync_to_synapse()` is optimized for uploading many files efficiently. However, if you are uploading very large files (>100 GiB each), consider using **sequential uploads with async API** instead. For very large file uploads, see the `execute_walk_file_sequential()` function in [uploadBenchmark.py](https://github.com/Sage-Bionetworks/synapsePythonClient/blob/develop/docs/scripts/uploadBenchmark.py#L286) as a reference implementation. This approach uses `asyncio.run(file.store_async())` with the newer async API, which has been optimized for handling very large files efficiently. In benchmarks, this pattern successfully uploaded 45 files of 100 GB each (4.5 TB total) in approximately 20.6 hours. ## Prerequisites + * Make sure that you have completed the following tutorials: * [Project](./project.md) * This tutorial is setup to upload the data from `~/my_ad_project`, make sure that this or @@ -41,58 +50,59 @@ another desired directory exists. * Pandas is used in this tutorial. Refer to our [installation guide](../installation.md#pypi) to install it. Feel free to skip this portion of the tutorial if you do not wish to use Pandas. You may also use external -tools to open and manipulate Tab Separated Value (TSV) files. +tools to open and manipulate CSV files. -## 1. Find the synapse ID of your project +## 1. Set up constants -First let's set up some constants we'll use in this script, and find the ID of our project +First let's set up some constants we'll use in this script ```python -{!docs/tutorials/python/tutorial_scripts/upload_data_in_bulk.py!lines=5-20} +--8<-- "docs/tutorials/python/tutorial_scripts/upload_data_in_bulk.py:imports_and_constants" ``` -## 2. Create a manifest TSV file to upload data in bulk +## 2. Create a manifest CSV file to upload data in bulk + +We call `Project.generate_sync_manifest` on the project we want to mirror into. +It walks our local directory and produces a CSV manifest that maps each file to +the correct parent folder in Synapse (creating folders as needed). The output +is ready to hand directly to `sync_to_synapse`. -Let's "walk" our directory on disk to create a manifest file for upload ```python -{!docs/tutorials/python/tutorial_scripts/upload_data_in_bulk.py!lines=21-31} +--8<-- "docs/tutorials/python/tutorial_scripts/upload_data_in_bulk.py:generate_manifest" ```
- After this has been run if you inspect the TSV file created you'll see it will look + After this has been run if you inspect the CSV file created you'll see it will look similar to this: ``` -path parent -/home/user_name/my_ad_project/single_cell_RNAseq_batch_2/SRR12345678_R2.fastq.gz syn60109537 -/home/user_name/my_ad_project/single_cell_RNAseq_batch_2/SRR12345678_R1.fastq.gz syn60109537 -/home/user_name/my_ad_project/biospecimen_experiment_2/fileD.txt syn60109543 -/home/user_name/my_ad_project/biospecimen_experiment_2/fileC.txt syn60109543 -/home/user_name/my_ad_project/single_cell_RNAseq_batch_1/SRR12345678_R2.fastq.gz syn60109534 -/home/user_name/my_ad_project/single_cell_RNAseq_batch_1/SRR12345678_R1.fastq.gz syn60109534 -/home/user_name/my_ad_project/biospecimen_experiment_1/fileA.txt syn60109540 -/home/user_name/my_ad_project/biospecimen_experiment_1/fileB.txt syn60109540 +path,parentId +/home/user_name/my_ad_project/single_cell_RNAseq_batch_2/SRR12345678_R2.fastq.gz,syn60109537 +/home/user_name/my_ad_project/single_cell_RNAseq_batch_2/SRR12345678_R1.fastq.gz,syn60109537 +/home/user_name/my_ad_project/biospecimen_experiment_2/fileD.txt,syn60109543 +/home/user_name/my_ad_project/biospecimen_experiment_2/fileC.txt,syn60109543 +/home/user_name/my_ad_project/single_cell_RNAseq_batch_1/SRR12345678_R2.fastq.gz,syn60109534 +/home/user_name/my_ad_project/single_cell_RNAseq_batch_1/SRR12345678_R1.fastq.gz,syn60109534 +/home/user_name/my_ad_project/biospecimen_experiment_1/fileA.txt,syn60109540 +/home/user_name/my_ad_project/biospecimen_experiment_1/fileB.txt,syn60109540 ```
## 3. Upload the data in bulk ```python -{!docs/tutorials/python/tutorial_scripts/upload_data_in_bulk.py!lines=32-36} +--8<-- "docs/tutorials/python/tutorial_scripts/upload_data_in_bulk.py:sync_to_synapse" ```
While this is running you'll see output in your console similar to: ``` -Validation and upload of: /home/user_name/manifest-for-upload.tsv -Validating columns of manifest.....OK -Validating that all paths exist...........OK -Validating that all files are unique...OK -Validating that all the files are not empty...OK +Validating manifest: /home/user_name/manifest-for-upload.csv +Validating that all paths exist... +Validating that all files are unique... +Validating that all the files are not empty... Validating file names... -OK -Validating provenance...OK -Validating that parents exist and are containers...OK -We are about to upload 8 files with a total size of 8. +Validating provenance and parent containers... +About to upload 8 files with a total size of 8 bytes. Uploading 8 files: 100%|███████████████████| 8.00/8.00 [00:01<00:00, 6.09B/s] ```
@@ -100,12 +110,12 @@ Uploading 8 files: 100%|██████████████████ ## 4. Add an annotation to our manifest file -At this point in the tutorial we will start to use pandas to manipulate a TSV file. If +At this point in the tutorial we will use pandas to manipulate the CSV manifest. If you are not comfortable with pandas you may use any tool that can open and manipulate -TSV such as excel or google sheets. +CSV files such as Excel or Google Sheets. ```python -{!docs/tutorials/python/tutorial_scripts/upload_data_in_bulk.py!lines=37-57} +--8<-- "docs/tutorials/python/tutorial_scripts/upload_data_in_bulk.py:add_annotation" ``` Now that you have uploaded and annotated your files you'll be able to inspect your data @@ -120,14 +130,14 @@ Let's create an [Activity/Provenance](../../explanations/domain_models_of_synaps record for one of our files. In otherwords, we will record the steps taken to generate the file. -In this code we are finding a row in our TSV file and pointing to the file path of +In this code we are finding a row in our CSV file and pointing to the file path of another file within our manifest. By doing this we are creating a relationship between the two files. This is a simple example of how you can create a provenance record in Synapse. Additionally we'll link off to a sample URL that describes a process that we may have executed to generate the file. ```python -{!docs/tutorials/python/tutorial_scripts/upload_data_in_bulk.py!lines=58-90} +--8<-- "docs/tutorials/python/tutorial_scripts/upload_data_in_bulk.py:add_provenance" ``` After running this code we may again inspect the synapse web UI. In this screenshot i've @@ -146,14 +156,16 @@ navigated to the Files tab and selected the file that we added a Provenance reco Click to show me ```python -{!docs/tutorials/python/tutorial_scripts/upload_data_in_bulk.py!} +--8<-- "docs/tutorials/python/tutorial_scripts/upload_data_in_bulk.py" ```
## References used in this tutorial - [syn.login][synapseclient.Synapse.login] -- [syn.findEntityId][synapseclient.Synapse.findEntityId] -- [synapseutils.generate_sync_manifest][] -- [synapseutils.syncToSynapse][] +- [Project.generate_sync_manifest][synapseclient.models.Project.generate_sync_manifest] +- [Project.sync_to_synapse][synapseclient.models.Project.sync_to_synapse] +- [Folder.generate_sync_manifest][synapseclient.models.Folder.generate_sync_manifest] +- [Folder.sync_to_synapse][synapseclient.models.Folder.sync_to_synapse] +- [Manifest CSV format](../../explanations/manifest_csv.md) - [Activity/Provenance](../../explanations/domain_models_of_synapse.md#activityprovenance) diff --git a/docs/tutorials/python/virtualtable.md b/docs/tutorials/python/virtualtable.md index e04ca13ba..3ee691340 100644 --- a/docs/tutorials/python/virtualtable.md +++ b/docs/tutorials/python/virtualtable.md @@ -31,7 +31,7 @@ You will want to replace `"My uniquely named project about Alzheimer's Disease"` the name of your project. ```python -{!docs/tutorials/python/tutorial_scripts/virtualtable.py!lines=3-72} +--8<-- "docs/tutorials/python/tutorial_scripts/virtualtable.py:setup" ``` **Note**: Virtual Tables do not support JOIN or UNION operations in the defining SQL query. @@ -44,7 +44,7 @@ First, we will create a simple Virtual Table that selects all rows from a table then query it to retrieve the results. ```python -{!docs/tutorials/python/tutorial_scripts/virtualtable.py!lines=77-99} +--8<-- "docs/tutorials/python/tutorial_scripts/virtualtable.py:basic_view" ```
@@ -66,7 +66,7 @@ Results from the basic virtual table: Next, we'll create a Virtual Table that selects only specific columns from the source table. ```python -{!docs/tutorials/python/tutorial_scripts/virtualtable.py!lines=102-124} +--8<-- "docs/tutorials/python/tutorial_scripts/virtualtable.py:column_selection" ```
@@ -88,7 +88,7 @@ Results from the virtual table with column selection: We can create a Virtual Table that filters rows from the source table using a WHERE clause. ```python -{!docs/tutorials/python/tutorial_scripts/virtualtable.py!lines=127-149} +--8<-- "docs/tutorials/python/tutorial_scripts/virtualtable.py:filtering" ```
@@ -107,7 +107,7 @@ Results from the virtual table with filtering: You can also create a Virtual Table that orders the rows from the source table using an ORDER BY clause. ```python -{!docs/tutorials/python/tutorial_scripts/virtualtable.py!lines=152-174} +--8<-- "docs/tutorials/python/tutorial_scripts/virtualtable.py:ordering" ```
@@ -129,7 +129,7 @@ Results from the virtual table with ordering: Finally, we can create a Virtual Table that aggregates data using functions like COUNT, along with GROUP BY. ```python -{!docs/tutorials/python/tutorial_scripts/virtualtable.py!lines=177-199} +--8<-- "docs/tutorials/python/tutorial_scripts/virtualtable.py:aggregation" ```
@@ -148,7 +148,7 @@ Results from the virtual table with aggregation: Click to show me ```python -{!docs/tutorials/python/tutorial_scripts/virtualtable.py!} +--8<-- "docs/tutorials/python/tutorial_scripts/virtualtable.py" ```
diff --git a/docs/tutorials/python/wiki.md b/docs/tutorials/python/wiki.md index c43614575..e38332c68 100644 --- a/docs/tutorials/python/wiki.md +++ b/docs/tutorials/python/wiki.md @@ -31,16 +31,16 @@ In this tutorial you will: ## 1. Create a Wiki page ### Initial setup ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=15-31} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:setup_and_imports" ``` A Wiki page requires an owner object, a title, and markdown. Here is an example to create a new root Wiki page for your project with plain text markdown: ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=33-38} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:create_root_wiki_plain_text" ``` Alternatively, you can create a Wiki page from an existing markdown file. ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=41-46} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:create_root_wiki_from_file" ```
You'll notice the output looks like: @@ -57,7 +57,7 @@ Created Wiki page: My Root Wiki Page with ID: ... To update an existing Wiki page, create a new WikiPage object with the same `id` and new content: ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=48-54} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:update_wiki_page" ```
@@ -74,12 +74,12 @@ Updated Wiki page: My First Root Wiki Page NEW with ID: ... You can restore a Wiki page to any previous version by specifying the `Wiki_version` parameter: ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=56-59} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:restore_wiki_page" ``` Check if the content is restored. ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=61-71} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:verify_restore" ``` ## 4. Get a Wiki page @@ -87,23 +87,23 @@ You can retrieve Wiki pages in several ways. To find a Wiki page id, you can get Once you know the Wiki page id, you can retrieve a specific Wiki page: ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=73-75} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:get_wiki_by_id" ``` Alternatively, you can retrieve a Wiki page by its title: ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=77-78} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:get_wiki_by_title" ``` Verify that the retrieved Wiki page matches the original Wiki page ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=80-89} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:verify_retrieved_wiki" ``` ## 5. Create a sub-Wiki page You can create a sub-Wiki page under an existing Wiki page. ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=91-96} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:create_sub_wiki" ```
You'll notice the output looks like: @@ -119,7 +119,7 @@ Created sub-wiki page: Sub Wiki Page 1 with ID: ... under parent: ... You can create a Wiki page from a single-line or multi-line Python string. Here is an example of creating a Wiki page from a multi-line Python string: ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=98-121} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:create_wiki_from_markdown_string" ``` ### Create a Wiki page from a markdown file @@ -127,7 +127,7 @@ You can create a Wiki page from a single-line or multi-line Python string. Here You can also create a Wiki page from an existing markdown file. Markdown files may be uploaded in either non-gzipped or gzipped format: ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=123-133} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:create_wiki_from_markdown_file" ``` ## 7. Download Wiki page markdown @@ -135,12 +135,12 @@ You can download the markdown content of a Wiki page back to a file. ### Download the markdown file URL for a Wiki page ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=135-143} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:get_markdown_file_url" ``` ### Download the markdown file for a Wiki page that is created from plain text, the downloaded file will be named `wiki_markdown_.md` ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=145-148} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:download_markdown_from_text" ```
@@ -153,7 +153,7 @@ Downloaded and unzipped the markdown file for wiki page ... to path/to/wiki_mark ### Download the markdown file for a Wiki page that is created from a markdown file ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=150-153} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:download_markdown_from_file" ```
You'll notice the output looks like: @@ -171,7 +171,7 @@ Wiki pages can include file attachments, which are useful for sharing supplement First, create a file to attach. Then create a Wiki page with the attachment. Note that attachment file names in markdown need special formatting: replace `.` with `%2E` and `_` with `%5F`. You can utilize the static method `WikiPage.reformat_attachment_file_name` to reformat the file name. ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=155-169} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:create_wiki_with_attachment" ```
@@ -185,7 +185,7 @@ Created sub-wiki page: Sub Page 4 with Attachments with ID: ... under parent: .. To include images in your Wiki page, you DO NOT need to reformat the file name for image files (e.g., PNG, JPG, JPEG). ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=171-182} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:create_wiki_with_image" ```
@@ -202,7 +202,7 @@ Created sub-wiki page: Sub Page 5 with Attachments with ID: ... under parent: .. Retrieve the file handles of all attachments on a Wiki page: ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=184-187} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:get_attachment_handles" ```
@@ -217,7 +217,7 @@ Retrieve the file handles of all attachments on a Wiki page: You can retrieve the URL of an attachment without downloading it. Attachment file name can be in either non-gzipped or gzipped format. ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=189-195} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:get_attachment_url" ```
@@ -231,14 +231,14 @@ You can retrieve the URL of an attachment without downloading it. Attachment fil Download an attachment file to your local machine and unzip it using `WikiPage.unzip_gzipped_file` function. ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=197-204} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:download_attachment" ``` ### Get attachment preview URL You can also retrieve preview URLs for attachments. When using `get_attachment_preview`, specify the original file name, not the file name returned in the attachment handle response when isPreview=True. The file name can be in either non-gzipped or gzipped format. The downloaded file will still be named according to the file name provided in the response when isPreview=True. Note that image attachments do not have preview files. ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=206-213} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:get_attachment_preview_url" ``` ### Download an attachment preview @@ -246,7 +246,7 @@ The downloaded file will still be named according to the file name provided in t Download the preview version of an attachment: ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=215-223} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:download_attachment_preview" ``` The downloaded preview file will be named `preview.` (or according to the file name in the attachment handle response when `isPreview=True`). @@ -260,7 +260,7 @@ WikiHeader allows you to retrieve the hierarchical structure of Wiki pages withi Retrieve the complete Wiki page hierarchy for a project: ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=224-226} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:get_wiki_header_tree" ```
@@ -279,7 +279,7 @@ WikiHistorySnapshot provides access to the version history of Wiki pages, allowi Retrieve the version history for a specific Wiki page: ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=228-230} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:get_wiki_history" ```
You'll notice the output shows the history of versions: @@ -297,7 +297,7 @@ WikiOrderHint allows you to control the order in which Wiki pages are displayed. First, retrieve the current order hint (which may be empty initially): ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=232-234} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:get_order_hint" ```
@@ -312,7 +312,7 @@ WikiOrderHint(owner_id='...', owner_object_type='ENTITY', id_list=[], etag='...' Set the order of Wiki pages by providing a list of Wiki page IDs in the desired order: ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=235-244} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:set_order_hint" ```
@@ -327,7 +327,7 @@ WikiOrderHint(id_list=['...', '...', ...]) You can update the order hint at any time by retrieving it, modifying the `id_list`, and storing it again: ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=246-256} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:update_order_hint" ``` ## 13. Delete Wiki pages @@ -335,7 +335,7 @@ You can update the order hint at any time by retrieving it, modifying the `id_li Delete a Wiki page by providing the owner ID and Wiki page ID: ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!lines=258-265} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py:delete_wiki_page" ``` ## Source Code for this Tutorial @@ -343,7 +343,7 @@ Delete a Wiki page by providing the owner ID and Wiki page ID:
Click to show me ```python -{!docs/tutorials/python/tutorial_scripts/wiki.py!} +--8<-- "docs/tutorials/python/tutorial_scripts/wiki.py" ```
diff --git a/docs/tutorials/python_client.md b/docs/tutorials/python_client.md index e1f912c44..ef15e1b79 100644 --- a/docs/tutorials/python_client.md +++ b/docs/tutorials/python_client.md @@ -30,6 +30,7 @@ By the end of these tutorials you'll have: - A [Team](./python/team.md) created with one or more members - Methods to [upload data in bulk](./python/upload_data_in_bulk.md) - Methods to [download data in bulk](./python/download_data_in_bulk.md) +- Methods to [download files by Synapse ID](./python/download_data_by_synid.md) - Methods to [move files and folders](./python/move_files_and_folders.md) - Methods to [migrate data to other storage locations](./python/migrate_data_to_other_storage_locations.md) diff --git a/docs/tutorials/reticulate.md b/docs/tutorials/reticulate.md index 791299990..6940a5e06 100644 --- a/docs/tutorials/reticulate.md +++ b/docs/tutorials/reticulate.md @@ -6,6 +6,23 @@ While the separate [synapser](https://github.com/Sage-Bionetworks/synapser) R pa ## Installation +### Setting up a fresh RStudio environment with Python + +The following Docker command starts an RStudio instance with Python pre-installed, which you can use to follow this guide: + +```bash +docker run --rm -it -p 8787:8787 \ + -e PASSWORD=rstudio \ + rocker/rstudio:latest \ + bash -lc " + apt-get update && + apt-get install -y python3 python3-venv python3-pip python3-dev build-essential libcurl4-openssl-dev libssl-dev libxml2-dev && + /init + " +``` + +Then open `http://localhost:8787` in your browser (username: `rstudio`, password: `rstudio`). + ### Installing reticulate This article assumes that reticulate is installed and available in your R environment. If not it can be installed as follows: @@ -18,14 +35,12 @@ install.packages("reticulate") The Python synapseclient can be installed either directly into the Python installation you intend to use with reticulate or from within R using the reticulate library. -synapseclient has the same requirements and dependencies when installed for use with reticulate as it does in other usage. In particular note that synapseclient requires a Python version of 3.6 or greater. +synapseclient has the same requirements and dependencies when installed for use with reticulate as it does in other usage. In particular note that synapseclient requires a Python version of 3.10 or greater. #### Installing into Python The Python synapseclient is available on the [PyPi package repository](https://pypi.org/project/synapseclient/) and can be installed through Python tools that interface with the repository, such as **pip**. To install synapseclient for use with reticulate directly into a Python environment, first ensure that the current Python interpreter is the one you intend to use with reticulate. This may be a particular installation of Python, or a loaded [virtual environment](https://docs.python.org/3/tutorial/venv.html). See reticulate's [Python version configuration documentation](https://rstudio.github.io/reticulate/articles/versions.html) for more information on how reticulate can be configured to use particular Python environments. -For help installing a reticulate compatible Python, see the reticulate version of the [SynapseShinyApp](https://github.com/Sage-Bionetworks/SynapseShinyApp/tree/reticulate). - Once you have ensured you are interacting with your intended Python interpreter, follow the standard synapseclient [installation instructions](./installation.md) to install synapseclient. #### Installing from R/Reticulate @@ -38,101 +53,55 @@ library(reticulate) Once loaded, ensure that reticulate will use the Python installation you intend. You may need to provide reticulate a hint or otherwise [point it at the proper Python installation](https://rstudio.github.io/reticulate/articles/versions.html). -Next install the synapseclient using reticulate's [py_install](https://rstudio.github.io/reticulate/reference/py_install.html) command, e.g. - -```r -py_install("synapseclient") -``` - -You may also want to install some of synapseclient's optional dependencies, such as [Pandas](https://pandas.pydata.org/) for table support. +Next install the synapseclient using reticulate's [py_install](https://rstudio.github.io/reticulate/reference/py_install.html) command. We recommend installing with the `pandas` and `curator` optional dependencies: ```r -py_install("pandas") +py_install("synapseclient[pandas,curator]") ``` See synapseclient's [installation instructions](./installation.md) for more information on optional dependencies. ## Usage -Once synapseclient is installed it can be used once it is imported through R's [import](https://rstudio.github.io/reticulate/reference/import.html) command: +Once synapseclient is installed, import the top-level module and the `models` and `operations` submodules through R's [import](https://rstudio.github.io/reticulate/reference/import.html) command: ```r synapseclient <- import("synapseclient") +models <- import("synapseclient.models") +operations <- import("synapseclient.operations") ``` -If you are using synapseclient with reticulate when writing an R package, you will want to wrap the import in an onLoad and use the delay_load option, .e.g. - -```r -synapseclient <- NULL +The `models` module contains dataclass entity types (e.g. `File`, `Project`, `Folder`). Each instance exposes methods like `store()`, `get()`, and `delete()` directly. The `operations` module provides top-level functions — most usefully `operations$get()` for retrieving an entity by Synapse ID when the entity type is not known in advance. -.onLoad <- function(libname, pkgname) { - synapseclient <<- reticulate::import("synapseclient", delay_load = TRUE) -} -``` - -This will allow users of your package to configure their reticulate usage properly regardless of when they load your package. More information on this technique can be found [here](https://rstudio.github.io/reticulate/articles/package.html). - -If you are familiar with the **synapser** R package, many of the commands will be similar, but unlike in synapser where package functions and classes are made available in the global namespace through the search path, when using synapseclient through reticulate, classes are accessed through the imported synapseclient module and functionality is provided through an instantiated Synapse instance. - -For example classes that were globally available are now available through the imported synapseclient module. - -```r -# File from synapser -synapseclient$File - -# Table from synapser -synapseclient$Table -``` +## Credentials -And various syn functions are now methods on the Synapse object: +synapseclient accessed through reticulate supports the same authentication options as it does when accessed directly from Python. Log in once per session using the `Synapse` class and your auth token: ```r -# using synapseclient with reticulate we must instantiate a Synapse instance syn <- synapseclient$Synapse() - -# synLogin from synapser syn$login() - -# synGet from synapser -syn$get(identifier) - -# synStore from syanpser -syn$store(entity) -``` - -Each synapse object has its own state, such as configuration and login credentials. - -## Credentials - -synapseclient accessed through reticulate supports the same authentication options as it does when accessed directly from Python, for example: - -```r -syn <- synapseclient$synapse() - -# one time login -syn$login('') ``` See [Managing Synapse Credentials](./authentication.md) for complete documentation on how synapseclient handles credentials and authentication. ## Accessing Data -The following illustrates some examples of storing and retrieving data in Synapse using synapseclient through reticulate. +The following illustrates storing and retrieving data in Synapse using the new OOP models and operations API. See [here](./python_client.md#accessing-data) for more details on available data access APIs. -Create a project with a unique name +### Create a project ```r -# use hex_digits to generate random string and use it to name a project +# use hex_digits to generate a random string for the project name hex_digits <- c(as.character(0:9), letters[1:6]) projectName <- sprintf("My unique project %s", paste0(sample(hex_digits, 32, replace = TRUE), collapse = "")) -project <- synapseclient$Project(projectName) -project <- syn$store(project) +project <- models$Project(name = projectName) +project <- project$store() ``` -Create, store, and retrieve a file +### Create, store, and retrieve a file ```r filePath <- tempfile() @@ -140,41 +109,50 @@ connection <- file(filePath) writeChar("a \t b \t c \n d \t e \t f \n", connection, eos = NULL) close(connection) -file <- synapseclient$File(path = filePath, parent = project) -file <- syn$store(file) -synId <- file$properties$id +# store a file inside the project +fileEntity <- models$File(path = filePath, parent_id = project$id) +fileEntity <- fileEntity$store() +synId <- fileEntity$id -# download the file using its identifier to specific path -fileEntity <- syn$get(synId, downloadLocation="/path/to/folder") +# retrieve the file by its Synapse ID +# use operations$get when you don't know the entity type in advance +fileEntity <- operations$get(synId) -# view the file meta data in the console +# view the file metadata in the console print(fileEntity) -# view the file on the web -syn$onweb(synId) +# open the file on the web +operations$onweb(synId) ``` -Create folder and add files to the folder: +### Create a folder and add files to it ```r -dataFolder <- synapseclient$Folder("Data", parent = project) -dataFolder <- syn$store(dataFolder) +dataFolder <- models$Folder(name = "Data", parent_id = project$id) +dataFolder <- dataFolder$store() filePath <- tempfile() connection <- file(filePath) writeChar("this is the content of the file", connection, eos = NULL) close(connection) -file <- synapseclient$File(path = filePath, parent = dataFolder) -file <- syn$store(file) + +fileEntity <- models$File(path = filePath, parent_id = dataFolder$id) +fileEntity <- fileEntity$store() ``` ## Annotating Synapse Entities -This illustrates adding annotations to a Synapse entity. +Annotations can be stored directly on model objects via the `annotations` attribute and then stored to Synapse: ```r -# first retrieve the existing annotations object -annotations <- syn$get_annotations(project) +project$annotations <- list(foo = "bar", fooList = list("bar", "baz")) +project <- project$store() +``` + +Alternatively, retrieve and update annotations directly: + +```r +annotations <- syn$get_annotations(project$id) annotations$foo <- "bar" annotations$fooList <- list("bar", "baz") @@ -191,21 +169,26 @@ This example illustrates creating an entity with associated provenance. See [here][synapseclient.activity] for more information on Activity/Provenance related APIs. ```r -act <- synapseclient$Activity( +act <- models$Activity( name = "clustering", description = "whizzy clustering", - used = c("syn1234", "syn1235"), - executed = "syn4567") -``` + used = list( + models$UsedEntity(target_id = "syn1234"), + models$UsedEntity(target_id = "syn1235") + ), + executed = list( + models$UsedURL(url = "https://github.com/my-org/my-repo") + ) +) -```r filePath <- tempfile() connection <- file(filePath) writeChar("some test", connection, eos = NULL) close(connection) -file = synapseclient$File(filePath, name="provenance_file.txt", parent=project) -file <- syn$store(file, activity = act) +fileEntity <- models$File(path = filePath, name = "provenance_file.txt", parent_id = project$id) +fileEntity$activity <- act +fileEntity <- fileEntity$store() ``` ## Tables @@ -214,10 +197,10 @@ These examples illustrate manipulating Synapse Tables. Note that you must have installed the Pandas dependency into the Python environment as described above in order to use this feature. -See [here][synapseclient.table] for more information on tables. +See [here][synapseclient.models.Table] for more information on tables. -The following illustrates building a table from an R data frame. The schema will be generated -from the data types of the values within the data frame. +The following illustrates building a table from an R data frame with the schema automatically +inferred from the data types of the columns. ```r # start with an R data frame @@ -229,17 +212,21 @@ genes <- data.frame( Strand = c("+", "+", "-", "-", "+", "+"), TranscriptionFactor = c(F, F, F, F, T, F)) -# build a Synapse table from the data frame. -# a schema is automatically generated -# note that reticulate will automatically convert from an R data frame to Pandas -table <- synapseclient$build_table("My Favorite Genes", project, genes) +# create the table schema in Synapse +table <- models$Table(name = "My Favorite Genes", parent_id = project$id) +table <- table$store() -table <- syn$store(table) +# upload rows — reticulate auto-converts the R data frame to a pandas DataFrame. +# INFER_FROM_DATA automatically creates columns from the data frame's schema. +table$store_rows( + values = genes, + schema_storage_strategy = models$SchemaStorageStrategy$INFER_FROM_DATA +) ``` -Alternately the schema can be specified. At this time when using date values it is necessary -to use a date string formatted in "YYYY-MM-dd HH:mm:ss.mmm" format or integer unix epoch millisecond -value and explicitly specify the type in the schema due to how dates are translated to the Python client. +Alternately the schema can be specified explicitly using `Column` objects. When using date +values it is necessary to use a date string formatted in "YYYY-MM-dd HH:mm:ss.mmm" format +or an integer unix epoch millisecond value and explicitly specify the column type. ```r prez_birthdays <- data.frame( @@ -247,152 +234,31 @@ prez_birthdays <- data.frame( Time = c("1732-02-22 11:23:11.024", "1743-04-13 00:00:00.000", "1809-02-12 01:02:03.456")) cols <- list( - synapseclient$Column(name = "Name", columnType = "STRING", maximumSize = 20), - synapseclient$Column(name = "Time", columnType = "DATE")) - -schema <- synapseclient$Schema(name = "President Birthdays", columns = cols, parent = project) -table <- synapseclient$Table(schema, prez_birthdays) - -# store the table in Synapse -table <- syn$store(table) -``` - -We can query a table as in the following: - -```r -tableId <- table$tableId - -results <- syn$tableQuery(sprintf("select * from %s where Name='George Washington'", tableId)) -results$asDataFrame() -``` - -## Wikis - -This example illustrates creating a wiki. - -See [here][synapseclient.wiki] for more information on wiki APIs. - -```r -content <- " -# My Wiki Page -Here is a description of my **fantastic** project! -" - -# attachment -filePath <- tempfile() -connection <- file(filePath) -writeChar("this is the content of the file", connection, eos = NULL) -close(connection) -wiki <- synapseclient$Wiki( - owner = project, - title = "My Wiki Page", - markdown = content, - attachments = list(filePath) + models$Column(name = "Name", column_type = models$ColumnType$STRING, maximum_size = 20L), + models$Column(name = "Time", column_type = models$ColumnType$DATE) ) -wiki <- syn$store(wiki) -``` - -An existing wiki can be updated as follows. - -```r -wiki <- syn$getWiki(project) -wiki$markdown <- " -# My Wiki Page -Here is a description of my **fantastic** project! Let's -*emphasize* the important stuff. -" -wiki <- syn$store(wiki) -``` - -## Evaluations - -An Evaluation is a Synapse construct useful for building processing pipelines and -for scoring predictive modeling and data analysis challenges. -See [here][synapseclient.evaluation] for more information on Evaluations. - -Creating an Evaluation: - -```r -eval <- synapseclient$Evaluation( - name = sprintf("My unique evaluation created on %s", format(Sys.time(), "%a %b %d %H%M%OS4 %Y")), - description = "testing", - contentSource = project, - submissionReceiptMessage = "Thank you for your submission!", - submissionInstructionsMessage = "This evaluation only accepts files.") - -eval <- syn$store(eval) - -eval <- syn$getEvaluation(eval$id) +table <- models$Table(name = "President Birthdays", parent_id = project$id, columns = cols) +table <- table$store() +table$store_rows(values = prez_birthdays) ``` -Submitting a file to an existing Evaluation: +We can query a table using `models$query`, which returns a pandas DataFrame that reticulate +automatically converts to an R data frame: ```r -# first create a file to submit -filePath <- tempfile() -connection <- file(filePath) -writeChar("this is my first submission", connection, eos = NULL) -close(connection) -file <- synapseclient$File(path = filePath, parent = project) -file <- syn$store(file) -# submit the created file -submission <- syn$submit(eval, file) -``` - -List submissions: - -```r -submissions <- syn$getSubmissionBundles(eval) - -# submissions are returned as a generator -list(iterate(submissions)) -``` - -Retrieving submission by id: - -```r -submission <- syn$getSubmission(submission$id) -``` - -Retrieving the submission status: - -```r -submissionStatus <- syn$getSubmissionStatus(submission) -submissionStatus -``` - -Query an evaluation: - -```r -queryString <- sprintf("query=select * from evaluation_%s LIMIT %s OFFSET %s'", eval$id, 10, 0) -syn$restGET(paste("/evaluation/submission/query?", URLencode(queryString), sep = "")) -``` - -## Sharing Access to Content - -The following illustrates sharing access to a Synapse Entity. - -See [here](../explanations/access_control.md) for more information on Access Control including all available permissions. - -```r -# get permissions on an entity -# to get permissions for a user/group pass a principalId identifier, -# otherwise the assumed permission will apply to the public - -# make the project publicly accessible -acl <- syn$setPermissions(project, accessType = list("READ")) - -perms = syn$getPermissions(project) +results <- models$query( + sprintf("select * from %s where Name='George Washington'", table$id) +) +results ``` ## Views -A view is a view of all entities (File, Folder, Project, Table, Docker Repository, View) within one or more Projects or Folders. Views can: +An EntityView provides a table-like interface over entities (Files, Folders, Projects, Tables, etc.) spread across one or more Projects or Folders. You can query a view with SQL, making it easy to filter, sort, and inspect entity metadata at scale. The following examples illustrate some view operations. -See [here](../guides/views.md) for more information on Views. A view is implemented as a Table, -see [here][synapseclient.table] for more information on Tables. +See [here](../guides/views.md) for more information on Views. First create some files we can use in a view: @@ -401,77 +267,68 @@ filePath1 <- tempfile() connection <- file(filePath1) writeChar("this is the content of the first file", connection, eos = NULL) close(connection) -file1 <- synapseclient$File(path = filePath1, parent = project) -file1 <- syn$store(file1) +fileEntity1 <- models$File(path = filePath1, parent_id = project$id) +fileEntity1 <- fileEntity1$store() + filePath2 <- tempfile() connection2 <- file(filePath2) -writeChar("this is the content of the second file", connection, eos = NULL) +writeChar("this is the content of the second file", connection2, eos = NULL) close(connection2) -file2 <- synapseclient$File(path = filePath2, parent = project) -file2 <- syn$store(file2) +fileEntity2 <- models$File(path = filePath2, parent_id = project$id) +fileEntity2 <- fileEntity2$store() -# add some annotations -fileAnnotations1 <- syn$get_annotations(file1) -fileAnnotations2 <- syn$get_annotations(file2) +# add some annotations and re-store +fileEntity1$annotations <- list(contributor = "Sage", class = "V") +fileEntity1 <- fileEntity1$store() -fileAnnotations1$contributor <- "Sage" -fileAnnotations1$class <- "V" -syn$set_annotations(fileAnnotations1) - -fileAnnotations2$contributor = "UW" -fileAnnotations2$rank = "X" -syn$set_annotations(fileAnnotations2) +fileEntity2$annotations <- list(contributor = "UW", rank = "X") +fileEntity2 <- fileEntity2$store() ``` -Now create a view: +Now create an `EntityView` scoped to the project. Use `bitwOr` to combine `ViewTypeMask` +values for multiple entity types: ```r -columns = c( - synapseclient$Column(name = "contributor", columnType = "STRING"), - synapseclient$Column(name = "class", columnType = "STRING"), - synapseclient$Column(name = "rank", columnType = "STRING") -) - -view <- synapseclient$EntityViewSchema( - name = "my first file view", - columns = columns, - parent = project, - scopes = project, - includeEntityTypes = c(synapseclient$EntityViewType$FILE, synapseclient$EntityViewType$FOLDER), - addDefaultViewColumns = TRUE +view <- models$EntityView( + name = "my first file view", + parent_id = project$id, + scope_ids = list(project$id), + view_type_mask = bitwOr( + as.integer(models$ViewTypeMask$FILE), + as.integer(models$ViewTypeMask$FOLDER) + ) ) -view <- syn$store(view) +view <- view$store() ``` -We can now see content of our view (note that views are not created synchronously it may take a few seconds for the view table to be queryable). +We can now query the view (note that views are not created synchronously; it may take a few seconds for the view table to be queryable): ```r -queryResults <- syn$tableQuery(sprintf("select * from %s", view$properties$id)) -data <- queryResults$asDataFrame() -data +results <- models$query(sprintf("select * from %s", view$id)) +results ``` -We can update annotations using a view as follows: +## Using with a Shiny App -```r -data["class"] <- c("V", "VI") -syn$store(synapseclient$Table(view$properties$id, data)) +Reticulate and the Python synapseclient can be used to workaround an issue that exists when using synapser with a Shiny App. Since synapser shares a Synapse client instance within the R process, multiple users of a synapser integrated Shiny App may end up sharing a login if precautions aren't taken. When using reticulate with synapseclient, session scoped Synapse client objects can be created that avoid this issue. -# the change in annotations is reflected in get_annotations(): -syn$get_annotations(file2$properties$id) -``` +See [SynapseShinyApp](https://github.com/Sage-Bionetworks/SynapseShinyApp) for a sample application and a discussion of the issue, and the [reticulate](https://github.com/Sage-Bionetworks/SynapseShinyApp/tree/reticulate) branch for an alternative implementation using reticulate with synapseclient. -## Update View's Content +## Developing an R Package with synapseclient and reticulate -```r -# A view can contain different types of entity. To change the types of entity that will show up in a view: -view <- syn$get(view$properties$id) -view$set_entity_types(list(synapseclient$EntityViewType$FILE)) -``` +If you are building an R package that wraps synapseclient, wrap all imports in `.onLoad` and use the `delay_load` option. This lets users configure their Python environment before any import occurs, regardless of when they load your package: -## Using with a Shiny App +```r +synapseclient <- NULL +models <- NULL +operations <- NULL -Reticulate and the Python synapseclient can be used to workaround an issue that exists when using synapser with a Shiny App. Since synapser shares a Synapse client instance within the R process, multiple users of a synapser integrated Shiny App may end up sharing a login if precautions aren't taken. When using reticulate with synapseclient, session scoped Synapse client objects can be created that avoid this issue. +.onLoad <- function(libname, pkgname) { + synapseclient <<- reticulate::import("synapseclient", delay_load = TRUE) + models <<- reticulate::import("synapseclient.models", delay_load = TRUE) + operations <<- reticulate::import("synapseclient.operations", delay_load = TRUE) +} +``` -See [SynapseShinyApp](https://github.com/Sage-Bionetworks/SynapseShinyApp) for a sample application and a discussion of the issue, and the [reticulate](https://github.com/Sage-Bionetworks/SynapseShinyApp/tree/reticulate) branch for an alternative implementation using reticulate with synapseclient. +More information on this technique can be found [here](https://rstudio.github.io/reticulate/articles/package.html). diff --git a/mkdocs.yml b/mkdocs.yml index eda90ac08..135436e09 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -31,7 +31,7 @@ nav: - Submission: tutorials/python/submission.md - Annotation: tutorials/python/annotation.md # - Versions: tutorials/python/versions.md - # - Activity/Provenance: tutorials/python/activity.md + - Activity/Provenance: tutorials/python/activity.md - Entity View: tutorials/python/entityview.md - Table: tutorials/python/table.md # - Using a Table: tutorials/python/table_crud.md @@ -45,10 +45,13 @@ nav: # - Team: tutorials/python/team.md - Upload data in bulk: tutorials/python/upload_data_in_bulk.md - Download data in bulk: tutorials/python/download_data_in_bulk.md + - Download data by Synapse ID: tutorials/python/download_data_by_synid.md # - Creating JSON Schema: tutorials/python/schema_operations.md - Working with JSON Schema: tutorials/python/json_schema.md + - Storage Location: tutorials/python/storage_location.md + - Proxy Storage Location: tutorials/python/proxy_storage_location.md + - Migration: tutorials/python/migration.md # - Move Files and Folders: tutorials/python/move_files_and_folders.md - # - Migrate data to other storage locations: tutorials/python/migrate_data_to_other_storage_locations.md - Working with the Command Line Client: tutorials/command_line_client.md # - Working with file versioning: tutorials/file_versioning.md # - Working with tables: tutorials/tables.md @@ -58,9 +61,11 @@ nav: # - Using Entity Views: guides/views.md - Data Storage: guides/data_storage.md - Access the REST API: guides/accessing_the_rest_api.md + - Synapse MCP Server: guides/synapse_mcp.md - Extensions: - Curator JSONschemas: guides/extensions/curator/schema_operations.md - - Curator: guides/extensions/curator/metadata_curation.md + - Curator (administrators): guides/extensions/curator/metadata_curation.md + - Curator (contributors): guides/extensions/curator/metadata_contribution.md # - Expermental Features: # - Validating Annotations: guides/validate_annotations.md - API Reference: @@ -112,6 +117,8 @@ nav: - JSONSchema: reference/experimental/sync/json_schema.md - Wiki: reference/experimental/sync/wiki.md - FormGroup and Form: reference/experimental/sync/form.md + - StorageLocation: reference/experimental/sync/storage_location.md + - Download List: reference/experimental/sync/download_list.md - Extensions: - Curator: reference/extensions/curator.md - Asynchronous: @@ -140,6 +147,8 @@ nav: - JSONSchema: reference/experimental/async/json_schema.md - Wiki: reference/experimental/async/wiki.md - FormGroup and Form: reference/experimental/async/form.md + - StorageLocation: reference/experimental/async/storage_location.md + - Download List: reference/experimental/async/download_list.md - Mixins: - AccessControllable: reference/experimental/mixins/access_controllable.md - StorableContainer: reference/experimental/mixins/storable_container.md @@ -149,17 +158,19 @@ nav: - ContainerEntityJSONSchema: reference/experimental/mixins/container_json_schema.md - FormData: reference/experimental/mixins/form_data.md - FormGroup: reference/experimental/mixins/form_group.md + - StorageLocationConfigurable: reference/experimental/mixins/storage_location_configurable.md - Further Reading: - Home: explanations/home.md - Domain Models of Synapse: explanations/domain_models_of_synapse.md - Access Control: explanations/access_control.md - Properties vs Annotations: explanations/properties_vs_annotations.md - - Manifest TSV: explanations/manifest_tsv.md + - Manifest CSV: explanations/manifest_csv.md - Benchmarking: explanations/benchmarking.md - Structuring Your Project: explanations/structuring_your_project.md - Asyncio Changes in Python 3.14: explanations/asyncio_in_python_3_14.md - Curator Data model: explanations/curator_data_model.md + - Storage Location Architecture: explanations/storage_location_architecture.md - News: - news.md - Contact Us: https://sagebionetworks.jira.com/servicedesk/customer/portal/9/group/16/create/206 @@ -259,6 +270,9 @@ markdown_extensions: - markdown_include.include: + - pymdownx.snippets: + base_path: ["."] + - toc: permalink: true - attr_list diff --git a/setup.cfg b/setup.cfg index 9f5218d45..59ba8de9e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -44,7 +44,7 @@ include_package_data = True packages = find: python_requires = >=3.10, <3.15 install_requires = - requests>=2.22.0,<3.0 + requests>=2.34.0,<3.0 urllib3>=2.6.3 deprecated>=1.2.4,<2.0 opentelemetry-api>=1.21.0 diff --git a/synapseclient/.synapseConfig b/synapseclient/.synapseConfig index c8aecaec3..08649e342 100644 --- a/synapseclient/.synapseConfig +++ b/synapseclient/.synapseConfig @@ -5,10 +5,16 @@ ## Used for logging in to Synapse. See https://python-docs.synapse.org/tutorials/authentication/ ## for information on retrieving an auth token. +## The [default] section is used when no profile is specified. +## username is optional; if both username and authtoken are provided, they must match. + #[default] #username = default_user #authtoken = default_auth_token +## Named profiles can be selected via syn.login(profile="user1"), +## the SYNAPSE_PROFILE environment variable, or the --profile CLI flag. + #[profile user1] #username = user1 #authtoken = user1_auth_token @@ -17,30 +23,37 @@ #username = user2 #authtoken = user2_auth_token -## If you have projects with file stored on SFTP servers, you can specify your credentials here -## You can specify multiple sftp credentials +## If you have projects with files stored on SFTP servers, you can specify your credentials here. +## You can specify multiple SFTP credentials — use one section per server. + #[sftp://some.sftp.url.com] -#username= -#password= +#username = +#password = #[sftp://a.different.sftp.url.com] -#username= -#password= +#username = +#password = -## If you have projects that need to be stored in an S3-like (e.g. AWS S3, Openstack) storage but cannot allow Synapse -## to manage access your storage you may put your credentials here. -## To avoid duplicating credentials with that used by the AWS Command Line Client, -## simply put the profile name form your ~/.aws/credentials file -## more information about aws credentials can be found here http://docs.aws.amazon.com/cli/latest/userguide/cli-config-files.html -#[https://s3.amazonaws.com/bucket_name] # this is the bucket's endpoint -#profile_name=local_credential_profile_name +## If you have projects that need to be stored in an S3-like (e.g. AWS S3, OpenStack) storage +## but cannot allow Synapse to manage access to your storage, you may put your credentials here. +## To avoid duplicating credentials already used by the AWS CLI, specify the profile name from +## your ~/.aws/credentials file. If profile_name is omitted, the "default" AWS profile is used. +## More information about AWS credentials: https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html +## Use one section per bucket; the section name is the full endpoint URL including the bucket name. + +#[https://s3.amazonaws.com/bucket_name] +#profile_name = local_credential_profile_name ########################### # Caching # ########################### -## your downloaded files are cached to avoid repeat downloads of the same file. change 'location' to use a different folder on your computer as the cache location +## Downloaded files are cached to avoid repeat downloads of the same file. +## Change 'location' to use a different folder on your computer as the cache location. +## Supports ~ for the home directory and environment variable expansion. +## Default: ~/.synapseCache + #[cache] #location = ~/.synapseCache @@ -49,20 +62,30 @@ # Advanced Configurations # ########################### -## If this section is specified, then the synapseclient will print out debug information +## If this section is present, the synapseclient will print debug-level log output. #[debug] -## Configuring these will cause the Python client to use these as Synapse service endpoints instead of the default prod endpoints. +## Configuring these will cause the Python client to use these as Synapse service endpoints +## instead of the default production endpoints. Useful for testing against staging environments. #[endpoints] -#repoEndpoint= -#authEndpoint= -#fileHandleEndpoint= -#portalEndpoint= +#repoEndpoint = +#authEndpoint = +#fileHandleEndpoint = +#portalEndpoint = + -## Settings to configure how Synapse uploads/downloads data +## Settings to configure how Synapse uploads/downloads data. #[transfer] -# use this to configure the default for how many threads/connections Synapse will use to perform file transfers. -# Currently this applies only to files whose underlying storage is AWS S3. -# max_threads=16 +## max_threads: number of concurrent threads/connections used for file transfers. +## Applies to AWS S3 transfers (uploads and downloads). +## Default: min(cpu_count + 4, 128). Maximum: 128. Minimum: 1. +## Can also be set programmatically: syn.max_threads = 16 +#max_threads = 16 + +## use_boto_sts: if true, use AWS STS (Security Token Service) to obtain temporary +## credentials for S3 transfers instead of using stored AWS credentials directly. +## Useful when your storage location is configured with STS-based access. +## Valid values: true or false (case-insensitive). Default: false. +#use_boto_sts = false diff --git a/synapseclient/__main__.py b/synapseclient/__main__.py index c86ecf75d..fa462bb4c 100644 --- a/synapseclient/__main__.py +++ b/synapseclient/__main__.py @@ -676,7 +676,12 @@ def submit(args, syn): def get_download_list(args, syn: synapseclient.Synapse) -> None: """Download files from the Synapse download cart""" - manifest_path = syn.get_download_list(downloadLocation=args.downloadLocation) + from synapseclient.operations import download_list_files + + manifest_path = download_list_files( + download_location=args.downloadLocation, + synapse_client=syn, + ) syn.logger.info(f"Manifest file: {manifest_path}") diff --git a/synapseclient/activity.py b/synapseclient/activity.py index 369f42b29..8d91deee4 100644 --- a/synapseclient/activity.py +++ b/synapseclient/activity.py @@ -223,9 +223,9 @@ def used( resource = target if "concreteType" not in resource: - resource[ - "concreteType" - ] = "org.sagebionetworks.repo.model.provenance.UsedEntity" + resource["concreteType"] = ( + "org.sagebionetworks.repo.model.provenance.UsedEntity" + ) # -- Used URL elif is_used_url(target): @@ -234,9 +234,9 @@ def used( resource = target if "concreteType" not in resource: - resource[ - "concreteType" - ] = "org.sagebionetworks.repo.model.provenance.UsedURL" + resource["concreteType"] = ( + "org.sagebionetworks.repo.model.provenance.UsedURL" + ) # -- Synapse Entity elif is_synapse_entity(target): diff --git a/synapseclient/api/CLAUDE.md b/synapseclient/api/CLAUDE.md new file mode 100644 index 000000000..9d267a2b2 --- /dev/null +++ b/synapseclient/api/CLAUDE.md @@ -0,0 +1,87 @@ + + +## Project + +REST API service layer — thin async functions that map to Synapse REST endpoints. One file per resource type. Called by model layer, never by end users directly. + +## Reference + +- [Synapse REST API docs](https://rest-docs.synapse.org/rest/) + +## Conventions + +### Function signature pattern +```python +async def verb_resource( + required_param: str, + optional_param: str = None, + *, + synapse_client: Optional["Synapse"] = None, +) -> Dict[str, Any]: +``` +- All functions are `async def` +- `synapse_client` is **always** `Optional["Synapse"] = None` — never make it required. Callers omit it to use the cached singleton returned by `Synapse.get_client()`. +- `synapse_client` is always the last parameter, keyword-only (after `*`) +- Use `Synapse.get_client(synapse_client=synapse_client)` to get the client instance +- Use `TYPE_CHECKING` guard for `Synapse` import — avoids circular dependencies between `api/` and `client.py` +- Construct a `query_params` dictionary for non-null optional args, and pass it to the `params` arg of the REST call. See `entity_services.py` for the pattern. + +### Docstring conventions +Module-level — every file opens with boilerplate linking to the Synapse REST controller: +```python +"""This module is responsible for exposing the services defined at: + +""" +``` +Function-level (Google style): +```python +""" +One-line summary. + + + +Arguments: + param: Description. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + +Returns: + Description of return value. +""" +``` +- The `synapse_client` argument description is boilerplate — always copy it verbatim, not paraphrased. +- The REST endpoint URL uses `` format (angled brackets), not markdown `[text](url)`. +- Parameter descriptions in `Arguments:` must be copied verbatim from the Synapse REST API docs for that endpoint — do not paraphrase or infer. + +### REST call pattern +```python +client = Synapse.get_client(synapse_client=synapse_client) +return await client.rest_post_async(uri="/endpoint", body=json.dumps(request)) +``` +Available methods: `rest_get_async`, `rest_post_async`, `rest_put_async`, `rest_delete_async`. Pass `endpoint=client.fileHandleEndpoint` for file handle operations; omit for the default repository endpoint. Use `json.dumps()` for request bodies — not raw dicts. Always assign the response to a named `response` variable before returning or extracting attributes from it. + +### Return values +- Most functions return raw `Dict[str, Any]` — transformation happens in the model layer via `fill_from_dict()` +- Some return typed dataclass instances (e.g., `EntityHeader` from `entity_services.py`) when the data is only used internally +- Delete operations return `None` + +### Pagination +Use async pagination helpers when the API endpoint returns a list of results. For single-object responses, a simple `return` is sufficient. + +Helpers from `api_client.py`: +- `rest_get_paginated_async()` — for GET endpoints with limit/offset. Expects `results` or `children` key in response. +- `rest_post_paginated_async()` — for POST endpoints with `nextPageToken`. Expects `page` array in response. +Both are async generators yielding individual items. Reference `entity_services.py`, `table_services.py`, or `evaluation_services.py` for pagination patterns. + +### Entity factory (`entity_factory.py`) +Polymorphic entity deserialization via concrete type dispatch. Maps Java class names from `core/constants/concrete_types.py` to model classes. When adding a new entity type, register the type mapping here. + +### When to add a new service file vs. update an existing one +Add a new file when the Synapse REST controller is different (each file maps to one controller). Update an existing file when adding endpoints under the same controller. + +### Adding a new service file +1. Create `synapseclient/api/new_service.py` +2. Add all public functions to `api/__init__.py` imports and `__all__` — every public function must be re-exported +3. Use `json.dumps()` for request bodies (not dict) +4. Reference `entity_services.py` for CRUD pattern, `table_services.py` or `evaluation_services.py` for pagination pattern diff --git a/synapseclient/api/__init__.py b/synapseclient/api/__init__.py index 2f9e454ea..c55e43410 100644 --- a/synapseclient/api/__init__.py +++ b/synapseclient/api/__init__.py @@ -21,12 +21,19 @@ delete_curation_task, delete_grid_session, get_curation_task, + get_curation_task_status, list_curation_tasks, list_grid_sessions, update_curation_task, + update_curation_task_status, ) from .docker_commit_services import get_docker_tag from .docker_services import get_entity_id_by_repository_name +from .download_list_services import ( + add_to_download_list_async, + clear_download_list_async, + remove_from_download_list_async, +) from .entity_bundle_services_v2 import ( get_entity_id_bundle2, get_entity_id_version_bundle2, @@ -95,6 +102,7 @@ get_file_handle, get_file_handle_for_download, get_file_handle_for_download_async, + get_file_handle_presigned_url, post_external_filehandle, post_external_object_store_filehandle, post_external_s3_file_handle, @@ -131,6 +139,16 @@ update_organization_acl, validate_entity_with_json_schema, ) +from .project_setting_services import ( + create_project_setting, + delete_project_setting, + get_project_setting, + update_project_setting, +) +from .storage_location_services import ( + create_storage_location_setting, + get_storage_location_setting, +) from .table_services import ( ViewEntityType, ViewTypeMask, @@ -196,6 +214,7 @@ "post_external_object_store_filehandle", "post_external_s3_file_handle", "get_file_handle", + "get_file_handle_presigned_url", "post_external_filehandle", "post_file_multipart_presigned_urls", "put_file_multipart_add", @@ -315,9 +334,15 @@ "delete_curation_task", "delete_grid_session", "get_curation_task", + "get_curation_task_status", "list_curation_tasks", "list_grid_sessions", "update_curation_task", + "update_curation_task_status", + # download_list_services + "add_to_download_list_async", + "clear_download_list_async", + "remove_from_download_list_async", # docker_commit_services "get_docker_tag", # docker_services @@ -360,4 +385,11 @@ "create_form_data", "list_form_data", "list_form_data_sync", + # storage_location_services + "create_storage_location_setting", + "get_storage_location_setting", + "get_project_setting", + "create_project_setting", + "update_project_setting", + "delete_project_setting", ] diff --git a/synapseclient/api/curation_services.py b/synapseclient/api/curation_services.py index 0505edb24..3f13e99f5 100644 --- a/synapseclient/api/curation_services.py +++ b/synapseclient/api/curation_services.py @@ -94,6 +94,63 @@ async def update_curation_task( ) +async def get_curation_task_status( + task_id: int, + *, + synapse_client: "Synapse | None" = None, +) -> dict[str, Any]: + """ + Get the status of a CurationTask by its ID. + + + + Arguments: + task_id: The unique identifier of the task. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The CurationTaskStatus. + """ + from synapseclient import Synapse + + client = Synapse.get_client(synapse_client=synapse_client) + + return await client.rest_get_async(uri=f"/curation/task/{task_id}/status") + + +async def update_curation_task_status( + task_id: int, + curation_task_status: dict[str, Any], + *, + synapse_client: "Synapse | None" = None, +) -> dict[str, Any]: + """ + Update the status of a CurationTask. + + + + Arguments: + task_id: The unique identifier of the task. + curation_task_status: The complete CurationTaskStatus object to update. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The updated CurationTaskStatus. + """ + from synapseclient import Synapse + + client = Synapse.get_client(synapse_client=synapse_client) + + return await client.rest_put_async( + uri=f"/curation/task/{task_id}/status", + body=json.dumps(curation_task_status), + ) + + async def delete_curation_task( task_id: int, *, @@ -123,8 +180,11 @@ async def delete_curation_task( async def list_curation_tasks( project_id: str, *, + assigned_to_me: Optional[bool] = None, + assignee_ids: Optional[list[str]] = None, + state_filter: Optional[list[str]] = None, synapse_client: Optional["Synapse"] = None, -) -> AsyncGenerator[Dict[str, Any], None]: +) -> AsyncGenerator[dict[str, Any], None]: """ Generator to get a list of CurationTasks for a project. @@ -132,6 +192,14 @@ async def list_curation_tasks( Arguments: project_id: The synId of the project. + assigned_to_me: When True, only return tasks that are assigned to the + current user. When False or None, the filter is not applied. + Cannot be combined with assignee_ids when True. Defaults to None. + assignee_ids: Optional list of principal IDs (users or teams) to filter + tasks by assignee. Cannot be combined with assigned_to_me=True. + Defaults to None. + state_filter: Optional list of TaskState string values to filter tasks by + their current state. Defaults to None (all states returned). synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. @@ -143,7 +211,13 @@ async def list_curation_tasks( client = Synapse.get_client(synapse_client=synapse_client) - request_body = {"projectId": project_id} + request_body: Dict[str, Any] = {"projectId": project_id} + if assigned_to_me: + request_body["assignedToMe"] = True + if assignee_ids is not None: + request_body["assigneeIds"] = assignee_ids + if state_filter is not None: + request_body["stateFilter"] = state_filter async for item in rest_post_paginated_async( "/curation/task/list", body=request_body, synapse_client=client diff --git a/synapseclient/api/docker_commit_services.py b/synapseclient/api/docker_commit_services.py index c90c4bba4..19c7fa8dd 100644 --- a/synapseclient/api/docker_commit_services.py +++ b/synapseclient/api/docker_commit_services.py @@ -1,6 +1,7 @@ """This module is responsible for exposing the services defined at: """ + from typing import TYPE_CHECKING, Optional if TYPE_CHECKING: diff --git a/synapseclient/api/download_list_services.py b/synapseclient/api/download_list_services.py new file mode 100644 index 000000000..003b8c973 --- /dev/null +++ b/synapseclient/api/download_list_services.py @@ -0,0 +1,93 @@ +"""This module is responsible for exposing the services defined at: + +""" + +import json +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from synapseclient import Synapse + from synapseclient.operations.download_list_operations import DownloadListItem + + +async def clear_download_list_async( + *, + synapse_client: Optional["Synapse"] = None, +) -> None: + """Clear all files from the user's Synapse download list. + + + + Arguments: + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + """ + from synapseclient import Synapse + + client = Synapse.get_client(synapse_client=synapse_client) + await client.rest_delete_async("/download/list") + + +async def add_to_download_list_async( + files: list["DownloadListItem"], + *, + synapse_client: Optional["Synapse"] = None, +) -> int: + """Add a batch of specific file versions to the user's Synapse download list. + + + + Arguments: + files: List of DownloadListItem objects identifying the file versions to add. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The number of files added to the download list. + """ + from synapseclient import Synapse + + client = Synapse.get_client(synapse_client=synapse_client) + batch = [ + {"fileEntityId": item.file_entity_id, "versionNumber": item.version_number} + for item in files + ] + request_body = {"batchToAdd": batch} + response = await client.rest_post_async( + "/download/list/add", body=json.dumps(request_body) + ) + return response["numberOfFilesAdded"] + + +async def remove_from_download_list_async( + files: list["DownloadListItem"], + *, + synapse_client: Optional["Synapse"] = None, +) -> int: + """Remove a batch of specific file versions from the user's Synapse download list. + + + + Arguments: + files: List of DownloadListItem objects identifying the file versions to remove. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The number of files removed from the download list. + """ + from synapseclient import Synapse + + client = Synapse.get_client(synapse_client=synapse_client) + batch = [ + {"fileEntityId": item.file_entity_id, "versionNumber": item.version_number} + for item in files + ] + request_body = {"batchToRemove": batch} + response = await client.rest_post_async( + "/download/list/remove", body=json.dumps(request_body) + ) + return response["numberOfFilesRemoved"] diff --git a/synapseclient/api/entity_services.py b/synapseclient/api/entity_services.py index 009dc1791..27561da16 100644 --- a/synapseclient/api/entity_services.py +++ b/synapseclient/api/entity_services.py @@ -1726,3 +1726,37 @@ async def main(): return True raise return True + + +async def update_entity_file_handle_version( + entity_id: str, + version: int, + old_file_handle_id: str, + new_file_handle_id: str, + *, + synapse_client: Optional["Synapse"] = None, +) -> None: + """Update the file handle for a specific version of an entity. + + Arguments: + entity_id: The Synapse ID of the entity. + version: The version number to update. + old_file_handle_id: The file handle ID being replaced. + new_file_handle_id: The new file handle ID to assign. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + """ + from synapseclient import Synapse + + client = Synapse.get_client(synapse_client=synapse_client) + client.logger.info(f"Updating file handle for {entity_id} version {version}") + await client.rest_put_async( + f"/entity/{entity_id}/version/{version}/filehandle", + body=json.dumps( + { + "oldFileHandleId": old_file_handle_id, + "newFileHandleId": new_file_handle_id, + } + ), + ) diff --git a/synapseclient/api/file_services.py b/synapseclient/api/file_services.py index 1a3c19d73..75e451dec 100644 --- a/synapseclient/api/file_services.py +++ b/synapseclient/api/file_services.py @@ -269,7 +269,7 @@ async def post_external_filehandle( "contentSize": file_size, } if mimetype is None: - (mimetype, _) = mimetypes.guess_type(external_url, strict=False) + mimetype, _ = mimetypes.guess_type(external_url, strict=False) if mimetype is not None: file_handle["contentType"] = mimetype return await client.rest_post_async( @@ -384,6 +384,45 @@ async def get_file_handle( ) +async def get_file_handle_presigned_url( + file_handle_id: str, + *, + synapse_client: Optional["Synapse"] = None, +) -> str: + """ + Get a pre-signed URL for a file handle owned by the current user. + Unlike get_file_handle_for_download_async, this does not require an + associated Synapse entity — only that the caller is the creator of the + file handle. + + + + Arguments: + file_handle_id: The ID of the file handle. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Raises: + SynapseFileNotFoundError: If the fileHandleId is not found in Synapse. + SynapseAuthorizationError: If the caller is not the creator of the + file handle. + + Returns: + A pre-signed URL string for downloading the file. The Synapse endpoint + returns the URL as text/plain when redirect=false, so rest_get_async + returns a plain string rather than a dict for this call. + """ + from synapseclient import Synapse + + client = Synapse.get_client(synapse_client=synapse_client) + + return await client.rest_get_async( + f"/fileHandle/{file_handle_id}/url?redirect=false", + endpoint=client.fileHandleEndpoint, + ) + + async def get_file_handle_for_download_async( file_handle_id: str, synapse_id: str, diff --git a/synapseclient/api/project_setting_services.py b/synapseclient/api/project_setting_services.py new file mode 100644 index 000000000..5cef49e83 --- /dev/null +++ b/synapseclient/api/project_setting_services.py @@ -0,0 +1,121 @@ +"""Services for interacting with project settings in Synapse. + +This module provides async REST wrappers for creating, retrieving, updating, +and deleting project settings. +""" + +import json +from typing import TYPE_CHECKING, Any, Dict, Optional + +if TYPE_CHECKING: + from synapseclient import Synapse + + +async def get_project_setting( + project_id: str, + setting_type: str = "upload", + *, + synapse_client: Optional["Synapse"] = None, +) -> Optional[Dict[str, Any]]: + """Retrieve the project setting of a particular setting type for the project or folder. + Only users with READ access on a project can retrieve its project settings. + + Arguments: + project_id: The Synapse ID of the project or folder. + setting_type: The type of project setting to retrieve. Currently supports 'upload' only. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The upload destination list setting matching . + If the storage location is Synapse S3, the response will be an empty string. + """ + from synapseclient import Synapse + + client = Synapse.get_client(synapse_client=synapse_client) + response = await client.rest_get_async( + uri=f"/projectSettings/{project_id}/type/{setting_type}", + ) + return response + + +async def create_project_setting( + request: Dict[str, Any], + *, + synapse_client: Optional["Synapse"] = None, +) -> Dict[str, Any]: + """Create a project setting for a project or folder. + Only the users with CREATE access to the project or folder can add a project setting. + Currently, only the "upload" project setting is supported. This is implemented using UploadDestinationListSetting matching . + A project can have a maximum of 10 storage locations. + + Arguments: + request: The project setting request body matching . + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The created project setting matching . + """ + from synapseclient import Synapse + + client = Synapse.get_client(synapse_client=synapse_client) + return await client.rest_post_async( + uri="/projectSettings", + body=json.dumps(request), + ) + + +async def update_project_setting( + request: Dict[str, Any], + *, + synapse_client: Optional["Synapse"] = None, +) -> None: + """Update an existing project setting for a project or folder. + Only the users with UPDATE access to the project or folder can update a project setting. + Currently, only the "upload" project setting is supported. This is implemented using UploadDestinationListSetting matching . + A project can have a maximum of 10 storage locations. + + Arguments: + request: The project setting request body including the id field matching . + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + None + """ + from synapseclient import Synapse + + client = Synapse.get_client(synapse_client=synapse_client) + return await client.rest_put_async( + uri="/projectSettings", + body=json.dumps(request), + ) + + +async def delete_project_setting( + setting_id: str, + *, + synapse_client: Optional["Synapse"] = None, +) -> None: + """Delete a project setting for a project or folder. + Only the users with DELETE access to the project or folder can delete a project setting. + + Arguments: + setting_id: The ID of the project setting to delete. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + None + """ + from synapseclient import Synapse + + client = Synapse.get_client(synapse_client=synapse_client) + await client.rest_delete_async( + uri=f"/projectSettings/{setting_id}", + ) diff --git a/synapseclient/api/storage_location_services.py b/synapseclient/api/storage_location_services.py new file mode 100644 index 000000000..fff84b26b --- /dev/null +++ b/synapseclient/api/storage_location_services.py @@ -0,0 +1,66 @@ +"""Services for interacting with storage location settings in Synapse. + +This module provides async REST wrappers for creating and retrieving +storage location settings. +""" + +import json +from typing import TYPE_CHECKING, Any, Dict, Optional + +if TYPE_CHECKING: + from synapseclient import Synapse + + +async def create_storage_location_setting( + request: Dict[str, Any], + *, + synapse_client: Optional["Synapse"] = None, +) -> Dict[str, Any]: + """Create a new storage location in Synapse that can be linked to a project, + allowing users to upload their data to a storage location they own. + + Storage location creation is idempotent per user - if the same user creates + a storage location with identical properties, the existing one is returned. + + Arguments: + request: The storage location setting matching . + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The created storage location setting matching . + """ + from synapseclient import Synapse + + client = Synapse.get_client(synapse_client=synapse_client) + return await client.rest_post_async( + uri="/storageLocation", + body=json.dumps(request), + ) + + +async def get_storage_location_setting( + storage_location_id: int, + *, + synapse_client: Optional["Synapse"] = None, +) -> Dict[str, Any]: + """Retrieve a storage location setting by its ID. + + Only the creator of a StorageLocationSetting can retrieve it by its ID. + + Arguments: + storage_location_id: The ID of the storage location setting to retrieve. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The created storage location setting matching . + """ + from synapseclient import Synapse + + client = Synapse.get_client(synapse_client=synapse_client) + return await client.rest_get_async( + uri=f"/storageLocation/{storage_location_id}", + ) diff --git a/synapseclient/client.py b/synapseclient/client.py index 2e9c543cb..d3ae76fcc 100644 --- a/synapseclient/client.py +++ b/synapseclient/client.py @@ -2,6 +2,7 @@ The `Synapse` object encapsulates a connection to the Synapse service and is used for building projects, uploading and retrieving data, and recording provenance of data analysis. """ + import asyncio import collections import collections.abc @@ -607,9 +608,7 @@ def _init_logger(self): logger_name = ( SILENT_LOGGER_NAME if self.silent - else DEBUG_LOGGER_NAME - if self.debug - else DEFAULT_LOGGER_NAME + else DEBUG_LOGGER_NAME if self.debug else DEFAULT_LOGGER_NAME ) self.logger = logging.getLogger(logger_name) logging.getLogger("py.warnings").handlers = self.logger.handlers @@ -2672,9 +2671,11 @@ async def main(): fileHandle = await upload_file_handle_async( self, parent_id_for_upload, - local_state["path"] - if (synapseStore or local_state_fh.get("externalURL") is None) - else local_state_fh.get("externalURL"), + ( + local_state["path"] + if (synapseStore or local_state_fh.get("externalURL") is None) + else local_state_fh.get("externalURL") + ), synapse_store=synapseStore, md5=local_file_md5_hex or local_state_fh.get("contentMd5"), file_size=local_state_fh.get("contentSize"), @@ -3146,15 +3147,68 @@ async def upload_file(): ############################################################ # Download List # ############################################################ - # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1439 + @deprecated( + version="4.13.0", + reason=( + "Use `synapseclient.operations.download_list_clear()` instead. " + "See synapseclient.operations.download_list_operations for the new interface." + ), + ) def clear_download_list(self): - """Clear all files from download list""" + """Clear all files from download list + + Example: Migration to new method +   + + ```python + # Old approach (DEPRECATED) + # syn.clear_download_list() + + # New approach (RECOMMENDED) + from synapseclient import Synapse + from synapseclient.operations import download_list_clear + + syn = Synapse() + syn.login() + + # Clear all files from the download list + download_list_clear() + ``` + """ self.restDELETE("/download/list") - # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1439 + @deprecated( + version="4.13.0", + reason=( + "Use `synapseclient.operations.download_list_remove(files)` instead. " + "See synapseclient.operations.download_list_operations for the new interface." + ), + ) def remove_from_download_list(self, list_of_files: typing.List[typing.Dict]) -> int: """Remove a batch of files from download list + Example: Migration to new method +   + + ```python + # Old approach (DEPRECATED) + # syn.remove_from_download_list([ + # {"fileEntityId": "syn123", "versionNumber": 1}, + # ]) + + # New approach (RECOMMENDED) + from synapseclient import Synapse + from synapseclient.operations import download_list_remove, DownloadListItem + + syn = Synapse() + syn.login() + + # Remove specific file versions from the download list + download_list_remove([ + DownloadListItem(file_entity_id="syn123", version_number=1), + ]) + ``` + Arguments: list_of_files: Array of files in the format of a mapping {fileEntityId: synid, versionNumber: version} @@ -3167,7 +3221,13 @@ def remove_from_download_list(self, list_of_files: typing.List[typing.Dict]) -> ) return num_files_removed - # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1439 + @deprecated( + version="4.13.0", + reason=( + "Use `synapseclient.operations.download_list_manifest()` instead. " + "See synapseclient.operations.download_list_operations for the new interface." + ), + ) def _generate_manifest_from_download_list( self, quoteCharacter: str = '"', @@ -3176,8 +3236,25 @@ def _generate_manifest_from_download_list( separator: str = ",", header: bool = True, ): - """ - Creates a download list manifest generation request + """Creates a download list manifest generation request + + Example: Migration to new method +   + + ```python + # Old approach (DEPRECATED) + # manifest_handle = syn._generate_manifest_from_download_list() + + # New approach (RECOMMENDED) + from synapseclient import Synapse + from synapseclient.operations import download_list_manifest + + syn = Synapse() + syn.login() + + # Generate and download the manifest CSV + manifest_path = download_list_manifest() + ``` Arguments: quoteCharacter: The character to be used for quoted elements in the resulting file. @@ -3203,10 +3280,34 @@ def _generate_manifest_from_download_list( uri="/download/list/manifest/async", request=request_body ) - # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1439 + @deprecated( + version="4.13.0", + reason=( + "Use `synapseclient.operations.download_list_manifest()` instead. " + "See synapseclient.operations.download_list_operations for the new interface." + ), + ) def get_download_list_manifest(self): """Get the path of the download list manifest file + Example: Migration to new method +   + + ```python + # Old approach (DEPRECATED) + # manifest_path = syn.get_download_list_manifest() + + # New approach (RECOMMENDED) + from synapseclient import Synapse + from synapseclient.operations import download_list_manifest + + syn = Synapse() + syn.login() + + # Generate and download the manifest CSV + manifest_path = download_list_manifest() + ``` + Returns: Path of download list manifest file """ @@ -3230,10 +3331,34 @@ def get_download_list_manifest(self): ) return downloaded_path - # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1439 + @deprecated( + version="4.13.0", + reason=( + "Use `synapseclient.operations.download_list_files(download_location=...)` instead. " + "See synapseclient.operations.download_list_operations for the new interface." + ), + ) def get_download_list(self, downloadLocation: str = None) -> str: """Download all files from your Synapse download list + Example: Migration to new method +   + + ```python + # Old approach (DEPRECATED) + # manifest_path = syn.get_download_list(downloadLocation="./downloads") + + # New approach (RECOMMENDED) + from synapseclient import Synapse + from synapseclient.operations import download_list_files + + syn = Synapse() + syn.login() + + # Download all files in the cart and get the result manifest path + manifest_path = download_list_files(download_location="./downloads") + ``` + Arguments: downloadLocation: Directory to download files to. @@ -3243,9 +3368,10 @@ def get_download_list(self, downloadLocation: str = None) -> str: dl_list_path = self.get_download_list_manifest() downloaded_files = [] new_manifest_path = f"manifest_{time.time_ns()}.csv" - with open(dl_list_path) as manifest_f, open( - new_manifest_path, "w" - ) as write_obj: + with ( + open(dl_list_path) as manifest_f, + open(new_manifest_path, "w") as write_obj, + ): reader = csv.DictReader(manifest_f) columns = reader.fieldnames columns.extend(["path", "error"]) @@ -4925,9 +5051,11 @@ def _convertProvenanceList(self, usedList: list, limitSearch: str = None) -> lis if usedList is None: return None usedList = [ - self.get(target, limitSearch=limitSearch) - if (os.path.isfile(target) if isinstance(target, str) else False) - else target + ( + self.get(target, limitSearch=limitSearch) + if (os.path.isfile(target) if isinstance(target, str) else False) + else target + ) for target in usedList ] return usedList @@ -5468,7 +5596,7 @@ def _createExternalFileHandle( "contentSize": fileSize, } if mimetype is None: - (mimetype, enc) = mimetypes.guess_type(externalURL, strict=False) + mimetype, enc = mimetypes.guess_type(externalURL, strict=False) if mimetype is not None: fileHandle["contentType"] = mimetype return self.restPOST( @@ -5512,6 +5640,11 @@ def _createExternalObjectStoreFileHandle( "/externalFileHandle", json.dumps(file_handle), self.fileHandleEndpoint ) + @deprecated( + version="4.12.0", + reason="To be removed in 5.0.0. " + "Use `synapseclient.api.post_external_s3_file_handle()` instead.", + ) def create_external_s3_file_handle( self, bucket_name, @@ -5650,7 +5783,11 @@ def _getUserCredentials( # Project/Folder storage location settings # ############################################ - # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1441 + @deprecated( + version="4.12.0", + reason="To be removed in 5.0.0. " + "Use `StorageLocation(...).store()` from synapseclient.models instead.", + ) def createStorageLocationSetting(self, storage_type, **kwargs): """ Creates an IMMUTABLE storage location based on the specified type. @@ -5707,7 +5844,12 @@ def createStorageLocationSetting(self, storage_type, **kwargs): return self.restPOST("/storageLocation", body=json.dumps(kwargs)) - # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1441 + @deprecated( + version="4.12.0", + reason="To be removed in 5.0.0. " + "Use `StorageLocation(storage_location_id=id).get()` from " + "synapseclient.models instead.", + ) def getMyStorageLocationSetting(self, storage_location_id): """ Get a StorageLocationSetting by its id. @@ -5721,7 +5863,12 @@ def getMyStorageLocationSetting(self, storage_location_id): """ return self.restGET("/storageLocation/%s" % storage_location_id) - # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1441 + @deprecated( + version="4.12.0", + reason="To be removed in 5.0.0. " + "Use `Folder(id=...).set_storage_location(...)` or " + "`Project(id=...).set_storage_location(...)` from synapseclient.models instead.", + ) def setStorageLocation(self, entity, storage_location_id): """ Sets the storage location for a Project or Folder @@ -5759,7 +5906,12 @@ def setStorageLocation(self, entity, storage_location_id): "/projectSettings", body=json.dumps(project_destination) ) - # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1441 + @deprecated( + version="4.12.0", + reason="To be removed in 5.0.0. " + "Use `Folder(id=...).get_project_setting(...)` or " + "`Project(id=...).get_project_setting(...)` from synapseclient.models instead.", + ) def getProjectSetting(self, project, setting_type): """ Gets the ProjectSetting for a project. @@ -5787,7 +5939,12 @@ def getProjectSetting(self, project, setting_type): response if response else None ) # if no project setting, a empty string is returned as the response - # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1441 + @deprecated( + version="4.12.0", + reason="To be removed in 5.0.0. " + "Use `Folder(id=...).get_sts_storage_token(...)` or " + "`Project(id=...).get_sts_storage_token(...)` from synapseclient.models instead.", + ) def get_sts_storage_token( self, entity, permission, *, output_format="json", min_remaining_life=None ): @@ -5820,7 +5977,7 @@ def get_sts_storage_token( min_remaining_life=min_remaining_life, ) - # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1441 + @deprecated(version="4.12.0", reason="To be removed in 5.0.0. ") def create_s3_storage_location( self, *, @@ -5862,7 +6019,10 @@ def create_s3_storage_location( ) ) - # TODO: Deprecate method in https://sagebionetworks.jira.com/browse/SYNPY-1441 + @deprecated( + version="4.12.0", + reason="To be removed in 5.0.0. ", + ) async def create_s3_storage_location_async( self, *, @@ -5893,16 +6053,16 @@ async def create_s3_storage_location_async( } if bucket_name: - storage_location_kwargs[ - "concreteType" - ] = concrete_types.EXTERNAL_S3_STORAGE_LOCATION_SETTING + storage_location_kwargs["concreteType"] = ( + concrete_types.EXTERNAL_S3_STORAGE_LOCATION_SETTING + ) storage_location_kwargs["bucket"] = bucket_name if base_key: storage_location_kwargs["baseKey"] = base_key else: - storage_location_kwargs[ - "concreteType" - ] = concrete_types.SYNAPSE_S3_STORAGE_LOCATION_SETTING + storage_location_kwargs["concreteType"] = ( + concrete_types.SYNAPSE_S3_STORAGE_LOCATION_SETTING + ) storage_location_setting = self.restPOST( "/storageLocation", json.dumps(storage_location_kwargs) diff --git a/synapseclient/core/CLAUDE.md b/synapseclient/core/CLAUDE.md new file mode 100644 index 000000000..6b91fee72 --- /dev/null +++ b/synapseclient/core/CLAUDE.md @@ -0,0 +1,62 @@ + + +## Project + +Infrastructure layer — authentication, file transfer, retry logic, caching, OpenTelemetry tracing, and the `async_to_sync` decorator that powers the dual sync/async API. + +## Conventions + +### async_to_sync decorator (`async_utils.py`) +- Scans class for `*_async` methods and creates sync wrappers stripping the suffix +- Uses `ClassOrInstance` descriptor — methods work on both class and instance +- Detects running event loop: uses `nest_asyncio.apply()` for nested loops (Python <3.14), raises `RuntimeError` on Python 3.14+ instructing users to call async directly +- `wrap_async_to_sync()` for standalone functions (not class methods) — used in `operations/` layer +- `wrap_async_generator_to_sync_generator()` for async generators — must call `aclose()` in finally block +- `@skip_async_to_sync` decorator excludes specific methods from sync wrapper generation (sets `_skip_conversion = True`) +- `@otel_trace_method()` wraps async methods with OpenTelemetry spans. Format: `f"{ClassName}_{Operation}: ID: {self.id}, Name: {self.name}"` + +### Retry patterns (`retry.py`) +- `with_retry()` — count-based exponential backoff (default 3 retries), jitter 0.5-1.5x multiplier +- `with_retry_time_based_async()` — time-bounded (default 20 min), exponential backoff with 0.01-0.1 random jitter +- Default retryable status codes: `[429, 500, 502, 503, 504]` +- `NON_RETRYABLE_ERRORS` list overrides status code retry (currently: `["is not a table or view"]`) +- 429 throttling: wait bumps to 16 seconds minimum +- Sets OTel span attribute `synapse.retries` on retry + +### Credentials chain (`credentials/`) +Provider chain tries in order: login args → config file → env var (`SYNAPSE_AUTH_TOKEN`) → AWS SSM. Credentials implement `requests.auth.AuthBase`, adding `Authorization: Bearer` header. Profile selection via `SYNAPSE_PROFILE` env var or `--profile` arg. + +### Upload/download +- Both use 60-retry params spanning ~30 minutes for resilience +- Upload determines storage location from project settings, supports S3/SFTP/GCP +- Download validates MD5 post-transfer, raises `SynapseMd5MismatchError` on mismatch +- Progress via `tqdm`; multi-threaded uploads suppress per-file messages via `cumulative_transfer_progress` + +### concrete_types.py (`core/constants/concrete_types.py`) +Maps Java class names from Synapse REST API for polymorphic deserialization. When adding a new entity type, add its concrete type string here AND in `api/entity_factory.py` type map AND in `models/mixins/asynchronous_job.py` ASYNC_JOB_URIS if it's an async job type. + +### Key reusable utilities (`utils.py`) +- `delete_none_keys(d)` — removes None-valued keys from dict. MUST call before all API requests — Synapse rejects null values. +- `id_of(obj)` — extracts Synapse ID from entity, dict, or string +- `concrete_type_of(entity)` — gets the concrete type string from an entity +- `get_synid_and_version(id_str)` — parses "synXXX.N" strings into (id, version) tuples +- `merge_dataclass_entities(source, dest, ...)` — merges fields from one dataclass into another +- `log_dataclass_diff(obj1, obj2)` — logs field-by-field differences between two dataclass instances +- `snake_case(name)` — converts camelCase to snake_case +- `normalize_whitespace(s)` — collapses whitespace +- `MB`, `KB`, `GB` — byte size constants +- `make_bogus_data_file()`, `make_bogus_binary_file(n)`, `make_bogus_uuid_file()` — test file generators (in production code, used by tests) + +### Exception hierarchy (`exceptions.py`) +`SynapseError` base with 14+ subclasses: `SynapseHTTPError`, `SynapseMd5MismatchError`, `SynapseFileNotFoundError`, `SynapseNotFoundError`, `SynapseAuthenticationError`, etc. `_raise_for_status()` and `_raise_for_status_httpx()` handle HTTP error responses with Bearer token redaction via `BEARER_TOKEN_PATTERN` regex. + +### Rolled-up subdirectories + +**`core/models/`** — Internal dataclasses for ACL, Permission, DictObject (dict-like base class), and custom JSON serialization utilities. `DictObject` (`dict_object.py`) provides dot-notation access to dict entries. + +**`core/multithread_download/`** — Threaded download manager with `shared_executor()` context manager for external thread pool configuration. Uses `DownloadRequest` dataclass. Default part size: `SYNAPSE_DEFAULT_DOWNLOAD_PART_SIZE`. + +## Constraints + +- Bearer tokens must never appear in logs — use `BEARER_TOKEN_PATTERN` regex for redaction. +- `delete_none_keys()` must be called on all dicts before sending to the API — Synapse rejects null values. diff --git a/synapseclient/core/constants/CLAUDE.md b/synapseclient/core/constants/CLAUDE.md new file mode 100644 index 000000000..d5d42ff72 --- /dev/null +++ b/synapseclient/core/constants/CLAUDE.md @@ -0,0 +1,22 @@ + + +## Project + +Centralized constants used across the codebase — concrete type mappings, API limits, collision modes, and config file keys. + +## Conventions + +### concrete_types.py — 3-way registration required +Maps Java class name strings (e.g., `org.sagebionetworks.repo.model.FileEntity`) for polymorphic entity deserialization. When adding a new entity or job type, register in THREE places: +1. `concrete_types.py` — add the constant string +2. `api/entity_factory.py` — add to the type dispatch map +3. `models/mixins/asynchronous_job.py` `ASYNC_JOB_URIS` — add if it's an async job type + +### limits.py +`MAX_FILE_HANDLE_PER_COPY_REQUEST = 100` and other API batch size limits. + +### method_flags.py +Collision handling modes for file downloads: `COLLISION_OVERWRITE_LOCAL`, `COLLISION_KEEP_LOCAL`, `COLLISION_KEEP_BOTH`. + +### config_file_constants.py +Section and key names for the `~/.synapseConfig` file. `AUTHENTICATION_SECTION_NAME` identifies the auth section. diff --git a/synapseclient/core/constants/concrete_types.py b/synapseclient/core/constants/concrete_types.py index fba11dbdb..d8cbdbd59 100644 --- a/synapseclient/core/constants/concrete_types.py +++ b/synapseclient/core/constants/concrete_types.py @@ -9,7 +9,23 @@ EXTERNAL_S3_STORAGE_LOCATION_SETTING = ( "org.sagebionetworks.repo.model.project.ExternalS3StorageLocationSetting" ) -# EXTERNAL_GCP_STORAGE_LOCATION_SETTING = 'org.sagebionetworks.repo.model.project.ExternalGoogleCloudStorageLocationSetting' # noqa: E501 +EXTERNAL_GCP_STORAGE_LOCATION_SETTING = ( + "org.sagebionetworks.repo.model.project.ExternalGoogleCloudStorageLocationSetting" +) +EXTERNAL_STORAGE_LOCATION_SETTING = ( + "org.sagebionetworks.repo.model.project.ExternalStorageLocationSetting" +) +EXTERNAL_OBJECT_STORAGE_LOCATION_SETTING = ( + "org.sagebionetworks.repo.model.project.ExternalObjectStorageLocationSetting" +) +PROXY_STORAGE_LOCATION_SETTINGS = ( + "org.sagebionetworks.repo.model.project.ProxyStorageLocationSettings" +) + +# Concrete types for ProjectSettings +UPLOAD_DESTINATION_LIST_SETTING = ( + "org.sagebionetworks.repo.model.project.UploadDestinationListSetting" +) # Concrete types for UploadDestinations SYNAPSE_S3_UPLOAD_DESTINATION = ( @@ -116,9 +132,20 @@ RECORD_BASED_METADATA_TASK_PROPERTIES = ( "org.sagebionetworks.repo.model.curation.metadata.RecordBasedMetadataTaskProperties" ) +GRID_EXECUTION_DETAILS = ( + "org.sagebionetworks.repo.model.curation.execution.GridExecutionDetails" +) + +# Download List +DOWNLOAD_LIST_MANIFEST_REQUEST = ( + "org.sagebionetworks.repo.model.download.DownloadListManifestRequest" +) # Grid Session Types CREATE_GRID_REQUEST = "org.sagebionetworks.repo.model.grid.CreateGridRequest" +DOWNLOAD_FROM_GRID_REQUEST = ( + "org.sagebionetworks.repo.model.grid.DownloadFromGridRequest" +) GRID_RECORD_SET_EXPORT_REQUEST = ( "org.sagebionetworks.repo.model.grid.GridRecordSetExportRequest" ) @@ -128,3 +155,8 @@ LIST_GRID_SESSIONS_RESPONSE = ( "org.sagebionetworks.repo.model.grid.ListGridSessionsResponse" ) +SYNCHRONIZE_GRID_REQUEST = "org.sagebionetworks.repo.model.grid.SynchronizeGridRequest" +GRID_CSV_IMPORT_REQUEST = "org.sagebionetworks.repo.model.grid.GridCsvImportRequest" +UPLOAD_TO_TABLE_PREVIEW_REQUEST = ( + "org.sagebionetworks.repo.model.table.UploadToTablePreviewRequest" +) diff --git a/synapseclient/core/credentials/CLAUDE.md b/synapseclient/core/credentials/CLAUDE.md new file mode 100644 index 000000000..85b758f46 --- /dev/null +++ b/synapseclient/core/credentials/CLAUDE.md @@ -0,0 +1,23 @@ + + +## Project + +Authentication credential providers implementing a chain-of-responsibility pattern for token resolution. + +## Conventions + +### Provider chain order (priority) +1. **UserArgsCredentialsProvider** — explicit login args passed to `syn.login()` +2. **ConfigFileCredentialsProvider** — `~/.synapseConfig` file (profile-aware via sections) +3. **EnvironmentVariableCredentialsProvider** — `SYNAPSE_AUTH_TOKEN` env var +4. **AWSParameterStoreCredentialsProvider** — AWS SSM Parameter Store (via `SYNAPSE_TOKEN_AWS_SSM_PARAMETER_NAME` env var) + +### Profile selection +Select profile via `SYNAPSE_PROFILE` env var or `--profile` CLI arg. If username provided in login args differs from config file username, config credentials are rejected — prevents ambiguity. + +### Token handling +`SynapseAuthTokenCredentials` implements `requests.auth.AuthBase`, adding `Authorization: Bearer` header. JWT validation failure is silent (logs warning, does not raise) — allows tokens with unrecognized formats to attempt API calls. + +## Constraints + +- Bearer tokens must never appear in logs — redact with `BEARER_TOKEN_PATTERN` regex before logging. diff --git a/synapseclient/core/download/CLAUDE.md b/synapseclient/core/download/CLAUDE.md new file mode 100644 index 000000000..456905b05 --- /dev/null +++ b/synapseclient/core/download/CLAUDE.md @@ -0,0 +1,26 @@ + + +## Project + +File download from Synapse storage with MD5 validation, collision handling, and progress tracking. + +## Conventions + +### Primary download path +`download_async.py` is the primary async download implementation. `download_functions.py` contains shared helpers and the sync download wrapper. The default part size of 8 MiB was empirically optimized for Synapse download throughput — do not change it without benchmarking. + +### MD5 validation +Post-transfer MD5 validation is mandatory. Raises `SynapseMd5MismatchError` on mismatch — the download is retried automatically (60 retries spanning ~30 minutes). + +### Collision handling +Controlled by `if_collision` parameter, using constants from `core/constants/method_flags.py`: +- `overwrite.local` — replace existing local file +- `keep.local` — skip download if local file exists +- `keep.both` — rename downloaded file to avoid collision + +### Progress tracking +Uses `shared_download_progress_bar` from `core/transfer_bar.py` for tqdm-based progress. Multi-file downloads track cumulative progress via `cumulative_transfer_progress`. + +### Key helpers +- `ensure_download_location_is_directory()` — validates/creates download directory +- `download_by_file_handle()` — downloads a file given its handle metadata diff --git a/synapseclient/core/download/download_functions.py b/synapseclient/core/download/download_functions.py index 56a67cdb2..5595359a4 100644 --- a/synapseclient/core/download/download_functions.py +++ b/synapseclient/core/download/download_functions.py @@ -479,13 +479,13 @@ async def download_by_file_handle( while retries > 0: try: - file_handle_result: Dict[ - str, str - ] = await get_file_handle_for_download_async( - file_handle_id=file_handle_id, - synapse_id=synapse_id, - entity_type=entity_type, - synapse_client=syn, + file_handle_result: Dict[str, str] = ( + await get_file_handle_for_download_async( + file_handle_id=file_handle_id, + synapse_id=synapse_id, + entity_type=entity_type, + synapse_client=syn, + ) ) file_handle = file_handle_result["fileHandle"] concrete_type = file_handle["concreteType"] diff --git a/synapseclient/core/dozer.py b/synapseclient/core/dozer.py index ecb09ec4a..7ae2d8b11 100644 --- a/synapseclient/core/dozer.py +++ b/synapseclient/core/dozer.py @@ -5,6 +5,7 @@ sleep while checking registered _listeners """ + import time from opentelemetry import trace diff --git a/synapseclient/core/models/custom_json.py b/synapseclient/core/models/custom_json.py index f1d4d8183..531f531cc 100644 --- a/synapseclient/core/models/custom_json.py +++ b/synapseclient/core/models/custom_json.py @@ -1,6 +1,7 @@ """ When imported, monkey-patches the 'json' module's encoder with a custom json encoding function. """ + import datetime import json diff --git a/synapseclient/core/otel_config.py b/synapseclient/core/otel_config.py index ebadfe284..57e4efc1b 100644 --- a/synapseclient/core/otel_config.py +++ b/synapseclient/core/otel_config.py @@ -1,4 +1,5 @@ """OpenTelemetry configuration for Synapse Python Client.""" + import os import sys from typing import Any, Dict, List, Optional diff --git a/synapseclient/core/remote_file_storage_wrappers.py b/synapseclient/core/remote_file_storage_wrappers.py index 811cfdbd9..fbcd3d8d8 100644 --- a/synapseclient/core/remote_file_storage_wrappers.py +++ b/synapseclient/core/remote_file_storage_wrappers.py @@ -316,8 +316,10 @@ def progress_callback(*args, **kwargs) -> None: progress_bar.update(args[0] - progress_bar.n) parsedURL = SFTPWrapper._parse_for_sftp(url) + # If the port is not specified, it will default to 22 + port_kwargs = {"port": parsedURL.port} if parsedURL.port else {} with _retry_pysftp_connection( - parsedURL.hostname, username=username, password=password + parsedURL.hostname, username=username, password=password, **port_kwargs ) as sftp: sftp.makedirs(parsedURL.path) with sftp.cd(parsedURL.path): diff --git a/synapseclient/core/transfer_bar.py b/synapseclient/core/transfer_bar.py index cd87c0336..736ef41d4 100644 --- a/synapseclient/core/transfer_bar.py +++ b/synapseclient/core/transfer_bar.py @@ -1,4 +1,5 @@ """Logic used to handle progress bars for file uploads and downloads.""" + try: import threading as _threading except ImportError: diff --git a/synapseclient/core/upload/CLAUDE.md b/synapseclient/core/upload/CLAUDE.md new file mode 100644 index 000000000..b28de12f7 --- /dev/null +++ b/synapseclient/core/upload/CLAUDE.md @@ -0,0 +1,38 @@ + + +## Project + +Multipart file upload to Synapse storage (S3, GCP, SFTP). Dual implementation: sync (requests) and async (httpx). + +## Conventions + +### Constants +- `MAX_NUMBER_OF_PARTS = 10000` +- `MIN_PART_SIZE = 5 MB` +- `DEFAULT_PART_SIZE = 8 MB` +- `MAX_RETRIES = 7` +- Upload retry: 60 retries spanning ~30 minutes for resilience + +### Sync vs async duality +`multipart_upload.py` (sync/requests) and `multipart_upload_async.py` (async/httpx) must be kept in feature parity. Both implement `UploadAttempt` / `UploadAttemptAsync` classes orchestrating multi-part uploads with presigned URL batching. + +### Async-specific patterns +- `HandlePartResult` dataclass tracks individual part uploads +- `shared_progress_bar()` context manager for tqdm integration across concurrent tasks +- Explicit `gc.collect()` calls and psutil memory monitoring during large uploads — prevents memory pressure +- Uses `asyncio.Lock` for thread-safe state management + +### Sync-specific patterns +- Thread-local `requests.Session` storage for persistent HTTP connections per thread +- `shared_executor()` context manager allows callers to provide their own thread pool + +### Upload flow +1. Pre-upload: MD5 calculation, MIME type detection, storage location determination from project settings +2. Presigned URL batch fetching with expiry detection and refresh +3. Multi-part upload with retry per part +4. Post-upload: complete upload API call, retrieve file handle + +### upload_utils.py +- `get_partial_file_chunk()` — binary file chunk reader with offset tracking +- `get_partial_dataframe_chunk()` — DataFrame chunk reader (iterates in 100-row increments) +- MD5 calculation, MIME type guessing, part size computation diff --git a/synapseclient/core/upload/multipart_upload_async.py b/synapseclient/core/upload/multipart_upload_async.py index 7e6ac3d6e..ac077369e 100644 --- a/synapseclient/core/upload/multipart_upload_async.py +++ b/synapseclient/core/upload/multipart_upload_async.py @@ -698,6 +698,8 @@ async def multipart_upload_dataframe_async( force_restart: True to restart a previously initiated upload from scratch, False to try to resume. storage_str: Optional string to append to the upload message. + to_csv_kwargs: Additional arguments to pass to the `pd.DataFrame.to_csv` + function when writing the data to a CSV file. """ trace.get_current_span().set_attributes( { diff --git a/synapseclient/core/utils.py b/synapseclient/core/utils.py index e02c0487b..186035b18 100644 --- a/synapseclient/core/utils.py +++ b/synapseclient/core/utils.py @@ -26,7 +26,8 @@ import zipfile from dataclasses import asdict, fields, is_dataclass from email.message import Message -from typing import TYPE_CHECKING, List, Optional, TypeVar +from enum import Enum +from typing import TYPE_CHECKING, List, Optional, TypeVar, Union import requests from deprecated import deprecated @@ -40,6 +41,7 @@ from synapseclient.models.dataset import EntityRef R = TypeVar("R") +E = TypeVar("E", bound=Enum) UNIX_EPOCH = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) ISO_FORMAT = "%Y-%m-%dT%H:%M:%S.000Z" @@ -278,7 +280,7 @@ def id_of(obj: typing.Union[str, collections.abc.Mapping, numbers.Number]) -> st def validate_submission_id( - submission_id: typing.Union[str, int, collections.abc.Mapping] + submission_id: typing.Union[str, int, collections.abc.Mapping], ) -> str: """ Ensures that a given submission ID is either an integer or a string that @@ -354,7 +356,7 @@ def get_properties(entity): return entity.properties if hasattr(entity, "properties") else entity -def is_url(s): +def is_url(s) -> bool: """Return True if the string appears to be a valid URL.""" if isinstance(s, str): try: @@ -462,7 +464,7 @@ def is_synapse_id_str(obj: str) -> typing.Union[str, None]: def get_synid_and_version( - obj: typing.Union[str, collections.abc.Mapping] + obj: typing.Union[str, collections.abc.Mapping], ) -> typing.Tuple[str, typing.Union[int, None]]: """Extract the Synapse ID and version number from input entity @@ -698,7 +700,7 @@ def to_unix_epoch_time(dt: typing.Union[datetime.date, datetime.datetime, str]) def to_unix_epoch_time_secs( - dt: typing.Union[datetime.date, datetime.datetime] + dt: typing.Union[datetime.date, datetime.datetime], ) -> float: """ Convert either [datetime.date or datetime.datetime objects](http://docs.python.org/2/library/datetime.html) to UNIX time. @@ -1567,3 +1569,50 @@ def test_import_pandas() -> None: # catch other errors (see SYNPY-177) except: # noqa raise + + +def test_import_sqlite3() -> None: + """This function is called within other functions and methods to ensure that sqlite3 is installed.""" + try: + import sqlite3 # noqa F401 + # used to catch when sqlite3 isn't installed + except ModuleNotFoundError: + raise ModuleNotFoundError( + """\n\nThe sqlite3 package is required for this function!\n + Most functions in the synapseclient package don't require sqlite3, + but some do. sqlite3 is included in the Python standard library but + may be missing in certain minimal environments. Please refer to the + Python documentation at: https://docs.python.org/3/library/sqlite3.html + or ensure your Python installation includes the sqlite3 module. + \n\n\n""" + ) + # catch other errors (see SYNPY-177) + except: # noqa + raise + + +def coerce_enum_list(enum_class: type[E], values: list[Union[E, str]]) -> list[str]: + """Normalize a list of values to string equivalents of an enum class. + + Accepts enum members or strings. Unrecognized values raise ValueError with + the list of valid values. + + Arguments: + enum_class: The Enum subclass to coerce values against. + values: List of enum members or equivalent strings to coerce. + + Returns: + List of string values corresponding to each enum member. + + Raises: + ValueError: If any element is not a valid enum member or string. + """ + result = [] + for value in values: + try: + result.append(enum_class(value).value) + except ValueError as exc: + raise ValueError( + f"Invalid value {value!r}. Valid values are: {[e.value for e in enum_class]}" + ) from exc + return result diff --git a/synapseclient/extensions/curator/CLAUDE.md b/synapseclient/extensions/curator/CLAUDE.md new file mode 100644 index 000000000..c7402f2ae --- /dev/null +++ b/synapseclient/extensions/curator/CLAUDE.md @@ -0,0 +1,26 @@ + + +## Project + +Schema curation tools for data modeling — JSON Schema generation from CSV/JSONLD data models, schema registration/binding to Synapse entities, and metadata task creation for file-based and record-based curation workflows. + +## Stack + +Optional dependencies (gated by `[curator]` extras): pandas, pandarallel, networkx, rdflib, inflection, dataclasses-json. + +## Conventions + +### schema_generation.py +Largest file in the codebase. Uses networkx (DiGraph, MultiDiGraph) for node/edge relationships and cycle detection (via multiprocessing). Many deprecated validation rule enums marked for removal (SYNPY-1724, SYNPY-1692). Active development area. + +### schema_management.py +Uses `wrap_async_to_sync()` for sync versions (not class decorator). `fix_schema_name()` replaces dashes/underscores with periods for Synapse compliance. + +### utils.py +`project_id_from_entity_id()` — traverses folder hierarchy up to project (max 1000 iterations). Uses `operations.get` in a loop — known tech debt. + +## Constraints + +- This area is under active development with frequent PRs. Be cautious about large refactors — coordinate with the curator team. +- `schema_generation.py` contains deprecated patterns (SYNPY-1724) that are still in use — do not remove without verifying the deprecation timeline. +- Uses `urllib.request` in one place instead of httpx (has TODO to replace) — do not propagate this pattern elsewhere. diff --git a/synapseclient/extensions/curator/file_based_metadata_task.py b/synapseclient/extensions/curator/file_based_metadata_task.py index 1e9c6f5fe..cd2a39191 100644 --- a/synapseclient/extensions/curator/file_based_metadata_task.py +++ b/synapseclient/extensions/curator/file_based_metadata_task.py @@ -41,6 +41,7 @@ def create_json_schema_entity_view( syn: Synapse, synapse_entity_id: str, entity_view_name: str = "JSON Schema view", + view_type_mask: Union[int, ViewTypeMask] = ViewTypeMask.FILE, ) -> str: """ Creates a Synapse entity view based on a JSON Schema that is bound to a Synapse entity @@ -50,6 +51,10 @@ def create_json_schema_entity_view( syn: A Synapse object thats been logged in synapse_entity_id: The ID of the entity in Synapse to bind the JSON Schema to entity_view_name: The name the crated entity view will have + view_type_mask: The view type mask for the EntityView. Defaults to + ViewTypeMask.FILE. Additional types can be added using bitwise OR + (e.g., ViewTypeMask.FILE | ViewTypeMask.DOCKER). Accepts either a + ViewTypeMask enum member or its raw integer value. Returns: The Synapse id of the crated entity view @@ -69,7 +74,7 @@ def create_json_schema_entity_view( name=entity_view_name, parent_id=synapse_entity_id, scope_ids=[synapse_entity_id], - view_type_mask=ViewTypeMask.FILE, + view_type_mask=view_type_mask, columns=columns, ).store(synapse_client=syn) # This reorder is so that these show up in the front of the EntityView in Synapse @@ -319,6 +324,7 @@ def create_file_based_metadata_task( schema_uri: Optional[str] = None, enable_derived_annotations: bool = False, assignee_principal_id: Optional[Union[str, int]] = None, + view_type_mask: Union[int, ViewTypeMask] = ViewTypeMask.FILE, *, synapse_client: Optional[Synapse] = None, ) -> Tuple[str, str]: @@ -332,6 +338,7 @@ def create_file_based_metadata_task( ```python import synapseclient from synapseclient.extensions.curator import create_file_based_metadata_task + from synapseclient.models import ViewTypeMask syn = synapseclient.Synapse() syn.login() @@ -344,7 +351,8 @@ def create_file_based_metadata_task( attach_wiki=False, entity_view_name="Biospecimen Metadata View", schema_uri="sage.schemas.v2571-amp.Biospecimen.schema-0.0.1", - assignee_principal_id=123456 # Optional: Assign to a user or team (can be str or int) + assignee_principal_id=123456, # Optional: Assign to a user or team (can be str or int) + view_type_mask=ViewTypeMask.FILE | ViewTypeMask.DOCKER, # Optional: include additional entity types in the view ) ``` @@ -365,6 +373,10 @@ def create_file_based_metadata_task( (default), the task will be unassigned. For metadata tasks, this determines the owner of the grid session. Team members can all join grid sessions owned by their team, while user-owned grid sessions are restricted to that user only. + view_type_mask: The view type mask for the EntityView. Defaults to + ViewTypeMask.FILE. Additional types can be added using bitwise OR + (e.g., ViewTypeMask.FILE | ViewTypeMask.DOCKER). Accepts either a + ViewTypeMask enum member or its raw integer value. synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. @@ -415,6 +427,7 @@ def create_file_based_metadata_task( syn=synapse_client, synapse_entity_id=folder_id, entity_view_name=entity_view_name, + view_type_mask=view_type_mask, ) except Exception as e: synapse_client.logger.exception("Error creating entity view") diff --git a/synapseclient/extensions/curator/readme.md b/synapseclient/extensions/curator/readme.md index 6cfdd60e9..cb8bfc5d8 100644 --- a/synapseclient/extensions/curator/readme.md +++ b/synapseclient/extensions/curator/readme.md @@ -1,6 +1,6 @@ # Synapse Curator Extension - Developer Guide -This document describes the design principles and architecture of the `synapseclient.extensions.curator` module. For user-facing documentation, see `metadata_curation.md` in the docs folder. +This document describes the design principles and architecture of the `synapseclient.extensions.curator` module. For user-facing documentation, see `docs/guides/extensions/curator/metadata_curation.md` (administrators) and `docs/guides/extensions/curator/metadata_contribution.md` (contributors). ## Design Goals diff --git a/synapseclient/extensions/curator/record_based_metadata_task.py b/synapseclient/extensions/curator/record_based_metadata_task.py index b56ab50c7..c97a49260 100644 --- a/synapseclient/extensions/curator/record_based_metadata_task.py +++ b/synapseclient/extensions/curator/record_based_metadata_task.py @@ -5,8 +5,9 @@ This module provides library functions for creating record-based metadata curation tasks in Synapse, including RecordSet creation, CurationTask setup, and Grid view initialization. """ + import tempfile -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Union from synapseclient import Synapse from synapseclient.core.typing_utils import DataFrame as DATA_FRAME_TYPE @@ -113,10 +114,15 @@ def create_record_based_metadata_task( *, synapse_client: Optional[Synapse] = None, project_id: Optional[str] = None, # Deprecated, will be removed in v5.0.0 -) -> Tuple[RecordSet, CurationTask, Grid]: + create_grid: bool = True, # Deprecated, will be removed in v5.0.0 + # TODO: https://sagebionetworks.jira.com/browse/SYNPY-1838 + # remove Grid tuple here +) -> tuple[RecordSet, CurationTask, Grid] | tuple[RecordSet, CurationTask]: """ - Generate and upload CSV templates as a RecordSet for record-based metadata, - create a CurationTask, and also create a Grid to bootstrap the ValidationStatistics. + This function: + - Generates and uploads CSV templates as a RecordSet for record-based metadata + - Creates a CurationTask + - Optionally creates a Grid if create_grid is True (deprecated, will be removed in v5.0.0) A number of schema URIs that are already registered to Synapse can be found at: @@ -141,7 +147,7 @@ def create_record_based_metadata_task( syn = synapseclient.Synapse() syn.login() - record_set, task, grid = create_record_based_metadata_task( + record_set, curation_task = create_record_based_metadata_task( synapse_client=syn, folder_id="syn87654321", record_set_name="BiospecimenMetadata_RecordSet", @@ -150,7 +156,8 @@ def create_record_based_metadata_task( upsert_keys=["specimenID"], instructions="Please curate this metadata according to the schema requirements", schema_uri="schema-org-schema.name.schema-v1.0.0", - assignee_principal_id=123456 # Optional: Assign to a user or team (can be str or int) + assignee_principal_id=123456, # Optional: Assign to a user or team (can be str or int) + create_grid=False, # Opt out of deprecated Grid creation ) ``` @@ -179,9 +186,13 @@ def create_record_based_metadata_task( `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. project_id: Deprecated, will be removed in v5.0.0 + create_grid: If True (default), creates a Grid for the RecordSet and returns it as the + third element of the tuple. Deprecated — Grid creation will be removed in v5.0.0. + Pass False to opt out early and receive only (RecordSet, CurationTask). Returns: - Tuple containing the created RecordSet, CurationTask, and Grid objects + If create_grid is True: tuple of (RecordSet, CurationTask, Grid). + If create_grid is False: tuple of (RecordSet, CurationTask). Raises: ValueError: If required parameters are missing or if schema_uri is not provided. @@ -211,6 +222,13 @@ def create_record_based_metadata_task( synapse_client = Synapse.get_client(synapse_client=synapse_client) + # TODO: https://sagebionetworks.jira.com/browse/SYNPY-1838 + # remove this warning + if create_grid: + synapse_client.logger.warning( + "A Grid object will no longer be created by this function starting in v5.0.0." + ) + project_id = project_id_from_entity_id( entity_id=folder_id, synapse_client=synapse_client ) @@ -277,19 +295,20 @@ def create_record_based_metadata_task( synapse_client.logger.error(f"Error creating CurationTask in Synapse: {e}") raise e - try: - curation_grid: Grid = Grid( - record_set_id=record_set_id, - ) - curation_grid.create(synapse_client=synapse_client) - curation_grid = curation_grid.export_to_record_set( - synapse_client=synapse_client - ) - synapse_client.logger.info( - f"Created Grid view for RecordSet ID: {record_set_id} for curation task {curation_task_name}" - ) - except Exception as e: - synapse_client.logger.exception("Error creating Grid view in Synapse") - raise e + # TODO: https://sagebionetworks.jira.com/browse/SYNPY-1838 + # stop creating Grid + if create_grid: + try: + curation_grid: Grid = Grid( + record_set_id=record_set_id, + ) + curation_grid.create(synapse_client=synapse_client) + synapse_client.logger.info( + f"Created Grid view for RecordSet ID: {record_set_id} for curation task {curation_task_name}" + ) + except Exception as e: + synapse_client.logger.exception("Error creating Grid view in Synapse") + raise e + return record_set_with_data, curation_task, curation_grid - return record_set_with_data, curation_task, curation_grid + return record_set_with_data, curation_task diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index decbf2028..508cf7f62 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -343,18 +343,18 @@ def find_and_convert_ints( if ( dataframe.size < large_manifest_cutoff_size ): # If small manifest, iterate as normal for improved performance - ints = dataframe.map( # type:ignore + ints = dataframe.map( # type: ignore lambda cell: convert_ints(cell), na_action="ignore" ).fillna(False) else: # parallelize iterations for large manifests pandarallel.initialize(verbose=1) - ints = dataframe.parallel_applymap( # type:ignore + ints = dataframe.parallel_applymap( # type: ignore lambda cell: convert_ints(cell), na_action="ignore" ).fillna(False) # Identify cells converted to integers - is_int = ints.map(is_integer) # type:ignore + is_int = ints.map(is_integer) # type: ignore assert isinstance(ints, DataFrame) assert isinstance(is_int, DataFrame) @@ -2049,6 +2049,7 @@ class PropertyTemplate: @dataclass class ClassTemplate: "Class Template" + magic_id: str = field(default="", metadata=config(field_name="@id")) magic_type: str = field(default="rdfs:Class", metadata=config(field_name="@type")) magic_comment: str = field(default="", metadata=config(field_name="rdfs:comment")) @@ -3876,7 +3877,7 @@ def match_node_names_with_reserved_names( def create_reserve_name_error_messages( - reserved_names_found: list[Tuple[str, str]] + reserved_names_found: list[Tuple[str, str]], ) -> list[str]: """Creates the error messages when a reserved name is used @@ -3943,7 +3944,7 @@ def get_missing_fields_from( def create_missing_fields_error_messages( - missing_fields: list[Tuple[str, str]] + missing_fields: list[Tuple[str, str]], ) -> list[str]: """Creates the error message for when a node is missing a required field @@ -4119,7 +4120,7 @@ def export_schema(schema: dict, file_path: str, logger: Logger) -> None: def parsed_model_as_dataframe( - parsed_model: dict[str, dict[str, Any]] + parsed_model: dict[str, dict[str, Any]], ) -> DATA_FRAME_TYPE: """Convert parsed model dictionary to an unpacked pandas DataFrame. Args: @@ -5168,7 +5169,7 @@ def add_conditional_dependency( @staticmethod def _convert_conditional_properties_to_all_of( - conditional_dependencies: dict[tuple[str, str], list[str]] + conditional_dependencies: dict[tuple[str, str], list[str]], ) -> list[AllOf]: """ Converts the conditional dependencies dict to a list of JSON Schema allOf conditions diff --git a/synapseclient/models/CLAUDE.md b/synapseclient/models/CLAUDE.md new file mode 100644 index 000000000..b00c467e1 --- /dev/null +++ b/synapseclient/models/CLAUDE.md @@ -0,0 +1,71 @@ + + +## Project + +Dataclass-based entity models for the Synapse REST API. Each model represents a Synapse resource (Project, File, Folder, Table, etc.) with async-first methods and auto-generated sync wrappers. + +## Conventions + +### New model checklist +1. Decorate with `@dataclass()` above `@async_to_sync` (follow existing models — `@dataclass` is the outer decorator) +2. Inherit from the model-specific `*SynchronousProtocol` base (e.g., `FileSynchronousProtocol`, `ProjectSynchronousProtocol`), then mixins (`AccessControllable`, `StorableContainer`, etc.) when appropriate +3. Create a matching `*SynchronousProtocol` file in `protocols/` with sync method signatures +4. Register concrete type in `core/constants/concrete_types.py` +5. Add to `models/__init__.py` exports and `__all__` +6. Add to entity factory type map in `api/entity_factory.py` if it's an entity type +7. Add to `ASYNC_JOB_URIS` in `models/mixins/asynchronous_job.py` if it uses async jobs +8. Summarize the model and its core functions in class-level docstrings with examples of distinct API use-cases. Add method-level docstrings with at least one example. Refer to existing models (`evaluation.py`, `materializedview.py`) for guidance on syntax, tone, and scope. +9. Methods are typically scoped to a single API communication (e.g., "download file", "list evaluations") and should wrap functionality that already exists in `api/`. Refer to existing models as guides. +10. Abstract any reusable logic to a new or existing `.py` in `api/` and, if appropriate, in `mixins/` + +### Standard fields every mutable entity model must have +```python +id: Optional[str] = None +name: Optional[str] = None +etag: Optional[str] = None +created_on: Optional[str] = field(default=None, compare=False) +modified_on: Optional[str] = field(default=None, compare=False) +created_by: Optional[str] = field(default=None, compare=False) +modified_by: Optional[str] = field(default=None, compare=False) +create_or_update: bool = field(default=True, repr=False) +_last_persistent_instance: Optional["Self"] = field(default=None, repr=False, compare=False) +``` + +Use `compare=False` for read-only timestamps, child collections, annotations, and internal state — this makes `has_changed` compare only user-modifiable fields. + +### fill_from_dict() pattern +Maps camelCase REST keys to snake_case fields via `.get("camelCaseKey", None)`. Must return `self`. Handle annotations separately with `set_annotations` parameter. Reference: `folder.py`, `file.py`. + +### Annotations handling +Annotations are deserialized separately from `fill_from_dict()` — they use a `set_annotations` flag parameter. The `Annotations` model wraps key-value metadata. When storing, annotations are sent via a separate API call in `models/services/storable_entity_components.py`. + +### Activity/provenance pattern +`Activity` model tracks provenance (what data/code produced an entity). Contains `used` and `executed` lists of `UsedEntity`/`UsedURL` references. Activity is stored as a separate component — the `associate_activity_to_new_version` flag on File controls whether activity transfers to new versions. + +### _last_persistent_instance lifecycle +- Set via `_set_last_persistent_instance()` after every successful `store_async()` and `get_async()` +- Uses `dataclasses.replace(self)` with `deepcopy` for annotations +- Enables `has_changed` property — skips redundant API calls when nothing changed +- Drives `create_or_update` logic: if no `_last_persistent_instance`, attempts merge with existing Synapse entity via `merge_dataclass_entities()` from `core/utils.py`. When calling this, set `fields_to_preserve_from_source` to attributes only mutable by the API (e.g., `id`, `etag`, `content_source`, `owner_id`, `created_on`). See `evaluation.py` for an example. + +### @otel_trace_method on every async method +Apply to all async methods that call Synapse. Format: `f"{ClassName}_{Operation}: ID: {self.id}, Name: {self.name}"`. + +### delete_none_keys() before API calls +Always call `delete_none_keys()` on request dicts before passing to `store_entity()` — the Synapse API rejects `None` values. + +### EnumCoercionMixin for enum fields +If a model has enum-typed fields, inherit from `EnumCoercionMixin` and declare `_ENUM_FIELDS: ClassVar[Dict[str, type]]` mapping field names to enum classes. Auto-coerces strings to enums on assignment via `__setattr__`. + +### OOP models vs legacy synapseclient/ classes +Modern dataclass models live in `models/`. Legacy classes in the package root (`synapseclient/entity.py`, `synapseclient/table.py`, etc.) are kept for backwards compatibility. New features go in `models/`. When introducing a new OOP model that replaces a legacy class, add a `DeprecationWarning` to the legacy counterpart and update its docstring to point users to the new model. + +### Business logic in services/ +Complex orchestration logic lives in `models/services/` (storable_entity, storable_entity_components, search) — not directly on model classes. This keeps models thin. + +## Constraints + +- Never manually write sync methods on models — `@async_to_sync` generates them. Use `@skip_async_to_sync` to exclude specific methods. +- All direct API communications must go in `api/`. Model methods wrap `api/` functions — only skip the `api/` layer if the API call is truly one-off and won't be reused. +- Protocol files must exactly match the async method signatures (minus `_async` suffix) — they exist for IDE type hints, not runtime dispatch. +- Child collections (files, folders, tables) must use `compare=False` to avoid breaking `has_changed`. diff --git a/synapseclient/models/__init__.py b/synapseclient/models/__init__.py index 7a85b6b83..c68bfe2ec 100644 --- a/synapseclient/models/__init__.py +++ b/synapseclient/models/__init__.py @@ -9,9 +9,13 @@ from synapseclient.models.annotations import Annotations from synapseclient.models.curation import ( CurationTask, + CurationTaskStatus, FileBasedMetadataTaskProperties, Grid, + GridExecutionDetails, RecordBasedMetadataTaskProperties, + TaskExecutionDetails, + TaskState, ) from synapseclient.models.dataset import Dataset, DatasetCollection, EntityRef from synapseclient.models.docker import DockerRepository @@ -24,9 +28,15 @@ from synapseclient.models.materializedview import MaterializedView from synapseclient.models.mixins.table_components import QueryMixin from synapseclient.models.project import Project +from synapseclient.models.project_setting import ProjectSetting from synapseclient.models.recordset import RecordSet from synapseclient.models.schema_organization import JSONSchema, SchemaOrganization from synapseclient.models.services import FailureStrategy +from synapseclient.models.storage_location import ( + StorageLocation, + StorageLocationType, + UploadType, +) from synapseclient.models.submission import Submission from synapseclient.models.submission_bundle import SubmissionBundle from synapseclient.models.submission_status import SubmissionStatus @@ -88,9 +98,13 @@ "TeamMember", "TeamMembershipStatus", "CurationTask", + "CurationTaskStatus", "FileBasedMetadataTaskProperties", "RecordBasedMetadataTaskProperties", + "TaskState", "Grid", + "GridExecutionDetails", + "TaskExecutionDetails", "UserProfile", "UserPreference", "UserGroupHeader", @@ -155,6 +169,12 @@ # Form models "FormGroup", "FormData", + # Storage Location models + "StorageLocation", + "StorageLocationType", + "UploadType", + # Project Setting models + "ProjectSetting", ] # Static methods to expose as functions diff --git a/synapseclient/models/agent.py b/synapseclient/models/agent.py index c5bc60c74..256d97d68 100644 --- a/synapseclient/models/agent.py +++ b/synapseclient/models/agent.py @@ -199,9 +199,9 @@ class AgentSession(AgentSessionSynchronousProtocol): """The unique ID of the agent session. Can only be used by the user that created it.""" - access_level: Optional[ - AgentSessionAccessLevel - ] = AgentSessionAccessLevel.PUBLICLY_ACCESSIBLE + access_level: Optional[AgentSessionAccessLevel] = ( + AgentSessionAccessLevel.PUBLICLY_ACCESSIBLE + ) """The access level of the agent session. One of PUBLICLY_ACCESSIBLE, READ_YOUR_PRIVATE_DATA, or WRITE_YOUR_PRIVATE_DATA. Defaults to PUBLICLY_ACCESSIBLE. diff --git a/synapseclient/models/annotations.py b/synapseclient/models/annotations.py index d6c1d609c..6323e2a56 100644 --- a/synapseclient/models/annotations.py +++ b/synapseclient/models/annotations.py @@ -93,9 +93,7 @@ async def store_async( return self @classmethod - def from_dict( - cls, synapse_annotations: dict - ) -> Union[ + def from_dict(cls, synapse_annotations: dict) -> Union[ Dict[ str, Union[ diff --git a/synapseclient/models/curation.py b/synapseclient/models/curation.py index 89107c1d2..918ab0850 100644 --- a/synapseclient/models/curation.py +++ b/synapseclient/models/curation.py @@ -5,8 +5,23 @@ data or metadata in Synapse. """ +import asyncio +import os +from abc import ABC, abstractmethod +from copy import deepcopy from dataclasses import dataclass, field, replace -from typing import Any, AsyncGenerator, Dict, Generator, Optional, Protocol, Union +from datetime import datetime, timezone +from enum import Enum +from typing import ( + Any, + AsyncGenerator, + ClassVar, + Dict, + Generator, + Optional, + Protocol, + Union, +) from opentelemetry import trace @@ -16,27 +31,64 @@ delete_curation_task, delete_grid_session, get_curation_task, + get_curation_task_status, + get_file_handle, + get_file_handle_presigned_url, list_curation_tasks, list_grid_sessions, update_curation_task, + update_curation_task_status, ) from synapseclient.core.async_utils import ( async_to_sync, + otel_trace_method, skip_async_to_sync, wrap_async_generator_to_sync_generator, ) from synapseclient.core.constants.concrete_types import ( CREATE_GRID_REQUEST, + DOWNLOAD_FROM_GRID_REQUEST, FILE_BASED_METADATA_TASK_PROPERTIES, + GRID_CSV_IMPORT_REQUEST, + GRID_EXECUTION_DETAILS, GRID_RECORD_SET_EXPORT_REQUEST, LIST_GRID_SESSIONS_REQUEST, LIST_GRID_SESSIONS_RESPONSE, RECORD_BASED_METADATA_TASK_PROPERTIES, + SYNCHRONIZE_GRID_REQUEST, + UPLOAD_TO_TABLE_PREVIEW_REQUEST, +) +from synapseclient.core.download.download_functions import download_from_url +from synapseclient.core.upload.upload_functions_async import upload_synapse_s3 +from synapseclient.core.utils import ( + coerce_enum_list, + delete_none_keys, + merge_dataclass_entities, ) -from synapseclient.core.utils import delete_none_keys, merge_dataclass_entities from synapseclient.models.mixins.asynchronous_job import AsynchronousCommunicator +from synapseclient.models.mixins.enum_coercion import EnumCoercionMixin from synapseclient.models.recordset import ValidationSummary -from synapseclient.models.table_components import Query +from synapseclient.models.table_components import Column, CsvTableDescriptor, Query + + +class TaskState(str, Enum): + """ + The state of a CurationTask. + + See . + """ + + NOT_STARTED = "NOT_STARTED" + """The task has been created and assigned but work has not yet started.""" + + IN_PROGRESS = "IN_PROGRESS" + """The assignee has actively started the task.""" + + COMPLETED = "COMPLETED" + """The task has been completed and verified.""" + + CANCELED = "CANCELED" + """The task has been canceled and is no longer needed.""" @dataclass @@ -132,7 +184,7 @@ def to_synapse_request(self) -> Dict[str, Any]: def _create_task_properties_from_dict( - properties_dict: Dict[str, Any] + properties_dict: Dict[str, Any], ) -> Union[FileBasedMetadataTaskProperties, RecordBasedMetadataTaskProperties]: """ Factory method to create the appropriate FileBasedMetadataTaskProperties/RecordBasedMetadataTaskProperties @@ -156,6 +208,177 @@ def _create_task_properties_from_dict( ) +@dataclass +class TaskExecutionDetails(ABC): + """ + Base class for task-specific execution details attached to a CurationTaskStatus. + + + + The concrete subclass is determined by the concreteType field in the REST response. + """ + + @abstractmethod + def fill_from_dict( + self, synapse_response: dict[str, Any] + ) -> "TaskExecutionDetails": + """ + Converts a response from the REST API into this dataclass. + + Arguments: + synapse_response: The response from the REST API. + + Returns: + The TaskExecutionDetails object. + """ + ... + + @abstractmethod + def to_synapse_request(self) -> dict[str, Any]: + """ + Converts this dataclass to a dictionary suitable for a Synapse REST API request. + + Returns: + A dictionary representation of this object for API requests. + """ + ... + + +@dataclass +class GridExecutionDetails(TaskExecutionDetails): + """ + Execution details for a metadata curation task involving a collaborative grid session. + + + + Attributes: + active_session_id: The unique identifier of the active CRDT grid session linked to this task. + """ + + active_session_id: str | None = None + """The unique identifier of the active CRDT grid session linked to this task.""" + + def fill_from_dict( + self, synapse_response: dict[str, Any] + ) -> "GridExecutionDetails": + """ + Converts a response from the REST API into this dataclass. + + Arguments: + synapse_response: The response from the REST API. + + Returns: + The GridExecutionDetails object. + """ + self.active_session_id = synapse_response.get("activeSessionId") + return self + + def to_synapse_request(self) -> dict[str, Any]: + """ + Converts this dataclass to a dictionary suitable for a Synapse REST API request. + + Returns: + A dictionary representation of this object for API requests. + """ + request_dict: dict[str, Any] = {"concreteType": GRID_EXECUTION_DETAILS} + if self.active_session_id is not None: + request_dict["activeSessionId"] = self.active_session_id + return request_dict + + +TASK_EXECUTION_DETAILS_DICT: dict[str, type[TaskExecutionDetails]] = { + GRID_EXECUTION_DETAILS: GridExecutionDetails, +} + + +@dataclass +class CurationTaskStatus(EnumCoercionMixin): + """ + The status of a CurationTask in its lifecycle. + + + + Attributes: + task_id: The unique identifier of the associated curation task. + state: The state of a curation task in its lifecycle. + execution_details: Task-specific execution details. The concrete type + determines which task-type-specific properties are available. + last_updated_by: The principal ID of the user who last updated the status. + last_updated_on: Timestamp of when the status was last updated. + etag: Optimistic concurrency control token for the task status. + """ + + _ENUM_FIELDS: ClassVar[dict[str, type]] = {"state": TaskState} + + task_id: int | None = None + """The unique identifier of the associated curation task.""" + + state: str | TaskState | None = None + """The state of a curation task in its lifecycle.""" + + execution_details: TaskExecutionDetails | None = None + """Task-specific execution details. The concrete type determines which + task-type-specific properties are available.""" + + last_updated_by: str | None = None + """The principal ID of the user who last updated the status.""" + + last_updated_on: str | None = None + """Timestamp of when the status was last updated.""" + + etag: str | None = None + """Optimistic concurrency control token for the task status.""" + + def fill_from_dict(self, synapse_response: dict[str, Any]) -> "CurationTaskStatus": + """ + Converts a response from the REST API into this dataclass. + + Arguments: + synapse_response: The response from the REST API. + + Returns: + The CurationTaskStatus object. + """ + task_id_value = synapse_response.get("taskId") + self.task_id = int(task_id_value) if task_id_value is not None else None + self.state = synapse_response.get("state") + self.last_updated_by = synapse_response.get("lastUpdatedBy") + self.last_updated_on = synapse_response.get("lastUpdatedOn") + self.etag = synapse_response.get("etag") + + details_dict: dict[str, Any] | None = synapse_response.get("executionDetails") + if details_dict is None: + self.execution_details = None + else: + concrete_type = details_dict.get("concreteType", "") + cls = TASK_EXECUTION_DETAILS_DICT.get(concrete_type) + if cls is None: + raise ValueError( + f"Unknown concreteType for TaskExecutionDetails: {concrete_type}" + ) + self.execution_details = cls().fill_from_dict(details_dict) + return self + + def to_synapse_request(self) -> dict[str, Any]: + """ + Converts this dataclass to a dictionary suitable for a Synapse REST API request. + + Returns: + A dictionary representation of this object for API requests. + """ + request_dict: dict[str, Any] = { + "taskId": self.task_id, + "state": self.state.value if self.state is not None else None, + "etag": self.etag, + } + if self.execution_details is not None: + request_dict["executionDetails"] = ( + self.execution_details.to_synapse_request() + ) + delete_none_keys(request_dict) + return request_dict + + async def _get_existing_curation_task_id( project_id: str, data_type: str, @@ -213,6 +436,238 @@ def get(self, *, synapse_client: Optional[Synapse] = None) -> "CurationTask": """ return self + def get_status( + self, *, synapse_client: Synapse | None = None + ) -> "CurationTaskStatus": + """ + Gets the status of this CurationTask from Synapse. + + Arguments: + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The CurationTaskStatus object. + + Raises: + ValueError: If the CurationTask object does not have a task_id. + + Example: Get the status of a curation task +   + + ```python + from synapseclient import Synapse + from synapseclient.models import CurationTask + + syn = Synapse() + syn.login() + + status = CurationTask(task_id=123).get_status() + print(status.state) + ``` + """ + return CurationTaskStatus() + + def update_status( + self, + curation_task_status: "CurationTaskStatus", + *, + synapse_client: Synapse | None = None, + ) -> "CurationTaskStatus": + """ + Updates the status of this CurationTask on Synapse. + + Arguments: + curation_task_status: The complete CurationTaskStatus object to update. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The updated CurationTaskStatus object. + + Raises: + ValueError: If the CurationTask object does not have a task_id. + + Example: Update the status of a curation task +   + + ```python + from synapseclient import Synapse + from synapseclient.models import ( + CurationTask, + TaskState, + ) + + syn = Synapse() + syn.login() + + task = CurationTask(task_id=123) + current = task.get_status() + current.state = TaskState.COMPLETED + updated = task.update_status(curation_task_status=current) + print(updated.state) + ``` + """ + return CurationTaskStatus() + + def set_active_grid_session( + self, + active_session_id: str, + *, + synapse_client: Synapse | None = None, + ) -> "CurationTaskStatus": + """ + Set the active grid session on this CurationTask's status by replacing + execution_details with a GridExecutionDetails carrying the given session id. + + Does not transition the task state. + + Arguments: + active_session_id: The unique identifier of the active grid session to link. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The updated CurationTaskStatus object. + + Raises: + ValueError: If the CurationTask object does not have a task_id. + + Example: Link a grid session to a curation task +   + + ```python + from synapseclient import Synapse + from synapseclient.models import CurationTask, Grid + + syn = Synapse() + syn.login() + + grid = Grid(record_set_id="syn1234567").create() + CurationTask(task_id=123).set_active_grid_session( + active_session_id=grid.session_id + ) + ``` + """ + return CurationTaskStatus() + + def set_task_state( + self, + state: "TaskState | str", + *, + synapse_client: Synapse | None = None, + ) -> "CurationTaskStatus": + """ + Set the state on this CurationTask's status. + + Does not modify execution_details. Fetches the current CurationTaskStatus + first so the update carries a fresh etag. + + Arguments: + state: The state to set on this task's status. Accepts a + TaskState or a string exactly matching one of its members + (e.g. NOT_STARTED, IN_PROGRESS, COMPLETED, CANCELED). + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The updated CurationTaskStatus object. + + Raises: + ValueError: If the CurationTask object does not have a task_id, or + if state is a string that does not match a TaskState member. + + Example: Mark a curation task as completed +   + + ```python + from synapseclient import Synapse + from synapseclient.models import CurationTask, TaskState + + syn = Synapse() + syn.login() + + CurationTask(task_id=123).set_task_state( + state=TaskState.COMPLETED + ) + ``` + + Example: Mark a curation task as completed using a string +   + + ```python + from synapseclient import Synapse + from synapseclient.models import CurationTask + + syn = Synapse() + syn.login() + + CurationTask(task_id=123).set_task_state(state="COMPLETED") + ``` + """ + return CurationTaskStatus() + + def create_grid_session( + self, + *, + owner_principal_id: int | None = None, + timeout: int = 120, + synapse_client: Synapse | None = None, + ) -> "Grid": + """ + Create a Grid session for this CurationTask and link it to the task status. + + Picks the Grid seed from this task's task_properties: + + - RecordBasedMetadataTaskProperties uses record_set_id + - FileBasedMetadataTaskProperties uses an initial_query that selects from + the file_view_id + + Always creates a new Grid session. To attach an existing session to a task, + use set_active_grid_session instead. + + After the Grid is created, updates the CurationTaskStatus to point its + active_session_id at the new session. If that update fails for any reason, + the newly created Grid is deleted on a best-effort basis and the original + exception is re-raised. + + Arguments: + owner_principal_id: The principal ID (user or team) that will own the + created grid session. When not provided, the principal ID of the + caller is used. + timeout: Seconds to wait for the grid creation job. Defaults to 120. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The newly created Grid. + + Raises: + ValueError: If task_id is unset or task_properties is of an unsupported type. + SynapseHTTPError: If the status update fails. The orphan Grid is + deleted on a best-effort basis before the error is re-raised. + + Example: Create a grid session for a curation task +   + + ```python + from synapseclient import Synapse + from synapseclient.models import CurationTask + + syn = Synapse() + syn.login() + + grid = CurationTask(task_id=123).create_grid_session() + print(grid.session_id) + ``` + """ + return Grid() + def delete( self, delete_source: bool = False, @@ -355,6 +810,9 @@ def list( cls, project_id: str, *, + assigned_to_me: Optional[bool] = None, + assignee_ids: Optional[list[str]] = None, + state_filter: Optional[list[Union["TaskState", str]]] = None, synapse_client: Optional[Synapse] = None, ) -> Generator["CurationTask", None, None]: """ @@ -362,13 +820,35 @@ def list( Arguments: project_id: The synId of the project. + assigned_to_me: When True, only return tasks assigned to the current user. + Cannot be combined with assignee_ids. + False does not mean "tasks not assigned to me". + Defaults to None. + assignee_ids: Optional list of principal IDs (users or teams) to filter + tasks by assignee. Cannot be combined with assigned_to_me=True. + Passing an empty list raises a ValueError; pass None to return tasks + for any assignee. Defaults to None. + state_filter: Optional list of TaskState values or exact-case strings to + filter tasks by their current state (e.g., "IN_PROGRESS"). Defaults to + None (all states returned). Passing an empty list raises a ValueError; + pass None to return tasks in any state. synapse_client: If not passed in and caching was not disabled by - `Synapse.allow_client_caching(False)` this will use the last created + Synapse.allow_client_caching(False) this will use the last created instance from the Synapse class constructor. Yields: CurationTask objects as they are retrieved from the API. + Raises: + ValueError: If state_filter is an empty list. + ValueError: If assignee_ids is an empty list. + ValueError: If assigned_to_me is True and assignee_ids is also provided. + ValueError: If any value in state_filter is not a TaskState member or + an exact-case string matching a TaskState value (e.g., "IN_PROGRESS"). + + Note: Due to generator semantics, argument validation runs on the first + iteration of the generator, not at the point where list() is called. + Example: List all curation tasks in a project   @@ -386,15 +866,74 @@ def list( print(f"Instructions: {task.instructions}") print("---") ``` - """ - yield from wrap_async_generator_to_sync_generator( - async_gen_func=cls.list_async, - project_id=project_id, - synapse_client=synapse_client, - ) + Example: List only curation tasks assigned to the current user +   -@dataclass + ```python + from synapseclient import Synapse + from synapseclient.models import CurationTask + + syn = Synapse() + syn.login() + + for task in CurationTask.list(project_id="syn9876543", assigned_to_me=True): + print(f"Task ID: {task.task_id}") + print(f"Data Type: {task.data_type}") + print("---") + ``` + + Example: List only in-progress curation tasks +   + + ```python + from synapseclient import Synapse + from synapseclient.models import CurationTask, TaskState + + syn = Synapse() + syn.login() + + for task in CurationTask.list( + project_id="syn9876543", + state_filter=[TaskState.IN_PROGRESS], + ): + print(f"Task ID: {task.task_id}") + print(f"Data Type: {task.data_type}") + print("---") + ``` + + Example: List only in-progress curation tasks using a string state filter +   + + state_filter also accepts plain strings matching TaskState names exactly. + + ```python + from synapseclient import Synapse + from synapseclient.models import CurationTask + + syn = Synapse() + syn.login() + + for task in CurationTask.list( + project_id="syn9876543", + state_filter=["IN_PROGRESS"], + ): + print(f"Task ID: {task.task_id}") + print(f"Data Type: {task.data_type}") + print("---") + ``` + """ + yield from wrap_async_generator_to_sync_generator( + async_gen_func=cls.list_async, + project_id=project_id, + assigned_to_me=assigned_to_me, + assignee_ids=assignee_ids, + state_filter=state_filter, + synapse_client=synapse_client, + ) + + +@dataclass @async_to_sync class CurationTask(CurationTaskSynchronousProtocol): """ @@ -510,6 +1049,9 @@ def _set_last_persistent_instance(self) -> None: determine if the object has been changed and needs to be updated in Synapse.""" del self._last_persistent_instance self._last_persistent_instance = replace(self) + self._last_persistent_instance.task_properties = ( + deepcopy(self.task_properties) if self.task_properties else None + ) def fill_from_dict( self, synapse_response: Union[Dict[str, Any], Any] @@ -539,10 +1081,14 @@ def fill_from_dict( self.assignee_principal_id = synapse_response.get("assigneePrincipalId", None) task_properties_dict = synapse_response.get("taskProperties", None) - if task_properties_dict: - self.task_properties = _create_task_properties_from_dict( - task_properties_dict + if task_properties_dict is None: + raise ValueError( + "taskProperties was not found in the Synapse response for this CurationTask. " + "This means it is likely an older CurationTask from before taskProperties was added. " + "It is recommended that this task be deleted: task.delete(delete_source=False) " + "and then recreate the task with the correct taskProperties." ) + self.task_properties = _create_task_properties_from_dict(task_properties_dict) return self @@ -587,6 +1133,7 @@ async def get_async( Raises: ValueError: If the CurationTask object does not have a task_id. + ValueError: If the Synapse response does not contain taskProperties. Example: Get a curation task asynchronously   @@ -623,6 +1170,54 @@ async def main(): self._set_last_persistent_instance() return self + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: ( + f"CurationTask_GetStatus: ID: {self.task_id}" + ) + ) + async def get_status_async( + self, *, synapse_client: Synapse | None = None + ) -> "CurationTaskStatus": + """ + Gets the status of this CurationTask from Synapse. + + Arguments: + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The CurationTaskStatus object. + + Raises: + ValueError: If the CurationTask object does not have a task_id. + + Example: Get the status of a curation task asynchronously +   + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import CurationTask + + syn = Synapse() + syn.login() + + async def main(): + status = await CurationTask(task_id=123).get_status_async() + print(status.state) + + asyncio.run(main()) + ``` + """ + if not self.task_id: + raise ValueError("task_id is required to get a CurationTask status") + + status_result = await get_curation_task_status( + task_id=self.task_id, synapse_client=synapse_client + ) + return CurationTaskStatus().fill_from_dict(status_result) + async def delete_async( self, delete_source: bool = False, @@ -836,29 +1431,464 @@ async def main(): self._set_last_persistent_instance() return self + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: ( + f"CurationTask_UpdateStatus: ID: {self.task_id}" + ) + ) + async def update_status_async( + self, + curation_task_status: "CurationTaskStatus", + *, + synapse_client: Synapse | None = None, + ) -> "CurationTaskStatus": + """ + Updates the status of this CurationTask on Synapse. + + Arguments: + curation_task_status: The complete CurationTaskStatus object to update. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The updated CurationTaskStatus object. + + Raises: + ValueError: If the CurationTask object does not have a task_id. + + Example: Update the status of a curation task asynchronously +   + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import ( + CurationTask, + TaskState, + ) + + syn = Synapse() + syn.login() + + async def main(): + task = CurationTask(task_id=123) + current = await task.get_status_async() + current.state = TaskState.COMPLETED + updated = await task.update_status_async(curation_task_status=current) + print(updated.state) + + asyncio.run(main()) + ``` + """ + if not self.task_id: + raise ValueError("task_id is required to update a CurationTask status") + + status_result = await update_curation_task_status( + task_id=self.task_id, + curation_task_status=curation_task_status.to_synapse_request(), + synapse_client=synapse_client, + ) + return CurationTaskStatus().fill_from_dict(status_result) + + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: ( + f"CurationTask_SetActiveGridSession: ID: {self.task_id}" + ) + ) + async def set_active_grid_session_async( + self, + active_session_id: str, + *, + synapse_client: Synapse | None = None, + ) -> "CurationTaskStatus": + """ + Set the active grid session on this CurationTask's status by replacing + execution_details with a GridExecutionDetails carrying the given session id. + + Arguments: + active_session_id: The unique identifier of the active grid session to link. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The updated CurationTaskStatus object. + + Raises: + ValueError: If the CurationTask object does not have a task_id. + + Example: Link a grid session to a curation task asynchronously +   + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import CurationTask, Grid + + syn = Synapse() + syn.login() + + async def main(): + grid = await Grid(record_set_id="syn1234567").create_async() + await CurationTask(task_id=123).set_active_grid_session_async( + active_session_id=grid.session_id + ) + + asyncio.run(main()) + ``` + """ + status = await self.get_status_async(synapse_client=synapse_client) + status.execution_details = GridExecutionDetails( + active_session_id=active_session_id + ) + return await self.update_status_async( + curation_task_status=status, synapse_client=synapse_client + ) + + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: ( + f"CurationTask_SetTaskState: ID: {self.task_id}" + ) + ) + async def set_task_state_async( + self, + state: "TaskState | str", + *, + synapse_client: Synapse | None = None, + ) -> "CurationTaskStatus": + """ + Set the state on this CurationTask's status. + + Does not modify execution_details. Fetches the current CurationTaskStatus + first so the update carries a fresh etag. + + Arguments: + state: The state to set on this task's status. Accepts a + TaskState or a string exactly matching one of its members + (e.g. NOT_STARTED, IN_PROGRESS, COMPLETED, CANCELED). + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The updated CurationTaskStatus object. + + Raises: + ValueError: If the CurationTask object does not have a task_id, or + if state is a string that does not match a TaskState member. + + Example: Mark a curation task as completed asynchronously +   + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import CurationTask, TaskState + + syn = Synapse() + syn.login() + + async def main(): + await CurationTask(task_id=123).set_task_state_async( + state=TaskState.COMPLETED + ) + + asyncio.run(main()) + ``` + + Example: Mark a curation task as completed using a string asynchronously +   + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import CurationTask + + syn = Synapse() + syn.login() + + async def main(): + await CurationTask(task_id=123).set_task_state_async( + state="COMPLETED" + ) + + asyncio.run(main()) + ``` + """ + try: + coerced_state = TaskState(state) + except ValueError as exc: + raise ValueError( + f"{state!r} is not a valid TaskState. " + f"Expected one of: {[s.value for s in TaskState]}." + ) from exc + + status = await self.get_status_async(synapse_client=synapse_client) + status.state = coerced_state + return await self.update_status_async( + curation_task_status=status, synapse_client=synapse_client + ) + + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: ( + f"CurationTask_CreateGridSession: ID: {self.task_id}" + ) + ) + async def create_grid_session_async( + self, + *, + owner_principal_id: int | None = None, + timeout: int = 120, + synapse_client: Synapse | None = None, + ) -> "Grid": + """ + Create a new Grid session for this CurationTask and set it as the active session. + + Picks the Grid seed from this task's task_properties: + + - RecordBasedMetadataTaskProperties uses record_set_id + - FileBasedMetadataTaskProperties uses an initial_query that selects from + the file_view_id + + Always creates a new Grid session. To attach an existing session to a task, + use set_active_grid_session_async instead. + + After the Grid is created, updates the CurationTaskStatus to point its + active_session_id at the new session. If that update fails for any reason, + the newly created Grid is deleted on a best-effort basis and the original + exception is re-raised. + + Arguments: + owner_principal_id: The principal ID (user or team) that will own the + created grid session. When not provided, the principal ID of the + caller is used. + timeout: Seconds to wait for the grid creation job. Defaults to 120. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The newly created Grid. + + Raises: + ValueError: If task_id is unset or task_properties is of an unsupported type. + SynapseHTTPError: If the RecordSet or EntityView does not exist, or if the + status update fails. The orphan Grid is deleted on a best-effort basis + before the error is re-raised. + + Example: Create a grid session for a curation task asynchronously +   + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import CurationTask + + syn = Synapse() + syn.login() + + async def main(): + grid = await CurationTask(task_id=123).create_grid_session_async() + print(grid.session_id) + + asyncio.run(main()) + ``` + """ + if not self.task_id: + raise ValueError( + "task_id is required to create a CurationTask grid session" + ) + + if not self.task_properties: + await self.get_async(synapse_client=synapse_client) + + if isinstance(self.task_properties, RecordBasedMetadataTaskProperties): + if not self.task_properties.record_set_id: + raise ValueError( + "Cannot create grid session: " + "task_properties.record_set_id is missing." + ) + from synapseclient.models import RecordSet + + # raises SynapseHTTPError if RecordSet does not exist + await RecordSet(id=self.task_properties.record_set_id).get_async( + synapse_client=synapse_client + ) + grid = Grid( + record_set_id=self.task_properties.record_set_id, + owner_principal_id=owner_principal_id, + ) + elif isinstance(self.task_properties, FileBasedMetadataTaskProperties): + if not self.task_properties.file_view_id: + raise ValueError( + "Cannot create grid session: " + "task_properties.file_view_id is missing." + ) + from synapseclient.models import EntityView + + # raises SynapseHTTPError if EntityView does not exist + await EntityView(id=self.task_properties.file_view_id).get_async( + synapse_client=synapse_client + ) + grid = Grid( + initial_query=Query( + sql=f"SELECT * FROM {self.task_properties.file_view_id}" + ), + owner_principal_id=owner_principal_id, + ) + else: + raise ValueError( + "task_properties must be a FileBasedMetadataTaskProperties or " + "RecordBasedMetadataTaskProperties to create a grid session" + ) + + grid = await grid.create_async( + timeout=timeout, + synapse_client=synapse_client, + ) + + # Only one grid session can be set as the active one on a a given CurationTask + # at a any time, though multiple sessions can exist. + # If two users run this concurrently, one will lose the race and + # receive a 412 (precondition failed). In that case — or if recording the + # active session fails for any other reason — delete the session we just + # created so it doesn't become an orphan. If the delete also fails, log a + # warning so the caller knows manual cleanup is needed, then re-raise the + # original exception in all cases. + try: + await self.set_active_grid_session_async( + active_session_id=grid.session_id, synapse_client=synapse_client + ) + except Exception: + try: + await grid.delete_async(synapse_client=synapse_client) + except Exception: + Synapse.get_client(synapse_client=synapse_client).logger.warning( + "Failed to delete orphan grid session %s after status " + "update failure; manual cleanup may be required.", + grid.session_id, + ) + raise + + return grid + @skip_async_to_sync @classmethod async def list_async( cls, project_id: str, *, + assigned_to_me: Optional[bool] = None, + assignee_ids: Optional[list[str]] = None, + state_filter: Optional[list[Union["TaskState", str]]] = None, synapse_client: Optional[Synapse] = None, ) -> AsyncGenerator["CurationTask", None]: """ Generator that yields CurationTasks for a project as they become available. - Arguments: - project_id: The synId of the project. - synapse_client: If not passed in and caching was not disabled by - `Synapse.allow_client_caching(False)` this will use the last created - instance from the Synapse class constructor. + Arguments: + project_id: The synId of the project. + assigned_to_me: When True, only return tasks assigned to the current user. + Cannot be combined with assignee_ids. + False does not mean "tasks not assigned to me". + Defaults to None. + assignee_ids: Optional list of principal IDs (users or teams) to filter + tasks by assignee. Cannot be combined with assigned_to_me=True. + Passing an empty list raises a ValueError; pass None to return tasks + for any assignee. Defaults to None. + state_filter: Optional list of TaskState values or exact-case strings to + filter tasks by their current state (e.g., "IN_PROGRESS"). Defaults to + None (all states returned). Passing an empty list raises a ValueError; + pass None to return tasks in any state. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Yields: + CurationTask objects as they are retrieved from the API. + + Raises: + ValueError: If state_filter is an empty list. + ValueError: If assignee_ids is an empty list. + ValueError: If assigned_to_me is True and assignee_ids is also provided. + ValueError: If any value in state_filter is not a TaskState member or + an exact-case string matching a TaskState value (e.g., "IN_PROGRESS"). + ValueError: If the Synapse response for any task does not contain + taskProperties. + + Example: List all curation tasks in a project asynchronously +   + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import CurationTask + + syn = Synapse() + syn.login() + + async def main(): + # List all curation tasks in the project + async for task in CurationTask.list_async(project_id="syn9876543"): + print(f"Task ID: {task.task_id}") + print(f"Data Type: {task.data_type}") + print(f"Instructions: {task.instructions}") + print("---") + + asyncio.run(main()) + ``` + + Example: List only curation tasks assigned to the current user asynchronously +   + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import CurationTask + + syn = Synapse() + syn.login() + + async def main(): + async for task in CurationTask.list_async( + project_id="syn9876543", assigned_to_me=True + ): + print(f"Task ID: {task.task_id}") + print(f"Data Type: {task.data_type}") + print("---") + + asyncio.run(main()) + ``` + + Example: List only in-progress curation tasks asynchronously +   + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import CurationTask, TaskState + + syn = Synapse() + syn.login() - Yields: - CurationTask objects as they are retrieved from the API. + async def main(): + async for task in CurationTask.list_async( + project_id="syn9876543", + state_filter=[TaskState.IN_PROGRESS], + ): + print(f"Task ID: {task.task_id}") + print(f"Data Type: {task.data_type}") + print("---") - Example: List all curation tasks in a project asynchronously + asyncio.run(main()) + ``` + + Example: List only in-progress curation tasks using a string state filter asynchronously   + state_filter also accepts plain strings matching TaskState names exactly. + ```python import asyncio from synapseclient import Synapse @@ -868,16 +1898,34 @@ async def list_async( syn.login() async def main(): - # List all curation tasks in the project - async for task in CurationTask.list_async(project_id="syn9876543"): + async for task in CurationTask.list_async( + project_id="syn9876543", + state_filter=["IN_PROGRESS"], + ): print(f"Task ID: {task.task_id}") print(f"Data Type: {task.data_type}") - print(f"Instructions: {task.instructions}") print("---") asyncio.run(main()) ``` """ + if state_filter == []: + raise ValueError( + "state_filter must not be empty. Pass None to return tasks in any state." + ) + if assignee_ids == []: + raise ValueError( + "assignee_ids must not be empty. Pass None to return tasks for any assignee." + ) + if assigned_to_me is True and assignee_ids is not None: + raise ValueError( + f"assigned_to_me and assignee_ids are mutually exclusive " + f"and cannot be used together. Got assignee_ids={assignee_ids!r}." + ) + + if state_filter is not None: + state_filter = coerce_enum_list(TaskState, state_filter) + trace.get_current_span().set_attributes( { "synapse.project_id": project_id, @@ -885,7 +1933,11 @@ async def main(): ) async for task_dict in list_curation_tasks( - project_id=project_id, synapse_client=synapse_client + project_id=project_id, + assigned_to_me=assigned_to_me, + assignee_ids=assignee_ids, + state_filter=state_filter, + synapse_client=synapse_client, ): task = cls().fill_from_dict(synapse_response=task_dict) yield task @@ -904,6 +1956,11 @@ class CreateGridRequest(AsynchronousCommunicator): stored for the given record set id initial_query: Initialize a grid session from an EntityView. Mutually exclusive with record_set_id. + owner_principal_id: The owner of the grid determines who is allowed to join and participate in the grid's session. + The default owner will be the user that started the grid session, but only that user will have access to the grid. + In order to allow other users to access the grid, set this value to the id of a team. + When a team ID is provided as the owner, all members of that team will have equal access to the grid. + Note: If a team ID is provided, the creator of the grid must be a member of the team. session_id: The session ID of the created grid (populated from response) """ @@ -919,6 +1976,13 @@ class CreateGridRequest(AsynchronousCommunicator): """Initialize a grid session from an EntityView. Mutually exclusive with record_set_id.""" + owner_principal_id: int | None = None + """The owner of the grid determines who is allowed to join and participate in the grid's session. + The default owner will be the user that started the grid session, but only that user will have access to the grid. + In order to allow other users to access the grid, set this value to the id of a team. + When a team ID is provided as the owner, all members of that team will have equal access to the grid. + Note: If a team ID is provided, the creator of the grid must be a member of the team.""" + session_id: Optional[str] = None """The session ID of the created grid (populated from response)""" @@ -970,6 +2034,7 @@ def fill_grid_session_from_response(self, grid_session: "Grid") -> "Grid": grid_session.last_replica_id_service = data.get("lastReplicaIdService", None) grid_session.grid_json_schema_id = data.get("gridJsonSchema$Id", None) grid_session.source_entity_id = data.get("sourceEntityId", None) + grid_session.owner_principal_id = data.get("ownerPrincipalId") return grid_session @@ -985,6 +2050,229 @@ def to_synapse_request(self) -> Dict[str, Any]: request_dict["initialQuery"] = ( self.initial_query.to_synapse_request() if self.initial_query else None ) + request_dict["ownerPrincipalId"] = self.owner_principal_id + delete_none_keys(request_dict) + return request_dict + + +@dataclass +class GridCsvImportRequest(AsynchronousCommunicator): + """ + A request to import a CSV file into a grid. Currently supports only grid + created from a record set. + + This request is modeled from: + + The response is modeled from: + """ + + session_id: str + """The grid session ID.""" + + file_handle_id: str + """The id of the file handle that contains the CSV data.""" + + schema: list[Column] + """The list of ColumnModel that describe the CSV file. Currently this is required.""" + + concrete_type: str = GRID_CSV_IMPORT_REQUEST + """The concrete type for this request.""" + + csv_descriptor: CsvTableDescriptor = field(default_factory=CsvTableDescriptor) + """The description of a csv for upload or download.""" + + # Response fields (populated by fill_from_dict) + total_count: Optional[int] = field(default=None, compare=False) + """The total number of rows in the CSV.""" + + created_count: Optional[int] = field(default=None, compare=False) + """The number of rows that were created.""" + + updated_count: Optional[int] = field(default=None, compare=False) + """The number of rows that were updated.""" + + def fill_from_dict( + self, synapse_response: Dict[str, Any] + ) -> "GridCsvImportRequest": + """ + Converts a response from the REST API into this dataclass. + + Arguments: + synapse_response: The response from the REST API. + + Returns: + The GridCsvImportRequest object. + """ + self.session_id = synapse_response.get("sessionId", self.session_id) + self.total_count = synapse_response.get("totalCount", None) + self.created_count = synapse_response.get("createdCount", None) + self.updated_count = synapse_response.get("updatedCount", None) + return self + + def to_synapse_request(self) -> Dict[str, Any]: + """ + Converts this dataclass to a dictionary suitable for a Synapse REST API request. + + Returns: + A dictionary representation of this object for API requests. + """ + request_dict = { + "concreteType": self.concrete_type, + "sessionId": self.session_id, + "fileHandleId": self.file_handle_id, + "csvDescriptor": self.csv_descriptor.to_synapse_request(), + "schema": [col.to_synapse_request() for col in self.schema], + } + delete_none_keys(request_dict) + return request_dict + + +@dataclass +class DownloadFromGridRequest(AsynchronousCommunicator): + """ + A CSV Grid download request. + + This request is modeled from: + + The response is modeled from: + """ + + session_id: str + """The grid session ID.""" + + concrete_type: str = DOWNLOAD_FROM_GRID_REQUEST + """The concrete type for this request.""" + + write_header: bool = True + """Should the first line contain the columns names as a header in the resulting file? Set to 'true' to include the headers else, 'false'.""" + + include_row_id_and_row_version: bool = False + """Should the first two columns contain the row ID and row version?""" + + include_etag: bool = False + """Should the first (or third if includeRowIdAndRowVersion is true) column contain the row etag?""" + + csv_table_descriptor: CsvTableDescriptor = field(default_factory=CsvTableDescriptor) + """The description of a csv for upload or download.""" + + file_name: Optional[str] = None + """The optional name for the downloaded table.""" + + # Response fields (populated by fill_from_dict) + results_file_handle_id: Optional[str] = None + """The file handle ID of the generated CSV. Populated from the async job response.""" + + def fill_from_dict( + self, synapse_response: Dict[str, Any] + ) -> "DownloadFromGridRequest": + """ + Converts a response from the REST API into this dataclass. + + Arguments: + synapse_response: The response from the REST API. + + Returns: + The DownloadFromGridRequest object. + """ + self.results_file_handle_id = synapse_response.get("resultsFileHandleId") + return self + + def to_synapse_request(self) -> Dict[str, Any]: + """ + Converts this dataclass to a dictionary suitable for a Synapse REST API request. + + Returns: + A dictionary representation of this object for API requests. + """ + request_dict = { + "concreteType": self.concrete_type, + "sessionId": self.session_id, + "writeHeader": self.write_header, + "includeRowIdAndRowVersion": self.include_row_id_and_row_version, + "includeEtag": self.include_etag, + "csvTableDescriptor": self.csv_table_descriptor.to_synapse_request(), + "fileName": self.file_name, + } + delete_none_keys(request_dict) + return request_dict + + +@dataclass +class UploadToTablePreviewRequest(AsynchronousCommunicator): + """ + Request for a preview of an upload to a Table. + + This request is modeled from: + + This response is modeled from: + """ + + upload_file_handle_id: str + """The ID of the file handle for a type of UPLOAD""" + + concrete_type: str = UPLOAD_TO_TABLE_PREVIEW_REQUEST + """The concrete type for this request.""" + + lines_to_skip: Optional[int] = None + """The number of lines to skip from the start of the file. The default value of 0 will be used if this is not provided by the caller.""" + + csv_table_descriptor: CsvTableDescriptor = field(default_factory=CsvTableDescriptor) + """The description of a csv for upload or download.""" + + do_full_file_scan: Optional[bool] = None + """When set to true the full file will be scanned for a schema suggestions. A full scan is more accurate but can take more time. When set to false only a sub-set of the first rows will be scanned, which can be faster but is less accurate. The default value is false.""" + + # Response fields (populated by fill_from_dict) + suggested_columns: Optional[list[Column]] = field(default=None, compare=False) + """The suggested columns for the table based on the file scan.""" + + sample_rows: Optional[list[list[Optional[str]]]] = field( + default=None, compare=False + ) + """A sample of the rows in the file.""" + + rows_scanned: Optional[int] = field(default=None, compare=False) + """The number of rows scanned from the file.""" + + def fill_from_dict( + self, synapse_response: Dict[str, Any] + ) -> "UploadToTablePreviewRequest": + """ + Converts a response from the REST API into this dataclass. + + Arguments: + synapse_response: The response from the REST API. + + Returns: + The UploadToTablePreviewRequest object. + """ + suggested_columns_data = synapse_response.get("suggestedColumns", None) + if suggested_columns_data is not None: + self.suggested_columns = [ + Column().fill_from_dict(col) for col in suggested_columns_data + ] + + sample_rows_data = synapse_response.get("sampleRows", None) + if sample_rows_data is not None: + self.sample_rows = [row.get("values", []) for row in sample_rows_data] + + self.rows_scanned = synapse_response.get("rowsScanned", None) + return self + + def to_synapse_request(self) -> Dict[str, Any]: + """ + Converts this dataclass to a dictionary suitable for a Synapse REST API request. + + Returns: + A dictionary representation of this object for API requests. + """ + request_dict = { + "concreteType": self.concrete_type, + "uploadFileHandleId": self.upload_file_handle_id, + "linesToSkip": self.lines_to_skip, + "doFullFileScan": self.do_full_file_scan, + "csvTableDescriptor": self.csv_table_descriptor.to_synapse_request(), + } delete_none_keys(request_dict) return request_dict @@ -1078,6 +2366,53 @@ def to_synapse_request(self) -> Dict[str, Any]: return request_dict +@dataclass +class SynchronizeGridRequest(AsynchronousCommunicator): + """ + A request to synchronize a grid session. + + The request is modeled from: + + The response is modeled from: + """ + + grid_session_id: str + """The ID of the grid session to synchronize.""" + + concrete_type: str = field(default=SYNCHRONIZE_GRID_REQUEST) + """The concrete type for this request.""" + + error_messages: Optional[list[str]] = field(default=None, compare=False) + """Any error messages generated during the synchronization process.""" + + def fill_from_dict( + self, synapse_response: Dict[str, Any] + ) -> "SynchronizeGridRequest": + """ + Converts a response from the REST API into this dataclass. + + Arguments: + synapse_response: The response from the REST API. + + Returns: + The SynchronizeGridRequest object. + """ + self.error_messages = synapse_response.get("errorMessages", None) + return self + + def to_synapse_request(self) -> Dict[str, Any]: + """ + Converts this dataclass to a dictionary suitable for a Synapse REST API request. + + Returns: + A dictionary representation of this object for API requests. + """ + return { + "concreteType": self.concrete_type, + "gridSessionId": self.grid_session_id, + } + + @dataclass class GridSession: """ @@ -1298,27 +2633,109 @@ def create( """ return self - def export_to_record_set( - self, *, timeout: int = 120, synapse_client: Optional[Synapse] = None - ) -> "Grid": + def export_to_record_set( + self, *, timeout: int = 120, synapse_client: Optional[Synapse] = None + ) -> "Grid": + """ + Exports the grid session data back to a record set. This will create a new version + of the original record set with the modified data from the grid session. + + Arguments: + timeout: The number of seconds to wait for the job to complete or progress + before raising a SynapseTimeoutError. Defaults to 120. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + GridSession: The GridSession object with export information populated. + + Raises: + ValueError: If session_id is not provided. + + Example: Export grid session data back to record set +   + + ```python + from synapseclient import Synapse + from synapseclient.models import Grid + + syn = Synapse() + syn.login() + + # Export modified grid data back to the record set + grid = Grid(session_id="abc-123-def") + grid = grid.export_to_record_set() + print(f"Exported to record set: {grid.record_set_id}") + print(f"Version number: {grid.record_set_version_number}") + if grid.validation_summary_statistics: + print(f"Valid records: {grid.validation_summary_statistics.number_of_valid_children}") + ``` + """ + return self + + def synchronize( + self, *, timeout: int = 120, synapse_client: Optional[Synapse] = None + ) -> "Grid": + """ + Synchronizes the grid session's schema and row data against its source entity. + + This is intended for grid sessions created from a file view via `initial_query`. + Grid sessions backed by a RecordSet should use `export_to_record_set` instead. + + Arguments: + timeout: The number of seconds to wait for the job to complete or progress + before raising a SynapseTimeoutError. Defaults to 120. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + Grid: The Grid object. + + Raises: + ValueError: If session_id is not provided. + + Example: Synchronize a grid session created from a file view +   + + ```python + from synapseclient import Synapse + from synapseclient.models import Grid + from synapseclient.models.table_components import Query + + syn = Synapse() + syn.login() + + # First create a grid session from a file view + query = Query(sql="SELECT * FROM syn1234567") + grid = Grid(initial_query=query) + grid = grid.create() + + # Synchronize the grid with the latest state of the file view + grid = grid.synchronize() + ``` + """ + return self + + def delete(self, *, synapse_client: Optional[Synapse] = None) -> None: """ - Exports the grid session data back to a record set. This will create a new version - of the original record set with the modified data from the grid session. + Delete the grid session. + + Note: Only the user that created a grid session may delete it. Arguments: - timeout: The number of seconds to wait for the job to complete or progress - before raising a SynapseTimeoutError. Defaults to 120. synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. Returns: - GridSession: The GridSession object with export information populated. + None Raises: ValueError: If session_id is not provided. - Example: Export grid session data back to record set + Example: Delete a grid session   ```python @@ -1328,35 +2745,59 @@ def export_to_record_set( syn = Synapse() syn.login() - # Export modified grid data back to the record set + # Delete the grid session grid = Grid(session_id="abc-123-def") - grid = grid.export_to_record_set() - print(f"Exported to record set: {grid.record_set_id}") - print(f"Version number: {grid.record_set_version_number}") - if grid.validation_summary_statistics: - print(f"Valid records: {grid.validation_summary_statistics.number_of_valid_children}") + grid.delete() ``` """ - return self + return None - def delete(self, *, synapse_client: Optional[Synapse] = None) -> None: + def download_csv( + self, + *, + destination: Optional[str] = None, + write_header: bool = True, + include_row_id_and_row_version: bool = False, + include_etag: bool = False, + csv_table_descriptor: Optional[CsvTableDescriptor] = None, + file_name: Optional[str] = None, + timeout: int = 120, + synapse_client: Optional[Synapse] = None, + ) -> str: """ - Delete the grid session. + Download the current state of this grid session as a CSV file. - Note: Only the user that created a grid session may delete it. + Submits a DownloadFromGridRequest async job, waits for it to complete, + then downloads the resulting CSV to the local filesystem. Arguments: + destination: Local directory path where the CSV will be saved. + If not provided, defaults to the current working directory. The directory must already exist. + write_header: Whether the first line should contain column names + as a header. Defaults to True. + include_row_id_and_row_version: Whether the first two columns + should contain row ID and version. Defaults to False. + include_etag: Whether a column should contain the row etag. + Defaults to False. + csv_table_descriptor: The description of the CSV format (delimiter, + quote character, etc.). If not provided, the default CSV format + will be used. + file_name: The optional name for the downloaded file. If not + provided, defaults to `grid_{session_id}-{timestamp}.csv`. + timeout: The number of seconds to wait for the async job to + complete or progress before raising a SynapseTimeoutError. + Defaults to 120. synapse_client: If not passed in and caching was not disabled by - `Synapse.allow_client_caching(False)` this will use the last created - instance from the Synapse class constructor. + `Synapse.allow_client_caching(False)` this will use the last + created instance from the Synapse class constructor. Returns: - None + The local path to the downloaded CSV file. Raises: ValueError: If session_id is not provided. - Example: Delete a grid session + Example: Download a grid session as a CSV   ```python @@ -1366,12 +2807,27 @@ def delete(self, *, synapse_client: Optional[Synapse] = None) -> None: syn = Synapse() syn.login() - # Delete the grid session grid = Grid(session_id="abc-123-def") - grid.delete() + path = grid.download_csv(destination="./downloads") + print(f"Downloaded CSV to: {path}") + ``` + + Example: Download a grid session as a CSV with a custom file name +   + + ```python + from synapseclient import Synapse + from synapseclient.models import Grid + + syn = Synapse() + syn.login() + + grid = Grid(session_id="abc-123-def") + path = grid.download_csv(destination="./downloads", file_name="my_export.csv") + print(f"Downloaded CSV to: {path}") ``` """ - return None + return "" @classmethod def list( @@ -1427,6 +2883,53 @@ def list( ``` """ + def import_csv( + self, + path: str, + *, + timeout: int = 120, + csv_table_descriptor: Optional[CsvTableDescriptor] = None, + synapse_client: Optional[Synapse] = None, + ) -> "Grid": + """ + Import a CSV file into this grid session. Previews the file to determine + the column schema, then imports the data. Currently supports only grids + created from a record set. + + Arguments: + path: Local path to the CSV file to import. + csv_table_descriptor: The description of the CSV format (delimiter, + quote character, etc.). If not provided, the default CSV format + will be used. + timeout: The number of seconds to wait for each async job to complete + or progress before raising a SynapseTimeoutError. Defaults to 120. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The Grid object. + + Raises: + ValueError: If session_id is not provided. + + Example: Import a CSV file into a grid session +   + + ```python + from synapseclient import Synapse + from synapseclient.models import Grid + + syn = Synapse() + syn.login() + + grid = Grid(session_id="abc-123-def") + grid = grid.import_csv(path="/local/path/to/data.csv") + print(f"Import complete for session: {grid.session_id}") + ``` + """ + return self + @dataclass @async_to_sync @@ -1440,6 +2943,9 @@ class Grid(GridSynchronousProtocol): record_set_id: The synId of the RecordSet to use for initializing the grid initial_query: Initialize a grid session from an EntityView. Mutually exclusive with record_set_id. + owner_principal_id: The principal ID (user or team) that will own the + created grid session. When not provided, the principal ID of the + caller is used. session_id: The unique sessionId that identifies the grid session started_by: The user that started this session started_on: The date-time when the session was started @@ -1504,6 +3010,10 @@ class Grid(GridSynchronousProtocol): """Initialize a grid session from an EntityView. Mutually exclusive with record_set_id.""" + owner_principal_id: int | None = None + """The principal ID (user or team) that will own the created grid session. + When not provided, the principal ID of the caller is used.""" + session_id: Optional[str] = None """The unique sessionId that identifies the grid session""" @@ -1619,7 +3129,9 @@ async def main(): # No existing session found, create a new one create_request = CreateGridRequest( - record_set_id=self.record_set_id, initial_query=self.initial_query + record_set_id=self.record_set_id, + initial_query=self.initial_query, + owner_principal_id=self.owner_principal_id, ) result = await create_request.send_job_and_wait_async( timeout=timeout, synapse_client=synapse_client @@ -1707,6 +3219,7 @@ def fill_from_dict(self, synapse_response: Dict[str, Any]) -> "Grid": ) self.grid_json_schema_id = synapse_response.get("gridJsonSchema$Id", None) self.source_entity_id = synapse_response.get("sourceEntityId", None) + self.owner_principal_id = synapse_response.get("ownerPrincipalId") return self @skip_async_to_sync @@ -1789,6 +3302,9 @@ def list( synapse_client=synapse_client, ) + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: f"Grid_Delete: ID: {self.session_id}" + ) async def delete_async(self, *, synapse_client: Optional[Synapse] = None) -> None: """ Delete the grid session. @@ -1838,3 +3354,318 @@ async def main(): await delete_grid_session( session_id=self.session_id, synapse_client=synapse_client ) + + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: f"Grid_ImportCsv: ID: {self.session_id}" + ) + async def import_csv_async( + self, + path: str, + *, + timeout: int = 120, + csv_table_descriptor: Optional[CsvTableDescriptor] = None, + synapse_client: Optional[Synapse] = None, + ) -> "Grid": + """ + Import a CSV file into this grid session. Previews the file to determine + the column schema, then imports the data. Currently supports only grids + created from a record set. + + Arguments: + path: Local path to the CSV file to import. + csv_table_descriptor: The description of the CSV format (delimiter, + quote character, etc.). If not provided, the default CSV format + will be used. + timeout: The number of seconds to wait for each async job to complete + or progress before raising a SynapseTimeoutError. Defaults to 120. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The Grid object. + + Raises: + ValueError: If session_id is not provided. + + Example: Import a CSV file into a grid session asynchronously +   + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import Grid + + syn = Synapse() + syn.login() + + async def main(): + grid = Grid(session_id="abc-123-def") + grid = await grid.import_csv_async(path="/local/path/to/data.csv") + print(f"Import complete for session: {grid.session_id}") + + asyncio.run(main()) + ``` + """ + + if not self.session_id: + raise ValueError( + "session_id is required to import a CSV into a GridSession" + ) + + if not os.path.isfile(path): + raise ValueError(f"Path '{path}' is not a valid file.") + + trace.get_current_span().set_attributes( + { + "synapse.session_id": self.session_id, + } + ) + + client = Synapse.get_client(synapse_client=synapse_client) + file_handle = await upload_synapse_s3(syn=client, file_path=path) + file_handle_id = file_handle["id"] + + effective_descriptor = csv_table_descriptor or CsvTableDescriptor() + + upload_to_table_preview = UploadToTablePreviewRequest( + csv_table_descriptor=effective_descriptor, + upload_file_handle_id=file_handle_id, + ) + + preview_response = await upload_to_table_preview.send_job_and_wait_async( + timeout=timeout, synapse_client=synapse_client + ) + if not preview_response.suggested_columns: + raise ValueError( + f"CSV preview for file handle {file_handle_id} returned no suggested " + f"columns (rows scanned: {preview_response.rows_scanned}). The file may " + f"be empty, contain only a header row, or use a separator different " + f"from the configured csv_table_descriptor " + f"(separator={repr(effective_descriptor.separator)})." + ) + + import_request = GridCsvImportRequest( + session_id=self.session_id, + file_handle_id=file_handle_id, + schema=preview_response.suggested_columns, + csv_descriptor=effective_descriptor, + ) + import_response = await import_request.send_job_and_wait_async( + timeout=timeout, synapse_client=synapse_client + ) + client.logger.info( + f"CSV import to grid session {self.session_id} completed successfully, " + f"total count: {import_response.total_count}, " + f"total created: {import_response.created_count}, " + f"total updated: {import_response.updated_count}" + ) + + return self + + @otel_trace_method( + method_to_trace_name=lambda self, *args, **kwargs: f"Grid_DownloadCsv: ID: {self.session_id}" + ) + async def download_csv_async( + self, + *, + destination: Optional[str] = None, + write_header: bool = True, + include_row_id_and_row_version: bool = False, + include_etag: bool = False, + csv_table_descriptor: Optional[CsvTableDescriptor] = None, + file_name: Optional[str] = None, + timeout: int = 120, + synapse_client: Optional[Synapse] = None, + ) -> str: + """ + Asynchronously download the current state of this grid session as a CSV file. + + Submits a DownloadFromGridRequest async job, waits for it to complete, + then downloads the resulting CSV to the local filesystem. + + Arguments: + destination: Local directory path where the CSV will be saved. The directory must already exist. + If not provided, defaults to the current working directory. + write_header: Whether the first line should contain column names + as a header. Defaults to True. + include_row_id_and_row_version: Whether the first two columns + should contain row ID and version. Defaults to False. + include_etag: Whether a column should contain the row etag. + Defaults to False. + csv_table_descriptor: The description of the CSV format (delimiter, + quote character, etc.). If not provided, the default CSV format + will be used. + file_name: The optional name for the downloaded file. If not + provided, defaults to `grid_{session_id}-{timestamp}.csv`. + timeout: The number of seconds to wait for the async job to + complete or progress before raising a SynapseTimeoutError. + Defaults to 120. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last + created instance from the Synapse class constructor. + + Returns: + The local path to the downloaded CSV file. + + Raises: + ValueError: If session_id is not provided. + + Example: Download a grid session as a CSV asynchronously +   + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import Grid + + syn = Synapse() + syn.login() + + async def main(): + grid = Grid(session_id="abc-123-def") + path = await grid.download_csv_async(destination="./downloads") + print(f"Downloaded CSV to: {path}") + + asyncio.run(main()) + ``` + + Example: Download a grid session as a CSV with a custom file name asynchronously +   + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import Grid + + syn = Synapse() + syn.login() + + async def main(): + grid = Grid(session_id="abc-123-def") + path = await grid.download_csv_async( + destination="./downloads", file_name="my_export.csv" + ) + print(f"Downloaded CSV to: {path}") + + asyncio.run(main()) + ``` + """ + if not self.session_id: + raise ValueError("session_id is required to download a GridSession as CSV") + + if not destination: + destination = os.getcwd() + + if not os.path.isdir(destination): + raise ValueError(f"Destination {destination} is not a valid directory.") + + trace.get_current_span().set_attributes({"synapse.session_id": self.session_id}) + + effective_descriptor = csv_table_descriptor or CsvTableDescriptor() + request = DownloadFromGridRequest( + session_id=self.session_id, + write_header=write_header, + include_row_id_and_row_version=include_row_id_and_row_version, + include_etag=include_etag, + csv_table_descriptor=effective_descriptor, + file_name=file_name, + ) + download_response = await request.send_job_and_wait_async( + timeout=timeout, synapse_client=synapse_client + ) + if not download_response.results_file_handle_id: + raise ValueError( + f"Download job for grid session '{self.session_id}' completed but " + "did not return a file handle ID. The CSV result may be empty or " + "the job may have failed silently." + ) + file_handle, presigned_url = await asyncio.gather( + get_file_handle( + file_handle_id=download_response.results_file_handle_id, + synapse_client=synapse_client, + ), + get_file_handle_presigned_url( + file_handle_id=download_response.results_file_handle_id, + synapse_client=synapse_client, + ), + ) + if not file_name: + timestamp = datetime.now(tz=timezone.utc).strftime("%Y%m%d%H%M%S") + file_name = f"grid_{self.session_id}-{timestamp}.csv" + file_path = os.path.join(destination, file_name) + return await asyncio.to_thread( + download_from_url, + url=presigned_url, + destination=file_path, + file_handle_id=file_handle["id"], + expected_md5=file_handle.get("contentMd5"), + url_is_presigned=True, + synapse_client=synapse_client, + ) + + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: f"Grid_Synchronize: ID: {self.session_id}" + ) + async def synchronize_async( + self, *, timeout: int = 120, synapse_client: Optional[Synapse] = None + ) -> "Grid": + """ + Synchronizes the grid session's schema and row data against its source entity. + + This is intended for grid sessions created from a file view via `initial_query`. + Grid sessions backed by a RecordSet should use `export_to_record_set` instead. + + Arguments: + timeout: The number of seconds to wait for the job to complete or progress + before raising a SynapseTimeoutError. Defaults to 120. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + Grid: The Grid object. + + Raises: + ValueError: If session_id is not provided. + + Example: Synchronize a grid session created from a file view +   + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import Grid + from synapseclient.models.table_components import Query + + syn = Synapse() + syn.login() + + async def main(): + # First create a grid session from a file view + query = Query(sql="SELECT * FROM syn1234567") + grid = Grid(initial_query=query) + grid = await grid.create_async() + + # Synchronize the grid with the latest state of the file view + grid = await grid.synchronize_async() + + asyncio.run(main()) + ``` + """ + if not self.session_id: + raise ValueError("session_id is required to synchronize a GridSession") + + request = SynchronizeGridRequest(grid_session_id=self.session_id) + result = await request.send_job_and_wait_async( + timeout=timeout, synapse_client=synapse_client + ) + + if result.error_messages: + client = Synapse.get_client(synapse_client=synapse_client) + client.logger.error( + f"Grid session '{self.session_id}' synchronization completed with " + f"error messages: {result.error_messages}" + ) + + return self diff --git a/synapseclient/models/entityview.py b/synapseclient/models/entityview.py index bbc0ae006..bd20c8b38 100644 --- a/synapseclient/models/entityview.py +++ b/synapseclient/models/entityview.py @@ -790,15 +790,21 @@ def to_synapse_request(self): "versionLabel": self.version_label, "versionComment": self.version_comment, "isLatestVersion": self.is_latest_version, - "columnIds": [ - column.id for column in self._last_persistent_instance.columns.values() - ] - if self._last_persistent_instance and self._last_persistent_instance.columns - else [], + "columnIds": ( + [ + column.id + for column in self._last_persistent_instance.columns.values() + ] + if self._last_persistent_instance + and self._last_persistent_instance.columns + else [] + ), "isSearchEnabled": self.is_search_enabled, - "viewTypeMask": self.view_type_mask.value - if isinstance(self.view_type_mask, ViewTypeMask) - else self.view_type_mask, + "viewTypeMask": ( + self.view_type_mask.value + if isinstance(self.view_type_mask, ViewTypeMask) + else self.view_type_mask + ), "scopeIds": list(scopes_without_syn) if scopes_without_syn else None, } delete_none_keys(entity) diff --git a/synapseclient/models/file.py b/synapseclient/models/file.py index d3af140d5..61d400ada 100644 --- a/synapseclient/models/file.py +++ b/synapseclient/models/file.py @@ -1053,9 +1053,11 @@ async def get_async( if_collision=self.if_collision, limit_search=self.synapse_container_limit or self.parent_id, download_file=self.download_file, - download_location=os.path.dirname(self.path) - if self.path and os.path.isfile(self.path) - else self.path, + download_location=( + os.path.dirname(self.path) + if self.path and os.path.isfile(self.path) + else self.path + ), md5=self.content_md5, synapse_client=syn, ) diff --git a/synapseclient/models/folder.py b/synapseclient/models/folder.py index a0658f521..3b5a0002e 100644 --- a/synapseclient/models/folder.py +++ b/synapseclient/models/folder.py @@ -16,6 +16,7 @@ from synapseclient.models.mixins import ( AccessControllable, ContainerEntityJSONSchema, + ProjectSettingsMixin, StorableContainer, ) from synapseclient.models.protocols.folder_protocol import FolderSynchronousProtocol @@ -47,6 +48,7 @@ class Folder( AccessControllable, StorableContainer, ContainerEntityJSONSchema, + ProjectSettingsMixin, ): """Folder is a hierarchical container for organizing data in Synapse. diff --git a/synapseclient/models/link.py b/synapseclient/models/link.py index 1c317aea0..9aa56ad9d 100644 --- a/synapseclient/models/link.py +++ b/synapseclient/models/link.py @@ -358,12 +358,14 @@ def to_synapse_request(self) -> Dict[str, Any]: "modifiedBy": self.modified_by, "parentId": self.parent_id, "concreteType": LINK_ENTITY, - "linksTo": { - "targetId": self.target_id, - "targetVersionNumber": self.target_version_number, - } - if self.target_id - else None, + "linksTo": ( + { + "targetId": self.target_id, + "targetVersionNumber": self.target_version_number, + } + if self.target_id + else None + ), "linksToClassName": self.links_to_class_name, } if request_dict["linksTo"]: diff --git a/synapseclient/models/mixins/CLAUDE.md b/synapseclient/models/mixins/CLAUDE.md new file mode 100644 index 000000000..90ec7a098 --- /dev/null +++ b/synapseclient/models/mixins/CLAUDE.md @@ -0,0 +1,27 @@ + + +## Project + +Composable behavior mixins for model classes — ACL management, container operations, async job orchestration, table CRUD, form submissions, and JSON schema validation. + +## Conventions + +### access_control.py +Uses `BenefactorTracker` dataclass to track ACL cascade when inheritance changes — maps entity→benefactor and benefactor→children relationships. Batch ACL operations use `asyncio.as_completed()` for concurrency with tqdm progress bars. + +### storable_container.py +Queue-based concurrent download/upload via `_worker()` coroutine processing `asyncio.Queue`. `FailureStrategy` enum (LOG_EXCEPTION vs RAISE_EXCEPTION) controls child entity error handling. Uses `wrap_async_generator_to_sync_generator()` for `get_children`. Child entity type dispatch via concrete type → model class mapping. + +### asynchronous_job.py +`ASYNC_JOB_URIS` dict maps concrete types to REST endpoints — when adding a new async job type, register here AND in `core/constants/concrete_types.py`. Subclasses must implement `to_synapse_request()` and `fill_from_dict()`. + +### table_components.py +Column type mapping between Python types and Synapse column types. Multiple TODOs for incomplete features (SYNPY-1651). + +### json_schema.py +Schema validation and creation via async jobs. Used by entities that support schema binding (Folder, Project). + +## Constraints + +- When adding a new async job type, register in BOTH `ASYNC_JOB_URIS` (here) and `concrete_types.py` — missing either causes runtime errors. +- Child collections on `StorableContainer` models must use `compare=False` in field definition to avoid breaking `has_changed` comparison. diff --git a/synapseclient/models/mixins/__init__.py b/synapseclient/models/mixins/__init__.py index 62ddcf017..2e64a1280 100644 --- a/synapseclient/models/mixins/__init__.py +++ b/synapseclient/models/mixins/__init__.py @@ -2,6 +2,7 @@ from synapseclient.models.mixins.access_control import AccessControllable from synapseclient.models.mixins.asynchronous_job import AsynchronousCommunicator +from synapseclient.models.mixins.enum_coercion import EnumCoercionMixin from synapseclient.models.mixins.form import ( FormChangeRequest, FormData, @@ -21,10 +22,17 @@ ValidationException, ) from synapseclient.models.mixins.storable_container import StorableContainer +from synapseclient.models.mixins.storage_location_mixin import ( + ProjectSettingsMixin, + StorageLocationConfigurable, +) __all__ = [ "AccessControllable", + "EnumCoercionMixin", + "ProjectSettingsMixin", "StorableContainer", + "StorageLocationConfigurable", "AsynchronousCommunicator", "BaseJSONSchema", "ContainerEntityJSONSchema", diff --git a/synapseclient/models/mixins/asynchronous_job.py b/synapseclient/models/mixins/asynchronous_job.py index fd3649bc1..3110f892b 100644 --- a/synapseclient/models/mixins/asynchronous_job.py +++ b/synapseclient/models/mixins/asynchronous_job.py @@ -14,11 +14,16 @@ AGENT_CHAT_REQUEST, CREATE_GRID_REQUEST, CREATE_SCHEMA_REQUEST, + DOWNLOAD_FROM_GRID_REQUEST, + DOWNLOAD_LIST_MANIFEST_REQUEST, GET_VALIDATION_SCHEMA_REQUEST, + GRID_CSV_IMPORT_REQUEST, GRID_RECORD_SET_EXPORT_REQUEST, QUERY_BUNDLE_REQUEST, QUERY_TABLE_CSV_REQUEST, + SYNCHRONIZE_GRID_REQUEST, TABLE_UPDATE_TRANSACTION_REQUEST, + UPLOAD_TO_TABLE_PREVIEW_REQUEST, ) from synapseclient.core.exceptions import ( SynapseError, @@ -29,12 +34,17 @@ ASYNC_JOB_URIS = { AGENT_CHAT_REQUEST: "/agent/chat/async", CREATE_GRID_REQUEST: "/grid/session/async", + DOWNLOAD_FROM_GRID_REQUEST: "/grid/download/csv/async", + DOWNLOAD_LIST_MANIFEST_REQUEST: "/download/list/manifest/async", GRID_RECORD_SET_EXPORT_REQUEST: "/grid/export/recordset/async", + SYNCHRONIZE_GRID_REQUEST: "/grid/synchronize/async", TABLE_UPDATE_TRANSACTION_REQUEST: "/entity/{entityId}/table/transaction/async", GET_VALIDATION_SCHEMA_REQUEST: "/schema/type/validation/async", CREATE_SCHEMA_REQUEST: "/schema/type/create/async", QUERY_TABLE_CSV_REQUEST: "/entity/{entityId}/table/download/csv/async", QUERY_BUNDLE_REQUEST: "/entity/{entityId}/table/query/async", + GRID_CSV_IMPORT_REQUEST: "/grid/import/csv/async", + UPLOAD_TO_TABLE_PREVIEW_REQUEST: "/table/upload/csv/preview/async", } diff --git a/synapseclient/models/mixins/enum_coercion.py b/synapseclient/models/mixins/enum_coercion.py new file mode 100644 index 000000000..77edf6b56 --- /dev/null +++ b/synapseclient/models/mixins/enum_coercion.py @@ -0,0 +1,33 @@ +"""Mixin for automatic enum coercion in dataclasses.""" + +from typing import Any, ClassVar, Dict + + +class EnumCoercionMixin: + """Mixin for dataclasses that auto-coerces string values to enum types. + This allows strings or enums to be used interchangeably for the same field. + + Subclasses declare a class-level ``_ENUM_FIELDS`` dict mapping field names + to their enum classes. On every ``__setattr__`` call the mixin checks + whether the target field is listed and, if the incoming value is not + already the correct enum type, coerces it via the enum constructor. + + Example:: + + @dataclass + class MyModel(EnumCoercionMixin): + _ENUM_FIELDS = {"status": StatusEnum} + status: Optional[Union[str, StatusEnum]] = None + """ + + _ENUM_FIELDS: ClassVar[Dict[str, type]] = {} + + def __setattr__(self, name: str, value: Any) -> None: + enum_cls = self._ENUM_FIELDS.get(name) + if ( + value is not None + and enum_cls is not None + and not isinstance(value, enum_cls) + ): + value = enum_cls(value) + super().__setattr__(name, value) diff --git a/synapseclient/models/mixins/storable_container.py b/synapseclient/models/mixins/storable_container.py index 25432a6b9..ba375653f 100644 --- a/synapseclient/models/mixins/storable_container.py +++ b/synapseclient/models/mixins/storable_container.py @@ -41,10 +41,21 @@ from synapseclient.core.constants.method_flags import COLLISION_OVERWRITE_LOCAL from synapseclient.core.exceptions import SynapseError from synapseclient.core.transfer_bar import shared_download_progress_bar +from synapseclient.core.upload.multipart_upload_async import ( + shared_progress_bar as upload_shared_progress_bar, +) from synapseclient.models.protocols.storable_container_protocol import ( + ManifestSetting, StorableContainerSynchronousProtocol, ) +from synapseclient.models.services.manifest import ( + generate_manifest_csv, + generate_sync_manifest, + read_manifest_for_upload, + upload_sync_files, +) from synapseclient.models.services.storable_entity_components import ( + MANIFEST_UPLOAD_MAX_RETRIES, FailureStrategy, wrap_coroutine, ) @@ -159,6 +170,7 @@ async def sync_from_synapse_async( link_hops: int = 1, queue: asyncio.Queue = None, include_types: Optional[List[str]] = None, + manifest: ManifestSetting = "all", *, synapse_client: Optional[Synapse] = None, ) -> Self: @@ -170,9 +182,10 @@ async def sync_from_synapse_async( If you only want to retrieve the full tree of metadata about your container specify `download_file` as False. - This works similar to [synapseutils.syncFromSynapse][], however, this does not - currently support the writing of data to a manifest TSV file. This will be a - future enhancement. + This works similar to [synapseutils.syncFromSynapse][], and generates a + `manifest.csv` file in each synced directory. The manifest uses CSV format + with `parentId` and `ID` columns, interoperable with the Synapse UI download + cart and `synapse get-download-list` CLI output. Supports syncing Files, Folders, Tables, EntityViews, SubmissionViews, Datasets, DatasetCollections, MaterializedViews, and VirtualTables from Synapse. The @@ -208,6 +221,11 @@ async def sync_from_synapse_async( `["folder", "file", "table", "entityview", "dockerrepo", "submissionview", "dataset", "datasetcollection", "materializedview", "virtualtable"]`. + manifest: Determines whether to generate a manifest CSV file. Options are: + + - `all` (default): generate `manifest.csv` in every synced directory + - `root`: generate `manifest.csv` only in the root `path` directory + - `suppress`: do not generate any manifest file synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. @@ -312,8 +330,38 @@ async def my_function(): asyncio.run(my_function()) ``` + Suppose I want to download all the children of a Project and all sub-folders and files and generate a manifest file: + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import Project + + async def my_function(): + syn = Synapse() + syn.login() + + my_project = Project(id="syn12345") + await my_project.sync_from_synapse_async(path="/path/to/folder", manifest="all") + + asyncio.run(my_function()) + ``` + Suppose I want to download a manifest file at the root path: + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import Project + async def my_function(): + syn = Synapse() + syn.login() + my_project = Project(id="syn12345") + await my_project.sync_from_synapse_async(path="/path/to/folder", manifest="root", download_file=False) + + asyncio.run(my_function()) + ``` Raises: ValueError: If the folder does not have an id set. @@ -375,6 +423,18 @@ async def my_function(): end end + opt manifest != "suppress" and path is set + alt manifest == "all" + loop For each directory path + sync_from_synapse->>manifest: call `generate_manifest_csv(files, dir_path, syn)` + manifest-->>sync_from_synapse: manifest.csv written to dir_path + end + else manifest == "root" + sync_from_synapse->>manifest: call `generate_manifest_csv(all_files, root_path, syn)` + manifest-->>sync_from_synapse: manifest.csv written to root_path + end + end + deactivate sync_from_synapse deactivate project_or_folder ``` @@ -397,6 +457,7 @@ async def my_function(): link_hops=link_hops, queue=queue, include_types=include_types, + manifest=manifest, synapse_client=syn, ) @@ -412,6 +473,7 @@ async def _sync_from_synapse_async( link_hops: int = 1, queue: asyncio.Queue = None, include_types: Optional[List[str]] = None, + manifest: ManifestSetting = "all", *, synapse_client: Optional[Synapse] = None, ) -> Self: @@ -420,12 +482,18 @@ async def _sync_from_synapse_async( All arguments are passed through from the wrapper function. """ + if manifest not in ("all", "root", "suppress"): + raise ValueError( + f"Invalid manifest value: {manifest}. Must be one of: 'all', 'root', 'suppress'." + ) + syn = Synapse.get_client(synapse_client=synapse_client) if not self._last_persistent_instance: await self.get_async(synapse_client=syn) syn.logger.info( f"[{self.id}:{self.name}]: Syncing {self.__class__.__name__} from Synapse." ) + path = os.path.expanduser(path) if path else None children = await self._retrieve_children( @@ -488,14 +556,227 @@ async def _sync_from_synapse_async( if create_workers: try: - # Wait until the queue is fully processed. + # Blocks until every queued item has been picked up and + # task_done() called by a worker. await queue.join() finally: + # Workers are now blocked waiting on an empty queue; cancel + # them so they don't hang the event loop. for task in worker_tasks: task.cancel() + if path and manifest != "suppress": + if manifest == "all": + for ( + directory_path, + file_entities, + ) in self.map_directory_to_all_contained_files(root_path=path).items(): + generate_manifest_csv( + all_files=file_entities, + path=directory_path, + syn=syn, + ) + elif manifest == "root": + generate_manifest_csv( + all_files=self.flatten_file_list(), + path=path, + syn=syn, + ) + return self + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: f"{self.__class__.__name__}_sync_to_synapse: {self.id}" + ) + async def sync_to_synapse_async( + self: Self, + manifest_path: str, + dry_run: bool = False, + send_messages: bool = True, + retries: int = MANIFEST_UPLOAD_MAX_RETRIES, + merge_existing_annotations: bool = True, + associate_activity_to_new_version: bool = False, + *, + synapse_client: Synapse | None = None, + ) -> list["File"]: + """Upload files to Synapse using a manifest CSV file. + + Accepts manifests produced by sync_from_synapse, the + synapse get-download-list CLI, or the Synapse UI download cart. + The manifest must have at minimum a path and parentId column. + All other columns that are not part of the standard manifest column set + are treated as file annotations. + + Standard manifest columns: + [ID, name, parentId, contentType, path, synapseStore, activityName, + activityDescription, forceVersion, used, executed] + + Arguments: + manifest_path: Path to the CSV manifest file. + dry_run: If True, perform full validation of the manifest + (including verifying that all parent containers exist in + Synapse) but skip the actual file upload. + send_messages: If True, send a Synapse notification message on + completion. + retries: Number of notification retries (only relevant when + send_messages=True). + merge_existing_annotations: If True, merge manifest annotations + with existing annotations on Synapse. If False, overwrite them. + associate_activity_to_new_version: If True and a version update + occurs, the existing Synapse activity is associated with the new + version. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last + created instance from the Synapse class constructor. + + Returns: + List of File entities that were created or updated. Returns an + empty list if dry_run=True or if no rows were eligible for + upload. + + Example: Using this function +   + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import Project + + async def main(): + syn = Synapse() + syn.login() + + project = Project(id="syn12345") + await project.sync_to_synapse_async( + manifest_path="/path/to/manifest.csv" + ) + + asyncio.run(main()) + ``` + """ + from tqdm import tqdm + + from synapseutils.monitor import notify_me_async + + syn = Synapse.get_client(synapse_client=synapse_client) + + items, total_size = await read_manifest_for_upload( + manifest_path=manifest_path, + syn=syn, + merge_existing_annotations=merge_existing_annotations, + associate_activity_to_new_version=associate_activity_to_new_version, + ) + + syn.logger.info( + f"About to upload {len(items)} files with a total size of {total_size} bytes." + ) + + if dry_run: + syn.logger.info("Returning due to dry run.") + return [] + + if not items: + return [] + + progress_bar = tqdm( + total=total_size, + desc=f"Uploading {len(items)} files", + unit="B", + unit_scale=True, + smoothing=0, + leave=None, + ) + with upload_shared_progress_bar(progress_bar): + try: + if send_messages: + notify_decorator = notify_me_async( + syn, f"Upload from {manifest_path}", retries=retries + ) + wrapped = notify_decorator( + lambda items: upload_sync_files(items, syn=syn) + ) + uploaded_files = await wrapped(items) + else: + uploaded_files = await upload_sync_files(items, syn=syn) + progress_bar.update(total_size - progress_bar.n) + finally: + progress_bar.close() + return uploaded_files + + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: f"{self.__class__.__name__}_generate_sync_manifest: {self.id}" + ) + async def generate_sync_manifest_async( + self: Self, + directory_path: str, + manifest_path: str, + *, + synapse_client: Synapse | None = None, + ) -> None: + """Walk a local directory, mirror its folder hierarchy under this + container in Synapse, and write a CSV manifest ready for + [sync_to_synapse_async][synapseclient.models.mixins.StorableContainer.sync_to_synapse_async]. + + The manifest has two columns: path (absolute, symlink-resolved) and + parentId (the Synapse ID of the file's containing folder). Existing + Synapse folders with matching names and parents are reused. Directory + symlinks inside directory_path are not followed; file symlinks record + the symlink path and upload the target's contents. Zero-byte files are + skipped with a warning — Synapse rejects empty files. I/O errors during + walk are logged and skipped; an empty source directory produces a + warning and a header-only manifest. + + Arguments: + directory_path: Path to the local directory to be pushed to + Synapse. + manifest_path: Path where the generated manifest CSV will be + written. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last + created instance from the Synapse class constructor. + + Raises: + ValueError: If this container's id is None, or if directory_path + does not exist or is not a directory, or if this container's + id exists in Synapse but is not a Folder or Project. + SynapseHTTPError: If this container's id does not exist in + Synapse. + + Example: Generate a manifest and upload the files + Mirror ./my_data under a Synapse project and then upload it. + + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.models import Project + + async def main(): + syn = Synapse() + syn.login() + + project = Project(id="syn12345") + await project.generate_sync_manifest_async( + directory_path="./my_data", + manifest_path="manifest.csv", + ) + await project.sync_to_synapse_async(manifest_path="manifest.csv") + + asyncio.run(main()) + ``` + """ + if self.id is None: + raise ValueError( + f"Cannot generate a sync manifest for a {type(self).__name__}" + " that has not been stored in Synapse. Set id on this" + " container (or store it) first." + ) + await generate_sync_manifest( + directory_path=directory_path, + parent_id=self.id, + manifest_path=manifest_path, + synapse_client=synapse_client, + ) + def flatten_file_list(self) -> List["File"]: """ Recursively loop over all of the already retrieved files and folders and return @@ -1106,6 +1387,7 @@ async def _wrap_recursive_get_children( synapse_client=synapse_client, queue=queue, include_types=include_types, + manifest="suppress", # The manifest is suppressed for child folders because they’re already accounted for when iterating through their parent folder. This is handled in the map_directory_to_all_contained_files function, which returns all files in the directory, including those in its child directories. ) def _create_task_for_child( diff --git a/synapseclient/models/mixins/storage_location_mixin.py b/synapseclient/models/mixins/storage_location_mixin.py new file mode 100644 index 000000000..01b4a4e71 --- /dev/null +++ b/synapseclient/models/mixins/storage_location_mixin.py @@ -0,0 +1,467 @@ +"""Mixins for entities that can have their storage location and project settings configured.""" + +import asyncio +from typing import Any, List, Optional, Union + +from synapseclient import Synapse +from synapseclient.core.async_utils import async_to_sync, otel_trace_method +from synapseclient.models.project_setting import ProjectSetting +from synapseclient.models.protocols.storage_location_mixin_protocol import ( + StorageLocationConfigurableSynchronousProtocol, +) +from synapseclient.models.services.migration import ( + index_files_for_migration_async as _index_files_for_migration_async, +) +from synapseclient.models.services.migration import ( + migrate_indexed_files_async as _migrate_indexed_files_async, +) +from synapseclient.models.services.migration_types import MigrationResult + +# Default storage location ID used by Synapse +DEFAULT_STORAGE_LOCATION_ID = 1 + + +@async_to_sync +class StorageLocationConfigurable(StorageLocationConfigurableSynchronousProtocol): + """Mixin for objects that can have their storage location configured. + + In order to use this mixin, the class must have an `id` attribute. + + This mixin provides methods for: + - Getting STS (AWS Security Token Service) credentials for direct S3 access + - Migrating files to a new storage location + """ + + id: Optional[str] = None + """The unique immutable ID for this entity.""" + + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: f"Entity_GetStsStorageToken: {self.id}" + ) + async def get_sts_storage_token_async( + self, + permission: str, + *, + output_format: str = "json", + min_remaining_life: Optional[int] = None, + synapse_client: Optional[Synapse] = None, + ) -> Any: + """Get STS (AWS Security Token Service) credentials for direct access to + the storage location backing this entity. These credentials can be used + with AWS tools like awscli and boto3. + Note: The entity must use a storage location that has STS enabled. + + Arguments: + permission: The permission level for the token. Must be 'read_only' + or 'read_write'. + output_format: The output format for the credentials. Options: + 'json' (default), 'boto', 'shell', 'bash', 'cmd', 'powershell'. + min_remaining_life: The minimum remaining life (in seconds) for a + cached token before a new one is fetched. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The STS credentials in the requested format. + + Raises: + ValueError: If the entity does not have an id set. + + Example: Using credentials with boto3 + Get STS credentials for an STS-enabled folder and use with boto3: + + import asyncio + import boto3 + from synapseclient import Synapse + from synapseclient.models import Folder + + syn = Synapse() + syn.login() + + async def main(): + folder = await Folder(id="syn123").get_async() + credentials = await folder.get_sts_storage_token_async( + permission="read_write", + output_format="boto", + ) + s3_client = boto3.client('s3', **credentials) + + asyncio.run(main()) + """ + if not self.id: + raise ValueError("The entity must have an id set.") + + from synapseclient.core import sts_transfer + + client = Synapse.get_client(synapse_client=synapse_client) + + return await asyncio.to_thread( + sts_transfer.get_sts_credentials, + client, + self.id, + permission, + output_format=output_format, + min_remaining_life=min_remaining_life, + ) + + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: f"Entity_IndexFilesForMigration: {self.id}" + ) + async def index_files_for_migration_async( + self, + dest_storage_location_id: int, + db_path: Optional[str] = None, + *, + source_storage_location_ids: Optional[List[int]] = None, + file_version_strategy: str = "new", + include_table_files: bool = False, + continue_on_error: bool = False, + synapse_client: Optional[Synapse] = None, + ) -> MigrationResult: + """Index files in this entity for migration to a new storage location. + + This is the first step in migrating files to a new storage location. + After indexing, use `migrate_indexed_files` to perform the actual migration. + + Arguments: + dest_storage_location_id: The destination storage location ID. + db_path: Path to the SQLite database file for tracking migration state. + If not provided, a temporary directory will be used. The path + can be retrieved from the returned MigrationResult.db_path. + source_storage_location_ids: Optional list of source storage location IDs + to filter which files to migrate. If None, all files are indexed. + file_version_strategy: Strategy for handling file versions. Options: + 'new' (default) - create new versions, 'all' - migrate all versions, + 'latest' - only migrate latest version, 'skip' - skip if file exists. + include_table_files: Whether to include files attached to tables. + continue_on_error: Whether to continue indexing if an error occurs. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + A MigrationResult object containing indexing statistics and the database + path (accessible via result.db_path). + + Example: Indexing files for migration + Index files in a project for migration: + + import asyncio + from synapseclient import Synapse + from synapseclient.models import Project + + syn = Synapse() + syn.login() + + async def main(): + project = await Project(id="syn123").get_async() + result = await project.index_files_for_migration_async( + dest_storage_location_id=12345, + ) + print(f"Database path: {result.db_path}") + print(f"Indexed {result.counts_by_status}") + + asyncio.run(main()) + """ + if not self.id: + raise ValueError("The entity must have an id set.") + + return await _index_files_for_migration_async( + self, + dest_storage_location_id=str(dest_storage_location_id), + db_path=db_path, + source_storage_location_ids=( + [str(s) for s in source_storage_location_ids] + if source_storage_location_ids + else None + ), + file_version_strategy=file_version_strategy, + include_table_files=include_table_files, + continue_on_error=continue_on_error, + synapse_client=synapse_client, + ) + + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: f"Entity_MigrateIndexedFiles: {self.id}" + ) + async def migrate_indexed_files_async( + self, + db_path: str, + *, + create_table_snapshots: bool = True, + continue_on_error: bool = False, + force: bool = False, + synapse_client: Optional[Synapse] = None, + ) -> Optional[MigrationResult]: + """Migrate files that have been indexed with `index_files_for_migration`. + + This is the second step in migrating files to a new storage location. + Files must first be indexed using `index_files_for_migration`. + + **Interactive confirmation:** When called from an interactive shell and + ``force=False`` (the default), this method will print the number of items + queued for migration and prompt for confirmation before proceeding. If + standard output is not connected to an interactive terminal (e.g. a script + or CI environment), migration is aborted unless ``force=True`` is set. + + Arguments: + db_path: Path to the SQLite database file created by + `index_files_for_migration`. You can get this from the + MigrationResult.db_path returned by index_files_for_migration. + create_table_snapshots: Whether to create table snapshots before + migrating table files. + continue_on_error: Whether to continue migration if an error occurs. + force: Skip the interactive confirmation prompt and proceed with + migration automatically. Set to ``True`` when running + non-interactively (scripts, CI, automated pipelines). + Defaults to False. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + A MigrationResult object containing migration statistics, or None + if migration was aborted (user declined the confirmation prompt, or + the session is non-interactive and force=False). + + Example: Migrating indexed files + Migrate previously indexed files: + + import asyncio + from synapseclient import Synapse + from synapseclient.models import Project + + syn = Synapse() + syn.login() + + async def main(): + project = await Project(id="syn123").get_async() + + # Index first + index_result = await project.index_files_for_migration_async( + dest_storage_location_id=12345, + ) + + # Then migrate using the db_path from index result + result = await project.migrate_indexed_files_async( + db_path=index_result.db_path, + force=True, # Skip interactive confirmation + ) + print(f"Migrated {result.counts_by_status}") + + asyncio.run(main()) + """ + if not self.id: + raise ValueError("The entity must have an id set.") + + return await _migrate_indexed_files_async( + db_path=db_path, + create_table_snapshots=create_table_snapshots, + continue_on_error=continue_on_error, + force=force, + synapse_client=synapse_client, + ) + + +@async_to_sync +class ProjectSettingsMixin(StorageLocationConfigurable): + """Mixin for objects that can have their project settings configured. + + Extends StorageLocationConfigurable with methods for managing project + settings such as upload storage locations. + + In order to use this mixin, the class must have an `id` attribute. + """ + + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: f"Entity_SetStorageLocation: {self.id}" + ) + async def set_storage_location_async( + self, + storage_location_id: Optional[ + Union[int, List[int]] + ] = DEFAULT_STORAGE_LOCATION_ID, + *, + synapse_client: Optional[Synapse] = None, + ) -> "ProjectSetting": + """Set the upload storage location for this entity. This configures where + files uploaded to this entity will be stored. + + **This is a destructive update.** The provided `storage_location_id` value(s) + will **replace** any storage locations previously configured on this entity. + To add a storage location without removing existing ones, first retrieve the + current setting via `get_project_setting_async`, append to its `locations` + list, and call `store_async` on the returned `ProjectSetting` directly. + The first ID in the list is the default upload destination. + To obtain a storage location ID, create a + [StorageLocation][synapseclient.models.StorageLocation] and use its + `storage_location_id`. See + [StorageLocationType][synapseclient.models.StorageLocationType] for the + available storage backend types. + + Arguments: + storage_location_id: The storage location ID(s) to set. Can be a single + ID, a list of IDs (first is default, max 10). By default, the + default Synapse S3 storage location is used. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The ProjectSetting object reflecting the current state after the operation. + + Raises: + ValueError: If the entity does not have an id set. + + Example: Replace all storage locations + Fully replace the storage location on a folder with a single location: + + import asyncio + from synapseclient import Synapse + from synapseclient.models import Folder + + syn = Synapse() + syn.login() + + async def main(): + folder = await Folder(id="syn123").get_async() + setting = await folder.set_storage_location_async( + storage_location_id=12345 + ) + print(setting) + + asyncio.run(main()) + + Example: Partial update — add a storage location without removing existing ones + Retrieve the current setting and append a new location: + + import asyncio + from synapseclient import Synapse + from synapseclient.models import Folder + + syn = Synapse() + syn.login() + + async def main(): + folder = await Folder(id="syn123").get_async() + setting = await folder.get_project_setting_async(setting_type="upload") + if setting: + setting.locations.append(67890) + await setting.store_async() + + asyncio.run(main()) + """ + if not self.id: + raise ValueError("The entity must have an id set.") + + if storage_location_id is None: + locations = [DEFAULT_STORAGE_LOCATION_ID] + elif isinstance(storage_location_id, list): + locations = storage_location_id + else: + locations = [storage_location_id] + setting = await ProjectSetting( + project_id=self.id, settings_type="upload" + ).get_async(synapse_client=synapse_client) + + if setting is None: + setting = ProjectSetting( + project_id=self.id, + settings_type="upload", + locations=locations, + ) + else: + setting.locations = locations + return await setting.store_async(synapse_client=synapse_client) + + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: f"Entity_GetProjectSetting: {self.id}" + ) + async def get_project_setting_async( + self, + setting_type: str = "upload", + *, + synapse_client: Optional[Synapse] = None, + ) -> Optional["ProjectSetting"]: + """Get the project setting for this entity. + + Arguments: + setting_type: The type of setting to retrieve. Currently only 'upload' is supported. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The ProjectSetting object, or None if no setting exists. + + Raises: + ValueError: If the entity does not have an id set. + + Example: Using this function + Get the upload settings for a folder: + + import asyncio + from synapseclient import Synapse + from synapseclient.models import Folder + + syn = Synapse() + syn.login() + + async def main(): + folder = await Folder(id="syn123").get_async() + setting = await folder.get_project_setting_async(setting_type="upload") + if setting: + print(f"Storage locations: {setting.locations}") + + asyncio.run(main()) + """ + if not self.id: + raise ValueError("The entity must have an id set.") + + return await ProjectSetting( + project_id=self.id, settings_type=setting_type + ).get_async(synapse_client=synapse_client) + + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: f"Entity_DeleteProjectSetting: {self.id}" + ) + async def delete_project_setting_async( + self, + setting_id: str, + *, + synapse_client: Optional[Synapse] = None, + ) -> None: + """Delete a project setting by its setting ID. + + Arguments: + setting_id: The ID of the project setting to delete. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + None + + Raises: + ValueError: If the entity does not have an id set. + + Example: Using this function + Delete the upload settings for a folder: + + import asyncio + from synapseclient import Synapse + from synapseclient.models import Folder + + syn = Synapse() + syn.login() + + async def main(): + folder = await Folder(id="syn123").get_async() + await folder.delete_project_setting_async(setting_id="123") + + asyncio.run(main()) + """ + if not setting_id: + raise ValueError("The id is required to delete a project setting.") + await ProjectSetting(id=setting_id).delete_async(synapse_client=synapse_client) diff --git a/synapseclient/models/mixins/table_components.py b/synapseclient/models/mixins/table_components.py index dc583975b..f37a1dfaf 100644 --- a/synapseclient/models/mixins/table_components.py +++ b/synapseclient/models/mixins/table_components.py @@ -13,6 +13,7 @@ from io import BytesIO from typing import Any, Dict, List, Optional, Protocol, Tuple, Union +import pandas as pd from tqdm import tqdm from tqdm.contrib.logging import logging_redirect_tqdm from typing_extensions import Self @@ -99,7 +100,7 @@ DEFAULT_QUOTE_CHARACTER = '"' DEFAULT_SEPARATOR = "," -DEFAULT_ESCAPSE_CHAR = "\\" +DEFAULT_ESCAPE_CHAR = "\\" # Taken from RESERVED_COLUMN_NAMES = [ @@ -138,10 +139,20 @@ def row_labels_from_rows(rows: List[Row]) -> List[Row]: ) -def convert_dtypes_to_json_serializable(df): +def convert_dtypes_to_json_serializable(df) -> pd.DataFrame: """ - Convert the dtypes of the int64 and float64 columns to object columns which are JSON serializable types. - Also, convert the ROW_ID, ROW_VERSION, and ROW_ID.1 columns to int columns which are JSON serializable types. + Prepare a DataFrame for JSON/CSV serialization by cleaning special values + and normalizing dtypes. Mutates the passed-in DataFrame in place (and also + returns it). + + - Recursively replaces `Ellipsis` with `"..."` and `pd.NA`/`np.nan`/`None` + with `None` inside nested `list`/`dict` values. + - Converts top-level `Ellipsis` to `"..."` and top-level `pd.NA`/`np.nan`/ + `None` to `None`. + - Runs `convert_dtypes()` then casts every column to `object` dtype (with + `pd.NA` -> `None`), except `ROW_ID`, `ROW_VERSION`, and `ROW_ID.1`, which + are cast back to `int` since the Synapse API requires them as integers. + Arguments: df: The dataframe to convert the dtypes of. Returns: @@ -163,16 +174,64 @@ def convert_dtypes_to_json_serializable(df): "datetime_list_col": [[datetime(2021, 1, 1), datetime(2021, 1, 2), datetime(2021, 1, 3)], [datetime(2021, 1, 4), datetime(2021, 1, 5), datetime(2021, 1, 6)], None, [datetime(2021, 1, 7), datetime(2021, 1, 8), datetime(2021, 1, 9)]], "entityid_list_col": [["syn123", "syn456", None], ["syn101", "syn102", "syn103"], None, ["syn104", "syn105", "syn106"]], "userid_list_col": [["user1", "user2", "user3"], ["user4", "user5", None], None, ["user7", "user8", "user9"]], + "json_col_with_quotes": [ + { + "id": 1, + "description": 'Text with "quotes" in the description field', + "references": [] + }, + { + "id": 2, + "description": 'Another description with "quoted text" here', + "references": ["ref1", "ref2"] + }, + { + "id": 3, + "description": 'Description containing "multiple" quoted "words"', + "references": [...] + }, + { + "id": 4, + "description": 'Description containing apostrophes sage\'s', + "references": [...] + } + + ], }).convert_dtypes() df = convert_dtypes_to_json_serializable(df) print(df) """ - import pandas as pd + + def _serialize_json_value(x): + if isinstance(x, (list, dict)): + + def _reformat_special_values(obj): + if obj is ...: + return "..." + if isinstance(obj, dict): + return {k: _reformat_special_values(v) for k, v in obj.items()} + if isinstance(obj, list): + return [_reformat_special_values(item) for item in obj] + # Catch pd.NA, np.nan, and None — none are valid JSON + if pd.isna(obj): + return None + return obj + + return _reformat_special_values(x) + # Handle standalone ellipsis + if x is ...: + return "..." + # Handle top-level pd.NA, np.nan, None + if pd.isna(x): + return None + return x for col in df.columns: - df[col] = ( - df[col].replace({pd.NA: None}).astype(object) - ) # this will convert the int64 and float64 columns to object columns + df[col] = df[col].apply(_serialize_json_value) + # restore the original values of the column especially for the int64 and float64 columns since apply function changes the dtype + df[col] = df[col].convert_dtypes() + df[col] = df[col].replace({pd.NA: None}).astype(object) + # Convert ROW_ prefixed columns back to int (like ROW_ID, ROW_VERSION) if col in [ "ROW_ID", @@ -426,7 +485,7 @@ async def _table_query( query=query, synapse_client=synapse_client, quote_character=kwargs.get("quote_character", DEFAULT_QUOTE_CHARACTER), - escape_character=kwargs.get("escape_character", DEFAULT_ESCAPSE_CHAR), + escape_character=kwargs.get("escape_character", DEFAULT_ESCAPE_CHAR), line_end=kwargs.get("line_end", str(os.linesep)), separator=kwargs.get("separator", DEFAULT_SEPARATOR), header=kwargs.get("header", True), @@ -1844,10 +1903,10 @@ def _construct_partial_rows_for_upsert( if ( isinstance(cell_value, list) and len(cell_value) > 0 ) or not cell_is_na: - partial_change_values[ - column_id - ] = _convert_pandas_row_to_python_types( - cell=cell_value, column_type=column_type + partial_change_values[column_id] = ( + _convert_pandas_row_to_python_types( + cell=cell_value, column_type=column_type + ) ) else: partial_change_values[column_id] = None @@ -2809,7 +2868,6 @@ async def main(): timeout=timeout, synapse_client=synapse_client, ) - if download_location: return csv_path @@ -2852,7 +2910,7 @@ async def main(): filepath=csv_path, separator=separator or DEFAULT_SEPARATOR, quote_char=quote_character or DEFAULT_QUOTE_CHARACTER, - escape_char=escape_character or DEFAULT_ESCAPSE_CHAR, + escape_char=escape_character or DEFAULT_ESCAPE_CHAR, row_id_and_version_in_index=False, date_columns=date_columns if date_columns else None, list_columns=list_columns if list_columns else None, @@ -3387,7 +3445,9 @@ async def store_rows_async( function when writing the data to a CSV file. This is only used when the `values` argument is a Pandas DataFrame. See - for complete list of supported arguments. + for complete list of supported arguments. Any kwargs you supply are + merged on top of the default `{"escapechar": "\\"}`, so you only need + to override `escapechar` explicitly if you want different behavior. job_timeout: The maximum amount of time to wait for a job to complete. This is used when inserting, and updating rows of data. Each individual @@ -3560,6 +3620,8 @@ async def main(): test_import_pandas() from pandas import DataFrame + to_csv_kwargs = {"escapechar": DEFAULT_ESCAPE_CHAR, **(to_csv_kwargs or {})} + original_values = values if isinstance(values, dict): values = DataFrame(values).convert_dtypes() @@ -3786,6 +3848,7 @@ async def _stream_and_update_from_df( "AppendableRowSetRequest", ] ] = None, + to_csv_kwargs: Optional[Dict[str, Any]] = None, ) -> None: """ Organize the process of reading in and uploading parts of the DataFrame we are @@ -3816,6 +3879,8 @@ async def _stream_and_update_from_df( being uploaded. changes: Additional changes to the table that should execute within this transaction. + to_csv_kwargs: Additional arguments to pass to the `pd.DataFrame.to_csv` + function when writing the data to a CSV file. """ file_handle_id = await multipart_upload_dataframe_async( syn=client, @@ -3828,6 +3893,7 @@ async def _stream_and_update_from_df( line_start=line_start, line_end=line_end, bytes_to_prepend=header, + to_csv_kwargs=to_csv_kwargs, ) # We are using a semaphore here because large tables can take a very long time # for the update to complete. This will allow us to wait for the update to @@ -4031,8 +4097,8 @@ async def _chunk_and_upload_df( to_csv_kwargs: Additional arguments to pass to the `pd.DataFrame.to_csv` function when writing the data to a CSV file. """ + df = convert_dtypes_to_json_serializable(df) # Loop over the rows of the DF to determine the size/boundries we'll be uploading - chunks_to_upload = [] size_of_chunk = 0 buffer = BytesIO() @@ -4142,6 +4208,7 @@ async def _chunk_and_upload_df( header=header_line, changes=changes, file_suffix=f"{part}", + to_csv_kwargs=to_csv_kwargs, ) ) ) @@ -4439,7 +4506,7 @@ def csv_to_pandas_df( filepath: Union[str, BytesIO], separator: str = DEFAULT_SEPARATOR, quote_char: str = DEFAULT_QUOTE_CHARACTER, - escape_char: str = DEFAULT_ESCAPSE_CHAR, + escape_char: str = DEFAULT_ESCAPE_CHAR, contain_headers: bool = True, lines_to_skip: int = 0, date_columns: Optional[List[str]] = None, @@ -4462,7 +4529,7 @@ def csv_to_pandas_df( Passed as `quotechar` to pandas. If `quotechar` is supplied as a `kwarg` it will be used instead of this `quote_char` argument. escape_char: The escape character for the file, - Defaults to `DEFAULT_ESCAPSE_CHAR`. + Defaults to `DEFAULT_ESCAPE_CHAR`. contain_headers: Whether the file contains headers, Defaults to `True`. lines_to_skip: The number of lines to skip at the beginning of the file, @@ -4514,10 +4581,14 @@ def csv_to_pandas_df( # Turn list columns into lists and convert items to their proper types if list_columns: for col in list_columns: - # Fill NA values with empty lists, it must be a string for json.loads to work - # json.loads will convert null values in boolean list, string list to None. - df.fillna({col: "[]"}, inplace=True) - df[col] = df[col].apply(json.loads) + # A CSV cell for a list column is either a JSON string like "[1, 2]" + # or NA. When every value is NA, convert_dtypes() infers a typed + # dtype (e.g. Int64) into which the string "[]" cannot be written, + # so fillna({col: "[]"}) raises. Parse strings and substitute [] + # for NA in a single pass. + df[col] = df[col].apply( + lambda x: json.loads(x) if isinstance(x, str) else [] + ) # Convert list items to their proper types based on column type if list_column_types and col in list_column_types: column_type = list_column_types[col] diff --git a/synapseclient/models/project.py b/synapseclient/models/project.py index a1a6a1c21..a5cc15074 100644 --- a/synapseclient/models/project.py +++ b/synapseclient/models/project.py @@ -16,6 +16,7 @@ from synapseclient.models.mixins import ( AccessControllable, ContainerEntityJSONSchema, + ProjectSettingsMixin, StorableContainer, ) from synapseclient.models.protocols.project_protocol import ProjectSynchronousProtocol @@ -46,6 +47,7 @@ class Project( AccessControllable, StorableContainer, ContainerEntityJSONSchema, + ProjectSettingsMixin, ): """A Project is a top-level container for organizing data in Synapse. diff --git a/synapseclient/models/project_setting.py b/synapseclient/models/project_setting.py new file mode 100644 index 000000000..ddc61e037 --- /dev/null +++ b/synapseclient/models/project_setting.py @@ -0,0 +1,362 @@ +"""ProjectSetting model for managing project settings in Synapse.""" + +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + +from synapseclient import Synapse +from synapseclient.api.project_setting_services import ( + create_project_setting, + delete_project_setting, + get_project_setting, + update_project_setting, +) +from synapseclient.core.async_utils import async_to_sync, otel_trace_method +from synapseclient.core.constants import concrete_types +from synapseclient.models.protocols.project_setting_protocol import ( + ProjectSettingSynchronousProtocol, +) + + +@dataclass() +@async_to_sync +class ProjectSetting(ProjectSettingSynchronousProtocol): + """Represents a project setting in Synapse, controlling how files are uploaded + and stored within a project or folder. + + Currently supports the ``"upload"`` settings type, which is backed by + ``UploadDestinationListSetting`` in the Synapse REST API. Additional settings + types (e.g. ``"external_sync"``, ``"requester_pays"``) may be introduced in + future subclasses. + + Attributes: + id: (Read Only) The unique ID of this project setting, assigned by the + server on creation. + project_id: The Synapse ID of the project or folder this setting belongs to. + Required for `get()` and `store()`. + settings_type: The type of project setting. Currently only ``"upload"`` + is supported. Default: ``"upload"``. + locations: The list of storage location IDs for upload. The first ID is the + default upload destination. A project may have at most 10 storage locations. + To obtain a storage location ID, create a + [StorageLocation][synapseclient.models.StorageLocation] and use its + `storage_location_id`. See + [StorageLocationType][synapseclient.models.StorageLocationType] for the + available storage backend types. + concrete_type: (Read Only) The concrete type returned by the Synapse REST API. + etag: (Read Only) Synapse employs an Optimistic Concurrency Control (OCC) + scheme. The etag changes every time the setting is updated; it must be + included on updates. + + Example: Creating a project setting from a new storage location: + Create a StorageLocation first, then use its ID when creating the + project setting: + + from synapseclient.models import ( + ProjectSetting, + StorageLocation, + StorageLocationType, + ) + + import synapseclient + synapseclient.login() + + storage = StorageLocation( + storage_type=StorageLocationType.EXTERNAL_S3, + bucket="my-bucket", + base_key="my/prefix", + ).store() + + setting = ProjectSetting( + project_id="syn123", + locations=[storage.storage_location_id], + ).store() + print(f"Created setting ID: {setting.id}") + + Example: Creating a project setting: + + from synapseclient.models import ProjectSetting + + import synapseclient + synapseclient.login() + + setting = ProjectSetting( + project_id="syn123", + settings_type="upload", + locations=[12345], + ).store() + print(f"Created setting ID: {setting.id}") + + Example: Updating an existing project setting + Retrieve and update the storage locations on an existing setting: + + from synapseclient.models import ProjectSetting + + import synapseclient + synapseclient.login() + + setting = ProjectSetting(project_id="syn123", settings_type="upload").get() + setting.locations = [12345, 67890] + setting.store() + print(f"Updated setting ID: {setting.id}") + + Example: Deleting a project setting + Remove the project setting entirely: + + from synapseclient.models import ProjectSetting + + import synapseclient + synapseclient.login() + + setting = ProjectSetting(project_id="syn123", settings_type="upload").get() + if setting: + setting.delete() + """ + + id: Optional[str] = None + """(Read Only) The unique ID of this project setting, assigned by the server on + creation.""" + + project_id: Optional[str] = None + """The Synapse ID of the project or folder this setting belongs to. Required for + `get()` and `store()`.""" + + settings_type: str = "upload" + """The type of project setting. Currently only ``"upload"`` is supported.""" + + locations: List[int] = field(default_factory=list) + """The list of storage location IDs for upload. The first ID is the default upload + destination. A project may have at most 10 storage locations. To obtain a storage + location ID, create a [StorageLocation][synapseclient.models.StorageLocation] and + use its `storage_location_id`. See + [StorageLocationType][synapseclient.models.StorageLocationType] for the available + storage backend types.""" + + concrete_type: Optional[str] = field(default=None, compare=False) + """(Read Only) The concrete type returned by the Synapse REST API.""" + + etag: Optional[str] = field(default=None, compare=False) + """(Read Only) Synapse employs an Optimistic Concurrency Control (OCC) scheme. + The etag changes every time the setting is updated.""" + + def fill_from_dict(self, synapse_response: Dict[str, Any]) -> "ProjectSetting": + """Populate this dataclass from a REST API response dict. + + Arguments: + synapse_response: The response from the REST API. + + Returns: + The ProjectSetting object. + """ + self.id = synapse_response.get("id", None) + self.project_id = synapse_response.get("projectId", None) + self.settings_type = synapse_response.get("settingsType", self.settings_type) + self.locations = synapse_response.get("locations", []) + self.concrete_type = synapse_response.get("concreteType", None) + self.etag = synapse_response.get("etag", None) + return self + + def _to_synapse_request(self) -> Dict[str, Any]: + """Convert this dataclass to a request body for the REST API. + + Returns: + A dictionary suitable for the REST API. + """ + request: Dict[str, Any] = { + "concreteType": concrete_types.UPLOAD_DESTINATION_LIST_SETTING, + "settingsType": self.settings_type, + "projectId": self.project_id, + "locations": self.locations, + } + if self.id is not None: + request["id"] = self.id + if self.etag is not None: + request["etag"] = self.etag + return request + + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: f"ProjectSetting_Get: {self.project_id}" + ) + async def get_async( + self, + *, + synapse_client: Optional[Synapse] = None, + ) -> Optional["ProjectSetting"]: + """Retrieve this project setting from Synapse. + + Arguments: + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The ProjectSetting object populated with data from Synapse, or None if + no setting exists for this project and settings_type. + + Raises: + ValueError: If `project_id` is not set. + + Example: Using this function + Get the upload project setting for a project: + + import asyncio + from synapseclient import Synapse + from synapseclient.models import ProjectSetting + + syn = Synapse() + syn.login() + + async def main(): + setting = await ProjectSetting( + project_id="syn123", settings_type="upload" + ).get_async() + if setting: + print(f"Storage locations: {setting.locations}") + + asyncio.run(main()) + """ + if not self.project_id: + raise ValueError("project_id is required to retrieve a project setting.") + + response = await get_project_setting( + project_id=self.project_id, + setting_type=self.settings_type, + synapse_client=synapse_client, + ) + if not response: + return None + self.fill_from_dict(response) + return self + + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: f"ProjectSetting_Store: {self.project_id}" + ) + async def store_async( + self, + *, + synapse_client: Optional[Synapse] = None, + ) -> "ProjectSetting": + """Create or update this project setting in Synapse. + + If this setting does not yet have an `id`, a new project setting is created. + If `id` is already set (e.g. retrieved via `get_async()`), the existing + setting is updated. + + Arguments: + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The ProjectSetting object with server-assigned fields populated. + + Raises: + ValueError: If `project_id` is not set. + + Example: Creating a new project setting + Assign a custom storage location to a project for the first time: + + import asyncio + from synapseclient import Synapse + from synapseclient.models import ProjectSetting + + syn = Synapse() + syn.login() + + async def main(): + setting = await ProjectSetting( + project_id="syn123", + settings_type="upload", + locations=[12345], + ).store_async() + print(f"Created setting ID: {setting.id}") + + asyncio.run(main()) + + Example: Updating an existing project setting + Retrieve and then update the storage locations on an existing setting: + + import asyncio + from synapseclient import Synapse + from synapseclient.models import ProjectSetting + + syn = Synapse() + syn.login() + + async def main(): + setting = await ProjectSetting( + project_id="syn123", settings_type="upload" + ).get_async() + setting.locations = [12345, 67890] + await setting.store_async() + print(f"Updated setting ID: {setting.id}") + + asyncio.run(main()) + """ + if not self.project_id: + raise ValueError("project_id is required to store a project setting.") + + request = self._to_synapse_request() + + if self.id is None: + response = await create_project_setting( + request=request, + synapse_client=synapse_client, + ) + else: + await update_project_setting( + request=request, + synapse_client=synapse_client, + ) + response = await get_project_setting( + project_id=self.project_id, + setting_type=self.settings_type, + synapse_client=synapse_client, + ) + + self.fill_from_dict(response) + return self + + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: f"ProjectSetting_Delete: {self.id}" + ) + async def delete_async( + self, + *, + synapse_client: Optional[Synapse] = None, + ) -> None: + """Delete this project setting from Synapse. + + Arguments: + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + None + + Raises: + ValueError: If `id` is not set. + + Example: Using this function + Delete a project setting: + + import asyncio + from synapseclient import Synapse + from synapseclient.models import ProjectSetting + + syn = Synapse() + syn.login() + + async def main(): + await ProjectSetting( + id = "123" + ).delete_async() + asyncio.run(main()) + """ + if not self.id: + raise ValueError("id is required to delete a project setting.") + + await delete_project_setting( + setting_id=self.id, + synapse_client=synapse_client, + ) diff --git a/synapseclient/models/protocols/CLAUDE.md b/synapseclient/models/protocols/CLAUDE.md new file mode 100644 index 000000000..261f174ac --- /dev/null +++ b/synapseclient/models/protocols/CLAUDE.md @@ -0,0 +1,27 @@ + + +## Project + +Protocol classes providing sync method type hints for IDE autocompletion. Contains 18 protocol modules covering both individual model classes and shared behaviors/mixins; there is not always a strict 1:1 mapping between models and protocol files. + +## Conventions + +### Naming convention +- File: `{entity}_protocol.py` (e.g., `file_protocol.py`, `project_protocol.py`) +- Class: `{Entity}SynchronousProtocol` (e.g., `FileSynchronousProtocol`) + +### Signature matching +Every async method on a model must have a corresponding sync signature here — method name without `_async` suffix, same parameters (including `synapse_client: Optional["Synapse"] = None`). Method bodies use minimal placeholder implementations (e.g., `return self`, returning an empty list, or `...`), matching the existing pattern in each protocol file. Use placeholder return values that satisfy static type checkers — `...` alone will cause type errors for methods with non-None return types. Docstrings should match the async counterpart with updated examples showing sync usage. + +### Purpose +The `@async_to_sync` decorator generates the actual sync implementation at class definition time. These protocol files exist solely so IDEs can provide type hints, autocomplete, and documentation for the generated sync methods. + +### Adding a new method +1. Add async method to model class (e.g., `store_async()`) +2. Add sync signature to the corresponding protocol (e.g., `store()` with a placeholder body consistent with that file, such as `return self`, an empty collection, or `...`) +3. The decorator auto-generates the working sync implementation + +## Constraints + +- Protocol signatures must exactly match async signatures minus the `_async` suffix — mismatches cause IDE type hint errors. +- Do not add implementation logic to protocols — they are type stubs only. diff --git a/synapseclient/models/protocols/docker_protocol.py b/synapseclient/models/protocols/docker_protocol.py index ce83650bd..4f483d96f 100644 --- a/synapseclient/models/protocols/docker_protocol.py +++ b/synapseclient/models/protocols/docker_protocol.py @@ -1,4 +1,5 @@ """Protocol defining the synchronous interface for DockerRepository operations.""" + from typing import TYPE_CHECKING, Optional, Protocol from synapseclient import Synapse diff --git a/synapseclient/models/protocols/project_setting_protocol.py b/synapseclient/models/protocols/project_setting_protocol.py new file mode 100644 index 000000000..44b7847f9 --- /dev/null +++ b/synapseclient/models/protocols/project_setting_protocol.py @@ -0,0 +1,133 @@ +"""Protocol for the specific methods of ProjectSetting that have synchronous counterparts +generated at runtime.""" + +from typing import TYPE_CHECKING, Optional, Protocol + +from synapseclient import Synapse + +if TYPE_CHECKING: + from synapseclient.models.project_setting import ProjectSetting + + +class ProjectSettingSynchronousProtocol(Protocol): + """ + The protocol for methods that are asynchronous but also + have a synchronous counterpart that may also be called. + """ + + def get( + self, + *, + synapse_client: Optional[Synapse] = None, + ) -> "ProjectSetting": + """Retrieve this project setting from Synapse. + + Arguments: + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The ProjectSetting object populated with data from Synapse, or None if + no setting exists for this project and settings_type. + + Raises: + ValueError: If `project_id` is not set. + + Example: Using this function + Get the upload project setting for a project: + + from synapseclient.models import ProjectSetting + + import synapseclient + synapseclient.login() + + setting = ProjectSetting(project_id="syn123", settings_type="upload").get() + if setting: + print(f"Storage locations: {setting.locations}") + """ + return self + + def store( + self, + *, + synapse_client: Optional[Synapse] = None, + ) -> "ProjectSetting": + """Create or update this project setting in Synapse. + + If this setting does not yet have an `id`, a new project setting is created. + If `id` is already set (e.g. retrieved via `get()`), the existing setting + is updated. + + Arguments: + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The ProjectSetting object with server-assigned fields populated. + + Raises: + ValueError: If `project_id` is not set. + + Example: Creating a new project setting + Assign a custom storage location to a project for the first time: + + from synapseclient.models import ProjectSetting + + import synapseclient + synapseclient.login() + + setting = ProjectSetting( + project_id="syn123", + settings_type="upload", + locations=[12345], + ).store() + print(f"Created setting ID: {setting.id}") + + Example: Updating an existing project setting + Retrieve and then update the storage locations on an existing setting: + + from synapseclient.models import ProjectSetting + + import synapseclient + synapseclient.login() + + setting = ProjectSetting(project_id="syn123", settings_type="upload").get() + setting.locations = [12345, 67890] + setting.store() + print(f"Updated setting ID: {setting.id}") + """ + return self + + def delete( + self, + *, + synapse_client: Optional[Synapse] = None, + ) -> None: + """Delete this project setting from Synapse. + + Arguments: + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + None + + Raises: + ValueError: If `id` is not set. + + Example: Using this function + Delete a project setting: + + from synapseclient.models import ProjectSetting + + import synapseclient + synapseclient.login() + + setting = ProjectSetting(project_id="syn123", settings_type="upload").get() + if setting: + setting.delete() + """ + return self diff --git a/synapseclient/models/protocols/storable_container_protocol.py b/synapseclient/models/protocols/storable_container_protocol.py index 0352132d1..79d50c788 100644 --- a/synapseclient/models/protocols/storable_container_protocol.py +++ b/synapseclient/models/protocols/storable_container_protocol.py @@ -2,13 +2,21 @@ generated at runtime.""" import asyncio -from typing import List, Optional, Protocol +from typing import TYPE_CHECKING, List, Literal, Optional, Protocol from typing_extensions import Self from synapseclient import Synapse from synapseclient.core.constants.method_flags import COLLISION_OVERWRITE_LOCAL -from synapseclient.models.services.storable_entity_components import FailureStrategy +from synapseclient.models.services.storable_entity_components import ( + MANIFEST_UPLOAD_MAX_RETRIES, + FailureStrategy, +) + +if TYPE_CHECKING: + from synapseclient.models.file import File + +ManifestSetting = Literal["all", "suppress", "root"] class StorableContainerSynchronousProtocol(Protocol): @@ -29,6 +37,7 @@ def sync_from_synapse( link_hops: int = 1, queue: asyncio.Queue = None, include_types: Optional[List[str]] = None, + manifest: ManifestSetting = "all", *, synapse_client: Optional[Synapse] = None, ) -> Self: @@ -40,9 +49,10 @@ def sync_from_synapse( If you only want to retrieve the full tree of metadata about your container specify `download_file` as False. - This works similar to [synapseutils.syncFromSynapse][], however, this does not - currently support the writing of data to a manifest TSV file. This will be a - future enhancement. + This works similar to [synapseutils.syncFromSynapse][], and generates a + `manifest.csv` file in each synced directory. The manifest uses CSV format + with `parentId` and `ID` columns, interoperable with the Synapse UI download + cart and `synapse get-download-list` CLI output. Supports syncing Files, Folders, Tables, EntityViews, SubmissionViews, Datasets, DatasetCollections, MaterializedViews, and VirtualTables from Synapse. The @@ -74,6 +84,11 @@ def sync_from_synapse( include_types: Must be a list of entity types (ie. ["folder","file"]) which can be found [here](https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/model/EntityType.html) + manifest: Determines whether to generate a manifest CSV file. Options are: + + - `all` (default): generate `manifest.csv` in every synced directory + - `root`: generate `manifest.csv` only in the root `path` directory + - `suppress`: do not generate any manifest file synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. @@ -224,3 +239,120 @@ def sync_from_synapse( """ return self + + def sync_to_synapse( + self: Self, + manifest_path: str, + dry_run: bool = False, + send_messages: bool = True, + retries: int = MANIFEST_UPLOAD_MAX_RETRIES, + merge_existing_annotations: bool = True, + associate_activity_to_new_version: bool = False, + *, + synapse_client: Synapse | None = None, + ) -> list["File"]: + """Upload files to Synapse using a manifest CSV file. + + Accepts manifests produced by sync_from_synapse, the + synapse get-download-list CLI, or the Synapse UI download cart. + The manifest must have at minimum a path and parentId column. + All other columns that are not part of the standard manifest column set + are treated as file annotations. + + Standard manifest columns: + [ID, name, parentId, contentType, path, synapseStore, activityName, + activityDescription, forceVersion, used, executed] + + Arguments: + manifest_path: Path to the CSV manifest file. + dry_run: If True, perform full validation of the manifest + (including verifying that all parent containers exist in + Synapse) but skip the actual file upload. + send_messages: If True, send a Synapse notification message on + completion. + retries: Number of notification retries (only relevant when + send_messages=True). + merge_existing_annotations: If True, merge manifest annotations + with existing annotations on Synapse. If False, overwrite them. + associate_activity_to_new_version: If True and a version update + occurs, the existing Synapse activity is associated with the new + version. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last + created instance from the Synapse class constructor. + + Returns: + List of File entities that were created or updated. Returns an + empty list if dry_run=True or if no rows were eligible for + upload. + + Example: Using this function + + ```python + from synapseclient import Synapse + from synapseclient.models import Project + + syn = Synapse() + syn.login() + + project = Project(id="syn12345") + uploaded = project.sync_to_synapse(manifest_path="/path/to/manifest.csv") + ``` + """ + return [] + + def generate_sync_manifest( + self: Self, + directory_path: str, + manifest_path: str, + *, + synapse_client: Synapse | None = None, + ) -> None: + """Walk a local directory, mirror its folder hierarchy under this + container in Synapse, and write a CSV manifest ready for + [sync_to_synapse][synapseclient.models.mixins.StorableContainer.sync_to_synapse]. + + The manifest has two columns: path (absolute, symlink-resolved) and + parentId (the Synapse ID of the file's containing folder). Existing + Synapse folders with matching names and parents are reused. Directory + symlinks inside directory_path are not followed; file symlinks record + the symlink path and upload the target's contents. Zero-byte files are + skipped with a warning — Synapse rejects empty files. I/O errors during + walk are logged and skipped; an empty source directory produces a + warning and a header-only manifest. + + Arguments: + directory_path: Path to the local directory to be pushed to + Synapse. + manifest_path: Path where the generated manifest CSV will be + written. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last + created instance from the Synapse class constructor. + + Raises: + ValueError: If this container's id is None, or if directory_path + does not exist or is not a directory, or if this container's + id exists in Synapse but is not a Folder or Project. + SynapseHTTPError: If this container's id does not exist in + Synapse. + + Example: Generate a manifest and upload the files + Mirror ./my_data under a Synapse project and then upload it. + + ```python + from synapseclient import Synapse + from synapseclient.models import Project + + syn = Synapse() + syn.login() + + project = Project(id="syn12345") + project.generate_sync_manifest( + directory_path="./my_data", + manifest_path="manifest.csv", + ) + project.sync_to_synapse(manifest_path="manifest.csv") + ``` + """ + return None diff --git a/synapseclient/models/protocols/storage_location_mixin_protocol.py b/synapseclient/models/protocols/storage_location_mixin_protocol.py new file mode 100644 index 000000000..bcac3a491 --- /dev/null +++ b/synapseclient/models/protocols/storage_location_mixin_protocol.py @@ -0,0 +1,279 @@ +"""Protocol for the specific methods of StorageLocationConfigurable mixin that have +synchronous counterparts generated at runtime.""" + +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Protocol, Union + +from synapseclient import Synapse + +if TYPE_CHECKING: + from synapseclient.models.services.migration_types import MigrationResult + + +class StorageLocationConfigurableSynchronousProtocol(Protocol): + """ + The protocol for methods that are asynchronous but also + have a synchronous counterpart that may also be called. + """ + + def set_storage_location( + self, + storage_location_id: Optional[Union[int, List[int]]] = None, + *, + synapse_client: Optional[Synapse] = None, + ) -> Dict[str, Any]: + """Set the upload storage location for this entity. This configures where + files uploaded to this entity will be stored. + + Arguments: + storage_location_id: The storage location ID(s) to set. Can be a single + ID, a list of IDs (first is default, max 10), or None to use + Synapse default storage. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The project setting dict returned from Synapse. + + Raises: + ValueError: If the entity does not have an id set. + + Example: Setting storage location on a folder + Set storage location on a folder: + + from synapseclient.models import Folder + + import synapseclient + synapseclient.login() + + folder = Folder(id="syn123").get() + setting = folder.set_storage_location(storage_location_id=12345) + print(setting) + """ + return {} + + def get_project_setting( + self, + setting_type: str = "upload", + *, + synapse_client: Optional[Synapse] = None, + ) -> Optional[Dict[str, Any]]: + """Get the project setting for this entity. + + Arguments: + setting_type: The type of setting to retrieve. One of: + 'upload', 'external_sync', 'requester_pays'. Default: 'upload'. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The project setting as a dictionary, or None if no setting exists. + + Raises: + ValueError: If the entity does not have an id set. + + Example: Getting project settings + Get the upload settings for a folder: + + from synapseclient.models import Folder + + import synapseclient + synapseclient.login() + + folder = Folder(id="syn123").get() + setting = folder.get_project_setting(setting_type="upload") + if setting: + print(f"Storage locations: {setting.locations}") + """ + return {} + + def delete_project_setting( + self, + setting_id: str, + *, + synapse_client: Optional[Synapse] = None, + ) -> None: + """Delete a project setting by its setting ID. + + Arguments: + setting_id: The ID of the project setting to delete. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + None + + Raises: + ValueError: If the entity does not have an id set. + + Example: Deleting a project setting + Delete the upload settings for a folder: + + from synapseclient.models import Folder + + import synapseclient + synapseclient.login() + + folder = Folder(id="syn123").get() + setting = folder.get_project_setting(setting_type="upload") + if setting: + folder.delete_project_setting(setting_id=setting['id']) + """ + return None + + def get_sts_storage_token( + self, + permission: str, + *, + output_format: str = "json", + min_remaining_life: Optional[int] = None, + synapse_client: Optional[Synapse] = None, + ) -> Any: + """Get STS (AWS Security Token Service) credentials for direct access to + the storage location backing this entity. These credentials can be used + with AWS tools like awscli and boto3. + + Arguments: + permission: The permission level for the token. Must be 'read_only' + or 'read_write'. + output_format: The output format for the credentials. Options: + 'json' (default), 'boto', 'shell', 'bash', 'cmd', 'powershell'. + min_remaining_life: The minimum remaining life (in seconds) for a + cached token before a new one is fetched. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The STS credentials in the requested format. + + Raises: + ValueError: If the entity does not have an id set. + + Example: Using credentials with boto3 + Get STS credentials for an STS-enabled folder and use with boto3: + + import boto3 + from synapseclient.models import Folder + + import synapseclient + synapseclient.login() + + folder = Folder(id="syn123").get() + credentials = folder.get_sts_storage_token( + permission="read_write", + output_format="boto", + ) + s3_client = boto3.client('s3', **credentials) + """ + return {} + + def index_files_for_migration( + self, + dest_storage_location_id: int, + db_path: Optional[str] = None, + *, + source_storage_location_ids: Optional[List[int]] = None, + file_version_strategy: str = "new", + include_table_files: bool = False, + continue_on_error: bool = False, + synapse_client: Optional[Synapse] = None, + ) -> "MigrationResult": + """Index files in this entity for migration to a new storage location. + + This is the first step in migrating files to a new storage location. + After indexing, use `migrate_indexed_files` to perform the actual migration. + + Arguments: + dest_storage_location_id: The destination storage location ID. + db_path: Path to the SQLite database file for tracking migration state. + If not provided, a temporary directory will be used. The path + can be retrieved from the returned MigrationResult.db_path. + source_storage_location_ids: Optional list of source storage location IDs + to filter which files to migrate. If None, all files are indexed. + file_version_strategy: Strategy for handling file versions. Options: + 'new' (default) - create new versions, 'all' - migrate all versions, + 'latest' - only migrate latest version, 'skip' - skip if file exists. + include_table_files: Whether to include files attached to tables. + continue_on_error: Whether to continue indexing if an error occurs. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + A MigrationResult object containing indexing statistics and the database + path (accessible via result.db_path). + + Example: Indexing files for migration + Index files in a project for migration: + + from synapseclient.models import Project + + import synapseclient + synapseclient.login() + + project = Project(id="syn123").get() + result = project.index_files_for_migration( + dest_storage_location_id=12345, + ) + print(f"Database path: {result.db_path}") + print(f"Indexed {result.counts_by_status}") + """ + return None + + def migrate_indexed_files( + self, + db_path: str, + *, + create_table_snapshots: bool = True, + continue_on_error: bool = False, + force: bool = False, + synapse_client: Optional[Synapse] = None, + ) -> Optional["MigrationResult"]: + """Migrate files that have been indexed with `index_files_for_migration`. + + This is the second step in migrating files to a new storage location. + Files must first be indexed using `index_files_for_migration`. + + Arguments: + db_path: Path to the SQLite database file created by + `index_files_for_migration`. You can get this from the + MigrationResult.db_path returned by index_files_for_migration. + create_table_snapshots: Whether to create table snapshots before + migrating table files. + continue_on_error: Whether to continue migration if an error occurs. + force: Whether to force migration of files that have already been + migrated. Also bypasses interactive confirmation. + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + A MigrationResult object containing migration statistics, or None + if the user declined the confirmation prompt. + + Example: Migrating indexed files + Migrate previously indexed files: + + from synapseclient.models import Project + + import synapseclient + synapseclient.login() + + project = Project(id="syn123").get() + + # Index first + index_result = project.index_files_for_migration( + dest_storage_location_id=12345, + ) + + # Then migrate using the db_path from index result + result = project.migrate_indexed_files( + db_path=index_result.db_path, + force=True, # Skip interactive confirmation + ) + print(f"Migrated {result.counts_by_status}") + """ + return None diff --git a/synapseclient/models/protocols/storage_location_protocol.py b/synapseclient/models/protocols/storage_location_protocol.py new file mode 100644 index 000000000..79cb6fb38 --- /dev/null +++ b/synapseclient/models/protocols/storage_location_protocol.py @@ -0,0 +1,86 @@ +"""Protocol for the specific methods of StorageLocation that have synchronous counterparts +generated at runtime.""" + +from typing import TYPE_CHECKING, Optional, Protocol + +from synapseclient import Synapse + +if TYPE_CHECKING: + from synapseclient.models.storage_location import StorageLocation + + +class StorageLocationSynchronousProtocol(Protocol): + """ + The protocol for methods that are asynchronous but also + have a synchronous counterpart that may also be called. + """ + + def store( + self, + *, + synapse_client: Optional[Synapse] = None, + ) -> "StorageLocation": + """Create this storage location in Synapse. Storage locations are immutable; + this always creates a new one. If a storage location with identical properties + already exists for this user, the existing one is returned (idempotent). + + Arguments: + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The StorageLocation object with server-assigned fields populated. + + Raises: + ValueError: If `storage_type` is not set. + + Example: Creating an external S3 storage location + Create a storage location backed by your own S3 bucket: + + from synapseclient.models import StorageLocation, StorageLocationType + + import synapseclient + synapseclient.login() + + storage = StorageLocation( + storage_type=StorageLocationType.EXTERNAL_S3, + bucket="my-external-synapse-bucket", + base_key="path/within/bucket", + ).store() + + print(f"Storage location ID: {storage.storage_location_id}") + """ + return self + + def get( + self, + *, + synapse_client: Optional[Synapse] = None, + ) -> "StorageLocation": + """Retrieve this storage location from Synapse by its ID. Only the creator of + a StorageLocationSetting can retrieve it by its id. + + Arguments: + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The StorageLocation object populated with data from Synapse. + + Raises: + ValueError: If `storage_location_id` is not set. + + Example: Retrieving a storage location + Retrieve a storage location by ID: + + from synapseclient.models import StorageLocation + + import synapseclient + synapseclient.login() + + storage = StorageLocation(storage_location_id=12345).get() + print(f"Type: {storage.storage_type}, Bucket: {storage.bucket}") + """ + return self diff --git a/synapseclient/models/recordset.py b/synapseclient/models/recordset.py index 854b5cd43..9185d464d 100644 --- a/synapseclient/models/recordset.py +++ b/synapseclient/models/recordset.py @@ -1031,19 +1031,23 @@ def to_synapse_request(self) -> Dict[str, Any]: "isLatestVersion": self.is_latest_version, "dataFileHandleId": self.data_file_handle_id, "upsertKey": self.upsert_keys, - "csvDescriptor": self.csv_descriptor.to_synapse_request() - if self.csv_descriptor - else None, - "validationSummary": { - "containerId": self.validation_summary.container_id, - "totalNumberOfChildren": self.validation_summary.total_number_of_children, - "numberOfValidChildren": self.validation_summary.number_of_valid_children, - "numberOfInvalidChildren": self.validation_summary.number_of_invalid_children, - "numberOfUnknownChildren": self.validation_summary.number_of_unknown_children, - "generatedOn": self.validation_summary.generated_on, - } - if self.validation_summary - else None, + "csvDescriptor": ( + self.csv_descriptor.to_synapse_request() + if self.csv_descriptor + else None + ), + "validationSummary": ( + { + "containerId": self.validation_summary.container_id, + "totalNumberOfChildren": self.validation_summary.total_number_of_children, + "numberOfValidChildren": self.validation_summary.number_of_valid_children, + "numberOfInvalidChildren": self.validation_summary.number_of_invalid_children, + "numberOfUnknownChildren": self.validation_summary.number_of_unknown_children, + "generatedOn": self.validation_summary.generated_on, + } + if self.validation_summary + else None + ), "fileNameOverride": self.file_name_override, } delete_none_keys(entity) @@ -1280,9 +1284,11 @@ async def main(): if_collision=self.if_collision, limit_search=self.synapse_container_limit or self.parent_id, download_file=self.download_file, - download_location=os.path.dirname(self.path) - if self.path and os.path.isfile(self.path) - else self.path, + download_location=( + os.path.dirname(self.path) + if self.path and os.path.isfile(self.path) + else self.path + ), md5=self.content_md5, synapse_client=syn, ) diff --git a/synapseclient/models/schema_organization.py b/synapseclient/models/schema_organization.py index 91922a4d5..cfa3f30df 100644 --- a/synapseclient/models/schema_organization.py +++ b/synapseclient/models/schema_organization.py @@ -1228,7 +1228,7 @@ def from_uri(cls, uri: str) -> "JSONSchema": @staticmethod def _create_json_schema_version_from_response( - response: dict[str, Any] + response: dict[str, Any], ) -> JSONSchemaVersionInfo: """ Creates a JSONSchemaVersionInfo object from a Synapse API response @@ -1370,7 +1370,7 @@ def _check_semantic_version(self, version: str) -> None: @staticmethod def _create_json_schema_version_from_response( - response: dict[str, Any] + response: dict[str, Any], ) -> JSONSchemaVersionInfo: """ Creates a JSONSchemaVersionInfo object from a Synapse API response diff --git a/synapseclient/models/services/CLAUDE.md b/synapseclient/models/services/CLAUDE.md new file mode 100644 index 000000000..cd2a479c9 --- /dev/null +++ b/synapseclient/models/services/CLAUDE.md @@ -0,0 +1,20 @@ + + +## Project + +Business logic extracted from model classes to keep models thin. Internal-only — not part of the public API. + +## Conventions + +### storable_entity.py +`store_entity()` async function orchestrates entity POST/PUT to Synapse. Handles version numbering: if `version_label` changed or `force_version=True`, increments version. Note: this function has an explicit TODO marking it as incomplete/WIP. + +### storable_entity_components.py +`store_entity_components()` orchestrates storing annotations, activity, and ACL as separate API calls after the entity itself is stored. `FailureStrategy` enum (LOG_EXCEPTION, RAISE_EXCEPTION) controls error handling. `wrap_coroutine()` helper wraps individual component store operations. + +### search.py +`get_id()` utility resolves an entity by name+parent or by Synapse ID. Has a TODO for deprecated code replacement (SYNPY-1623) — uses `asyncio.get_event_loop().run_in_executor()` as a legacy pattern for blocking operations. + +## Constraints + +- These are internal service functions — do not expose in `models/__init__.py` or import from user-facing code. diff --git a/synapseclient/models/services/__init__.py b/synapseclient/models/services/__init__.py index d1e7227ca..5ff746bab 100644 --- a/synapseclient/models/services/__init__.py +++ b/synapseclient/models/services/__init__.py @@ -1,3 +1,15 @@ +from synapseclient.models.services.migration import ( + index_files_for_migration_async, + migrate_indexed_files_async, +) +from synapseclient.models.services.migration_types import ( + MigrationError, + MigrationKey, + MigrationResult, + MigrationSettings, + MigrationStatus, + MigrationType, +) from synapseclient.models.services.search import get_id from synapseclient.models.services.storable_entity import store_entity from synapseclient.models.services.storable_entity_components import ( @@ -5,4 +17,17 @@ store_entity_components, ) -__all__ = ["store_entity_components", "store_entity", "FailureStrategy", "get_id"] +__all__ = [ + "store_entity_components", + "store_entity", + "FailureStrategy", + "get_id", + "index_files_for_migration_async", + "migrate_indexed_files_async", + "MigrationResult", + "MigrationStatus", + "MigrationType", + "MigrationKey", + "MigrationSettings", + "MigrationError", +] diff --git a/synapseclient/models/services/manifest.py b/synapseclient/models/services/manifest.py new file mode 100644 index 000000000..e0b9dbcc6 --- /dev/null +++ b/synapseclient/models/services/manifest.py @@ -0,0 +1,1907 @@ +"""Services for reading and generating Synapse manifest CSV files used to +drive bulk upload via Project.sync_to_synapse / Folder.sync_to_synapse. + +This includes reading a manifest CSV file and preparing it for upload, as well +as writing a manifest CSV file from a list of File entities. +""" + +from __future__ import annotations + +import ast +import asyncio +import csv +import datetime +import functools +import io +import os +import re +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, Iterable, NamedTuple, TypedDict, Union + +from synapseclient import Synapse +from synapseclient.core import utils +from synapseclient.core.exceptions import ( + SynapseFileNotFoundError, + SynapseHTTPError, + SynapseProvenanceError, +) +from synapseclient.core.utils import ( + bool_or_none, + datetime_or_none, + get_synid_and_version, + is_synapse_id_str, + is_url, + test_import_pandas, + topolgical_sort, +) +from synapseclient.operations.factory_operations import FileOptions, get_async + +if TYPE_CHECKING: + from pandas import DataFrame, Series + + from synapseclient.models import UsedEntity, UsedURL + from synapseclient.models.file import File + from synapseclient.models.folder import Folder + + +MANIFEST_CSV_FILENAME = "manifest.csv" +DEFAULT_GENERATED_MANIFEST_CSV_KEYS = [ + "path", + "parentId", + "name", + "ID", + "synapseStore", + "contentType", + "used", + "executed", + "activityName", + "activityDescription", +] +#: Scalar types that Synapse supports as annotation values. +SynapseAnnotationType = datetime.datetime | float | int | bool | str + +# Columns that are NOT annotations — stripped before building File.annotations. +# Covers the standard manifest columns plus the extra metadata columns produced +# by the Synapse UI download cart and synapse get-download-list CLI. +NON_ANNOTATION_COLUMNS = frozenset( + [ + # Standard manifest columns used directly during upload + "path", + "parentId", + "ID", + "name", + "synapseStore", + "contentType", + "activityName", + "activityDescription", + "forceVersion", + "used", + "executed", + # Download-list / Synapse UI informational columns — ignore for upload + "error", + "versionNumber", + "dataFileSizeBytes", + "createdBy", + "createdOn", + "modifiedBy", + "modifiedOn", + "synapseURL", + "dataFileMD5Hex", + ] +) + +# Regex patterns used when parsing annotation cell values. +# Matches a cell that is a bracket-delimited list, e.g. "[a, b, c]". +# Disallows ']' inside to avoid matching adjacent lists like "[a][b]". +_ARRAY_BRACKET_PATTERN = re.compile(r"^\[[^\]]*\]$") +# https://stackoverflow.com/questions/18893390/splitting-on-comma-outside-quotes +_COMMAS_OUTSIDE_DOUBLE_QUOTES_PATTERN = re.compile(r",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)") +# Valid Synapse file name characters (1–256 chars). +_FILE_NAME_PATTERN = re.compile(r"^[`\w \-\+\.\(\)]{1,256}$") + + +def _manifest_csv_filename(path: str) -> str: + return os.path.join(os.path.expanduser(path), MANIFEST_CSV_FILENAME) + + +def _get_entity_provenance_dict_for_manifest(entity: File) -> dict[str, str]: + """ + Gets the provenance metadata for the entity. + + Arguments: + entity: A File entity object + + Returns: + dict[str, str]: a dictionary with a subset of the provenance metadata for the entity. + An empty dictionary is returned if the metadata does not have a provenance record. + """ + if not entity.activity: + return {} + used = [a.format_for_manifest() for a in entity.activity.used] + executed = [a.format_for_manifest() for a in entity.activity.executed] + return { + "used": ";".join(used), + "executed": ";".join(executed), + "activityName": entity.activity.name or "", + "activityDescription": entity.activity.description or "", + } + + +def _convert_manifest_data_items_to_string_list( + items: list[Union[str, datetime.datetime, bool, int, float]], +) -> str: + """ + Handle coverting an individual key that contains a possible list of data into a + string representation of the items that can be written to the manifest file. + + This has specific logic around how to handle datetime fields. + + When working with datetime fields we are printing the ISO 8601 UTC representation of + the datetime. + + When working with non strings we are printing the non-quoted version of the object. + + Example: Single-element lists are unwrapped + A list with one item returns the item directly, not wrapped in brackets. + ```python + _convert_manifest_data_items_to_string_list(["string,with,commas"]) # 'string,with,commas' + _convert_manifest_data_items_to_string_list([True]) # 'True' + _convert_manifest_data_items_to_string_list([1]) # '1' + _convert_manifest_data_items_to_string_list([1.0]) # '1.0' + _convert_manifest_data_items_to_string_list( + [datetime.datetime(2020, 1, 1, tzinfo=datetime.timezone.utc)] + ) # '2020-01-01T00:00:00Z' + ``` + + Example: Multi-element lists are bracket-wrapped + Multiple items are joined with commas inside `[...]`. String items containing + commas are individually quoted. + ```python + _convert_manifest_data_items_to_string_list(["a", "b", "c"]) + # '[a,b,c]' + _convert_manifest_data_items_to_string_list([True, False]) + # '[True,False]' + _convert_manifest_data_items_to_string_list(["string,with,commas", "string without commas"]) + # '["string,with,commas",string without commas]' + _convert_manifest_data_items_to_string_list( + [datetime.datetime(2020, 1, 1, tzinfo=datetime.timezone.utc), + datetime.datetime(2021, 1, 1, tzinfo=datetime.timezone.utc)] + ) + # '[2020-01-01T00:00:00Z,2021-01-01T00:00:00Z]' + ``` + + Args: + items: The list of items to convert. + + Returns: + The string representation of the items. + """ + items_to_write = [] + for item in items: + if isinstance(item, datetime.datetime): + items_to_write.append( + utils.datetime_to_iso(dt=item, include_milliseconds_if_zero=False) + ) + else: + # If a string based annotation has a comma in it + # this will wrap the string in quotes so it won't be parsed + # as multiple values. For example this is an annotation with 2 values: + # [my first annotation, "my, second, annotation"] + # This is an annotation with 4 value: + # [my first annotation, my, second, annotation] + if isinstance(item, str): + if len(items) > 1 and "," in item: + items_to_write.append(f'"{item}"') + else: + items_to_write.append(item) + else: + items_to_write.append(repr(item)) + + if len(items_to_write) > 1: + return f'[{",".join(items_to_write)}]' + elif len(items_to_write) == 1: + return items_to_write[0] + else: + return "" + + +def _extract_entity_metadata_for_manifest_csv( + all_files: list[File], +) -> tuple[list[str], list[dict[str, Any]]]: + """Extracts metadata from a list of File entities into a form usable by csv.DictWriter. + + Builds the column header list starting from DEFAULT_GENERATED_MANIFEST_CSV_KEYS, then + appends any annotation keys discovered across all files. Each row dict contains the + standard fields plus annotation values (serialized via + _convert_manifest_data_items_to_string_list) and provenance fields from + _get_entity_provenance_dict_for_manifest. + + Arguments: + all_files: A list of File model objects to extract metadata from. + + Returns: + A tuple of (keys, data) where keys is the ordered list of column headers and + data is a list of row dicts, one per file. + """ + keys = list(DEFAULT_GENERATED_MANIFEST_CSV_KEYS) + annotation_keys: set = set() + data = [] + for entity in all_files: + row: dict = { + "path": entity.path, + "parentId": entity.parent_id, + "name": entity.name, + "ID": entity.id, + "synapseStore": entity.synapse_store, + "contentType": entity.content_type, + } + if entity.annotations: + for key, val in entity.annotations.items(): + annotation_keys.add(key) + row[key] = ( + _convert_manifest_data_items_to_string_list(val) + if isinstance(val, list) + else val + ) + row.update(_get_entity_provenance_dict_for_manifest(entity=entity)) + data.append(row) + keys.extend(annotation_keys) + return keys, data + + +def _convert_manifest_data_row_to_dict(row: dict, keys: list[str]) -> dict: + """ + Convert a row of data to a dict that can be written to a manifest file. + + Args: + row: The row of data to convert. + keys: The keys of the manifest. Used to select the rows of data. + + Returns: + The dict representation of the row. + """ + data_to_write = {} + for key in keys: + data_for_key = row.get(key, "") + if isinstance(data_for_key, list): + items_to_write = _convert_manifest_data_items_to_string_list(data_for_key) + data_to_write[key] = items_to_write + else: + data_to_write[key] = data_for_key + return data_to_write + + +def _write_manifest_data_csv( + path: str, keys: list[str], data: list[dict], syn: Synapse +) -> None: + """Writes manifest data to a CSV file using csv.DictWriter with QUOTE_MINIMAL that automatically quotes any cell containing a comma, newline, or the quote character. + + Each row dict is normalized via _convert_manifest_data_row_to_dict so that + list-valued annotation fields are serialized to strings before writing. Missing + fields default to an empty string; extra keys not in fieldnames are silently ignored. + + Arguments: + path: Absolute path of the CSV file to create or overwrite. + keys: Ordered list of column headers used as DictWriter fieldnames. + data: List of row dicts, one per file. Keys absent from a row are written as + empty strings; keys not in fieldnames are ignored. + """ + with io.open(path, "w", encoding="utf8", newline="") as fp: + writer = csv.DictWriter( + fp, + fieldnames=keys, + restval="", + extrasaction="ignore", + quoting=csv.QUOTE_MINIMAL, + ) + writer.writeheader() + for row in data: + writer.writerow(_convert_manifest_data_row_to_dict(row, keys)) + + syn.logger.info(f"Manifest file {path} has been generated.") + + +def generate_manifest_csv(all_files: list[File], path: str, syn: Synapse) -> None: + """Generates a manifest.csv file based on a list of File entities. + + The generated file uses CSV format with comma delimiter and is interoperable + with the Synapse UI download cart. Column names follow the new convention: + `parentId` (instead of `parent`) and `ID` (instead of `id`). + If all_files is empty, a manifest.csv with only the header row will be generated. + If path is None, a ValueError will be raised. + + Args: + all_files: A list of File model objects. + path: The directory path where manifest.csv will be written. + syn: The Synapse client. + raises: + ValueError: If path is None. + """ + if not path: + raise ValueError( + "The path argument is required to generate a manifest.csv file." + ) + filename = _manifest_csv_filename(path=path) + keys, data = _extract_entity_metadata_for_manifest_csv(all_files=all_files) + _write_manifest_data_csv(path=filename, keys=keys, data=data, syn=syn) + + +class UploadSyncFile(NamedTuple): + """Represents a single file being uploaded. + + Attributes: + entity: The file that is going through the sync process. + used: Resolved provenance references — absolute local paths for files in + the upload batch, or File objects for files already in Synapse. + executed: Same as used but for executed provenance references. + activity_name: The name of the activity that is being performed. + activity_description: The description of the activity that is being performed. + """ + + entity: File + used: list[str | File] + executed: list[str | File] + activity_name: str | None + activity_description: str | None + + +async def read_manifest_for_upload( + manifest_path: str, + syn: Synapse, + merge_existing_annotations: bool, + associate_activity_to_new_version: bool, +) -> tuple[list[UploadSyncFile], int]: + """Read and validate a manifest CSV file, returning items ready for upload. + + Accepts manifests produced by StorableContainer.sync_from_synapse, + the synapse get-download-list CLI, or the Synapse UI download cart. + Rows with a non-empty error column (added by get-download-list) are + silently skipped. + + Arguments: + manifest_path: Path to the CSV manifest file. + syn: Authenticated Synapse client. + merge_existing_annotations: If True, merge manifest annotations with + existing annotations on Synapse. If False, overwrite them. + associate_activity_to_new_version: If True and a version update occurs, + the existing Synapse activity will be associated with the new version. + + Returns: + A tuple of (items, total_bytes) where items is a list of + UploadSyncFile objects ready for upload and total_bytes is the + combined size of all local files. + + Raises: + ValueError: If required columns are missing, paths are not unique, + files are empty, file names are invalid, or a parentId is not a + Folder or Project. + OSError: If a non-URL path does not exist on disk. + SynapseProvenanceError: If a provenance item is neither a local file + path, a URL, nor a valid Synapse ID. + SynapseHTTPError: If a parentId does not exist in Synapse. + """ + syn.logger.info(f"Validating manifest: {manifest_path}") + df = _clean_manifest(manifest_path) + + if df.empty: + return [], 0 + + syn.logger.info("Validating manifest contents...") + total_size = _validate_manifest(df) + + syn.logger.info("Validating provenance and parent containers...") + df, _ = await asyncio.gather( + _sort_and_fix_provenance(syn, df), + _check_parent_containers_async(df["parentId"].unique(), syn=syn), + ) + + items = _build_upload_files( + df, + merge_existing_annotations=merge_existing_annotations, + associate_activity_to_new_version=associate_activity_to_new_version, + ) + + return items, total_size + + +def _clean_manifest(manifest_path: str) -> DataFrame: + """Read a manifest CSV and return a cleaned DataFrame ready for validation. + + Arguments: + manifest_path: Path to the CSV manifest file. + + Returns: + A cleaned DataFrame. May be empty if all rows were filtered out. + + Raises: + ValueError: If required columns (path, parentId) are missing, or if + file paths are not unique. + OSError: If a non-URL path does not exist on disk. + """ + df = _read_and_filter_errors(manifest_path) + if df.empty: + return df + + _check_required_columns(df) + _apply_synapse_store_defaults(df) + df = df.fillna("") + df["path"] = df["path"].apply(_check_path_and_normalize) + _check_unique_paths(df) + _default_name_column(df) + return df + + +def _read_and_filter_errors(manifest_path: str) -> DataFrame: + """Read a manifest CSV and drop rows with a non-empty error column. + + The error column is added by the Synapse get-download-list CLI and the + Synapse UI download cart to mark rows that failed to download. + + Arguments: + manifest_path: Path to the CSV manifest file. + + Returns: + A DataFrame with error rows removed. May be empty. + """ + test_import_pandas() + import pandas as pd + + df = pd.read_csv(manifest_path) + if "error" in df.columns: + df = df[df["error"].fillna("") == ""] + return df + + +def _check_required_columns(df: DataFrame) -> None: + """Raise ValueError if the manifest is missing required columns. + + Arguments: + df: A non-empty manifest DataFrame. + + Raises: + ValueError: If path or parentId columns are missing. + """ + for col in ("path", "parentId"): + if col not in df.columns: + raise ValueError(f"Manifest must contain a '{col}' column") + + +def _check_unique_paths(df: DataFrame) -> None: + """Raise ValueError if any file path appears more than once. + + Arguments: + df: Manifest DataFrame with a normalized path column. + + Raises: + ValueError: If duplicate paths are found. + """ + if df["path"].duplicated().any(): + raise ValueError( + "All rows in manifest must contain a unique file path to upload" + ) + + +def _default_name_column(df: DataFrame) -> None: + """Ensure every row has a name, defaulting to the basename of the path. + + Creates the name column if it does not exist. For rows where the name + is blank, fills it from the path column. Mutates df in place. + + Arguments: + df: Manifest DataFrame with a path column. + """ + if "name" not in df.columns: + df["name"] = df["path"].apply(os.path.basename) + empty_names = df["name"] == "" + if empty_names.any(): + df.loc[empty_names, "name"] = df.loc[empty_names, "path"].apply( + os.path.basename + ) + + +def _validate_manifest(df: DataFrame) -> int: + """Run pure validation checks on a cleaned manifest DataFrame. + + Arguments: + df: A non-empty, cleaned manifest DataFrame as returned by + _clean_manifest. + + Returns: + Combined size in bytes of all local (non-URL) files in the manifest. + + Raises: + ValueError: If any file is empty (0 bytes) or has an invalid name, + or if (name, parentId) pairs are not unique. + """ + total_size = _check_size_each_file(df) + _check_file_names(df) + return total_size + + +def _apply_synapse_store_defaults(df: "DataFrame") -> None: + """Set synapseStore column defaults on the manifest DataFrame in place. + + Steps: + 1. Creates the synapseStore column if the manifest CSV didn't include + one (defaults to None/NaN). + 2. Sets URL rows to False -- files referenced by URL should not be + uploaded to Synapse storage. + 3. Sets all remaining nulls to True -- local file paths should be + uploaded by default. + 4. Casts the column to bool for consistent downstream usage. + + Arguments: + df: Manifest DataFrame with at least a path column. + """ + if "synapseStore" not in df.columns: + df["synapseStore"] = None + df.loc[df["path"].apply(is_url), "synapseStore"] = False + df.loc[df["synapseStore"].isnull(), "synapseStore"] = True + df["synapseStore"] = df["synapseStore"].astype(bool) + + +def _check_path_and_normalize(f: str) -> str: + """Return the normalized absolute path for f, or f unchanged if it is a URL. + + Arguments: + f: A file path or URL as read from the manifest path column. + + Returns: + The input unchanged if it is a URL, otherwise the resolved absolute + path after expanding ~ and environment variables. + + Raises: + OSError: If f is not a URL and the resolved path does not point to + an existing file on disk. + """ + if is_url(f): + return f + path_normalized = _expand_path(f) + if not os.path.isfile(path_normalized): + raise OSError(f"The path {f} is not a file or does not exist") + return path_normalized + + +def _expand_path(path: str) -> str: + """Expand ~ and environment variables, then return the absolute path.""" + return os.path.abspath(os.path.expandvars(os.path.expanduser(path))) + + +def _check_size_each_file(df: DataFrame) -> int: + """Raise ValueError if any non-URL file in the manifest is empty (0 bytes). + + Arguments: + df: Manifest DataFrame containing a path column. Rows whose + path is a URL are skipped. + + Returns: + Combined size in bytes of all local (non-URL) files in the manifest. + + Raises: + ValueError: If any local file referenced by the manifest has a size of + zero bytes. + """ + total = 0 + for _, row in df.iterrows(): + file_path = row["path"] + if not is_url(file_path): + size = os.stat(file_path).st_size + if size == 0: + raise ValueError( + f"File {file_path} is empty, empty files cannot be uploaded to Synapse" + ) + total += size + return total + + +def _check_file_names(df: DataFrame) -> None: + """Validate that each file name is acceptable for Synapse and that all + (name, parentId) pairs are unique. + + Arguments: + df: Manifest DataFrame containing name and parentId columns. + All name cells must already be populated (empty names should be + defaulted before calling this function). + + Raises: + ValueError: If any file name contains characters not permitted by + Synapse, or if two rows share the same name and parentId. + """ + for _, row in df.iterrows(): + file_name = row["name"] + if not _FILE_NAME_PATTERN.match(file_name): + raise ValueError( + f"File name {file_name} cannot be stored to Synapse. Names may contain" + " letters, numbers, spaces, underscores, hyphens, periods, plus signs," + " backticks, and parentheses" + ) + if df[["name", "parentId"]].duplicated().any(): + raise ValueError( + "All rows in manifest must contain a path with a unique file name and" + " parent to upload. Files uploaded to the same folder/project (parentId)" + " must have unique file names." + ) + + +async def _sort_and_fix_provenance(syn: Synapse, df: DataFrame) -> DataFrame: + """Validate and normalize provenance references, then topologically sort the + manifest rows so that files are uploaded before any file that depends on them. + + Each used and executed cell is split on ;, and each item is + resolved to an absolute path (if it is a local file being uploaded), a + Synapse entity (if the local file already exists in Synapse), or left as-is + if it is a URL or Synapse ID. + + Arguments: + syn: Authenticated Synapse client, used to look up local files that are + not in the upload manifest but may already exist in Synapse. + df: Manifest DataFrame indexed or containing a path column, with + optional used and executed columns holding ;-delimited + provenance strings. + + Returns: + A new DataFrame with the same rows reordered so that provenance + dependencies are uploaded before the files that reference them, and with + used and executed columns replaced by lists of resolved + references. + + Raises: + SynapseProvenanceError: If a provenance item is neither a local file + path, a URL, nor a valid Synapse ID. + """ + df = df.set_index("path").copy() + + results = await asyncio.gather( + *[_resolve_row(str(path), row, df, syn) for path, row in df.iterrows()] + ) + + deps: dict[str, list[str]] = {} + for path, resolved in results: + # Write resolved provenance back into the DataFrame + for col, values in resolved.items(): + df.at[path, col] = values + + # Local file paths (str) are upload-order dependencies; + # File objects (already in Synapse) are not. + deps[path] = _local_path_refs(resolved) + + sorted_order = [path for path, _deps in topolgical_sort(deps)] + df = df.reindex(sorted_order) + return df.reset_index() + + +def _local_path_refs( + resolved: dict[str, list[str | File]], +) -> list[str]: + """Extract local file path references from resolved provenance columns. + + Local paths (str) represent files in the current upload batch that must be + uploaded first. File objects are already in Synapse and do not create + upload-order dependencies. + + Arguments: + resolved: A dict mapping provenance column names (used, executed) to + their resolved reference lists. + + Returns: + A flat list of local file path strings found across all columns. + """ + return [ + ref for values in resolved.values() for ref in values if isinstance(ref, str) + ] + + +async def _check_parent_containers_async(parent_ids: list[str], syn: Synapse) -> None: + """Verify that every parentId in the manifest is a valid Synapse container. + + All parent IDs are validated concurrently. + + Arguments: + parent_ids: Iterable of Synapse IDs taken from the manifest + parentId column. Empty strings are silently skipped. + syn: Authenticated Synapse client used to fetch each entity. + + Raises: + SynapseHTTPError: If a parentId does not exist in Synapse. + ValueError: If a parentId exists but is not a Project or Folder. + """ + + async def _check_one(syn_id: str) -> None: + if not syn_id: + return + try: + container = await get_async( + synapse_id=syn_id, + file_options=FileOptions(download_file=False), + synapse_client=syn, + ) + except SynapseHTTPError: + syn.logger.warning( + f"\n{syn_id} in the parentId column is not a valid Synapse Id\n" + ) + raise + + from synapseclient.models.folder import Folder + from synapseclient.models.project import Project + + if not isinstance(container, (Folder, Project)): + raise ValueError( + f"{syn_id} in the parentId column is not a Folder or Project" + ) + + await asyncio.gather(*[_check_one(syn_id) for syn_id in parent_ids]) + + +def _build_upload_files( + df: DataFrame, + merge_existing_annotations: bool, + associate_activity_to_new_version: bool, +) -> list[UploadSyncFile]: + """Convert a validated manifest DataFrame into a list of UploadSyncFile objects, + one per manifest row. + + All columns not in NON_ANNOTATION_COLUMNS are treated as annotations. + + Arguments: + df: Validated manifest DataFrame (after provenance sort). + merge_existing_annotations: If True, manifest annotations are merged with + existing annotations on each File in Synapse. If False, manifest + annotations replace them entirely. + associate_activity_to_new_version: If True and a version update occurs, + the existing Synapse activity will be associated with the new version. + + Returns: + List of UploadSyncFile objects ready for upload, one per manifest row. + """ + from synapseclient.models.file import ( + File, # lazy import to avoid circular dependency + ) + + items = [] + for _, row in df.iterrows(): + file_entity = File( + path=row["path"], + parent_id=row["parentId"], + name=row.get("name") or None, + id=row.get("ID") or None, + synapse_store=row.get("synapseStore", True), + content_type=row.get("contentType") or None, + force_version=_parse_force_version(row.get("forceVersion", "")), + merge_existing_annotations=merge_existing_annotations, + associate_activity_to_new_version=associate_activity_to_new_version, + _present_manifest_fields=list(row.index), + ) + + annotation_cols: dict[str, object] = { + str(k): v for k, v in row.items() if k not in NON_ANNOTATION_COLUMNS + } + file_entity.annotations = _build_annotations_for_file(annotation_cols) + + item = UploadSyncFile( + file_entity, + used=row.get("used", []) or [], + executed=row.get("executed", []) or [], + activity_name=row.get("activityName") or None, + activity_description=row.get("activityDescription") or None, + ) + items.append(item) + + return items + + +def _parse_force_version(raw: object) -> bool: + """Parse a forceVersion cell into a bool, defaulting to True. + + The input comes from a CSV cell which can arrive in several forms + depending on whether the column existed, was blank, or had a value. + The conversion cascade is: + + 1. Missing or blank (empty string / None) -- the manifest did not + include a forceVersion column, or the cell was left empty. Defaults + to True (force a new version), the safe default for uploads. + 2. Already a bool -- pandas infers the column type as bool when every + row contains True/False. Used as-is. + 3. Parseable string -- CSV strings like "True"/"False" that pandas + read as str. bool_or_none handles case-insensitive conversion. + 4. Anything else -- unrecognizable values (e.g. "yes", "1", garbage) + fall back to True. + + Arguments: + raw: The raw cell value from the forceVersion manifest column. + + Returns: + True if a new version should be forced, False otherwise. + """ + if raw == "" or raw is None: + return True + if isinstance(raw, bool): + return raw + parsed = bool_or_none(str(raw)) + return parsed if parsed is not None else True + + +def _build_annotations_for_file( + manifest_annotations: dict[str, object], +) -> dict[str, list[SynapseAnnotationType]]: + """Pull annotations out of the manifest format into the client's internal format. + + Annotation values that are empty strings or None are omitted. All values + are returned as lists to match the Synapse annotation storage model, where + every annotation key maps to a list of values. + + Arguments: + manifest_annotations: A dict mapping annotation key to raw cell value as + read from the manifest DataFrame row. + + Returns: + A dict mapping annotation key to a list of converted Python values. + String values are passed through _parse_annotation_cell. + Non-string values (e.g. an int or float that pandas inferred from the + CSV) are wrapped in a single-element list. + """ + file_annotations = {} + for annotation_key, annotation_value in manifest_annotations.items(): + if annotation_value is None or annotation_value == "": + continue + if isinstance(annotation_value, str): + file_annotations[annotation_key] = _parse_annotation_cell( + cell=annotation_value + ) + else: + file_annotations[annotation_key] = [annotation_value] + return file_annotations + + +def _parse_annotation_cell( + cell: str, +) -> list[SynapseAnnotationType]: + """Convert a raw manifest CSV cell string into a typed list of annotation values. + + pandas.read_csv returns every cell as a string, but Synapse annotations + are typed (int, float, datetime, bool, str). This function parses a single + cell and returns the correctly-typed Python values so that annotations + round-trip faithfully through a manifest file. + + Multi-value cells are expressed in the manifest as a bracket-delimited, + comma-separated string (e.g. "[a, b, c]"). Single-value cells are + treated as a one-element list. The return is always a list so callers + never need to branch on scalar-vs-list. Empty values (e.g. from + "[a, , c]") are silently dropped. + + Arguments: + cell: A single cell string read from a manifest CSV. + + Returns: + A list of converted values. A plain scalar cell returns a one-element + list; a bracket-delimited cell returns one element per non-empty value. + """ + cell = cell.strip() + raw_values = ( + _split_csv_cell(cell[1:-1]) if _ARRAY_BRACKET_PATTERN.match(cell) else [cell] + ) + return [_convert_value(value) for value in raw_values if value.strip()] + + +def _convert_value(value: str) -> SynapseAnnotationType: + """Convert a single non-empty string token to its most specific Python type. + + The conversion order matters: each step is tried only if the previous one + returned no match. + + 1. datetime — tried first because date strings like "2024-01-01" + are also valid ast.literal_eval strings (they parse as subtraction + expressions), so datetime must win. + 2. bool — tried before ast.literal_eval because "true" and + "false" (lowercase) are not valid Python literals and would fall + through to a raw string if literal_eval ran first, giving inconsistent + results for "true" vs "True". + 3. int / float — parsed via ast.literal_eval. bool results are + excluded here because step 2 already handled them (bool is a subclass + of int, so without the exclusion "True" would come back as 1). + 4. raw string — returned unchanged when no conversion matched. + + Arguments: + value: A non-empty string token from a manifest cell. + + Returns: + The token as a datetime.datetime, bool, int, float, or + the original str if no conversion matched. + """ + datetime_ = datetime_or_none(value) + if datetime_ is not None: + return datetime_ + + bool_ = bool_or_none(value) + if bool_ is not None: + return bool_ + + literal_ = _parse_literal(value) + if literal_ is not None: + return literal_ + + return value + + +def _parse_literal(value: str) -> int | float | str | None: + """Try to parse value as a scalar Python literal via ast.literal_eval. + + Why str is accepted: bracket-array cells like ["foo bar", "baz"] + are split into tokens such as '"foo bar"' — a string that is itself a + quoted Python string literal. ast.literal_eval strips the outer quotes + and returns "foo bar". Plain unquoted strings (e.g. "hello") are + not valid Python literals, so literal_eval raises ValueError and + the raw string is returned by _convert_value instead. + + Why bool is excluded: bool is a subclass of int, so + ast.literal_eval("True") returns True and passes the + isinstance(parsed, int) check. Without the exclusion, "True" would + come back as the bool True from this function, but "true" (lowercase) + would not be a valid literal and would fall through to a raw string — an + inconsistency. bool_or_none in _convert_value handles both cases + uniformly before this function is ever called. + + Why complex types are rejected: tuples, lists, and dicts are valid Python + literals but are not valid Synapse annotation value types. + + Arguments: + value: A string token to parse. + + Returns: + An int, float, or str if the token is a recognized scalar + literal; None if parsing fails or produces an unsupported type. + """ + try: + parsed = ast.literal_eval(value) + if isinstance(parsed, (int, float, str)) and not isinstance(parsed, bool): + return parsed + except (ValueError, SyntaxError): + pass + return None + + +async def upload_sync_files(files: list[UploadSyncFile], syn: Synapse) -> list[File]: + """Upload files to Synapse concurrently in an order that honours + interdependent provenance dependencies. + + Arguments: + files: The list of UploadSyncFile items to upload. + syn: Authenticated Synapse client. + + Returns: + List of File entities that were created or updated, in the same + order as the dependency-graph task execution. + + Raises: + ValueError: If a provenance reference points to a local file not + in the upload batch, or if a provenance item is not a valid + Synapse ID. + RuntimeError: If prerequisite upload tasks fail to complete. + """ + plan = _build_upload_plan(items=list(files)) + tasks = _create_upload_tasks(upload_plan=plan, syn=syn) + results = await asyncio.gather(*tasks) + return list(results) + + +@dataclass +class _UploadPlan: + """Topologically sorted upload plan built from manifest provenance dependencies. + + Attributes: + path_to_dependencies: Maps each file path to the list of file paths that + must be uploaded before it (i.e. its provenance dependencies). + path_to_upload_item: Maps each file path to its UploadSyncFile, ordered + by the resolved dependency sort. + path_to_file_check: Cache of os.path.isfile results for provenance + references encountered during dependency resolution. + """ + + path_to_dependencies: dict[str, list[str]] + path_to_upload_item: dict[str, UploadSyncFile] + path_to_file_check: dict[str, bool] + + +def _build_upload_plan( + items: list[UploadSyncFile], +) -> _UploadPlan: + """Determine the order in which files should be uploaded, given that some + files depend on others via provenance. + + A manifest CSV can declare that file B was derived from file A + (provenance). If B is uploaded before A, B's provenance record cannot + reference A's Synapse ID because A does not have one yet. This + function ensures the upload order respects those constraints. + + Steps: + 1. Build a dependency graph. For each file in the upload batch, + scan its used and executed provenance references. If a + reference points to a local file that is also being uploaded, + that is a dependency edge. If it points to a local file that + is not in the batch, that is an error. + 2. Topologically sort the graph. This produces an ordering where + every file comes after its dependencies, guaranteeing that by + the time B is uploaded, A already has a Synapse ID. + 3. Package the result into an _UploadPlan. + + Arguments: + items: The list of items to upload. + + Returns: + An _UploadPlan containing the topologically sorted dependency map + and the upload items keyed by file path. + + Raises: + ValueError: If a provenance reference points to a local file + that is not part of the upload batch. + """ + items_by_path = {i.entity.path: i for i in items} + file_check_cache: dict[str, bool] = {} + + graph: dict[str, list[str]] = {} + for item in items: + graph[item.entity.path] = _resolve_file_dependencies( + item, items_by_path, file_check_cache + ) + + graph_sorted = topolgical_sort(graph) + path_to_dependencies_sorted = {path: deps for path, deps in graph_sorted} + path_to_upload_items_sorted = { + path: items_by_path[path] for path in path_to_dependencies_sorted + } + + return _UploadPlan( + path_to_dependencies=path_to_dependencies_sorted, + path_to_upload_item=path_to_upload_items_sorted, + path_to_file_check=file_check_cache, + ) + + +def _resolve_file_dependencies( + item: UploadSyncFile, + items_by_path: dict[str, UploadSyncFile], + file_check_cache: dict[str, bool], +) -> list[str]: + """Return local-file provenance paths that this item depends on. + + Arguments: + item: The upload item whose provenance references to resolve. + items_by_path: All items in the upload batch keyed by file path. + file_check_cache: Mutable cache of os.path.isfile results, + updated in place for any new paths encountered. + + Returns: + A list of absolute file paths from the upload batch that this + item depends on via provenance. + + Raises: + ValueError: If a provenance reference points to a local file + that is not part of the upload batch. + """ + deps: list[str] = [] + for ref in item.used + item.executed: + # File objects (already in Synapse) are not local-path + # dependencies — skip them in the dependency graph. + if not isinstance(ref, str): + continue + if ref not in file_check_cache: + file_check_cache[ref] = os.path.isfile(ref) + if file_check_cache[ref]: + if ref not in items_by_path: + raise ValueError( + f"{item.entity.path} depends on" + f" {ref} which is not being uploaded" + ) + deps.append(ref) + return deps + + +def _create_upload_tasks( + upload_plan: _UploadPlan, + syn: Synapse, +) -> list[asyncio.Task]: + """Build an asyncio task graph that uploads files concurrently while + honouring provenance dependencies. + + The manifest may declare that file B was derived from file A + (provenance). A must be uploaded before B so that B's provenance + record can reference A's Synapse ID. Files with no dependency + relationship upload concurrently. + + The function iterates over the upload plan's dependency map (already + topologically sorted, so dependencies always appear before + dependents). For each file it: + + 1. Collects the already-created asyncio.Task objects for its + prerequisites -- these are guaranteed to exist because of the + topological ordering. + 2. Creates a new asyncio.Task wrapping _upload_file_async, passing + the prerequisite tasks in. That function calls asyncio.wait() on + them before uploading, so the file will not start uploading until + its dependencies finish. + 3. Stores the new task so later files can reference it as a + prerequisite. + + The returned list can be passed directly to asyncio.gather(). The + concurrency constraints are encoded inside the tasks themselves + (each one awaits its own prerequisites), so gather fires them all + off but they naturally serialize where needed. + + Example: if A has no deps, B depends on A, and C has no deps: + - A and C start uploading immediately (in parallel). + - B's task starts but immediately awaits A's task. + - Once A finishes, B proceeds with A's Synapse ID available for + its provenance record. + + Arguments: + upload_plan: The topologically sorted upload plan produced by + _build_upload_plan. + syn: Authenticated Synapse client. + + Returns: + A list of asyncio tasks, one per file, that can be passed to + asyncio.gather for concurrent execution. + """ + created_tasks_by_path: dict[str, asyncio.Task] = {} + + for file_path, prerequisite_paths in upload_plan.path_to_dependencies.items(): + # Topological sort guarantees every prerequisite was already created. + prerequisite_tasks = [created_tasks_by_path[p] for p in prerequisite_paths] + + upload_item = upload_plan.path_to_upload_item[file_path] + file_task = asyncio.create_task( + _upload_file_async( + file_entity=upload_item.entity, + used=upload_item.used, + executed=upload_item.executed, + activity_name=upload_item.activity_name, + activity_description=upload_item.activity_description, + prerequisite_tasks=prerequisite_tasks, + syn=syn, + ) + ) + created_tasks_by_path[file_path] = file_task + + return list(created_tasks_by_path.values()) + + +def _build_activity_linkage( + used_or_executed: Iterable[str | File], + resolved_file_ids: dict[str, str], +) -> list[UsedEntity | UsedURL]: + """Convert raw provenance references into typed Synapse objects (UsedEntity + or UsedURL) that the Activity model expects. + + Each item in the input list is one of two things: + + 1. A File object -- already resolved from a prior provenance pass. Its + id is extracted and wrapped in a UsedEntity. + 2. A string -- delegated to _resolve_linkage_item, which checks in + priority order: + - A local file path that was just uploaded (found in + resolved_file_ids, mapped to a Synapse ID, returned as UsedEntity). + - A URL (returned as UsedURL). + - A Synapse ID like syn123 or syn123.4 (parsed into ID + optional + version, returned as UsedEntity). + - If none match, raises ValueError. + + The resolved_file_ids dict is built by _upload_file_async after + prerequisite uploads finish, mapping each uploaded file's local path to + the Synapse ID it received. This is how provenance references between + files in the same manifest batch get wired up: file A uploads first and + gets syn111, then when file B (which declares used: /path/to/A) uploads, + resolved_file_ids maps /path/to/A to syn111. + + Arguments: + used_or_executed: The list of used or executed items. Each item is either + a File object (already resolved from provenance), or a string that is + a local file path, URL, or Synapse ID. + resolved_file_ids: A dictionary that maps the local path of a file to the + Synapse ID it received after upload. Populated by _upload_file_async + once prerequisite uploads complete. + + Returns: + A list of UsedEntity or UsedURL objects. + + Raises: + ValueError: If a string item is not a resolved file path, a URL, or a + valid Synapse ID. + """ + from synapseclient.models import UsedEntity + + return [ + ( + UsedEntity(target_id=item.id) + if not isinstance(item, str) + else _resolve_linkage_item(item, resolved_file_ids) + ) + for item in used_or_executed + ] + + +def _resolve_linkage_item( + item: str, + resolved_file_ids: dict[str, str], +) -> UsedEntity | UsedURL: + """Resolve a single string provenance reference to a UsedEntity or UsedURL. + + Arguments: + item: A string provenance reference — a local file path present in + resolved_file_ids, a URL, or a Synapse ID. + resolved_file_ids: Maps local file paths to their Synapse IDs (populated + after prerequisite uploads complete). + + Returns: + A UsedEntity if the item resolves to a Synapse ID (either via + resolved_file_ids or directly), or a UsedURL if the item is a URL. + + Raises: + ValueError: If the item is not a resolved file path, a URL, or a + valid Synapse ID. + """ + from synapseclient.models import UsedEntity, UsedURL + + resolved_file_id = resolved_file_ids.get(item) + if resolved_file_id: + return UsedEntity(target_id=resolved_file_id) + if is_url(item): + return UsedURL(url=item) + if not is_synapse_id_str(item): + raise ValueError(f"{item} is not a valid Synapse id") + syn_id, version = get_synid_and_version(item) + return UsedEntity( + target_id=syn_id, + target_version_number=int(version) if version else None, + ) + + +async def _upload_file_async( + file_entity: File, + used: Iterable[str | File], + executed: Iterable[str | File], + activity_name: str, + activity_description: str, + prerequisite_tasks: list[asyncio.Task], + syn: Synapse, +) -> File: + """Upload a single file, waiting for any provenance dependencies to finish first. + + This function is invoked as an asyncio.Task by _create_upload_tasks. Many + instances run concurrently, but each one self-serializes by awaiting only + its specific prerequisites. Files with no dependencies start uploading + immediately in parallel. + + The flow: + + 1. Wait for prerequisites -- if this file declares provenance on other + files in the same manifest batch (e.g. "file B was derived from + file A"), those files must be uploaded first so they have Synapse IDs. + asyncio.wait blocks until all prerequisite upload tasks finish, then + a path-to-Synapse-ID mapping is collected from their results. + 2. Build provenance linkages -- converts the raw used and executed + references (local paths, URLs, Synapse IDs, or File objects) into + typed UsedEntity/UsedURL objects. Local paths are resolved to Synapse + IDs using the mapping from step 1. + 3. Attach Activity -- if any provenance references exist, creates an + Activity with the name, description, and linkages, and attaches it + to the file. + 4. Store -- calls file_entity.store_async() to perform the actual upload. + 5. Return -- the returned File (now with a Synapse ID) becomes available + to downstream tasks that depend on it via the resolved mapping. + + Arguments: + file_entity: The File entity to upload. + used: Provenance used references (paths, URLs, Synapse IDs, or File + objects). + executed: Provenance executed references. + activity_name: Name for the provenance Activity. + activity_description: Description for the provenance Activity. + prerequisite_tasks: Tasks for files that must be uploaded before this one. + syn: Authenticated Synapse client. + + Returns: + The stored File entity. + + Raises: + RuntimeError: If prerequisite tasks have not all completed. + ValueError: If a provenance item is not a resolved file ID, a URL, + or a valid Synapse ID. + """ + from synapseclient.models import Activity + + # Step 1: Wait for prerequisite uploads to finish and collect their + # Synapse IDs so provenance references can point to them. + resolved_file_ids: dict[str, str] = {} + if prerequisite_tasks: + finished_dependencies, pending = await asyncio.wait( + prerequisite_tasks, return_when=asyncio.ALL_COMPLETED + ) + # Defensive check: ALL_COMPLETED guarantees pending is empty, but + # guard against unexpected asyncio behavior or future refactors. + if pending: + raise RuntimeError( + f"There were {len(pending)} dependencies left when storing {file_entity}" + ) + for finished_dependency in finished_dependencies: + result: File = finished_dependency.result() + resolved_file_ids[result.path] = result.id + + # Step 2: Convert raw provenance references (local paths, URLs, Synapse + # IDs, File objects) into typed UsedEntity/UsedURL objects. Local paths + # are resolved to Synapse IDs using the mapping built in step 1. + used_activity = _build_activity_linkage( + used_or_executed=used, resolved_file_ids=resolved_file_ids + ) + executed_activity = _build_activity_linkage( + used_or_executed=executed, resolved_file_ids=resolved_file_ids + ) + + # Step 3: Attach an Activity to the file if provenance was declared. + if used_activity or executed_activity: + file_entity.activity = Activity( + name=activity_name, + description=activity_description, + used=used_activity, + executed=executed_activity, + ) + + # Step 4: Upload and return the file (now with a Synapse ID). + await file_entity.store_async(synapse_client=syn) + return file_entity + + +def _split_csv_cell(input_string: str) -> list[str]: + """Split a string on commas that are not inside double quotes. + + Arguments: + input_string: A string to split apart. + + Returns: + The list of split items as strings. + """ + parts = _COMMAS_OUTSIDE_DOUBLE_QUOTES_PATTERN.split(input_string) + return [item.strip() for item in parts] + + +async def _resolve_row( + path: str, row: Series, frame: DataFrame, client: Synapse +) -> tuple[str, dict[str, list[str | File]]]: + """Resolve provenance columns for a single manifest row. + + Arguments: + path: The file path for this row (used as its manifest key). + row: The pandas Series for this row. + frame: The full manifest DataFrame (path-indexed), passed through to + _resolve_provenance_column for cross-row lookups. + client: Authenticated Synapse client. + + Returns: + A (path, resolved) tuple where resolved maps column names + (used and/or executed) to their resolved reference lists. + + Raises: + SynapseProvenanceError: If a provenance item is neither a local file + path, a URL, nor a valid Synapse ID. + """ + resolved: dict[str, list[str | File]] = {} + for col in ("used", "executed"): + if col in row: + resolved[col] = await _resolve_provenance_column( + row[col], path, client, frame + ) + return path, resolved + + +async def _resolve_provenance_column( + cell: str | list[str | File], + path: str, + syn: Synapse, + df: DataFrame, +) -> list[str | File]: + """Parse and resolve all provenance references in a single manifest cell. + + Handles cells that are already Python lists (converted by + _parse_annotation_cell) as well as raw + semicolon-delimited strings from the CSV. Each item is validated and + resolved via _check_provenance. + + Arguments: + cell: The raw cell value from the used or executed column — + either a semicolon-delimited string or an already-parsed list. + path: The manifest file path of the row that owns this cell. Used only + for error messages. + syn: Authenticated Synapse client. + df: The manifest DataFrame (path-indexed), used to check whether a + local file path is part of the current upload batch. + + Returns: + A list of resolved provenance references. + + Raises: + SynapseProvenanceError: If a provenance item is neither a local file + path, a URL, nor a valid Synapse ID. + """ + items: list[str | File] + if isinstance(cell, list): + items = cell + else: + items = list(cell.split(";")) if cell.strip() != "" else [] + + resolved = await asyncio.gather( + *[ + _resolve_provenance_item( + item.strip() if isinstance(item, str) else item, + owner_path=path, + syn=syn, + df=df, + ) + for item in items + ] + ) + return [item for item in resolved if item is not None] + + +async def _resolve_provenance_item( + item: str | File | None, owner_path: str, syn: Synapse, df: DataFrame +) -> str | File | None: + """Resolve a single provenance reference to its normalized form. + + Items that are already resolved (None, non-string File objects, URLs, + Synapse IDs) are returned as-is. Local file paths are handed off to + _resolve_local_file_provenance for batch/Synapse lookup. + + Arguments: + item: A provenance reference — a local file path, a URL, a Synapse + ID, a File object from a prior resolution pass, or None. + owner_path: The manifest file path of the file that declares this + provenance reference. Used only for error messages. + syn: Authenticated Synapse client. + df: The manifest DataFrame (path-indexed), used to check whether a + local file path is part of the current upload batch. + + Returns: + The resolved reference: a str (absolute local path, URL, or Synapse + ID), a File (looked up via MD5), or None. + + Raises: + SynapseProvenanceError: If the item is a local file that is neither + being uploaded nor found in Synapse. + """ + if item is None or not isinstance(item, str): + return item + + if is_url(item) or is_synapse_id_str(item) is not None: + return item + + return await _resolve_local_file_provenance(item, owner_path, syn, df) + + +async def _resolve_local_file_provenance( + raw_path: str, owner_path: str, syn: Synapse, manifest_by_path: DataFrame +) -> str | File: + """Resolve a local file path to either an in-batch path or a Synapse File. + + Given a manifest row that declares provenance on a local file, this + function determines where that file is: + + 1. If the file does not exist on disk, the provenance reference is + broken and a SynapseProvenanceError is raised. + 2. If the file is in the current upload batch (present in + manifest_by_path.index), its absolute path is returned as a string. + The Synapse ID will be resolved later, after that file is uploaded. + 3. If the file exists on disk but is not being uploaded, it is looked + up in Synapse by MD5 hash. If found, the File object is returned + so its Synapse ID can be used for provenance. If not found, a + SynapseProvenanceError is raised because the reference cannot be + linked. + + Arguments: + raw_path: A local file path string from a provenance cell, not yet + expanded or normalized. + owner_path: The manifest file path of the file that declares this + provenance reference. Used only for error messages. + syn: Authenticated Synapse client. + manifest_by_path: The manifest DataFrame indexed by absolute file path. + + Returns: + str — the absolute path if the file is in the upload batch. + File — a Synapse File model if the file already exists in Synapse. + + Raises: + SynapseProvenanceError: If the path does not exist on disk, or the + file is neither in the upload batch nor found in Synapse. + """ + from synapseclient.models.file import File + + absolute_path = _expand_path(raw_path) + + if not os.path.isfile(absolute_path): + raise SynapseProvenanceError( + f"The provenance record for file: {owner_path} is incorrect.\n" + f"Specifically {raw_path} is not an existing file path, a valid URL, or a Synapse ID." + ) + + if absolute_path in manifest_by_path.index: + return absolute_path + + try: + return await File.from_path_async(path=absolute_path, synapse_client=syn) + except SynapseFileNotFoundError as e: + raise SynapseProvenanceError( + f"The provenance record for file: {owner_path} is incorrect.\n" + f"Specifically {absolute_path} is not being uploaded and is not in Synapse." + ) from e + + +GENERATED_MANIFEST_COLUMNS = ["path", "parentId"] + + +class ManifestRow(TypedDict): + """Shape of a single row in the generated manifest CSV. + + Keys mirror GENERATED_MANIFEST_COLUMNS so the dict can be passed + directly to csv.DictWriter without conversion. + """ + + path: str + parentId: str + + +async def generate_sync_manifest( + directory_path: str, + parent_id: str, + manifest_path: str, + *, + synapse_client: Synapse | None = None, +) -> None: + """Walk a local directory, mirror its folder hierarchy under parent_id in + Synapse, and write a CSV manifest ready for sync_to_synapse_async. + + The generated manifest has two columns: path (always an absolute, + symlink-resolved path — directory_path is resolved via os.path.realpath + before walking, so the manifest can be consumed from any working + directory and remains valid even if the original symlink is later + removed or re-pointed) and parentId (the Synapse ID of the file's + containing folder). Folders that already exist in Synapse with the same + name and parent are reused rather than re-created. + + Sibling folders at the same depth are created concurrently to reduce + latency on wide trees. Directories and files are traversed in sorted + order so manifest output is deterministic across runs and platforms. + Directory symlinks encountered inside directory_path are not followed. + File symlinks are not pruned: the symlink's path is recorded in the + manifest, and the underlying target's contents are uploaded when the + manifest is consumed. The root directory_path itself may be a symlink; + if so, it is resolved to its target and the target is walked. Zero-byte + files are skipped with a warning, since Synapse rejects empty files. + I/O errors raised by os.walk (for example, unreadable subdirectories) + are logged and skipped. If no uploadable files are found under + directory_path, a warning is logged and a header-only manifest is + written. If a folder store call fails, the exception propagates and no + manifest is written; any folders already created remain in Synapse and + will be reused on a retry. + + Arguments: + directory_path: Path to the local directory to be pushed to Synapse. + parent_id: Synapse ID of the Folder or Project to mirror the + directory hierarchy under. + manifest_path: Path where the generated manifest CSV will be written. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last + created instance from the Synapse class constructor. + + Returns: + None + + Raises: + ValueError: If the parent directory of manifest_path does not exist, + if manifest_path is an existing directory, if directory_path + does not exist or is not a directory, or if parent_id exists in + Synapse but is not a Folder or Project. + SynapseHTTPError: If parent_id does not exist in Synapse. + """ + client = Synapse.get_client(synapse_client=synapse_client) + + manifest_path = _expand_path(manifest_path) + manifest_parent = os.path.dirname(manifest_path) or "." + if not os.path.isdir(manifest_parent): + raise ValueError(f"Manifest output directory does not exist: {manifest_parent}") + if os.path.isdir(manifest_path): + raise ValueError( + f"Manifest output path is an existing directory, not a file: {manifest_path}" + ) + directory_path = _resolve_and_validate_directory_path(directory_path) + await _validate_target_container_async(parent_id, client=client) + + rows = await _collect_manifest_rows_async(directory_path, parent_id, client) + + if not rows: + client.logger.warning( + f"No uploadable files found under {directory_path};" + " generated manifest contains only the header row." + ) + _write_manifest_csv(manifest_path, rows) + + +def _resolve_and_validate_directory_path(directory_path: str) -> str: + """Resolve symlinks on directory_path and verify it is an existing + directory. + + Uses realpath (not abspath) so that if directory_path is itself a + symlink, the manifest records paths under the resolved target. That + keeps the manifest valid if the original symlink is later removed or + re-pointed. + + Arguments: + directory_path: Path to validate. + + Returns: + The realpath-resolved absolute directory path. + + Raises: + ValueError: If the resolved path does not exist or is not a + directory. + """ + directory_path = os.path.realpath(_expand_path(directory_path)) + if not os.path.isdir(directory_path): + raise ValueError(f"{directory_path} is not a directory or does not exist") + return directory_path + + +async def _validate_target_container_async(parent_id: str, client: Synapse) -> None: + """Verify that parent_id resolves to a Folder or Project in Synapse. + + Raises: + ValueError: If parent_id exists but is not a Folder or Project. + """ + container = await get_async( + synapse_id=parent_id, + file_options=FileOptions(download_file=False), + synapse_client=client, + ) + + from synapseclient.models import Folder, Project + + if not isinstance(container, (Folder, Project)): + raise ValueError(f"Container {parent_id} is not a Folder or Project") + + +async def _collect_manifest_rows_async( + directory_path: str, parent_id: str, client: Synapse +) -> list[ManifestRow]: + """Walk directory_path and produce manifest rows for every uploadable file. + + Orchestrates the layer between os.walk (filesystem traversal) and the + Synapse folder/file model. Called once by generate_sync_manifest, after + path validation and before _write_manifest_csv. + + Algorithm: + + 1. Initialize a local-to-Synapse parent map seeded with + directory_path → parent_id. Subfolder IDs are added as they are + created so descendants can look up their Synapse parent. + 2. Walk top-down with os.walk. Top-down order matters — a directory's + parent is always yielded (and registered) before the directory + itself is visited. Walk errors (permission denied, broken paths) + are logged via _log_walk_error rather than aborting traversal. + + For each directory: + + 3. Prune symlinks and sort dirnames in place via + _prune_symlinks_and_sort_dirnames so Synapse folders aren't created + for directories os.walk(followlinks=False) won't visit, and + descent is deterministic. + 4. Look up current_parent_id from the map (guaranteed present by the + top-down invariant). + 5. Create sibling folders concurrently via _create_child_folders_async + (asyncio.gather over Folder.store_async). Existing Synapse folders + with the same name and parent are reused, not duplicated. + 6. Register each new folder's Synapse ID in the map keyed by its + absolute local path so step 4 succeeds when os.walk descends into + it on a later iteration. + 7. Build manifest rows for files in the current directory via + _build_manifest_rows. Unreadable and zero-byte files are filtered + out with warnings inside that helper. + + Side effect vs return value: the return value is the flat list of + ManifestRow entries, but the side effect — creating the Synapse + folder hierarchy under parent_id to mirror the local tree — is what + makes the rows usable. Without it the parentId values in the rows + would point at folders that don't exist yet. + + Async because _create_child_folders_async hits the Synapse API and + parallelizes sibling-folder creation. The rest (walking, sorting, + building rows) is local I/O and would otherwise be sync. + + Arguments: + directory_path: Realpath-resolved local directory to walk. + parent_id: Synapse ID of the container that maps to directory_path. + client: Authenticated Synapse client. + + Returns: + A list of ManifestRow entries, one per uploadable file. + """ + rows: list[ManifestRow] = [] + # Step 1: seed the local-to-Synapse parent map with the root mapping. + local_to_synapse_parent: dict[str, str] = {directory_path: parent_id} + + # Step 2: walk the input dir. + for dirpath, dirnames, filenames in os.walk( + directory_path, onerror=functools.partial(_log_walk_error, client) + ): + # Step 3: prune symlinked dirs and sort in place + _prune_symlinks_and_sort_dirnames(dirnames, dirpath) + + # Step 4: look up the Synapse parent for the current local dir + current_parent_id = local_to_synapse_parent[dirpath] + + # Step 5: create sibling folders concurrently. + created = await _create_child_folders_async( + parent_id=current_parent_id, dirnames=dirnames, client=client + ) + # Step 6: register each new folder's Synapse ID. + for dirname, folder in created.items(): + local_to_synapse_parent[os.path.join(dirpath, dirname)] = folder.id + + # Step 7: build rows for files in this directory and accumulate. + rows.extend(_build_manifest_rows(dirpath, filenames, current_parent_id, client)) + + return rows + + +def _log_walk_error(client: Synapse, err: OSError) -> None: + """Turn an os.walk I/O error from fatal into logged-and-skipped. + + By default os.walk silently ignores any OSError it hits while listing a + directory (e.g., permission denied, vanished symlink target, dead mount + point). Silent skipping is dangerous during manifest generation because + the user would get an incomplete manifest with no indication that some + subtree was missed. os.walk accepts an onerror callback that, if + provided, is invoked with the OSError instead — which is what + _collect_manifest_rows_async wires up via functools.partial. + + This callback logs a warning through the Synapse client's logger naming + the offending path (err.filename) and the underlying error message, then + returns. Returning normally (rather than raising) is the contract that + tells os.walk to keep going, so traversal continues into the rest of the + tree. + + The client argument is first only so the caller can bind it via + functools.partial, leaving the (err: OSError) signature os.walk requires. + The client is needed solely to reach client.logger. + + Net effect: unreadable directories produce a visible warning but do not + abort manifest generation; readable parts of the tree still produce + manifest rows. This mirrors _build_manifest_rows, which also + warns-and-skips on unreadable or zero-byte files rather than failing the + whole run. + + Arguments: + client: Authenticated Synapse client, used only for its logger. + err: The OSError raised by os.walk while listing a directory. + """ + client.logger.warning( + f"Skipping unreadable path during manifest generation:" + f" {err.filename} ({err})" + ) + + +def _prune_symlinks_and_sort_dirnames(dirnames: list[str], dirpath: str) -> None: + """Prune symlinked subdirectories and sort the rest in place. + + Mutates dirnames in place — the documented os.walk hook for both + pruning the traversal and forcing deterministic descent order. + Symlinked subdirectories are dropped so we don't create Synapse + folders for directories whose contents os.walk (followlinks=False) + won't visit. + + Arguments: + dirnames: The dirnames list yielded by os.walk for the current dirpath. + dirpath: The current directory being walked. + """ + dirnames[:] = [d for d in dirnames if not os.path.islink(os.path.join(dirpath, d))] + dirnames.sort() + + +async def _create_child_folders_async( + parent_id: str, dirnames: list[str], client: Synapse +) -> dict[str, Folder]: + """Create sibling folders concurrently under a shared Synapse parent. + + Sibling folders have no ordering dependency on each other, so they are + gathered in a single batch rather than awaited one at a time. Each task + returns its own dirname alongside the resulting Folder so the caller's + name-to-Folder mapping does not depend on asyncio.gather preserving + submission order. + + Returns: + A dict mapping each input dirname to the Folder that was created or + reused for it. + """ + from synapseclient.models.folder import Folder + + semaphore = asyncio.Semaphore(max(client.max_threads * 2, 1)) + + # Each task carries its dirname through to the result so the caller can + # build a name-to-Folder mapping without relying on asyncio.gather's + # input-order guarantee. + # Pair-tagged results remain correct under any completion order. + async def _store(dirname: str) -> tuple[str, Folder]: + async with semaphore: + folder = await Folder(name=dirname, parent_id=parent_id).store_async( + synapse_client=client + ) + return dirname, folder + + pairs = await asyncio.gather(*[_store(d) for d in dirnames]) + return dict(pairs) + + +def _build_manifest_rows( + dirpath: str, + filenames: Iterable[str], + parent_id: str, + client: Synapse, +) -> list[ManifestRow]: + """Build manifest rows for the uploadable files in a single directory. + + Called once per directory by _collect_manifest_rows_async during the + os.walk traversal. All files in a single call share the same parent_id + (the Synapse folder corresponding to dirpath). Sync because everything + it does is local I/O with no Synapse API calls; the async parent only + awaits when creating Synapse folders, not when scanning files. + + Filenames are sorted before iteration so output is deterministic across + runs and platforms (os.walk does not guarantee filename order). Each + name is joined with dirpath to form an absolute filepath, then filtered + through _is_uploadable_file, which logs and drops unreadable files + (broken symlinks, permission errors) and zero-byte files. + + Arguments: + dirpath: Absolute directory path currently being walked. + filenames: Filenames yielded by os.walk for dirpath. + parent_id: Synapse ID of the folder that maps to dirpath. + client: Authenticated Synapse client, used only for logging. + + Returns: + A list of ManifestRow entries (path, parentId) ready to be written + into the manifest CSV (matching GENERATED_MANIFEST_COLUMNS), one + per uploadable file. + """ + rows: list[ManifestRow] = [] + for filename in sorted(filenames): + filepath = os.path.join(dirpath, filename) + if _is_uploadable_file(filepath, client): + rows.append({"path": filepath, "parentId": parent_id}) + return rows + + +def _write_manifest_csv(manifest_path: str, rows: list[ManifestRow]) -> None: + """Write generated manifest rows to a CSV at manifest_path.""" + with open(manifest_path, "w", encoding="utf-8", newline="") as fp: + writer = csv.DictWriter(fp, fieldnames=GENERATED_MANIFEST_COLUMNS) + writer.writeheader() + writer.writerows(rows) + + +def _is_uploadable_file(filepath: str, client: Synapse) -> bool: + """Return True if filepath can be included in a generated manifest. + + Logs a warning and returns False for files that cannot be uploaded: + unreadable files (broken symlinks, permission errors, races) and + zero-byte files (rejected by Synapse). + """ + try: + size = os.stat(filepath).st_size + except OSError as err: + client.logger.warning( + f"Skipping unreadable file during manifest generation:" + f" {filepath} ({err})" + ) + return False + if size == 0: + client.logger.warning( + f"Skipping zero-byte file (empty files cannot be" + f" uploaded to Synapse): {filepath}" + ) + return False + return True diff --git a/synapseclient/models/services/migration.py b/synapseclient/models/services/migration.py new file mode 100644 index 000000000..60f880248 --- /dev/null +++ b/synapseclient/models/services/migration.py @@ -0,0 +1,1564 @@ +""" +Asynchronous service for indexing, and migrating entities between storage locations. + +This module provides native async implementations of the indexing and migration functionality +""" + +import asyncio +import collections.abc +import json +import os +import sqlite3 +import sys +import tempfile +import traceback +from typing import ( + TYPE_CHECKING, + Any, + AsyncGenerator, + Dict, + List, + Optional, + Set, + Tuple, + Union, +) + +from synapseclient import Synapse +from synapseclient.api import get_entity_type, rest_get_paginated_async +from synapseclient.api.entity_services import ( + get_children, + update_entity_file_handle_version, +) +from synapseclient.api.file_services import get_file_handle_for_download_async +from synapseclient.api.storage_location_services import get_storage_location_setting +from synapseclient.api.table_services import get_columns +from synapseclient.core import utils +from synapseclient.core.constants import concrete_types +from synapseclient.core.exceptions import SynapseError +from synapseclient.core.upload.multipart_upload import MAX_NUMBER_OF_PARTS +from synapseclient.core.upload.multipart_upload_async import multipart_copy_async +from synapseclient.core.utils import test_import_sqlite3 +from synapseclient.entity import Entity + +if TYPE_CHECKING: + from synapseclient.models import Table + +from synapseclient.models.table_components import ( + AppendableRowSetRequest, + PartialRow, + PartialRowSet, + TableUpdateTransaction, +) + +from .migration_types import ( + IndexingError, + MigrationError, + MigrationKey, + MigrationResult, + MigrationSettings, + MigrationStatus, + MigrationType, +) + +# Default part size for multipart copy (100 MB) +# we use a much larger default part size for part copies than we would for part uploads. +# with part copies the data transfer is within AWS so don't need to concern ourselves +# with upload failures of the actual bytes. +# this value aligns with what some AWS client libraries use e.g. +# https://github.com/aws/aws-sdk-java/blob/57ed2e4bd57e08f316bf5c6c71f6fd82a27fa240/aws-java-sdk-s3/src/main/java/com/amazonaws/services/s3/transfer/TransferManagerConfiguration.java#L46 +DEFAULT_PART_SIZE = 100 * utils.MB + +# Batch size for database operations so the batch operations are chunked. +BATCH_SIZE = 500 + + +# ============================================================================= +# Indexing Helper Functions +# ============================================================================= +async def _verify_storage_location_ownership_async( + storage_location_id: int, + *, + synapse_client: Optional[Synapse] = None, +) -> None: + """Verify the user owns the destination storage location. + Only the creator of the storage location can can retrieve it by its id. + + Arguments: + storage_location_id: The storage location ID to verify. + synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. + + Raises: + ValueError: If the user does not own the storage location. + """ + try: + await get_storage_location_setting( + storage_location_id=storage_location_id, + synapse_client=synapse_client, + ) + except SynapseError: + raise ValueError( + f"Unable to verify ownership of storage location {storage_location_id}. " + f"You must be the creator of the destination storage location." + ) + + +def _get_default_db_path(entity_id: str) -> str: + """Generate a default temp database path for migration tracking. + + Arguments: + entity_id: The Synapse entity ID being migrated. + + Returns: + Path to a SQLite database file in a temp directory. + """ + temp_dir = tempfile.mkdtemp(prefix="synapse_migration_") + return os.path.join(temp_dir, f"migration_{entity_id}.db") + + +async def _get_version_numbers_async( + entity_id: str, + synapse_client: Optional[Synapse] = None, +) -> AsyncGenerator[int, None]: + """Get all version numbers for an entity. + + Arguments: + entity_id: The entity ID. + synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. + + Yields: + Version numbers. + """ + async for version_info in rest_get_paginated_async( + f"/entity/{entity_id}/version", synapse_client=synapse_client + ): + yield version_info["versionNumber"] + + +def _escape_column_name(column: Union[str, collections.abc.Mapping]) -> str: + """Escape a column name for use in a Synapse table query statement. + Arguments: + column: A string column name or a dictionary with a 'name' key. + Returns: + Escaped column name wrapped in double quotes. + """ + from synapseclient.models import Column + + col_name = ( + column["name"] + if isinstance(column, collections.abc.Mapping) + else column.name if isinstance(column, Column) else str(column) + ) + escaped_name = col_name.replace('"', '""') + return f'"{escaped_name}"' + + +def _join_column_names(columns: List[Any]) -> str: + """Join column names into a comma-delimited list for table queries. + Arguments: + columns: A list of column names or column objects with 'name' keys. + Returns: + Comma-separated string of escaped column names. + """ + return ",".join(_escape_column_name(c) for c in columns) + + +def _check_indexed( + cursor: sqlite3.Cursor, + entity_id: str, + synapse_client: Optional[Synapse] = None, +) -> bool: + """Check if an entity has already been indexed. + If so, it can skip reindexing it. + + Arguments: + cursor: The cursor object from the connection to the SQLite database. + entity_id: The entity ID to check. + synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. + + Returns: + True if the entity is already indexed. + """ + indexed_row = cursor.execute( + "select 1 from migrations where id = ?", (entity_id,) + ).fetchone() + + if indexed_row: + synapse_client.logger.debug(f"{entity_id} already indexed, skipping") + return True + + synapse_client.logger.debug(f"{entity_id} not yet indexed, indexing now") + return False + + +# ============================================================================= +# Database Helper Functions +# ============================================================================= +def _ensure_schema(cursor: sqlite3.Cursor) -> None: + """Ensure the SQLite database has the required schema. + + Arguments: + cursor: The cursor object from the connection to the SQLite database. + """ + # migration_settings table + # A table to store parameters used to create the index. + cursor.execute( + "CREATE TABLE IF NOT EXISTS migration_settings (settings TEXT NOT NULL)" + ) + + # Migrations table + # The representation of migratable file handles is flat including both file entities + # and table attached files, so not all columns are applicable to both. row id and col id + # are only used by table attached files. + cursor.execute(""" + CREATE TABLE IF NOT EXISTS migrations ( + id TEXT NOT NULL, + type INTEGER NOT NULL, + version INTEGER NULL, + row_id INTEGER NULL, + col_id INTEGER NULL, + parent_id NULL, + status INTEGER NOT NULL, + exception TEXT NULL, + from_storage_location_id NULL, + from_file_handle_id TEXT NULL, + to_file_handle_id TEXT NULL, + file_size INTEGER NULL, + PRIMARY KEY (id, type, row_id, col_id, version) + ) + """) + + # Index the status column for faster status-based lookups + cursor.execute("CREATE INDEX IF NOT EXISTS ix_status ON migrations(status)") + # Index the from_file_handle_id and to_file_handle_id columns for faster file handle-based lookups + # This is used to see if there is already a migrated copy of a file handle before doing a copy + cursor.execute( + "CREATE INDEX IF NOT EXISTS ix_file_handle_ids " + "ON migrations(from_file_handle_id, to_file_handle_id)" + ) + + +def _prepare_migration_db( + conn: sqlite3.Connection, + cursor: sqlite3.Cursor, + db_path: str, + root_id: str, + dest_storage_location_id: str, + source_storage_location_ids: List[str], + file_version_strategy: str, + include_table_files: bool, +) -> None: + """Prepare the migration database by checking the migration settings for the given parameters. + This is a guardrail: it binds a given SQLite index settings to the specific entity and migration options it was created with, enabling safe resumption and preventing mismatched reuse. + + Arguments: + conn: The connection to the SQLite database. + cursor: The cursor to the SQLite database. + db_path: Path to the SQLite database file. + root_id: The root entity ID being migrated. + dest_storage_location_id: Destination storage location ID. + source_storage_location_ids: List of source storage location IDs to filter. + file_version_strategy: Strategy for handling file versions. + include_table_files: Whether to include table-attached files. + """ + current_settings = MigrationSettings( + root_id=root_id, + dest_storage_location_id=dest_storage_location_id, + source_storage_location_ids=source_storage_location_ids, + file_version_strategy=file_version_strategy, + include_table_files=include_table_files, + ) + existing_settings = _retrieve_index_settings(cursor) + + if existing_settings: + current_settings.verify_migration_settings(existing_settings, db_path) + else: + cursor.execute( + "INSERT INTO migration_settings (settings) VALUES (?)", + (json.dumps(current_settings.to_dict()),), + ) + + conn.commit() + + +def _retrieve_index_settings(cursor: sqlite3.Cursor) -> Optional[MigrationSettings]: + """Retrieve index settings from the database as a MigrationSettings instance. + + Arguments: + cursor: The cursor object from the connection to the SQLite database. + + Returns: + MigrationSettings if a row exists, None otherwise. + """ + row = cursor.execute("SELECT settings FROM migration_settings").fetchone() + if row: + return MigrationSettings.from_dict(json.loads(row[0])) + return None + + +def _insert_file_migration( + cursor: sqlite3.Cursor, + insert_values: List[ + Tuple[str, str, Optional[int], Optional[str], int, str, int, MigrationStatus] + ], +) -> None: + """Insert a file migration entry to the migrations database. + + Arguments: + cursor: The cursor object from the connection to the SQLite database. + insert_values: List of tuples containing the file migration data. + """ + cursor.executemany( + """ + insert into migrations ( + id, + type, + version, + parent_id, + from_storage_location_id, + from_file_handle_id, + file_size, + status + ) values (?, ?, ?, ?, ?, ?, ?, ?) + """, + insert_values, + ) + + +def _insert_table_file_migration( + cursor: sqlite3.Cursor, + insert_values: List[ + Tuple[str, str, Optional[int], Optional[str], int, str, int, MigrationStatus] + ], +) -> None: + """Insert a table-attached file migration entry. + + Arguments: + cursor: The cursor object from the connection to the SQLite database. + insert_values: List of tuples containing the table-attached file migration data. + """ + cursor.executemany( + """ + INSERT OR IGNORE INTO migrations ( + id, type, row_id, col_id, version, parent_id, + from_storage_location_id, from_file_handle_id, + file_size, status + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + insert_values, + ) + + +def _mark_container_indexed( + cursor: sqlite3.Cursor, + entity_id: str, + migration_type: MigrationType, + parent_id: Optional[str], +) -> None: + """Mark a container (Project or Folder) as indexed. + + Arguments: + cursor: The cursor object from the connection to the SQLite database. + entity_id: The Synapse ID of the container entity. + migration_type: The MigrationType of the container. + parent_id: The Synapse ID of the parent entity. + """ + cursor.execute( + "INSERT OR IGNORE INTO migrations (id, type, parent_id, status) VALUES (?, ?, ?, ?)", + [entity_id, migration_type, parent_id, MigrationStatus.INDEXED.value], + ) + + +def _record_indexing_error( + cursor: sqlite3.Cursor, + entity_id: str, + migration_type: MigrationType, + parent_id: Optional[str], + tb_str: str, +) -> None: + """Record an indexing error in the database. + + Arguments: + cursor: The cursor object from the connection to the SQLite database. + entity_id: The Synapse ID of the entity that failed. + migration_type: The MigrationType of the entity. + parent_id: The Synapse ID of the parent entity. + tb_str: The traceback string. + """ + cursor.execute( + """ + insert into migrations ( + id, + type, + parent_id, + status, + exception + ) values (?, ?, ?, ?, ?) + """, + ( + entity_id, + migration_type, + parent_id, + MigrationStatus.ERRORED.value, + tb_str, + ), + ) + + +# ============================================================================= +# Migration Helper Functions +# ============================================================================= +def _check_file_handle_exists( + cursor: sqlite3.Cursor, from_file_handle_id: str +) -> Optional[str]: + """Check if a file handle has already been copied. + + Arguments: + cursor: The cursor object from the connection to the SQLite database. + from_file_handle_id: The source file handle ID. + + Returns: + The destination file handle ID if found, None otherwise. + """ + row = cursor.execute( + "SELECT to_file_handle_id FROM migrations WHERE from_file_handle_id = ? AND to_file_handle_id IS NOT NULL", + (from_file_handle_id,), + ).fetchone() + return row[0] if row else None + + +def _query_migration_batch( + cursor: sqlite3.Cursor, + last_key: MigrationKey, + pending_file_handle_ids: Set[str], + completed_file_handle_ids: Set[str], + limit: int, +) -> List[Dict[str, Any]]: + """Query the next batch of items to migrate. + + This matches the original synapseutils query logic: + - Forward progress through entities ordered by id, type, row_id, col_id, version + - Backtracking to pick up files with completed file handles that were skipped + + Arguments: + cursor: The cursor object from the connection to the SQLite database. + last_key: The last processed MigrationKey. + pending_file_handle_ids: Set of file handle IDs currently being processed. + completed_file_handles: Set of file handles already completed. + limit: Maximum number of items to return. + + Returns: + List of migration entries as dictionaries. + """ + query_kwargs = { + "indexed_status": MigrationStatus.INDEXED.value, + "id": last_key.id, + "file_type": MigrationType.FILE.value, + "table_type": MigrationType.TABLE_ATTACHED_FILE.value, + "version": last_key.version, + "row_id": last_key.row_id, + "col_id": last_key.col_id, + "limit": limit, + } + + # Build the IN clauses for file handles + pending = "('" + "','".join(pending_file_handle_ids) + "')" + completed = "('" + "','".join(completed_file_handle_ids) + "')" + + # Query the next batch of items to migrate. + # 1. Forward progress: entities after the current position + # 2. Backtracking: entities before current position that share completed file handles + results = cursor.execute( + f""" + SELECT + id, + type, + version, + row_id, + col_id, + from_file_handle_id, + file_size + FROM migrations + WHERE + status = :indexed_status + AND ( + ( + ((id > :id AND type IN (:file_type, :table_type)) + OR (id = :id AND type = :file_type AND version IS NOT NULL AND version > :version) + OR (id = :id AND type = :table_type AND (row_id > :row_id OR (row_id = :row_id AND col_id > :col_id)))) + AND from_file_handle_id NOT IN {pending} + ) OR + ( + id <= :id + AND from_file_handle_id IN {completed} + ) + ) + ORDER BY + id, + type, + row_id, + col_id, + version + LIMIT :limit + """, # noqa + query_kwargs, + ) + + batch = [] + for row in results: + batch.append( + { + "id": row[0], + "type": row[1], + "version": row[2], + "row_id": row[3], + "col_id": row[4], + "from_file_handle_id": row[5], + "file_size": row[6], + } + ) + return batch + + +def _update_migration_database( + conn: sqlite3.Connection, + cursor: sqlite3.Cursor, + key: MigrationKey, + to_file_handle_id: str, + status: MigrationStatus, + exception: Optional[Exception] = None, +) -> None: + """Update a migration database record as successful or errored. + + Arguments: + conn: The connection to the SQLite database. + cursor: The cursor object from the connection to the SQLite database. + key: The migration key. + to_file_handle_id: The destination file handle ID. + status: The migration status. + exception: The exception that occurred. + """ + tb_str = ( + "".join( + traceback.format_exception( + type(exception), exception, exception.__traceback__ + ) + ) + if exception + else None + ) + + update_sql = """ + UPDATE migrations SET + status = ?, + to_file_handle_id = ?, + exception = ? + WHERE + id = ? + AND type = ? + """ + update_args = [status, to_file_handle_id, tb_str, key.id, key.type.value] + for arg in ("version", "row_id", "col_id"): + arg_value = getattr(key, arg) + if arg_value is not None: + update_sql += "and {} = ?\n".format(arg) + update_args.append(arg_value) + else: + update_sql += "and {} is null\n".format(arg) + + cursor.execute(update_sql, tuple(update_args)) + + +def _confirm_migration( + cursor: sqlite3.Cursor, + dest_storage_location_id: str, + force: bool = False, + *, + synapse_client: Optional[Synapse] = None, +) -> bool: + """Confirm migration with user if in interactive mode. + + Arguments: + cursor: The cursor object from the connection to the SQLite database. + dest_storage_location_id: Destination storage location ID. + force: If running in an interactive shell, migration requires an interactice confirmation. + This can be bypassed by using the force=True option. Defaults to False. + synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. + + Returns: + True if migration should proceed, False otherwise. + """ + + if force: + return True + + count = cursor.execute( + "SELECT count(*) FROM migrations WHERE status = ?", + (MigrationStatus.INDEXED.value,), + ).fetchone()[0] + + if count == 0: + synapse_client.logger.info("No items for migration.") + return False + + if sys.stdout.isatty(): + user_input = input( + f"{count} items for migration to {dest_storage_location_id}. Proceed? (y/n)? " + ) + return user_input.strip().lower() == "y" + else: + synapse_client.logger.info( + f"{count} items for migration. " + "force option not used, and console input not available to confirm migration, aborting. " + "Use the force option or run from an interactive shell to proceed with migration." + ) + return False + + +def _get_part_size(file_size: int) -> int: + """Calculate the part size for multipart copy. + + Arguments: + file_size: The file size in bytes. + + Returns: + The part size in bytes. + """ + import math + + # Ensure we don't exceed max parts + min_part_size = math.ceil(file_size / MAX_NUMBER_OF_PARTS) + return max(DEFAULT_PART_SIZE, min_part_size) + + +def _get_file_migration_status( + file_handle: Dict[str, Any], + source_storage_location_ids: List[str], + dest_storage_location_id: str, +) -> Optional[MigrationStatus]: + """ + Determine whether a file should be included in the migrations database + and return its migration status. + + Only S3 file handles are considered for migration. Other handle types + (e.g., external URLs) are ignored. + + A file is included according to the following rules: + - If the file is already stored in the destination location, it is included + and marked as ALREADY_MIGRATED. + - If `source_storage_location_ids` is provided, the file's current storage + location must be in that list to be included. + - If `source_storage_location_ids` is empty, all files not already at the + destination are included. + + Args: + file_handle: File handle metadata. + source_storage_location_ids: Storage location IDs that qualify as + migration sources. If empty, all source locations are considered. + dest_storage_location_id: Destination storage location ID. + + Returns: + MigrationStatus enum (ALREADY_MIGRATED, INDEXED) if the file should be included in the migrations database, or + None if the file should not be included in the migrations database. + """ + # Only S3 file handles can be migrated + if file_handle.concrete_type != concrete_types.S3_FILE_HANDLE: + return None + + current_storage_location_id = str(file_handle.storage_location_id) + + if current_storage_location_id == dest_storage_location_id: + return MigrationStatus.ALREADY_MIGRATED.value + + if source_storage_location_ids: + if current_storage_location_id not in source_storage_location_ids: + return None + + return MigrationStatus.INDEXED.value + + +# ============================================================================= +# Indexing Functions +# ============================================================================= +async def index_files_for_migration_async( + entity: Entity, + dest_storage_location_id: str, + db_path: Optional[str] = None, + *, + source_storage_location_ids: Optional[List[str]] = [], + file_version_strategy: str = "new", + include_table_files: bool = False, + continue_on_error: bool = False, + synapse_client: Optional[Synapse] = None, +) -> MigrationResult: + """Index files for migration to a new storage location. + + This is the first step in migrating files to a new storage location. This function itself does not modify the given entity but only update the migrations and migration_settings tables in the SQLite database. + After indexing, use `migrate_indexed_files_async` to perform the actual migration. + + Arguments: + entity: The Synapse entity to migrate (Project, Folder, File, or Table). If it is a container (a Project or Folder), its contents will be recursively indexed. + dest_storage_location_id: The destination storage location ID. + db_path: A path on disk where the SQLite index database will be created. Must be on a volume with enough space for metadata of all indexed contents. If not provided, a temporary directory will be created and the path will be returned in the MigrationResult object. + source_storage_location_ids: Optional list of source storage location IDs that will be migrated. If provided, files outside of one of the listed storage locations will not be indexed for migration. If not provided, then all files not already in the destination storage location will be indexed for migrated. + file_version_strategy: Strategy to migrate file versions: "new", "all", "latest", "skip". + - `new`: will create a new version of file entities in the new storage location, leaving existing versions unchanged + - `all`: all existing versions will be migrated in place to the new storage location + - `latest`: the latest version will be migrated in place to the new storage location + - `skip`: skip migrating file entities. use this e.g. if wanting to e.g. migrate table attached files in a container while leaving the files unchanged + + include_table_files: Whether to include files attached to tables. If False (default) then e.g. only + file entities in the container will be migrated and tables will be untouched. + continue_on_error: Whether any errors encountered while indexing an entity will be raised + or instead just recorded in the index while allowing the index creation + to continue. Defaults to False. + synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. + + Returns: + A MigrationResult object that can be used to inspect the contents of the index or output the index to a CSV for manual inspection. + + Raises: + ValueError: If the file_version_strategy is invalid or if skipping both file entities and table attached files. + """ + test_import_sqlite3() + client = Synapse.get_client(synapse_client=synapse_client) + + # Validate parameters + valid_file_version_strategy = {"new", "all", "latest", "skip"} + if file_version_strategy not in valid_file_version_strategy: + raise ValueError( + f"Invalid file_version_strategy: {file_version_strategy}, " + f"must be one of {valid_file_version_strategy}" + ) + + if file_version_strategy == "skip" and not include_table_files: + raise ValueError( + "Skipping both file entities and table attached files, nothing to migrate" + ) + + # Verify ownership + await _verify_storage_location_ownership_async( + storage_location_id=dest_storage_location_id, + synapse_client=client, + ) + + entity_id = entity.id + + # Create database path if not provided + if db_path is None: + db_path = _get_default_db_path(entity_id) + + # Initialize database + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + try: + _ensure_schema(cursor) + _prepare_migration_db( + conn=conn, + cursor=cursor, + db_path=db_path, + root_id=entity_id, + dest_storage_location_id=dest_storage_location_id, + source_storage_location_ids=source_storage_location_ids, + file_version_strategy=file_version_strategy, + include_table_files=include_table_files, + ) + try: + await _index_entity_async( + conn=conn, + cursor=cursor, + entity=entity, + parent_id=None, + dest_storage_location_id=dest_storage_location_id, + source_storage_location_ids=source_storage_location_ids, + file_version_strategy=file_version_strategy, + include_table_files=include_table_files, + continue_on_error=continue_on_error, + synapse_client=client, + ) + except IndexingError as ex: + client.logger.exception( + f"Aborted due to failure to index entity {ex.entity_id} of type {ex.concrete_type}. " + "Use continue_on_error=True to skip individual failures." + ) + raise ex.__cause__ + finally: + conn.close() + + return MigrationResult(db_path=db_path, synapse_client=client) + + +# ============================================================================= +# Indexing Implementation +# ============================================================================= +async def _index_entity_async( + conn: sqlite3.Connection, + cursor: sqlite3.Cursor, + entity: Entity, + parent_id: Optional[str], + dest_storage_location_id: str, + source_storage_location_ids: List[str], + file_version_strategy: str, + include_table_files: bool, + continue_on_error: bool, + *, + synapse_client: Optional[Synapse] = None, +) -> None: + """Recursively index an entity and its children into migrations database. + + Arguments: + conn: The connection to the SQLite database. + cursor: The cursor object from the connection to the SQLite database. + entity: The Synapse entity object. + parent_id: The parent entity Synapse ID. + dest_storage_location_id: Destination storage location ID. + source_storage_location_ids: List of source storage locations. + file_version_strategy: Strategy for file versions. + include_table_files: Whether to include table-attached files. + continue_on_error: Whether to continue on errors. + synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. + """ + entity_id = utils.id_of(entity) + retrieved_entity = await get_entity_type( + entity_id=entity_id, synapse_client=synapse_client + ) + concrete_type = retrieved_entity.type + + # Check if already indexed + is_indexed = _check_indexed(cursor, entity_id, synapse_client) + try: + if not is_indexed: + if concrete_type in ( + concrete_types.FILE_ENTITY, + concrete_types.RECORD_SET_ENTITY, + ): + if file_version_strategy != "skip": + await _index_file_entity_async( + cursor=cursor, + entity=entity, + parent_id=parent_id, + dest_storage_location_id=dest_storage_location_id, + source_storage_location_ids=source_storage_location_ids, + file_version_strategy=file_version_strategy, + synapse_client=synapse_client, + ) + + elif concrete_type == concrete_types.TABLE_ENTITY: + if include_table_files: + await _index_table_entity_async( + cursor=cursor, + entity_id=entity_id, + parent_id=parent_id, + dest_storage_location_id=dest_storage_location_id, + source_storage_location_ids=source_storage_location_ids, + synapse_client=synapse_client, + ) + + elif concrete_type in ( + concrete_types.FOLDER_ENTITY, + concrete_types.PROJECT_ENTITY, + ): + await _index_container_async( + conn=conn, + cursor=cursor, + entity_id=entity_id, + parent_id=parent_id, + dest_storage_location_id=dest_storage_location_id, + source_storage_location_ids=source_storage_location_ids, + file_version_strategy=file_version_strategy, + include_table_files=include_table_files, + continue_on_error=continue_on_error, + synapse_client=synapse_client, + ) + conn.commit() + + except IndexingError: + # this is a recursive function, we don't need to log the error at every level so just + # pass up exceptions of this type that wrap the underlying exception and indicate + # that they were already logged + raise + except Exception as ex: + if continue_on_error: + synapse_client.logger.warning(f"Error indexing entity {entity_id}: {ex}") + tb_str = "".join(traceback.format_exception(type(ex), ex, ex.__traceback__)) + migration_type = MigrationType.from_concrete_type(concrete_type).value + _record_indexing_error(cursor, entity_id, migration_type, parent_id, tb_str) + else: + raise IndexingError(entity_id, concrete_type) from ex + + +async def _index_file_entity_async( + cursor: sqlite3.Cursor, + entity: Entity, + parent_id: Optional[str], + dest_storage_location_id: str, + source_storage_location_ids: List[str], + file_version_strategy: str, + *, + synapse_client: Optional[Synapse] = None, +) -> None: + """Index a file entity for migration. + + Arguments: + cursor: The cursor object from the connection to the SQLite database. + entity: The Synapse entity object, a File. + parent_id: The parent entity Synapse ID. + dest_storage_location_id: Destination storage location ID. + source_storage_location_ids: List of source storage locations. + file_version_strategy: Strategy for file versions. + synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. + """ + entity_id = utils.id_of(entity) + synapse_client.logger.info(f"Indexing file entity {entity_id}") + + entity_versions: List[Tuple[Any, Optional[int]]] = [] + + if file_version_strategy == "new": + entity_versions.append((entity, None)) + + elif file_version_strategy == "all": + from synapseclient.operations import FileOptions, get_async + + async for version in _get_version_numbers_async(entity_id, synapse_client): + entity = await get_async( + synapse_id=entity_id, + file_options=FileOptions(download_file=False), + synapse_client=synapse_client, + ) + entity_versions.append((entity, version)) + + elif file_version_strategy == "latest": + entity_versions.append((entity, entity.version_number)) + + insert_values = [] + for entity, version in entity_versions: + status = _get_file_migration_status( + entity.file_handle, source_storage_location_ids, dest_storage_location_id + ) + if status: + insert_values.append( + ( + entity_id, + MigrationType.FILE.value, + version, + parent_id, + entity.file_handle.storage_location_id, + entity.data_file_handle_id, + entity.file_handle.content_size, + status, + ) + ) + if insert_values: + _insert_file_migration(cursor, insert_values) + + +async def _get_table_file_handle_rows_async( + entity_id: str, + *, + synapse_client: Optional[Synapse] = None, +) -> List[Tuple[int, int, Dict[str, Any]]]: + """Get the table file handle rows for a given entity. + + Arguments: + entity_id: The table entity ID. + synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. + + Returns: + A list of tuples containing the row ID, row version, and file handles. + """ + from synapseclient.models import Table + from synapseclient.models.file import FileHandle + + columns = await get_columns(table_id=entity_id, synapse_client=synapse_client) + file_handle_columns = [c for c in columns if c.column_type == "FILEHANDLEID"] + + if file_handle_columns: + file_column_select = _join_column_names(file_handle_columns) + results = await Table(id=entity_id).query_async( + query=f"select {file_column_select} from {entity_id}", + include_row_id_and_row_version=True, + synapse_client=synapse_client, + ) + for _, row in results.iterrows(): + file_handles = {} + # first two cols are row id and row version, rest are file handle ids from our query + row_id, row_version = row[:2] + + file_handle_ids = row[2:] + for i, file_handle_id in enumerate(file_handle_ids): + if file_handle_id: + col_id = file_handle_columns[i].id + + response = await get_file_handle_for_download_async( + file_handle_id=file_handle_id, + synapse_id=entity_id, + entity_type="TableEntity", + synapse_client=synapse_client, + ) + file_handle = FileHandle().fill_from_dict(response["fileHandle"]) + file_handles[col_id] = file_handle + yield row_id, row_version, file_handles + + +async def _index_table_entity_async( + cursor: sqlite3.Cursor, + entity_id: str, + parent_id: Optional[str], + dest_storage_location_id: str, + source_storage_location_ids: List[str], + *, + synapse_client: Optional[Synapse] = None, +) -> None: + """Index a table entity's file attachments for migration. + + Arguments: + cursor: The cursor object from the connection to the SQLite database. + entity_id: The Synapse ID of the table entity. + parent_id: The parent entity Synapse ID. + dest_storage_location_id: Destination storage location ID. + source_storage_location_ids: List of source storage locations to filter. + synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. + """ + synapse_client.logger.info(f"Indexing table entity {entity_id}") + insert_values = [] + async for row_id, row_version, file_handles in _get_table_file_handle_rows_async( + entity_id=entity_id, synapse_client=synapse_client + ): + for col_id, file_handle in file_handles.items(): + status = _get_file_migration_status( + file_handle, source_storage_location_ids, dest_storage_location_id + ) + if status: + insert_values.append( + ( + entity_id, + MigrationType.TABLE_ATTACHED_FILE.value, + row_id, + col_id, + row_version, + parent_id, + file_handle.storage_location_id, + file_handle.id, + file_handle.content_size, + status, + ) + ) + if len(insert_values) % BATCH_SIZE == 0: + _insert_table_file_migration(cursor, insert_values) + insert_values.clear() + if insert_values: + _insert_table_file_migration(cursor, insert_values) + + +async def _index_container_async( + conn: sqlite3.Connection, + cursor: sqlite3.Cursor, + entity_id: str, + parent_id: Optional[str], + dest_storage_location_id: str, + source_storage_location_ids: List[str], + file_version_strategy: str, + include_table_files: bool, + continue_on_error: bool, + *, + synapse_client: Optional[Synapse] = None, +) -> None: + """Index a container (Project or Folder) and its children. + + Arguments: + conn: The connection to the SQLite database. + cursor: The cursor object from the connection to the SQLite database. + entity_id: The Synapse ID of the entity, a Project or Folder. + parent_id: The Synapse ID of the parent entity. + dest_storage_location_id: Destination storage location ID. + source_storage_location_ids: List of source storage locations to filter. + file_version_strategy: Strategy for file versions. + include_table_files: Whether to include table-attached files. + continue_on_error: Whether to continue on errors. + synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. + """ + retrieved_entity = await get_entity_type( + entity_id=entity_id, synapse_client=synapse_client + ) + concrete_type = retrieved_entity.type + synapse_client.logger.info( + f'Indexing {concrete_type[concrete_type.rindex(".") + 1 :]} {entity_id}' + ) + + # Determine included types + include_types = [] + if file_version_strategy != "skip": + include_types.extend(["folder", "file"]) + if include_table_files: + include_types.append("table") + + # Get children using the async API + children = [] + async for child in get_children( + parent=entity_id, + include_types=include_types, + synapse_client=synapse_client, + ): + children.append(child) + + async def index_child(child: Dict[str, Any]) -> None: + from synapseclient.operations import get_async + + async with synapse_client._get_parallel_file_transfer_semaphore( + asyncio_event_loop=asyncio.get_running_loop() + ): + child_entity = await get_async( + synapse_id=child["id"], synapse_client=synapse_client + ) + + await _index_entity_async( + conn=conn, + cursor=cursor, + entity=child_entity, + parent_id=entity_id, + dest_storage_location_id=dest_storage_location_id, + source_storage_location_ids=source_storage_location_ids, + file_version_strategy=file_version_strategy, + include_table_files=include_table_files, + continue_on_error=continue_on_error, + synapse_client=synapse_client, + ) + + # Process children with as_completed for progress tracking + tasks = [asyncio.create_task(index_child(child)) for child in children] + for task in asyncio.as_completed(tasks): + await task + + # Mark container as indexed + migration_type = ( + MigrationType.PROJECT.value + if concrete_type == concrete_types.PROJECT_ENTITY + else MigrationType.FOLDER.value + ) + _mark_container_indexed(cursor, entity_id, migration_type, parent_id) + + +# ============================================================================= +# Migration Functions +# ============================================================================= +async def _migrate_item_async( + key: MigrationKey, + from_file_handle_id: str, + to_file_handle_id: Optional[str], + file_size: int, + dest_storage_location_id: str, + semaphore: asyncio.Semaphore, + *, + synapse_client: Optional[Synapse] = None, +) -> Dict[str, Any]: + """Migrate a single item. + + Arguments: + key: The migration key. + from_file_handle_id: The source file handle ID. + to_file_handle_id: The destination file handle ID (if already copied). + file_size: File size in bytes. + dest_storage_location_id: The destination storage location ID. + semaphore: The concurrency semaphore. + synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. + + Returns: + Dictionary with the key, from_file_handle_id, and to_file_handle_id. + """ + async with semaphore: + try: + # copy to a new file handle if we haven't already + if not to_file_handle_id: + source_association = { + "fileHandleId": from_file_handle_id, + "associateObjectId": key.id, + "associateObjectType": ( + "FileEntity" + if key.type == MigrationType.FILE + else "TableEntity" + ), + } + + to_file_handle_id = await multipart_copy_async( + synapse_client, + source_association, + storage_location_id=dest_storage_location_id, + part_size=_get_part_size(file_size), + ) + # Update entity with new file handle + if key.type == MigrationType.FILE: + if key.version is None: + await _create_new_file_version_async( + entity_id=key.id, + to_file_handle_id=to_file_handle_id, + synapse_client=synapse_client, + ) + else: + await _migrate_file_version_async( + entity_id=key.id, + version=key.version, + from_file_handle_id=from_file_handle_id, + to_file_handle_id=to_file_handle_id, + synapse_client=synapse_client, + ) + elif key.type == MigrationType.TABLE_ATTACHED_FILE: + await _migrate_table_attached_file_async( + key=key, + to_file_handle_id=to_file_handle_id, + synapse_client=synapse_client, + ) + + return { + "key": key, + "from_file_handle_id": from_file_handle_id, + "to_file_handle_id": to_file_handle_id, + } + + except Exception as ex: + raise MigrationError( + key, from_file_handle_id, to_file_handle_id, cause=ex + ) from ex + + +async def _create_new_file_version_async( + entity_id: str, + to_file_handle_id: str, + *, + synapse_client: Optional[Synapse] = None, +) -> None: + """Create a new version of a file entity with the new file handle. + + Arguments: + entity_id: The file entity ID. + to_file_handle_id: The new file handle ID. + synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. + """ + from synapseclient.operations import FileOptions, get_async + + synapse_client.logger.info(f"Creating new version for file entity {entity_id}") + + entity = await get_async( + synapse_id=entity_id, + file_options=FileOptions(download_file=False), + synapse_client=synapse_client, + ) + entity.data_file_handle_id = to_file_handle_id + await entity.store_async(synapse_client=synapse_client) + + +async def _migrate_file_version_async( + entity_id: str, + version: int, + from_file_handle_id: str, + to_file_handle_id: str, + *, + synapse_client: Optional[Synapse] = None, +) -> None: + """Migrate/update an existing file version with a new file handle. + + Arguments: + entity_id: The Synapse ID of the entity. + version: The version number of the entity. + from_file_handle_id: The original file handle ID. + to_file_handle_id: The new file handle ID. + synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. + """ + await update_entity_file_handle_version( + entity_id=entity_id, + version=version, + old_file_handle_id=from_file_handle_id, + new_file_handle_id=to_file_handle_id, + synapse_client=synapse_client, + ) + + +async def _migrate_table_attached_file_async( + key: MigrationKey, + to_file_handle_id: str, + *, + synapse_client: Optional[Synapse] = None, +) -> None: + """Migrate/update a table attached file with a new file handle. + + Arguments: + key: The migration key. + to_file_handle_id: The new file handle ID. + synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. + """ + partial_row = PartialRow( + row_id=str(key.row_id), + values=[{"key": str(key.col_id), "value": to_file_handle_id}], + ) + partial_row_set = PartialRowSet( + table_id=key.id, + rows=[partial_row], + ) + appendable_request = AppendableRowSetRequest( + entity_id=key.id, + to_append=partial_row_set, + ) + transaction = TableUpdateTransaction( + entity_id=key.id, + changes=[appendable_request], + ) + await transaction.send_job_and_wait_async(synapse_client=synapse_client) + + +async def track_migration_results_async( + conn: sqlite3.Connection, + cursor: sqlite3.Cursor, + active_tasks: Set[asyncio.Task], + pending_file_handles: Set[str], + completed_file_handles: Set[str], + pending_keys: Set[MigrationKey], + return_when: str, + continue_on_error: bool, +) -> None: + """Track the results of the migration tasks. + + Arguments: + conn: The connection to the SQLite database. + cursor: The cursor object from the connection to the SQLite database. + pending_file_handles: The set of pending file handles. + completed_file_handles: The set of completed file handles. + active_tasks: The set of active migration tasks. + pending_keys: The set of pending migration keys. + return_when: The return when condition for the asyncio.wait. + continue_on_error: Whether to continue on errors. + + Returns: + None + """ + done, _ = await asyncio.wait( + active_tasks, + return_when=return_when, + ) + active_tasks -= done + for completed_task in done: + to_file_handle_id = None + ex = None + try: + result = completed_task.result() + key = result["key"] + from_file_handle_id = result["from_file_handle_id"] + to_file_handle_id = result["to_file_handle_id"] + status = MigrationStatus.MIGRATED.value + completed_file_handles.add(from_file_handle_id) + + except MigrationError as migration_error: + key = migration_error.key + from_file_handle_id = migration_error.from_file_handle_id + ex = migration_error.__cause__ + status = MigrationStatus.ERRORED.value + completed_file_handles.add(from_file_handle_id) + + await asyncio.to_thread( + _update_migration_database, conn, cursor, key, to_file_handle_id, status, ex + ) + pending_keys.discard(key) + pending_file_handles.discard(from_file_handle_id) + + if not continue_on_error and ex: + # Commit whatever updates completed before raising so the DB is consistent. + await asyncio.to_thread(conn.commit) + raise ex from None + + # Single commit for the entire batch of completed tasks. + await asyncio.to_thread(conn.commit) + + +# ============================================================================= +# Migration Implementation +# ============================================================================= +async def migrate_indexed_files_async( + db_path: str, + *, + create_table_snapshots: bool = True, + continue_on_error: bool = False, + force: bool = False, + synapse_client: Optional["Synapse"] = None, +) -> MigrationResult: + """Migrate files that have been indexed. + + This is the second step in migrating files to a new storage location. + Files must first be indexed using `index_files_for_migration_async`. + + **Interactive confirmation:** When called from an interactive shell and + `force=False` (the default), this function will print the number of items + queued for migration and prompt the user to confirm before proceeding + (``"N items for migration to . Proceed? (y/n)?``). If standard + output is not connected to an interactive terminal (e.g. a script or CI + environment), migration is aborted unless ``force=True`` is set. + + Arguments: + db_path: Path to SQLite database created by index_files_for_migration_async. + create_table_snapshots: Whether to create table snapshots before migrating. Defaults to True. + continue_on_error: Whether to continue on individual migration errors. Defaults to False. + force: Skip the interactive confirmation prompt and proceed with migration + automatically. Set to ``True`` when running non-interactively (scripts, + CI, automated pipelines). Defaults to False. + synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. + + Returns: + MigrationResult object, or None if migration was aborted (user declined + the confirmation prompt, or the session is non-interactive and force=False). + """ + test_import_sqlite3() + client = Synapse.get_client(synapse_client=synapse_client) + + # Retrieve settings + conn = sqlite3.connect(db_path, check_same_thread=False) + cursor = conn.cursor() + try: + _ensure_schema(cursor) + existing_settings = _retrieve_index_settings(cursor) + if existing_settings is None: + raise ValueError( + f"Unable to retrieve existing index settings from '{db_path}'. " + "Either this path does not represent a previously created migration index " + "or the file is corrupt." + ) + dest_storage_location_id = existing_settings.dest_storage_location_id + + # Confirm migration + confirmed = _confirm_migration( + cursor, dest_storage_location_id, force, synapse_client=client + ) + if not confirmed: + client.logger.info("Migration aborted.") + return None + + # Execute migration + await _execute_migration_async( + conn=conn, + cursor=cursor, + dest_storage_location_id=dest_storage_location_id, + create_table_snapshots=create_table_snapshots, + continue_on_error=continue_on_error, + synapse_client=client, + ) + return MigrationResult(db_path=db_path, synapse_client=client) + finally: + conn.close() + + +async def _execute_migration_async( + conn: sqlite3.Connection, + cursor: sqlite3.Cursor, + dest_storage_location_id: str, + create_table_snapshots: bool, + continue_on_error: bool, + *, + synapse_client: Optional[Synapse] = None, +) -> None: + """Execute the actual file migration. + + Arguments: + conn: The connection to the SQLite database. + cursor: The cursor object from the connection to the SQLite database. + dest_storage_location_id: Destination storage location ID. + create_table_snapshots: Whether to create table snapshots. + continue_on_error: Whether to continue on errors. + max_concurrent: Maximum concurrent operations. + synapse_client: If not passed in and caching was not disabled by `Synapse.allow_client_caching(False)` this will use the last created instance from the Synapse class constructor. + """ + pending_file_handles: Set[str] = set() + completed_file_handles: Set[str] = set() + pending_keys: Set[MigrationKey] = set() + + semaphore = synapse_client._get_parallel_file_transfer_semaphore( + asyncio_event_loop=asyncio.get_running_loop() + ) + active_tasks: Set[asyncio.Task] = set() + + # Initialize last key to an empty key so the first iteration can proceed. + key = MigrationKey(id="", type=None, row_id=-1, col_id=-1, version=-1) + while True: + # Query next batch — run in a thread to avoid blocking the event loop + # while SQLite performs the ORDER BY scan. + batch = await asyncio.to_thread( + _query_migration_batch, + cursor, + key, + pending_file_handles, + completed_file_handles, + min(BATCH_SIZE, semaphore._value - len(active_tasks)), + ) + row_count = 0 + for item in batch: + row_count += 1 + last_key = key + key = MigrationKey( + id=item["id"], + type=MigrationType(item["type"]), + version=item["version"], + row_id=item["row_id"], + col_id=item["col_id"], + ) + from_file_handle_id = item["from_file_handle_id"] + if key in pending_keys or from_file_handle_id in pending_file_handles: + # if this record is already being migrated or it shares a file handle + # with a record that is being migrated then skip this. + # if it the record shares a file handle it will be picked up later + # when its file handle is completed. + continue + + pending_keys.add(key) + + # Check for existing copy — run in a thread to avoid blocking the event loop. + to_file_handle_id = _check_file_handle_exists(cursor, from_file_handle_id) + + if not to_file_handle_id: + pending_file_handles.add(from_file_handle_id) + + # Create table snapshot if needed using the async API + if ( + key.type == MigrationType.TABLE_ATTACHED_FILE.value + and create_table_snapshots + and last_key.id != key.id + ): + await Table(id=key.id).snapshot_async(synapse_client=synapse_client) + + # Create migration task + task = asyncio.create_task( + _migrate_item_async( + key=key, + from_file_handle_id=from_file_handle_id, + to_file_handle_id=to_file_handle_id, + file_size=item["file_size"] or 0, + dest_storage_location_id=dest_storage_location_id, + semaphore=semaphore, + synapse_client=synapse_client, + ) + ) + active_tasks.add(task) + + if row_count == 0 and not pending_file_handles: + # we've run out of migratable sqlite rows, we have nothing else + # to submit, so we break out and wait for all remaining + # tasks to conclude. + break + + await track_migration_results_async( + conn, + cursor, + active_tasks, + pending_file_handles, + completed_file_handles, + pending_keys, + asyncio.FIRST_COMPLETED, + continue_on_error, + ) + + # Wait for any remaining tasks + if active_tasks: + await track_migration_results_async( + conn, + cursor, + active_tasks, + pending_file_handles, + completed_file_handles, + pending_keys, + asyncio.ALL_COMPLETED, + continue_on_error, + ) diff --git a/synapseclient/models/services/migration_types.py b/synapseclient/models/services/migration_types.py new file mode 100644 index 000000000..2a0af75ec --- /dev/null +++ b/synapseclient/models/services/migration_types.py @@ -0,0 +1,382 @@ +""" +Data classes and enums for the async migration service. + +These types are used to track the state of file migrations between storage locations. +""" + +import asyncio +import csv +from dataclasses import dataclass, field, fields +from enum import Enum +from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional + +from synapseclient.core.constants import concrete_types + +if TYPE_CHECKING: + from synapseclient import Synapse + + +class MigrationStatus(Enum): + """Internal enum used by the SQLite database to track the state of entities during indexing and migration.""" + + INDEXED = 1 + """The file has been indexed and is ready to be migrated.""" + + MIGRATED = 2 + """The file has been successfully migrated to the new storage location.""" + + ALREADY_MIGRATED = 3 + """The file was already at the destination storage location and no migration is needed.""" + + ERRORED = 4 + """An error occurred during indexing or migration for this entity.""" + + +class MigrationType(Enum): + """Type of entity being tracked in the migration database. + Container types (projects and folders) are only used during the indexing phase. + we record the containers we've indexed so we don't reindex them on a subsequent + run using the same db file (or reindex them after an indexing dry run)""" + + PROJECT = 1 + """A project entity.""" + + FOLDER = 2 + """A folder entity.""" + + FILE = 3 + """A file entity.""" + + TABLE_ATTACHED_FILE = 4 + """A file handle that is attached to a table column.""" + + @classmethod + def from_concrete_type(cls, concrete_type: str) -> "MigrationType": + """Convert a Synapse concrete type string to a MigrationType. + + Arguments: + concrete_type: The concrete type of the entity. + + Returns: + The corresponding MigrationType enum value. + + Raises: + ValueError: If the concrete type is not recognized. + """ + if concrete_type == concrete_types.PROJECT_ENTITY: + return cls.PROJECT + elif concrete_type == concrete_types.FOLDER_ENTITY: + return cls.FOLDER + elif concrete_type == concrete_types.FILE_ENTITY: + return cls.FILE + elif concrete_type == concrete_types.TABLE_ENTITY: + return cls.TABLE_ATTACHED_FILE + + raise ValueError(f"Unhandled concrete type: {concrete_type}") + + +@dataclass(frozen=True) +class MigrationKey: + """Unique identifier for a entry in the migrations database. + + Attributes: + id: The Synapse entity ID. + type: The migration type of entity being migrated. + version: The file version number (None for new versions or containers). #TODO double check if versions are NONE for containers + row_id: The table row ID (for table attached files). + col_id: The table column ID (for table attached files). + """ + + id: str + type: MigrationType + version: Optional[int] = None + row_id: Optional[int] = None + col_id: Optional[int] = None + + +@dataclass +class MigrationSettings: + """Settings for a migration index stored in the database. + + Attributes: + root_id: The root entity ID being migrated. + dest_storage_location_id: The destination storage location ID. + source_storage_location_ids: List of of storage location ids that will be migrated. + file_version_strategy: Strategy for handling file versions. + include_table_files: Whether to include files attached to tables. + """ + + root_id: str + dest_storage_location_id: str + source_storage_location_ids: List[str] = field(default_factory=list) + file_version_strategy: str = "new" + include_table_files: bool = False + + def to_dict(self) -> Dict[str, Any]: + """Return a dict suitable for JSON serialization in the database.""" + return { + "root_id": self.root_id, + "dest_storage_location_id": self.dest_storage_location_id, + "source_storage_location_ids": self.source_storage_location_ids, + "file_version_strategy": self.file_version_strategy, + "include_table_files": 1 if self.include_table_files else 0, + } + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> "MigrationSettings": + """Build MigrationSettings from a dict (e.g. from JSON in the database).""" + include = d.get("include_table_files", False) + if isinstance(include, int): + include = bool(include) + return cls( + root_id=d["root_id"], + dest_storage_location_id=d["dest_storage_location_id"], + source_storage_location_ids=d.get("source_storage_location_ids") or [], + file_version_strategy=d.get("file_version_strategy", "new"), + include_table_files=include, + ) + + def verify_migration_settings( + self, existing_settings: "MigrationSettings", db_path: str + ) -> None: + """Raise ValueError if the migration settings do not match the existing settings""" + # compare all fields + for f in fields(self): + if getattr(self, f.name) != getattr(existing_settings, f.name): + # we can't resume indexing with an existing index file using a different setting. + raise ValueError( + "Index parameter does not match the setting recorded in the existing index file. " + "To change the index settings start over by deleting the file or using a different path. " + f"Expected {f.name} {getattr(existing_settings, f.name)}, found {getattr(self, f.name)} in index file {db_path}" + ) + + +class IndexingError(Exception): + """Error during an indexing operation. + + Attributes: + entity_id: The entity ID that failed to index. + concrete_type: The concrete type of the entity. + """ + + def __init__(self, entity_id: str, concrete_type: str): + self.entity_id = entity_id + self.concrete_type = concrete_type + + +@dataclass +class MigrationResult: + """Result of a migration operation - proxy to the SQLite tracking database. + + This class provides methods to query the migration database for status counts, + individual migration entries, and CSV export. + + Attributes: + db_path: Path to the SQLite database file. + synapse_client: Optional Synapse client for column name lookups. + """ + + db_path: str + synapse_client: Optional["Synapse"] = None + + @property + def counts_by_status(self) -> Dict[str, int]: + """Get counts by migration status (synchronous). + + Returns: + Dictionary mapping status names to counts. + """ + return self.get_counts_by_status() + + def get_counts_by_status(self) -> Dict[str, int]: + """Get counts by migration status (synchronous). + + Returns: + Dictionary mapping status names to counts. + """ + import sqlite3 + + conn = sqlite3.connect(self.db_path) + try: + cursor = conn.cursor() + + # Only count FILE and TABLE_ATTACHED_FILE entries + result = cursor.execute( + "SELECT status, count(*) FROM migrations " + "WHERE type IN (?, ?) GROUP BY status", + (MigrationType.FILE.value, MigrationType.TABLE_ATTACHED_FILE.value), + ) + + counts = {status.name: 0 for status in MigrationStatus} + for row in result: + status_value = row[0] + count = row[1] + counts[MigrationStatus(status_value).name] = count + + return counts + finally: + conn.close() + + async def get_counts_by_status_async(self) -> Dict[str, int]: + """Get counts by migration status (asynchronous). + + Returns: + Dictionary mapping status names to counts. + """ + return await asyncio.to_thread(self.get_counts_by_status) + + def get_migrations(self) -> Iterator[Dict[str, Any]]: + """Iterate over all migration entries (synchronous). + + Yields: + Dictionary for each migration entry with keys: + id, type, version, row_id, col_name, from_storage_location_id, + from_file_handle_id, to_file_handle_id, file_size, status, exception. + """ + import sqlite3 + + conn = sqlite3.connect(self.db_path) + try: + cursor = conn.cursor() + + batch_size = 500 + rowid = -1 + column_names_cache: Dict[int, str] = {} + + while True: + results = cursor.execute( + """ + SELECT + rowid, + id, + type, + version, + row_id, + col_id, + from_storage_location_id, + from_file_handle_id, + to_file_handle_id, + file_size, + status, + exception + FROM migrations + WHERE + rowid > ? + AND type IN (?, ?) + ORDER BY rowid + LIMIT ? + """, + ( + rowid, + MigrationType.FILE.value, + MigrationType.TABLE_ATTACHED_FILE.value, + batch_size, + ), + ) + + rows = results.fetchall() + if not rows: + break + + for row in rows: + rowid = row[0] + col_id = row[5] + + # Resolve column name if needed + col_name = None + if col_id is not None and self.synapse_client: + if col_id not in column_names_cache: + try: + col_info = self.synapse_client.restGET( + f"/column/{col_id}" + ) + column_names_cache[col_id] = col_info.get("name", "") + except Exception: + column_names_cache[col_id] = "" + col_name = column_names_cache[col_id] + + yield { + "id": row[1], + "type": ( + "file" if row[2] == MigrationType.FILE.value else "table" + ), + "version": row[3], + "row_id": row[4], + "col_name": col_name, + "from_storage_location_id": row[6], + "from_file_handle_id": row[7], + "to_file_handle_id": row[8], + "file_size": row[9], + "status": MigrationStatus(row[10]).name, + "exception": row[11], + } + finally: + conn.close() + + async def get_migrations_async(self) -> List[Dict[str, Any]]: + """Get all migration entries (asynchronous). + + Returns: + List of dictionaries for each migration entry. + """ + # Convert to list since generators can't be returned from to_thread + return await asyncio.to_thread(lambda: list(self.get_migrations())) + + def as_csv(self, path: str) -> None: + """Export migration results to a CSV file (synchronous). + + Arguments: + path: Path to write the CSV file. + """ + fieldnames = [ + "id", + "type", + "version", + "row_id", + "col_name", + "from_storage_location_id", + "from_file_handle_id", + "to_file_handle_id", + "file_size", + "status", + "exception", + ] + + with open(path, "w", newline="") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + for migration in self.get_migrations(): + writer.writerow(migration) + + async def as_csv_async(self, path: str) -> None: + """Export migration results to a CSV file (asynchronous). + + Arguments: + path: Path to write the CSV file. + """ + await asyncio.to_thread(self.as_csv, path) + + +class MigrationError(Exception): + """Error during a migration operation. + + Attributes: + key: The migration key that failed. + from_file_handle_id: The source file handle ID. + to_file_handle_id: The destination file handle ID (if partially complete). + """ + + def __init__( + self, + key: MigrationKey, + from_file_handle_id: str, + to_file_handle_id: Optional[str] = None, + cause: Optional[Exception] = None, + ): + self.key = key + self.from_file_handle_id = from_file_handle_id + self.to_file_handle_id = to_file_handle_id + message = f"Migration failed for {key.id}" + if cause is not None: + message += f": {cause}" + super().__init__(message) diff --git a/synapseclient/models/services/storable_entity_components.py b/synapseclient/models/services/storable_entity_components.py index 0c918f9f0..6af377c79 100644 --- a/synapseclient/models/services/storable_entity_components.py +++ b/synapseclient/models/services/storable_entity_components.py @@ -22,6 +22,10 @@ ) +MANIFEST_UPLOAD_MAX_RETRIES: int = 4 +"""Maximum number of notification retries for manifest-based uploads.""" + + class FailureStrategy(Enum): """ When storing a large number of items through bulk actions like diff --git a/synapseclient/models/storage_location.py b/synapseclient/models/storage_location.py new file mode 100644 index 000000000..7a6ae14d6 --- /dev/null +++ b/synapseclient/models/storage_location.py @@ -0,0 +1,509 @@ +"""StorageLocation model for managing storage location settings in Synapse.""" + +from dataclasses import dataclass, field +from enum import Enum +from typing import Any, Dict, Optional + +from synapseclient import Synapse +from synapseclient.api.storage_location_services import ( + create_storage_location_setting, + get_storage_location_setting, +) +from synapseclient.core.async_utils import async_to_sync, otel_trace_method +from synapseclient.models.mixins.enum_coercion import EnumCoercionMixin +from synapseclient.models.protocols.storage_location_protocol import ( + StorageLocationSynchronousProtocol, +) + + +@dataclass(frozen=True) +class StorageLocationType: + """Describes a Synapse storage location type. + + Each instance is a distinct object identified by its ``name``, so SFTP and + HTTPS remain separate even though they share the same backend + ``concreteType`` (``ExternalStorageLocationSetting``). + + Attributes: + name: Human-readable identifier (e.g. ``"EXTERNAL_SFTP"``). + concrete_type: The ``concreteType`` suffix sent to the Synapse REST API. + """ + + name: str + concrete_type: str = field(repr=False) + + +StorageLocationType.SYNAPSE_S3 = StorageLocationType( + "SYNAPSE_S3", "S3StorageLocationSetting" +) +StorageLocationType.EXTERNAL_S3 = StorageLocationType( + "EXTERNAL_S3", "ExternalS3StorageLocationSetting" +) +StorageLocationType.EXTERNAL_GOOGLE_CLOUD = StorageLocationType( + "EXTERNAL_GOOGLE_CLOUD", "ExternalGoogleCloudStorageLocationSetting" +) +StorageLocationType.EXTERNAL_SFTP = StorageLocationType( + "EXTERNAL_SFTP", "ExternalStorageLocationSetting" +) +StorageLocationType.EXTERNAL_HTTPS = StorageLocationType( + "EXTERNAL_HTTPS", "ExternalStorageLocationSetting" +) +StorageLocationType.EXTERNAL_OBJECT_STORE = StorageLocationType( + "EXTERNAL_OBJECT_STORE", "ExternalObjectStorageLocationSetting" +) +StorageLocationType.PROXY = StorageLocationType("PROXY", "ProxyStorageLocationSettings") + + +class UploadType(str, Enum): + """Enumeration of upload types for storage locations. + + Attributes: + S3: Amazon S3 compatible upload. + GOOGLE_CLOUD_STORAGE: Google Cloud Storage upload. + SFTP: SFTP upload. + HTTPS: HTTPS upload (typically used with proxy storage). + NONE: No upload type specified. + """ + + S3 = "S3" + GOOGLE_CLOUD_STORAGE = "GOOGLECLOUDSTORAGE" + SFTP = "SFTP" + HTTPS = "HTTPS" + PROXYLOCAL = "PROXYLOCAL" + NONE = "NONE" + + +# Mapping from StorageLocationType to default UploadType +_STORAGE_TYPE_TO_UPLOAD_TYPE: Dict[StorageLocationType, UploadType] = { + StorageLocationType.SYNAPSE_S3: UploadType.S3, + StorageLocationType.EXTERNAL_S3: UploadType.S3, + StorageLocationType.EXTERNAL_GOOGLE_CLOUD: UploadType.GOOGLE_CLOUD_STORAGE, + StorageLocationType.EXTERNAL_SFTP: UploadType.SFTP, + StorageLocationType.EXTERNAL_HTTPS: UploadType.HTTPS, + StorageLocationType.EXTERNAL_OBJECT_STORE: UploadType.S3, + StorageLocationType.PROXY: UploadType.PROXYLOCAL, +} + +# Mapping from (concreteType suffix, uploadType value) -> StorageLocationType. +# The tuple key is required because EXTERNAL_SFTP and EXTERNAL_HTTPS share the +# same concreteType and are disambiguated by uploadType. +_CONCRETE_UPLOAD_TO_STORAGE_TYPE: Dict[tuple, StorageLocationType] = { + (storage_type.concrete_type, upload_type.value): storage_type + for storage_type, upload_type in _STORAGE_TYPE_TO_UPLOAD_TYPE.items() +} + +# Mapping from StorageLocationType to its type-specific (field_name, api_key) pairs. +# Only fields listed here are populated by fill_from_dict for a given type. +_STORAGE_TYPE_SPECIFIC_FIELDS: Dict[StorageLocationType, Dict[str, str]] = { + StorageLocationType.SYNAPSE_S3: { + "base_key": "baseKey", + "sts_enabled": "stsEnabled", + }, + StorageLocationType.EXTERNAL_S3: { + "bucket": "bucket", + "base_key": "baseKey", + "sts_enabled": "stsEnabled", + "endpoint_url": "endpointUrl", + }, + StorageLocationType.EXTERNAL_GOOGLE_CLOUD: { + "bucket": "bucket", + "base_key": "baseKey", + }, + StorageLocationType.EXTERNAL_OBJECT_STORE: { + "bucket": "bucket", + "endpoint_url": "endpointUrl", + }, + StorageLocationType.EXTERNAL_SFTP: { + "url": "url", + "supports_subfolders": "supportsSubfolders", + }, + StorageLocationType.EXTERNAL_HTTPS: { + "url": "url", + "supports_subfolders": "supportsSubfolders", + }, + StorageLocationType.PROXY: { + "proxy_url": "proxyUrl", + "secret_key": "secretKey", + "benefactor_id": "benefactorId", + }, +} + + +@dataclass() +@async_to_sync +class StorageLocation(EnumCoercionMixin, StorageLocationSynchronousProtocol): + """A storage location setting describes where files are uploaded to and + downloaded from via Synapse. Storage location settings may be created for + external locations, such as user-owned Amazon S3 buckets, Google Cloud + Storage buckets, SFTP servers, or proxy storage. + + Attributes: + storage_location_id: (Read Only) The unique ID for this storage location, + assigned by the server on creation. + storage_type: The type of storage location. Required when creating a new + storage location via `store()`. Determines the `concreteType` sent to + the Synapse REST API. + banner: The banner text to display to a user every time a file is uploaded. + This field is optional. + description: A description of the storage location. This description is + shown when a user has to choose which upload destination to use. + + Attributes: + bucket: The name of the S3 or Google Cloud Storage bucket. Applicable to + `SYNAPSE_S3`, `EXTERNAL_S3`, `EXTERNAL_GOOGLE_CLOUD`, and + `EXTERNAL_OBJECT_STORE` types. + base_key: The optional base key (prefix/folder) within the bucket. + Applicable to `SYNAPSE_S3`, `EXTERNAL_S3`, and `EXTERNAL_GOOGLE_CLOUD` + types. + sts_enabled: Whether STS (AWS Security Token Service) is enabled on this + storage location. Applicable to `SYNAPSE_S3` and `EXTERNAL_S3` types. + endpoint_url: The endpoint URL of the S3 service. Applicable to + `EXTERNAL_S3` (default: https://s3.amazonaws.com) and + `EXTERNAL_OBJECT_STORE` types. + + Attributes: + url: The base URL for uploading to the external destination. Applicable to + `EXTERNAL_SFTP` type. + supports_subfolders: Whether the destination supports creating subfolders + under the base url. Applicable to `EXTERNAL_SFTP` type. Default: False. + + Attributes: + proxy_url: The HTTPS URL of the proxy used for upload and download. + Applicable to `PROXY` type. + secret_key: The encryption key used to sign all pre-signed URLs used to + communicate with the proxy. Applicable to `PROXY` type. + benefactor_id: An Entity ID (such as a Project ID). When set, any user with + the 'create' permission on the given benefactorId will be allowed to + create ProxyFileHandle using its storage location ID. Applicable to + `PROXY` type. + + Attributes: + upload_type: (Read Only) The upload type for this storage location. + Automatically derived from `storage_type`. + etag: (Read Only) Synapse employs an Optimistic Concurrency Control (OCC) + scheme. The E-Tag changes every time the setting is updated. + created_on: (Read Only) The date this storage location setting was created. + created_by: (Read Only) The ID of the user that created this storage + location setting. + + Example: Creating an external S3 storage location + Create a storage location backed by your own S3 bucket: + + from synapseclient.models import StorageLocation, StorageLocationType + + import synapseclient + synapseclient.login() + + storage = StorageLocation( + storage_type=StorageLocationType.EXTERNAL_S3, + bucket="my-external-synapse-bucket", + base_key="path/within/bucket", + ).store() + + print(f"Storage location ID: {storage.storage_location_id}") + + Example: Creating a Google Cloud storage location + Create a storage location backed by your own GCS bucket: + + from synapseclient.models import StorageLocation, StorageLocationType + + import synapseclient + synapseclient.login() + + storage = StorageLocation( + storage_type=StorageLocationType.EXTERNAL_GOOGLE_CLOUD, + bucket="my-gcs-bucket", + base_key="path/within/bucket", + ).store() + """ + + _ENUM_FIELDS = { + "upload_type": UploadType, + } + + # REQUIRED fields + _REQUIRED_FIELDS = { + StorageLocationType.EXTERNAL_S3: {"bucket"}, + StorageLocationType.EXTERNAL_GOOGLE_CLOUD: {"bucket"}, + StorageLocationType.EXTERNAL_OBJECT_STORE: {"bucket", "endpoint_url"}, + StorageLocationType.EXTERNAL_SFTP: {"url"}, + StorageLocationType.EXTERNAL_HTTPS: {"url"}, + StorageLocationType.PROXY: {"proxy_url", "secret_key", "benefactor_id"}, + } + # Core fields - present on all storage locations + storage_location_id: Optional[int] = None + """(Read Only) The unique ID for this storage location, assigned by the server + on creation.""" + + storage_type: Optional[StorageLocationType] = None + """The type of storage location. Required when creating a new storage location + via `store()`. Determines the `concreteType` sent to the Synapse REST API.""" + + concrete_type: Optional[str] = field(default=None, compare=False) + """The concrete type of the storage location indicating which implementation this object represents. """ + + banner: Optional[str] = None + """The banner text to display to a user every time a file is uploaded.""" + + description: Optional[str] = None + """A description of the storage location. This description is shown when a user + has to choose which upload destination to use.""" + + # S3/GCS specific fields + bucket: Optional[str] = None + """The name of the S3 or Google Cloud Storage bucket. Applicable to `SYNAPSE_S3`, + `EXTERNAL_S3`, `EXTERNAL_GOOGLE_CLOUD`, and `EXTERNAL_OBJECT_STORE` types.""" + + base_key: Optional[str] = None + """The optional base key (prefix/folder) within the bucket. Applicable to + `SYNAPSE_S3`, `EXTERNAL_S3`, and `EXTERNAL_GOOGLE_CLOUD` types.""" + + sts_enabled: Optional[bool] = False + """Whether STS (AWS Security Token Service) is enabled on this storage location. + Applicable to `SYNAPSE_S3` and `EXTERNAL_S3` types. Default: False.""" + + endpoint_url: Optional[str] = "https://s3.amazonaws.com" + """The endpoint URL of the S3 service. Applicable to `EXTERNAL_S3` + (default: https://s3.amazonaws.com) and `EXTERNAL_OBJECT_STORE` types.""" + + # SFTP specific fields + url: Optional[str] = None + """The base URL for uploading to the external destination. Applicable to + `EXTERNAL_SFTP` type.""" + + supports_subfolders: Optional[bool] = False + """Whether the destination supports creating subfolders under the base url. + Applicable to `EXTERNAL_SFTP` type. Default: False.""" + + # Proxy specific fields + proxy_url: Optional[str] = None + """The HTTPS URL of the proxy used for upload and download. Applicable to + `PROXY` type.""" + + secret_key: Optional[str] = None + """The encryption key used to sign all pre-signed URLs used to communicate + with the proxy. Applicable to `PROXY` type.""" + + benefactor_id: Optional[str] = None + """An Entity ID (such as a Project ID). When set, any user with the 'create' + permission on the given benefactorId will be allowed to create ProxyFileHandle + using its storage location ID. Applicable to `PROXY` type.""" + + # Read-only fields + upload_type: Optional[UploadType] = field(default=None, compare=False) + """(Read Only) The upload type for this storage location. Automatically derived + from `storage_type`.""" + + etag: Optional[str] = field(default=None, compare=False) + """(Read Only) Synapse employs an Optimistic Concurrency Control (OCC) scheme. + The E-Tag changes every time the setting is updated.""" + + created_on: Optional[str] = field(default=None, compare=False) + """(Read Only) The date this storage location setting was created.""" + + created_by: Optional[int] = field(default=None, compare=False) + """(Read Only) The ID of the user that created this storage location setting.""" + + def __repr__(self) -> str: + common = { + "concrete_type": self.concrete_type, + "storage_location_id": self.storage_location_id, + "storage_type": self.storage_type, + "upload_type": self.upload_type, + "banner": self.banner, + "description": self.description, + "etag": self.etag, + "created_on": self.created_on, + "created_by": self.created_by, + } + type_specific = { + field_name: getattr(self, field_name) + for field_name in _STORAGE_TYPE_SPECIFIC_FIELDS.get(self.storage_type, {}) + } + parts = [f"{k}={v!r}" for k, v in {**common, **type_specific}.items()] + return f"StorageLocation({', '.join(parts)})" + + def fill_from_dict(self, synapse_response: Dict[str, Any]) -> "StorageLocation": + """Converts a response from the REST API into this dataclass. + + Arguments: + synapse_response: The response from the REST API. + + Returns: + The StorageLocation object. + """ + self.storage_location_id = synapse_response.get("storageLocationId", None) + self.banner = synapse_response.get("banner", None) + self.description = synapse_response.get("description", None) + self.etag = synapse_response.get("etag", None) + self.created_on = synapse_response.get("createdOn", None) + self.created_by = synapse_response.get("createdBy", None) + + self.upload_type = synapse_response.get("uploadType", None) + + # Parse storage type from concreteType + uploadType. + # Both are needed to distinguish EXTERNAL_SFTP from EXTERNAL_HTTPS. + self.concrete_type = synapse_response.get("concreteType", "") + if self.concrete_type: + type_suffix = ( + self.concrete_type.split(".")[-1] if "." in self.concrete_type else "" + ) + key = (type_suffix, self.upload_type) + if key in _CONCRETE_UPLOAD_TO_STORAGE_TYPE: + self.storage_type = _CONCRETE_UPLOAD_TO_STORAGE_TYPE[key] + # Type-specific fields — only populate attributes relevant to this storage type + for field_name, api_key in _STORAGE_TYPE_SPECIFIC_FIELDS.get( + self.storage_type, {} + ).items(): + setattr(self, field_name, synapse_response.get(api_key, None)) + else: + Synapse.get_client().logger.warning( + f"Unrecognized concreteType/uploadType pair " + f"({self.concrete_type}, {self.upload_type.value}); " + "storage_type will not be set and type-specific fields will be empty." + ) + return self + + def _to_synapse_request(self) -> Dict[str, Any]: + """Convert this dataclass to a request body for the REST API. + + Returns: + A dictionary suitable for the REST API. + """ + if not self.storage_type: + raise ValueError( + "storage_type is required when creating a storage location" + ) + + # Build the concrete type + concrete_type = ( + f"org.sagebionetworks.repo.model.project.{self.storage_type.concrete_type}" + ) + # Determine upload type + upload_type = self.upload_type or _STORAGE_TYPE_TO_UPLOAD_TYPE.get( + self.storage_type + ) + + body: Dict[str, Any] = { + "concreteType": concrete_type, + "uploadType": upload_type.value, + } + + # Add optional common fields + body["banner"] = self.banner if self.banner is not None else None + body["description"] = self.description if self.description is not None else None + # Add type-specific fields using the same mapping used by fill_from_dict + for field_name, api_key in _STORAGE_TYPE_SPECIFIC_FIELDS.get( + self.storage_type, {} + ).items(): + value = getattr(self, field_name, None) + if value is not None: + body[api_key] = value + return body + + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: f"StorageLocation_Store: {self.storage_type}" + ) + async def store_async( + self, + *, + synapse_client: Optional[Synapse] = None, + ) -> "StorageLocation": + """Create this storage location in Synapse. Storage locations are immutable; + this always creates a new one. If a storage location with identical properties + already exists for this user, the existing one is returned (idempotent). + + Arguments: + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The StorageLocation object with server-assigned fields populated. + + Raises: + ValueError: If `storage_type` is not set. + + Example: Using this function + Create an external S3 storage location: + + import asyncio + from synapseclient import Synapse + from synapseclient.models import StorageLocation, StorageLocationType + + syn = Synapse() + syn.login() + + async def main(): + storage = await StorageLocation( + storage_type=StorageLocationType.EXTERNAL_S3, + bucket="my-bucket", + base_key="my/prefix", + ).store_async() + print(f"Created storage location: {storage.storage_location_id}") + + asyncio.run(main()) + """ + # check if the attributes without default values for a specific storage type are present + for field_name in self._REQUIRED_FIELDS.get(self.storage_type, {}): + if getattr(self, field_name, None) is None: + raise ValueError( + f"missing the '{field_name}' attribute for {self.storage_type}" + ) + request = self._to_synapse_request() + response = await create_storage_location_setting( + request=request, + synapse_client=synapse_client, + ) + self.fill_from_dict(response) + return self + + @otel_trace_method( + method_to_trace_name=lambda self, **kwargs: f"StorageLocation_Get: {self.storage_location_id}" + ) + async def get_async( + self, + *, + synapse_client: Optional[Synapse] = None, + ) -> "StorageLocation": + """Retrieve this storage location from Synapse by its ID. Only the creator of + a StorageLocationSetting can retrieve it by its id. + + Arguments: + synapse_client: If not passed in and caching was not disabled by + `Synapse.allow_client_caching(False)` this will use the last created + instance from the Synapse class constructor. + + Returns: + The StorageLocation object populated with data from Synapse. + + Raises: + ValueError: If `storage_location_id` is not set. + + Example: Using this function + Retrieve a storage location by ID: + + import asyncio + from synapseclient import Synapse + from synapseclient.models import StorageLocation + + syn = Synapse() + syn.login() + + async def main(): + storage = await StorageLocation(storage_location_id=12345).get_async() + print(f"Type: {storage.storage_type}, Bucket: {storage.bucket}") + + asyncio.run(main()) + """ + if not self.storage_location_id: + raise ValueError( + "storage_location_id is required to retrieve a storage location" + ) + + response = await get_storage_location_setting( + storage_location_id=self.storage_location_id, + synapse_client=synapse_client, + ) + self.fill_from_dict(response) + return self diff --git a/synapseclient/operations/CLAUDE.md b/synapseclient/operations/CLAUDE.md new file mode 100644 index 000000000..a09c3a436 --- /dev/null +++ b/synapseclient/operations/CLAUDE.md @@ -0,0 +1,39 @@ + + +## Project + +High-level CRUD factory methods (`get`, `store`, `delete`) that dispatch to the correct entity-type-specific handler. Entry point for users who want a simpler interface than calling model methods directly. + +## Conventions + +### Sync wrapper pattern +Uses `wrap_async_to_sync()` on standalone async functions — NOT the `@async_to_sync` class decorator (which only works on classes). Every public async function has a sync counterpart generated this way. + +### Factory dispatch via isinstance() +`store_async()` routes to entity-specific handlers via `isinstance()` checks: +- File/RecordSet → `_handle_store_file_entity()` +- Project/Folder → `_handle_store_container_entity()` +- Table-like entities → `_handle_store_table_entity()` +- Link → `_handle_store_link_entity()` +- Team → if has `id`: `.store_async()`, else: `.create_async()` +- AgentSession → `.update_async()` (not `.store_async()`) + +### Options dataclasses +Type-specific configuration bundled in dataclass objects: +- **Store**: `StoreFileOptions`, `StoreContainerOptions`, `StoreTableOptions`, `StoreGridOptions`, `StoreJSONSchemaOptions` +- **Get**: `FileOptions`, `ActivityOptions`, `TableOptions`, `LinkOptions` + +`LinkOptions.follow_link=True` returns the target entity, not the Link itself. + +### Delete version precedence +Version resolution order: explicit `version` parameter > entity's `version_number` attribute > version parsed from ID string (e.g., "syn123.4"). Only warns on conflict if both explicit param and attribute are set and differ. + +### FailureStrategy +`FailureStrategy` enum controls child entity error handling in container store operations: +- `LOG_EXCEPTION` — log error, continue with remaining children +- `RAISE_EXCEPTION` — raise immediately on first child failure + +### Adding new operations +1. Add async function in the appropriate file +2. Create sync wrapper with `wrap_async_to_sync()` +3. Export both in `operations/__init__.py` and `__all__` diff --git a/synapseclient/operations/__init__.py b/synapseclient/operations/__init__.py index ab112f612..5134b03a4 100644 --- a/synapseclient/operations/__init__.py +++ b/synapseclient/operations/__init__.py @@ -1,4 +1,17 @@ from synapseclient.operations.delete_operations import delete, delete_async +from synapseclient.operations.download_list_operations import ( + DownloadListItem, + download_list_add, + download_list_add_async, + download_list_clear, + download_list_clear_async, + download_list_files, + download_list_files_async, + download_list_manifest, + download_list_manifest_async, + download_list_remove, + download_list_remove_async, +) from synapseclient.operations.factory_operations import ( ActivityOptions, FileOptions, @@ -47,6 +60,18 @@ # Delete operations "delete", "delete_async", + # Download list operations + "DownloadListItem", + "download_list_files", + "download_list_files_async", + "download_list_manifest", + "download_list_manifest_async", + "download_list_add", + "download_list_add_async", + "download_list_remove", + "download_list_remove_async", + "download_list_clear", + "download_list_clear_async", # Utility operations "find_entity_id", "find_entity_id_async", diff --git a/synapseclient/operations/download_list_operations.py b/synapseclient/operations/download_list_operations.py new file mode 100644 index 000000000..f2b20a3a4 --- /dev/null +++ b/synapseclient/operations/download_list_operations.py @@ -0,0 +1,943 @@ +"""Operations for the user's Synapse Download List (cart). + +The download list is a user-scoped cart of files queued for bulk download. +Files can be added via the Synapse web UI or API and downloaded in batch. + +Files are not packaged into a zip because download lists can exceed 100 GB. +Instead, files are downloaded individually and removed from the list after +successful download, so interrupted runs are safely resumable. +""" + +import asyncio +import csv +import os +import time +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Any, Optional + +from synapseclient.api.download_list_services import ( + add_to_download_list_async, + clear_download_list_async, + remove_from_download_list_async, +) +from synapseclient.core.async_utils import wrap_async_to_sync +from synapseclient.core.constants.concrete_types import DOWNLOAD_LIST_MANIFEST_REQUEST +from synapseclient.core.exceptions import SynapseError +from synapseclient.models.mixins.asynchronous_job import AsynchronousCommunicator +from synapseclient.models.table_components import CsvTableDescriptor + +if TYPE_CHECKING: + from synapseclient import Synapse + +_ID_COLUMN = "ID" +_VERSION_COLUMN = "versionNumber" +_PATH_COLUMN = "path" +_ERROR_COLUMN = "error" + + +@dataclass +class DownloadListItem: + """A single item for a user's download list. + + + + Attributes: + file_entity_id: Synapse ID of the file entity (e.g. "syn123"). + version_number: Version of the file to target. + """ + + file_entity_id: str + """Synapse ID of the file entity (e.g. "syn123").""" + + version_number: Optional[int] = None + """Version of the file to target.""" + + +@dataclass +class _DownloadListManifestRequest(AsynchronousCommunicator): + """Drives the full lifecycle of a Synapse async manifest job in one object. + + Calling send_job_and_wait_async() executes four phases automatically: + + **Phase 1 — Submit** (to_synapse_request) + Builds the POST body and submits it to + POST /download/list/manifest/async/start. Synapse starts a + background job and returns a token. + + **Phase 2 — Poll** (AsynchronousCommunicator base class) + Polls GET /download/list/manifest/async/get/{token} until the job + state is COMPLETE (or the timeout is reached). No code needed here + — the base class handles this using the endpoint registered in + ASYNC_JOB_URIS for this class's concrete_type. + + **Phase 3 — Parse response** (fill_from_dict) + Extracts resultFileHandleId from the completed job response and + stores it in self.result_file_handle_id. + + **Phase 4 — Download** (_post_exchange_async) + Retrieves file handle metadata via get_file_handle() and a + pre-signed S3 URL via get_file_handle_presigned_url(), then + streams the CSV to disk via download_from_url() (run in a thread + pool via asyncio.to_thread since it is a blocking sync method). + Stores the local path in self.manifest_path. + + After send_job_and_wait_async() returns, manifest_path holds the + local path to the downloaded CSV and is ready to use. + + + + """ + + concrete_type: str = field( + init=False, + default=DOWNLOAD_LIST_MANIFEST_REQUEST, + ) + """The concreteType string sent in the request body. Set automatically; + registered in ASYNC_JOB_URIS to resolve the REST endpoint.""" + + result_file_handle_id: Optional[str] = field(init=False, default=None) + """File handle ID of the generated manifest CSV. Populated by + fill_from_dict() from the resultFileHandleId field of the job + response. None until the job completes.""" + + manifest_path: Optional[str] = field(init=False, default=None) + """Absolute local path of the downloaded manifest CSV. Populated by + _post_exchange_async() after the file is downloaded. None until + send_job_and_wait_async() returns.""" + + csv_table_descriptor: CsvTableDescriptor = field( + default_factory=CsvTableDescriptor, + ) + """Describes the format of the generated CSV manifest.""" + + def to_synapse_request(self) -> dict[str, Any]: + """Build the request body for the manifest async job. + + Constructs the POST body for + POST /download/list/manifest/async/start including the concrete type + and CSV descriptor. + + Returns: + A dictionary containing the request body expected by the Synapse + REST API. + """ + return { + "concreteType": self.concrete_type, + "csvTableDescriptor": self.csv_table_descriptor.to_synapse_request(), + } + + def fill_from_dict( + self, synapse_response: dict[str, Any] + ) -> "_DownloadListManifestRequest": + """Converts the data coming from the Synapse async job response into + this data class. + + Extracts the resultFileHandleId from the completed job response and + stores it in result_file_handle_id. + + Arguments: + synapse_response: The response dict from the completed Synapse + async manifest job. + + Returns: + The _DownloadListManifestRequest object instance. + """ + self.result_file_handle_id = synapse_response.get("resultFileHandleId") + return self + + async def _post_exchange_async( + self, synapse_client: Optional["Synapse"] = None, **kwargs + ) -> None: + """Download the manifest CSV from Synapse after the async job completes. + + Retrieves the file handle metadata and a pre-signed S3 URL using + creator-based endpoints (no entity association required), then + streams the CSV to disk using download_from_url (run in a thread + pool to avoid blocking the event loop). On success, sets + self.manifest_path to the local path of the downloaded file. + + Arguments: + synapse_client: The Synapse client to use for the request. Uses + the cached singleton if omitted. + **kwargs: Additional arguments. Supports destination (str) to + control the download directory; defaults to the current + working directory. + """ + from synapseclient import Synapse + from synapseclient.api.file_services import ( + get_file_handle, + get_file_handle_presigned_url, + ) + from synapseclient.core.download.download_functions import download_from_url + + destination = kwargs.get("destination", ".") + client = Synapse.get_client(synapse_client=synapse_client) + file_handle = await get_file_handle( + file_handle_id=self.result_file_handle_id, + synapse_client=client, + ) + presigned_url = await get_file_handle_presigned_url( + file_handle_id=self.result_file_handle_id, + synapse_client=client, + ) + self.manifest_path = await asyncio.to_thread( + download_from_url, + url=presigned_url, + destination=destination, + file_handle_id=file_handle["id"], + expected_md5=file_handle.get("contentMd5"), + url_is_presigned=True, + synapse_client=client, + ) + + +def download_list_files( + download_location: Optional[str] = None, + *, + parallel: bool = False, + max_concurrent: int = 10, + synapse_client: Optional["Synapse"] = None, +) -> str: + """Download all files in the Synapse download list (cart) to a local directory. + + Files are downloaded individually. The cart is not packaged into a zip because + download lists can exceed 100 GB. Only successfully downloaded files are removed + from the cart after the full pass completes, so interrupted runs are safely + resumable. + + Files that cannot be accessed or fail to download are left in the cart and + recorded with an error value in the result manifest. + + Arguments: + download_location: Directory to download files to. Defaults to the + current working directory. + parallel: If True, files are downloaded concurrently up to + max_concurrent at a time using asyncio.gather. If False + (default), files are downloaded sequentially. + max_concurrent: Maximum number of files to download concurrently when + parallel=True. Defaults to 10. Has no effect when + parallel=False. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + Path to the result manifest CSV, which contains all original manifest + columns plus path (local file path) and error (error message or + empty string) columns. + + Raises: + SynapseHTTPError: If the manifest async job fails or the cart is empty + ("No files available for download"). + SynapseError: If the manifest job completes but produces no local file, + or if the downloaded CSV has no headers or contains reserved column + names ("path" or "error"). + + Example: Download all files in the cart +   + Download all files in the user's download list to a local directory. + ```python + from synapseclient import Synapse + from synapseclient.operations import download_list_files + + syn = Synapse() + syn.login() + + manifest_path = download_list_files(download_location="./data") + ``` + """ + return wrap_async_to_sync( + coroutine=download_list_files_async( + download_location=download_location, + parallel=parallel, + max_concurrent=max_concurrent, + synapse_client=synapse_client, + ) + ) + + +async def download_list_files_async( + download_location: Optional[str] = None, + *, + parallel: bool = False, + max_concurrent: int = 10, + synapse_client: Optional["Synapse"] = None, +) -> str: + """Download all files in the Synapse download list (cart) to a local directory. + + Files are downloaded individually. The cart is not packaged into a zip because + download lists can exceed 100 GB. Only successfully downloaded files are removed + from the cart after the full pass completes, so interrupted runs are safely + resumable. + + Files that cannot be accessed or fail to download are left in the cart and + recorded with an error value in the result manifest. + + Arguments: + download_location: Directory to download files to. Defaults to the + current working directory. + parallel: If True, files are downloaded concurrently up to + max_concurrent at a time using asyncio.gather. If False + (default), files are downloaded sequentially. + max_concurrent: Maximum number of files to download concurrently when + parallel=True. Defaults to 10. Has no effect when + parallel=False. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + Path to the result manifest CSV, which contains all original manifest + columns plus path (local file path) and error (error message or + empty string) columns. + + Raises: + SynapseHTTPError: If the manifest async job fails or the cart is empty + ("No files available for download"). + SynapseError: If the manifest job completes but produces no local file, + or if the downloaded CSV has no headers or contains reserved column + names ("path" or "error"). + + Example: Download all files in the cart +   + Download all files in the user's download list to a local directory. + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.operations import download_list_files_async + + async def main(): + syn = Synapse() + syn.login() + + manifest_path = await download_list_files_async(download_location="./data") + + asyncio.run(main()) + ``` + """ + from synapseclient import Synapse + + client = Synapse.get_client(synapse_client=synapse_client) + + if download_location is not None: + download_location = os.path.expandvars(os.path.expanduser(download_location)) + + # 1. Fetch the server-generated manifest and read it into memory + manifest_path = await download_list_manifest_async(synapse_client=client) + try: + columns, rows = await asyncio.to_thread(_read_manifest_rows, manifest_path) + finally: + os.remove(manifest_path) + + # 2. Validate manifest columns and append result columns + columns = _validate_and_extend_columns(columns) + + # 3. Download each file in the manifest + downloaded_files = await _download_all_manifest_files( + rows=rows, + download_location=download_location, + parallel=parallel, + max_concurrent=max_concurrent, + synapse_client=client, + ) + + # 4. Write the result manifest with path/error columns + new_manifest_path = await _save_result_manifest( + rows=rows, + columns=columns, + download_location=download_location, + ) + + # 5. Remove successfully downloaded files from the cart. The Synapse API + # requires the (fileEntityId, versionNumber) pair at removal to match + # exactly what was used at add time -- a no-version add is only matched + # by a no-version remove. The manifest always carries a resolved + # versionNumber, so a versioned remove silently fails (returns 0) for + # entries that were added without a version. When that happens, retry + # the same entity with version_number=None to match the no-version add. + if downloaded_files: + for item in downloaded_files: + removed = await remove_from_download_list_async( + files=[item], + synapse_client=client, + ) + if removed == 0: + await remove_from_download_list_async( + files=[DownloadListItem(file_entity_id=item.file_entity_id)], + synapse_client=client, + ) + else: + client.logger.warning("A manifest was created, but no files were downloaded") + + return new_manifest_path + + +def download_list_manifest( + *, + csv_table_descriptor: Optional[CsvTableDescriptor] = None, + destination: str = ".", + synapse_client: Optional["Synapse"] = None, +) -> str: + """Generate and download the manifest CSV for the current cart contents. + + Submits an async job to Synapse to generate the manifest, then downloads + the resulting CSV. The manifest contains the same columns as the zip + manifest downloaded from the Synapse web UI. + + Arguments: + csv_table_descriptor: Optional CsvTableDescriptor controlling the + format of the generated CSV (separator, quote character, escape + character, line ending, and whether the first line is a header). + When omitted the Synapse defaults are used. + destination: Directory to download the manifest CSV to. Defaults to + the current working directory. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Raises: + SynapseError: If the async job completes without producing a manifest. + + Returns: + Path to the downloaded manifest CSV. + + Example: Get the download list manifest +   + Inspect the cart contents before downloading. + ```python + from synapseclient import Synapse + from synapseclient.operations import download_list_manifest + + syn = Synapse() + syn.login() + + manifest_path = download_list_manifest() + ``` + """ + return wrap_async_to_sync( + coroutine=download_list_manifest_async( + csv_table_descriptor=csv_table_descriptor, + destination=destination, + synapse_client=synapse_client, + ) + ) + + +async def download_list_manifest_async( + *, + csv_table_descriptor: Optional[CsvTableDescriptor] = None, + destination: str = ".", + synapse_client: Optional["Synapse"] = None, +) -> str: + """Generate and download the manifest CSV for the current cart contents. + + Submits an async job to Synapse to generate the manifest, then downloads + the resulting CSV. The manifest contains the same columns as the zip + manifest downloaded from the Synapse web UI. + + Arguments: + csv_table_descriptor: Optional CsvTableDescriptor controlling the + format of the generated CSV (separator, quote character, escape + character, line ending, and whether the first line is a header). + When omitted the Synapse defaults are used. + destination: Directory to download the manifest CSV to. Defaults to + the current working directory. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Raises: + SynapseError: If the async job completes without producing a manifest. + + Returns: + Path to the downloaded manifest CSV. + + Example: Get the download list manifest +   + Inspect the cart contents before downloading. + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.operations import download_list_manifest_async + + async def main(): + syn = Synapse() + syn.login() + + manifest_path = await download_list_manifest_async() + + asyncio.run(main()) + ``` + """ + manifest_request = _DownloadListManifestRequest( + csv_table_descriptor=csv_table_descriptor or CsvTableDescriptor(), + ) + await manifest_request.send_job_and_wait_async( + post_exchange_args={"destination": destination}, + synapse_client=synapse_client, + ) + if manifest_request.manifest_path is None: + raise SynapseError( + "Manifest job completed but no local file was produced. " + "The download from Synapse may have failed silently." + ) + return manifest_request.manifest_path + + +def download_list_add( + files: list[DownloadListItem], + *, + synapse_client: Optional["Synapse"] = None, +) -> int: + """Add files to the Synapse download list. + + If a file is added with no version specified, the latest version will be downloaded. + + Arguments: + files: List of DownloadListItem objects identifying the file + versions to add. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The number of files added. + + Example: Add files to the download list +   + Add specific file versions to the cart. + ```python + from synapseclient import Synapse + from synapseclient.operations import download_list_add, DownloadListItem + + syn = Synapse() + syn.login() + + count = download_list_add([ + DownloadListItem(file_entity_id="syn123", version_number=1), + DownloadListItem(file_entity_id="syn456", version_number=2), + ]) + ``` + """ + return wrap_async_to_sync( + coroutine=download_list_add_async(files=files, synapse_client=synapse_client) + ) + + +async def download_list_add_async( + files: list[DownloadListItem], + *, + synapse_client: Optional["Synapse"] = None, +) -> int: + """Add files to the Synapse download list. + + If a file is added with no version specified, the latest version will be downloaded. + + Arguments: + files: List of DownloadListItem objects identifying the file + versions to add. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The number of files added. + + Example: Add files to the download list +   + Add specific file versions to the cart. + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.operations import download_list_add_async, DownloadListItem + + async def main(): + syn = Synapse() + syn.login() + + count = await download_list_add_async([ + DownloadListItem(file_entity_id="syn123", version_number=1), + DownloadListItem(file_entity_id="syn456", version_number=2), + ]) + + asyncio.run(main()) + ``` + """ + return await add_to_download_list_async( + files=files, + synapse_client=synapse_client, + ) + + +def download_list_remove( + files: list[DownloadListItem], + *, + synapse_client: Optional["Synapse"] = None, +) -> int: + """Remove files from the Synapse download list. + + If a file was added with a version specified, then that version must be specified to remove it. + If a file was added with no version specified, then no version must be specified to remove it. + + Arguments: + files: List of DownloadListItem objects identifying the file versions to remove. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The number of files removed. + + Example: Remove files from the download list +   + Remove specific file versions from the cart. + ```python + from synapseclient import Synapse + from synapseclient.operations import download_list_remove, DownloadListItem + + syn = Synapse() + syn.login() + + count = download_list_remove([ + DownloadListItem(file_entity_id="syn123", version_number=1), + ]) + ``` + """ + return wrap_async_to_sync( + coroutine=download_list_remove_async(files=files, synapse_client=synapse_client) + ) + + +async def download_list_remove_async( + files: list[DownloadListItem], + *, + synapse_client: Optional["Synapse"] = None, +) -> int: + """Remove files from the Synapse download list. + + If a file was added with a version specified, then that version must be specified to remove it. + If a file was added with no version specified, then no version must be specified to remove it. + + Arguments: + files: List of DownloadListItem objects identifying the file versions to remove. + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Returns: + The number of files removed. + + Example: Remove files from the download list +   + Remove specific file versions from the cart. + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.operations import download_list_remove_async, DownloadListItem + + async def main(): + syn = Synapse() + syn.login() + + count = await download_list_remove_async([ + DownloadListItem(file_entity_id="syn123", version_number=1), + ]) + + asyncio.run(main()) + ``` + """ + return await remove_from_download_list_async( + files=files, + synapse_client=synapse_client, + ) + + +def download_list_clear( + *, + synapse_client: Optional["Synapse"] = None, +) -> None: + """Clear all files from the Synapse download list (cart). + + Arguments: + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Example: Clear the download list +   + Remove all files from the cart. + ```python + from synapseclient import Synapse + from synapseclient.operations import download_list_clear + + syn = Synapse() + syn.login() + + download_list_clear() + ``` + """ + return wrap_async_to_sync( + coroutine=download_list_clear_async(synapse_client=synapse_client) + ) + + +async def download_list_clear_async( + *, + synapse_client: Optional["Synapse"] = None, +) -> None: + """Clear all files from the Synapse download list (cart). + + Arguments: + synapse_client: If not passed in and caching was not disabled by + Synapse.allow_client_caching(False) this will use the last created + instance from the Synapse class constructor. + + Example: Clear the download list +   + Remove all files from the cart. + ```python + import asyncio + from synapseclient import Synapse + from synapseclient.operations import download_list_clear_async + + async def main(): + syn = Synapse() + syn.login() + + await download_list_clear_async() + + asyncio.run(main()) + ``` + """ + await clear_download_list_async(synapse_client=synapse_client) + + +def _read_manifest_rows( + path: str, +) -> tuple[Optional[list[str]], list[dict[str, Any]]]: + """Read the server-generated manifest CSV into memory. + + Arguments: + path: Local path to the server-generated manifest CSV. + + Returns: + (columns, rows) where columns is the list of field names and + rows is a list of row dicts (possibly empty). Returns + (None, []) if the CSV file has no column headers. + """ + with open(path, newline="") as f: + reader = csv.DictReader(f) + columns = reader.fieldnames + rows = list(reader) + if not columns: + return None, [] + return list(columns), rows + + +def _validate_and_extend_columns( + columns: Optional[list[str]], +) -> list[str]: + """Validate server manifest columns and append the result columns. + + Ensures the server-generated manifest has headers and does not already + contain the reserved "path" or "error" column names that are appended + to the output manifest. + + Arguments: + columns: Column names from the server manifest, or None if the + CSV had no headers. + + Raises: + SynapseError: If columns is None (empty manifest) or contains + reserved column names. + + Returns: + The original columns with "path" and "error" appended. + """ + if columns is None: + raise SynapseError( + "Manifest job succeeded but the downloaded CSV has no headers. " + "This is unexpected — the Synapse server may have returned an empty file." + ) + + if _PATH_COLUMN in columns or _ERROR_COLUMN in columns: + raise SynapseError( + "The downloaded manifest CSV contains reserved column names 'path' or 'error'. " + "This is unexpected and may indicate a malformed manifest from the server, " + "or Synapse has added these columns." + ) + + return list(columns) + [_PATH_COLUMN, _ERROR_COLUMN] + + +async def _download_all_manifest_files( + rows: list[dict[str, Any]], + download_location: Optional[str], + parallel: bool = False, + max_concurrent: int = 10, + *, + synapse_client: Optional["Synapse"] = None, +) -> list[DownloadListItem]: + """Download all files from the manifest, either sequentially or concurrently. + + Arguments: + rows: List of row dicts from the manifest. Each row is mutated in + place by _download_manifest_file to include "path" and + "error" values. + download_location: Directory to download files to. + parallel: If True, rows are downloaded concurrently (bounded by + max_concurrent) via asyncio.gather. If False, rows are + downloaded one at a time. + max_concurrent: Maximum number of concurrent downloads when + parallel=True. Defaults to 10. Must be at least 1. Has no + effect when parallel=False. + synapse_client: Optional Synapse client. + + Raises: + ValueError: If max_concurrent is less than 1. + + Returns: + List of DownloadListItem for each successfully downloaded file. + """ + if max_concurrent < 1: + raise ValueError(f"max_concurrent must be at least 1, got {max_concurrent}.") + if parallel: + # asyncio.gather schedules all coroutines immediately, so without a + # semaphore a large cart would fire hundreds of concurrent HTTP requests + # at once — risking rate-limiting from Synapse and exhausting local + # file-descriptor / memory limits. The semaphore lets all coroutines + # be created (preserving gather's result ordering) while ensuring that + # at most max_concurrent are actually running at any given time. + sem = asyncio.Semaphore(max_concurrent) + + async def bounded_download( + row: dict[str, Any], + ) -> Optional[DownloadListItem]: + async with sem: + return await _download_manifest_file( + row, + download_location=download_location, + synapse_client=synapse_client, + ) + + items = await asyncio.gather(*[bounded_download(row) for row in rows]) + return [item for item in items if item is not None] + else: + downloaded: list[DownloadListItem] = [] + for row in rows: + item = await _download_manifest_file( + row, + download_location=download_location, + synapse_client=synapse_client, + ) + if item is not None: + downloaded.append(item) + return downloaded + + +async def _download_manifest_file( + row: dict[str, Any], + download_location: Optional[str] = None, + *, + synapse_client: Optional["Synapse"] = None, +) -> Optional[DownloadListItem]: + """Download the file described by a manifest row and record the result in place. + + On success, sets row["path"] to the local file path and row["error"] + to "". On failure, sets row["path"] to "" and row["error"] to the + error message. Failures are logged but never raised, so one bad file + does not abort the entire batch. + + Arguments: + row: A manifest row dict. Must contain an "ID" key; "versionNumber" + is optional and defaults to the latest version when missing or + blank. Modified in place to add "path" and "error" entries. + download_location: Directory to download the file to. Defaults to + the Synapse cache location if None. + synapse_client: Optional Synapse client. Uses cached singleton if omitted. + + Returns: + A DownloadListItem on success, or None on failure. + """ + from synapseclient import Synapse + from synapseclient.models.file import File + + client = Synapse.get_client(synapse_client=synapse_client) + entity_id = row[_ID_COLUMN] + version_str = row.get(_VERSION_COLUMN) + version_number = int(version_str) if version_str else None + + try: + file = await File( + id=entity_id, + version_number=version_number, + path=download_location, + ).get_async(synapse_client=client) + row[_PATH_COLUMN] = file.path or "" + row[_ERROR_COLUMN] = "" + return DownloadListItem( + file_entity_id=entity_id, + version_number=version_number, + ) + except Exception as e: + row[_PATH_COLUMN] = "" + row[_ERROR_COLUMN] = str(e) + client.logger.exception(f"Unable to download {entity_id} v{version_number}") + return None + + +async def _save_result_manifest( + rows: list[dict[str, Any]], + columns: list[str], + download_location: Optional[str], +) -> str: + """Write the annotated rows to a new result manifest CSV and return its path. + + Arguments: + rows: List of row dicts, each mutated by _download_manifest_file to + include "path" and "error" values. + columns: Field names for the CSV header, including "path" and + "error". + download_location: Directory to write the manifest to. Defaults to + the current working directory if None. + + Returns: + Absolute path to the written manifest CSV. + """ + directory = download_location or "." + os.makedirs(directory, exist_ok=True) + path = os.path.join(directory, f"manifest_{time.time_ns()}.csv") + # Run the synchronous CSV write in a thread pool so it does not block + # the event loop. Blocking the event loop here would stall all other + # pending coroutines (network requests, timeouts, etc.) for the + # duration of the file write. + await asyncio.to_thread( + _write_result_manifest, + path=path, + columns=columns, + rows=rows, + ) + return path + + +def _write_result_manifest( + path: str, + columns: list[str], + rows: list[dict[str, Any]], +) -> None: + """Write the annotated result rows to the output manifest CSV. + + Intended to be called via asyncio.to_thread to avoid blocking the + event loop on synchronous file I/O. + + Arguments: + path: Destination path for the output manifest CSV. + columns: Field names for the CSV header, including "path" and + "error". + rows: List of row dicts, each mutated by _download_manifest_file to + include "path" and "error" values. + """ + with open(path, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=columns, extrasaction="ignore") + writer.writeheader() + writer.writerows(rows) diff --git a/synapseclient/table.py b/synapseclient/table.py index 685070838..c78d51fb1 100644 --- a/synapseclient/table.py +++ b/synapseclient/table.py @@ -84,7 +84,7 @@ DEFAULT_QUOTE_CHARACTER = '"' DEFAULT_SEPARATOR = "," -DEFAULT_ESCAPSE_CHAR = "\\" +DEFAULT_ESCAPE_CHAR = "\\" # This Enum is used to help users determine which Entity types they want in their view @@ -338,6 +338,7 @@ def cast_row_set(rowset): return rowset +@deprecated(version="4.12.0", reason="To be removed in 5.0.0. ") def escape_column_name(column: Union[str, collections.abc.Mapping]) -> str: """ Escape the name of the given column for use in a Synapse table query statement @@ -355,6 +356,7 @@ def escape_column_name(column: Union[str, collections.abc.Mapping]) -> str: return f'"{escaped_name}"' +@deprecated(version="4.12.0", reason="To be removed in 5.0.0. ") def join_column_names(columns: Union[List, Dict[str, str]]): """ Join the names of the given columns into a comma delimited list suitable for use in a Synapse table query @@ -402,7 +404,7 @@ def _csv_to_pandas_df( filepath: str, separator: str = DEFAULT_SEPARATOR, quote_char: str = DEFAULT_QUOTE_CHARACTER, - escape_char: str = DEFAULT_ESCAPSE_CHAR, + escape_char: str = DEFAULT_ESCAPE_CHAR, contain_headers: bool = True, lines_to_skip: int = 0, date_columns: Optional[List[str]] = None, @@ -424,7 +426,7 @@ def _csv_to_pandas_df( Passed as `quotechar` to pandas. If `quotechar` is supplied as a `kwarg` it will be used instead of this `quote_char` argument. escape_char: The escape character for the file, - Defaults to `DEFAULT_ESCAPSE_CHAR`. + Defaults to `DEFAULT_ESCAPE_CHAR`. contain_headers: Whether the file contains headers, Defaults to `True`. lines_to_skip: The number of lines to skip at the beginning of the file, @@ -2498,7 +2500,7 @@ def __init__( filepath, etag=None, quoteCharacter=DEFAULT_QUOTE_CHARACTER, - escapeCharacter=DEFAULT_ESCAPSE_CHAR, + escapeCharacter=DEFAULT_ESCAPE_CHAR, lineEnd=str(os.linesep), separator=DEFAULT_SEPARATOR, header=True, diff --git a/synapseutils/CLAUDE.md b/synapseutils/CLAUDE.md new file mode 100644 index 000000000..12722a2a0 --- /dev/null +++ b/synapseutils/CLAUDE.md @@ -0,0 +1,34 @@ + + +## Project + +Legacy bulk utility functions for copy, sync, migrate, walk, describe, and monitor operations. Pre-OOP code using legacy `requests` HTTP and old-style Entity classes (not modern dataclass models). + +## Conventions + +### Naming convention +Functions use camelCase (legacy convention) — e.g., `syncFromSynapse()`, `copyFileHandles()`, `notifyMe()`. Do NOT convert to snake_case — this is the public API. + +### migrate_functions.py +Uses SQLite database for migration state persistence. `MigrationResult` proxy object iterates results without loading all into memory — avoids memory issues for repos with millions of files. Two-phase pattern: `index_files_for_migration()` then `migrate_indexed_files()`. Uses concurrent.futures thread pool with configurable part size (default 100 MB). + +### sync.py +`syncFromSynapse()` / `syncToSynapse()` for bulk folder transfer. Generates manifest files for tracking. Known issue: TODO at line 967 notes "absence of a raise here appears to be a bug and yet tests fail if this is raised" — `SynapseFileNotFoundError` handling may be incorrect. + +### copy_functions.py +`copyFileHandles()` batches by `MAX_FILE_HANDLE_PER_COPY_REQUEST`. Returns list with potential `failureCodes` (UNAUTHORIZED, NOT_FOUND). `copyWiki()` and `changeFileMetaData()` for metadata operations. + +### monitor.py +`notifyMe()` — decorator for sync functions that sends email notification on completion/failure. `notify_me_async()` — async variant. Both retry on failure with configurable retry count. Uses `syn.sendMessage()` with user's owner ID. + +### walk_functions.py +`walk()` — recursive entity tree traversal similar to `os.walk()`. Returns generator of (dirpath, dirnames, filenames) tuples. + +### describe_functions.py +Opens CSV/TSV entities as pandas DataFrames. Calculates per-column stats: mode, min/max (numeric), mean, dtype. + +## Constraints + +- ALL functions use legacy sync `requests` library, NOT httpx. Do NOT add async methods here — new async equivalents go in `synapseclient/models/` or `synapseclient/operations/`. +- Uses legacy Entity classes (`from synapseclient import Entity, File, Folder`) — NOT modern dataclass models. +- Do not refactor to modern patterns without a migration plan — these are public APIs with external consumers. diff --git a/synapseutils/migrate_functions.py b/synapseutils/migrate_functions.py index c0828ad9e..9fed381b9 100644 --- a/synapseutils/migrate_functions.py +++ b/synapseutils/migrate_functions.py @@ -348,8 +348,7 @@ def _ensure_schema(cursor): # our representation of migratable file handles is flat including both file entities # and table attached files, so not all columns are applicable to both. row id and col id # are only used by table attached files, for example. - cursor.execute( - """ + cursor.execute(""" create table if not exists migrations ( id text not null, type integer not null, @@ -368,8 +367,7 @@ def _ensure_schema(cursor): primary key (id, type, row_id, col_id, version) ) - """ - ) + """) # we get counts grouping on status cursor.execute("create index if not exists ix_status on migrations(status)") diff --git a/synapseutils/monitor.py b/synapseutils/monitor.py index ea62a6711..7728e9e3a 100644 --- a/synapseutils/monitor.py +++ b/synapseutils/monitor.py @@ -135,7 +135,7 @@ async def with_retry_and_messaging(*args, **kwargs): while attempt <= retries: try: output = await func(*args, **kwargs) - syn.sendMessage( + await syn.sendMessage_async( [destination], messageSubject, messageBody="Call to %s completed successfully!" @@ -146,7 +146,7 @@ async def with_retry_and_messaging(*args, **kwargs): syn.logger.exception( f"Encountered a temporary Failure during execution. Will retry {retries - attempt} more times." ) - syn.sendMessage( + await syn.sendMessage_async( [destination], messageSubject, messageBody=( diff --git a/synapseutils/sync.py b/synapseutils/sync.py index 6a6c21631..b8f5b5a74 100644 --- a/synapseutils/sync.py +++ b/synapseutils/sync.py @@ -78,6 +78,13 @@ COMMAS_OUTSIDE_DOUBLE_QUOTES_PATTERN = re.compile(r",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)") +@deprecated( + version="4.12.0", + reason=( + "To be removed in 5.0.0. Use StorableContainer.sync_from_synapse instead, " + "which generates a manifest.csv file interoperable with the Synapse UI download cart." + ), +) def syncFromSynapse( syn: Synapse, entity: Union[str, SynapseFile, SynapseProject, SynapseFolder], @@ -364,6 +371,12 @@ async def _sync( return root_entity +@deprecated( + version="4.13.0", + reason=( + "To be removed in 5.0.0. " "Moved to synapseclient.models.services.manifest.py" + ), +) class _SyncUploadItem(NamedTuple): """Represents a single file being uploaded. @@ -383,6 +396,12 @@ class _SyncUploadItem(NamedTuple): activity_description: str +@deprecated( + version="4.13.0", + reason=( + "To be removed in 5.0.0. " "Moved to synapseclient.models.services.manifest.py" + ), +) @dataclass class _SyncUploader: """ @@ -1056,12 +1075,12 @@ async def readManifestFile_async(syn: Synapse, manifestFile: str) -> DATA_FRAME_ df = pd.read_csv(manifestFile, sep="\t") if "synapseStore" not in df: df = df.assign(synapseStore=None) - df.loc[ - df["path"].apply(is_url), "synapseStore" - ] = False # override synapseStore values to False when path is a url - df.loc[ - df["synapseStore"].isnull(), "synapseStore" - ] = True # remaining unset values default to True + df.loc[df["path"].apply(is_url), "synapseStore"] = ( + False # override synapseStore values to False when path is a url + ) + df.loc[df["synapseStore"].isnull(), "synapseStore"] = ( + True # remaining unset values default to True + ) df.synapseStore = df.synapseStore.astype(bool) df = df.fillna("") @@ -1113,6 +1132,13 @@ async def readManifestFile_async(syn: Synapse, manifestFile: str) -> DATA_FRAME_ return df +@deprecated( + version="4.13.0", + reason=( + "To be removed in 5.0.0. Use Project.sync_to_synapse or " + "Folder.sync_to_synapse from synapseclient.models instead." + ), +) def syncToSynapse( syn: Synapse, manifestFile, @@ -1163,6 +1189,21 @@ def syncToSynapse( Returns: None + + Example: Migration to new method +   + + ```python + # Old approach (DEPRECATED) + # import synapseutils + # synapseutils.syncToSynapse(syn, manifestFile="path/to/manifest.tsv") + + # New approach (RECOMMENDED) + from synapseclient.models import Project + + project = Project(id="syn1234") + project.sync_to_synapse(manifest_path="path/to/manifest.csv") + ``` """ wrap_async_to_sync( coroutine=syncToSynapse_async( @@ -1177,6 +1218,13 @@ def syncToSynapse( ) +@deprecated( + version="4.13.0", + reason=( + "To be removed in 5.0.0. Use Project.sync_to_synapse_async or " + "Folder.sync_to_synapse_async from synapseclient.models instead." + ), +) async def syncToSynapse_async( syn: Synapse, manifestFile, @@ -1186,7 +1234,70 @@ async def syncToSynapse_async( merge_existing_annotations: bool = True, associate_activity_to_new_version: bool = False, ) -> None: - """Async version of syncToSynapse.""" + """Synchronizes files specified in the manifest file to Synapse. + + !!! warning "Deprecated since 4.13.0" + Use + [Project.sync_to_synapse][synapseclient.models.mixins.StorableContainer.sync_to_synapse] + or + [Folder.sync_to_synapse][synapseclient.models.mixins.StorableContainer.sync_to_synapse] + instead. This function will be removed in v5.0.0. + + Given a file describing all of the uploads, this uploads the content to Synapse and + optionally notifies you via Synapse messaging (email) at specific intervals, on + errors and on completion. + + [Read more about the manifest file format](../../explanations/manifest_tsv/) + + There are a few conversions around annotations to call out here. + + ## Conversion of annotations from the TSV file to Python native objects + + The first annotation conversion is from the TSV file into a Python native object. For + example Pandas will read a TSV file and convert the string "True" into a boolean True, + however, Pandas will NOT convert our comma delimited and bracket wrapped list of + annotations into their Python native objects. This means that we need to do that + conversion here after splitting them apart. + + ## Conversion of Python native objects for the REST API + + The second annotation conversion occurs when we are taking the Python native objects + and converting them into a string that can be sent to the REST API. For example + the datetime objects which may have timezone information are converted to milliseconds + since epoch. + + Arguments: + syn: A Synapse object with user's login, e.g. syn = synapseclient.login() + manifestFile: A tsv file with file locations and metadata to be pushed to Synapse. + dryRun: Performs validation without uploading if set to True. + sendMessages: Sends out messages on completion if set to True. + retries: Number of retries to attempt if an error occurs. + merge_existing_annotations: If True, will merge the annotations in the manifest + file with the existing annotations on Synapse. If False, will overwrite the + existing annotations on Synapse with the annotations in the manifest file. + associate_activity_to_new_version: If True, and a version update occurs, the + existing activity in Synapse will be associated with the new version. The + exception is if you are specifying new values to be used/executed, it will + create a new activity for the new version of the entity. + + Returns: + None + + Example: Migration to new method +   + + ```python + # Old approach (DEPRECATED) + # import synapseutils + # await synapseutils.syncToSynapse_async(syn, manifestFile="path/to/manifest.tsv") + + # New approach (RECOMMENDED) + from synapseclient.models import Project + + project = Project(id="syn1234") + await project.sync_to_synapse_async(manifest_path="path/to/manifest.csv") + ``` + """ df = await readManifestFile_async(syn, manifestFile) sizes = [ @@ -1335,9 +1446,9 @@ def _build_annotations_for_file( if annotation_value is None or annotation_value == "": continue if isinstance(annotation_value, str): - file_annotations[ - annotation_key - ] = _convert_cell_in_manifest_to_python_types(cell=annotation_value) + file_annotations[annotation_key] = ( + _convert_cell_in_manifest_to_python_types(cell=annotation_value) + ) else: file_annotations[annotation_key] = annotation_value return file_annotations @@ -1450,11 +1561,28 @@ def _check_size_each_file(df): ) +@deprecated( + version="4.13.0", + reason=( + "To be removed in 5.0.0. Use Project.generate_sync_manifest or " + "Folder.generate_sync_manifest instead, which write a CSV manifest " + "compatible with the OOP Project.sync_to_synapse / " + "Folder.sync_to_synapse methods." + ), +) def generate_sync_manifest(syn, directory_path, parent_id, manifest_path) -> None: """Generate manifest for [syncToSynapse][synapseutils.sync.syncToSynapse] from a local directory. [Read more about the manifest file format](../../explanations/manifest_tsv/) + !!! warning "Deprecated since 4.13.0" + To be removed in 5.0.0. Use + [Project.generate_sync_manifest][synapseclient.models.mixins.StorableContainer.generate_sync_manifest] + or + [Folder.generate_sync_manifest][synapseclient.models.mixins.StorableContainer.generate_sync_manifest] + instead, which produce a CSV manifest (with a parentId column) that + works directly with Project.sync_to_synapse and Folder.sync_to_synapse. + Arguments: syn: A Synapse object with user's login, e.g. syn = synapseclient.login() directory_path: Path to local directory to be pushed to Synapse. diff --git a/tests/CLAUDE.md b/tests/CLAUDE.md new file mode 100644 index 000000000..39e0e459e --- /dev/null +++ b/tests/CLAUDE.md @@ -0,0 +1,63 @@ + + +## Project + +Test suite for the Synapse Python Client. Unit tests run without network access; integration tests hit the live Synapse API. + +## Conventions + +### Prefer async tests for new code +For new or significantly refactored tests, write async tests only and avoid adding new synchronous test modules. The `@async_to_sync` decorator is validated by a dedicated smoke test (`tests/integration/synapseclient/models/synchronous/test_sync_wrapper_smoke.py`). Legacy synchronous unit tests (under `tests/unit/synapseclient/`) still exist and are maintained, but should not be expanded. + +Use `pytest.mark.parametrize` when possible to merge similar tests into one test. + +### Unit tests (`tests/unit/`) +- `pytest-socket` blocks all network calls (unix sockets allowed on non-Windows for async event loop). On Windows, socket disabling is skipped entirely — tests still run but are not network-isolated. +- Session-scoped `syn` fixture: `Synapse(skip_checks=True, cache_client=False)` with silent logger +- Autouse `set_timezone` fixture forces `TZ=UTC` for deterministic timestamps +- Client caching disabled via `Synapse.allow_client_caching(False)` +- Use `AsyncMock` for async method mocking, `create_autospec` for type-safe mocks +- Class-based test organization with `@pytest.fixture(scope="function", autouse=True)` for setup +- Test file naming: `unit_test_*.py` (legacy) or `test_*.py` (newer) — both patterns are discovered by pytest +- Mock isolation: when mocking fixture/instance attributes (e.g., `self.syn.rest_post_async`), always wrap in `patch.object()` context managers instead of direct assignment. This prevents the mock from leaking to other tests: `with patch.object(self.syn, "method_name", new_callable=AsyncMock, return_value=...):`. Direct assignment leaves the mock in place after the test, polluting subsequent tests in the class. + +### Integration tests (`tests/integration/`) +- All async tests share one event loop: `asyncio_default_fixture_loop_scope = session` +- `schedule_for_cleanup(item)` — defer entity/file cleanup to session teardown. Always use this instead of inline deletion. Cleanup list is reversed before execution for dependency ordering (children deleted before parents). +- Use shared resources when possible via fixtures in `conftest.py` files (e.g., `project_model`, `project`). Refer to existing integration tests for the pattern. +- Per-worker project fixtures (`project_model`, `project`) created during session setup +- `--reruns 3` for flaky retry, `-n 8 --dist loadscope` for parallelism +- OpenTelemetry tracing opt-in via `SYNAPSE_INTEGRATION_TEST_OTEL_ENABLED` env var +- Two client fixtures: `syn` (silent logger) and `syn_with_logger` (verbose) +- conftest.py locations: `tests/unit/conftest.py` (session client, socket blocking, UTC timezone), `tests/integration/conftest.py` (logged-in client, per-worker projects, cleanup fixture) + +### Test utilities +- `tests/test_utils.py`: `spy_for_async_function(original_func)` — wraps async function for pytest-mock spying while preserving async behavior. `spy_for_function(original_func)` — sync variant. +- `tests/integration/helpers.py`: `wait_for_condition(condition_fn, timeout_seconds=60)` — async polling helper with exponential backoff. Accepts sync or async condition functions. +- `tests/integration/__init__.py`: `QUERY_TIMEOUT_SEC = 600`, `ASYNC_JOB_TIMEOUT_SEC = 600` +- Test data generators in production code: `core/utils.py` has `make_bogus_data_file()`, `make_bogus_binary_file(n)`, `make_bogus_uuid_file()` + +### No `@pytest.mark.asyncio` needed +`asyncio_mode = auto` in pytest.ini — all async test functions are auto-detected. + +### Python 3.14+ limitation +Sync wrapper smoke tests are skipped on Python 3.14+ — `@async_to_sync` raises `RuntimeError` when an event loop is already active (pytest-asyncio runs one). Users on 3.14+ must call async methods directly. + +### Integration test fixture best practices +- Request fixtures explicitly in test function signatures — do not use `autouse=True` with `scope="function"` to create Synapse resources, as this creates them for every test in the class even when not needed. +- Use `autouse=True` only for side-effect fixtures (e.g., timezone setup) or at class/module scope when truly needed by all tests. +- Prefer getting `syn` and `schedule_for_cleanup` directly from conftest fixtures instead of assigning them via `self.syn` in an `init` fixture. +- Scope resource fixtures carefully: `scope="function"` ensures test isolation but increases API calls. Consider `scope="module"` or `scope="class"` for read-only resources, but be cautious — one test must not influence another. +- Clean up in the fixture itself rather than having each test call `schedule_for_cleanup` individually: + ```python + @pytest.fixture(scope="module") + def project(syn, schedule_for_cleanup): + project = Project(name=str(uuid.uuid4())).store(synapse_client=syn) + schedule_for_cleanup(project) + return project + ``` + +## Constraints + +- Unit tests must never make network calls — `pytest-socket` will fail them. Mock all HTTP interactions. +- Integration test cleanup is mandatory — use `schedule_for_cleanup()` for every created resource to avoid orphaned Synapse entities. diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 6a3c5adc3..9a139156c 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -181,6 +181,11 @@ async def _cleanup(syn: Synapse, items): os.remove(item) except Exception as ex: print(ex) + else: + sys.stderr.write( + "Don't know how to clean: %s (type: %s)" + % (str(item), type(item).__name__) + ) elif isinstance( item, ( diff --git a/tests/integration/synapseclient/core/test_external_storage.py b/tests/integration/synapseclient/core/test_external_storage.py index c3a4c9c7f..6518b7a7b 100644 --- a/tests/integration/synapseclient/core/test_external_storage.py +++ b/tests/integration/synapseclient/core/test_external_storage.py @@ -267,6 +267,10 @@ async def test_set_external_storage_location(self) -> None: finally: self._teardown_bucket_location(key_prefix=folder_in_s3_to_cleanup) + @pytest.mark.skip( + reason="Server-side error: STS storage token endpoint is returning errors. " + "Re-enable once PLFM-9604 is resolved on the server side." + ) async def test_sts_external_storage_location(self) -> None: """Test creating and using an external STS storage location. A custom storage location is created with sts enabled, @@ -348,6 +352,10 @@ async def test_sts_external_storage_location(self) -> None: finally: self._teardown_bucket_location(key_prefix=folder_in_s3_to_cleanup) + @pytest.mark.skip( + reason="Server-side error: STS storage token endpoint is returning errors. " + "Re-enable once PLFM-9604 is resolved on the server side." + ) async def test_boto_upload_acl(self) -> None: """Verify when we store a Synapse object using boto we apply a bucket-owner-full-control ACL to the object""" @@ -423,12 +431,15 @@ async def test_external_object_store(self) -> None: ) = await self._configure_storage_location(external_object_store=True) try: - with mock.patch( - "synapseclient.core.upload.upload_functions_async._get_aws_credentials", - return_value=get_aws_env()[1], - ), mock.patch( - "synapseclient.core.download.download_functions._get_aws_credentials", - return_value=get_aws_env()[1], + with ( + mock.patch( + "synapseclient.core.upload.upload_functions_async._get_aws_credentials", + return_value=get_aws_env()[1], + ), + mock.patch( + "synapseclient.core.download.download_functions._get_aws_credentials", + return_value=get_aws_env()[1], + ), ): # WHEN we save a file to that location upload_file = utils.make_bogus_uuid_file() diff --git a/tests/integration/synapseclient/core/upload/test_multipart_upload.py b/tests/integration/synapseclient/core/upload/test_multipart_upload.py index 189471eff..c79a1271d 100644 --- a/tests/integration/synapseclient/core/upload/test_multipart_upload.py +++ b/tests/integration/synapseclient/core/upload/test_multipart_upload.py @@ -31,7 +31,7 @@ async def test_round_trip(syn: Synapse, project: Project, schedule_for_cleanup): # Download the file and compare it with the original junk = File(parent=project, dataFileHandleId=fhid) junk.properties.update(syn._createEntity(junk.properties)) - (_, tmp_path) = tempfile.mkstemp() + _, tmp_path = tempfile.mkstemp() schedule_for_cleanup(tmp_path) junk["path"] = await download_by_file_handle( @@ -107,7 +107,7 @@ def _put_chunk_or_fail_randomly(url, *args, **kwargs): # Download the file and compare it with the original junk = File(parent=project, dataFileHandleId=fhid) junk.properties.update(syn._createEntity(junk.properties)) - (tmp_f, tmp_path) = tempfile.mkstemp() + tmp_f, tmp_path = tempfile.mkstemp() schedule_for_cleanup(tmp_path) junk["path"] = await download_by_file_handle( @@ -181,7 +181,7 @@ async def test_multipart_upload_big_string( # Download the file and compare it with the original junk = File(parent=project, dataFileHandleId=fhid) junk.properties.update(syn._createEntity(junk.properties)) - (_, tmp_path) = tempfile.mkstemp() + _, tmp_path = tempfile.mkstemp() schedule_for_cleanup(tmp_path) junk["path"] = await download_by_file_handle( diff --git a/tests/integration/synapseclient/core/upload/test_multipart_upload_async.py b/tests/integration/synapseclient/core/upload/test_multipart_upload_async.py index 9a4351fd1..2270739d6 100644 --- a/tests/integration/synapseclient/core/upload/test_multipart_upload_async.py +++ b/tests/integration/synapseclient/core/upload/test_multipart_upload_async.py @@ -41,7 +41,7 @@ async def test_round_trip( parent_id=project_model.id, data_file_handle_id=file_handle_id ).store_async(synapse_client=syn) - (_, tmp_path) = tempfile.mkstemp() + _, tmp_path = tempfile.mkstemp() schedule_for_cleanup(tmp_path) # AND I download the file from Synapse @@ -128,7 +128,7 @@ def _put_chunk_or_fail_randomly(self, url, *args, **kwargs): parent_id=project_model.id, data_file_handle_id=file_handle_id ).store_async(synapse_client=syn) - (_, tmp_path) = tempfile.mkstemp() + _, tmp_path = tempfile.mkstemp() schedule_for_cleanup(tmp_path) # AND I download the file from Synapse @@ -215,7 +215,7 @@ async def test_multipart_upload_big_string( parent_id=project_model.id, data_file_handle_id=file_handle_id ).store_async(synapse_client=syn) - (_, tmp_path) = tempfile.mkstemp() + _, tmp_path = tempfile.mkstemp() schedule_for_cleanup(tmp_path) # AND I download the file from Synapse diff --git a/tests/integration/synapseclient/extensions/curator/test_record_based_metadata_task.py b/tests/integration/synapseclient/extensions/curator/test_record_based_metadata_task.py new file mode 100644 index 000000000..efc46981c --- /dev/null +++ b/tests/integration/synapseclient/extensions/curator/test_record_based_metadata_task.py @@ -0,0 +1,130 @@ +"""Integration tests for create_record_based_metadata_task.""" + +import uuid + +import pytest + +from synapseclient import Synapse +from synapseclient.extensions.curator.record_based_metadata_task import ( + create_record_based_metadata_task, +) +from synapseclient.models import Folder, JSONSchema, Project, SchemaOrganization + + +def _test_name() -> str: + random_string = "".join(c for c in str(uuid.uuid4()) if c.isalpha()) + return f"SYNPY.TEST.{random_string}" + + +@pytest.fixture(scope="module") +def patient_schema_uri(syn: Synapse, request: pytest.FixtureRequest) -> str: + """ + Create a SchemaOrganization and a Patient JSON schema for the module. + Returns the schema URI. + """ + org_name = _test_name() + schema_name = "test.schematic.Patient" + + org = SchemaOrganization(name=org_name) + org.store(synapse_client=syn) + + schema = JSONSchema(name=schema_name, organization_name=org_name) + schema_body = { + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": f"https://example.com/schema/{org_name}-{schema_name}.json", + "title": "Patient", + "type": "object", + "properties": { + "PatientID": {"type": "string"}, + "Sex": {"type": "string", "enum": ["Male", "Female", "Other"]}, + "Age": {"type": "integer", "minimum": 0}, + }, + "required": ["PatientID"], + } + schema.store(schema_body=schema_body, synapse_client=syn) + + def cleanup(): + for js in org.get_json_schemas(synapse_client=syn): + js.delete(synapse_client=syn) + org.delete(synapse_client=syn) + + request.addfinalizer(cleanup) + + return schema.uri + + +@pytest.fixture(scope="function") +def folder(syn: Synapse, project: Project, request: pytest.FixtureRequest) -> Folder: + """Create a Folder for the test and tear it down on completion. + + The finalizer unbinds any JSON schema from the folder before deletion so + that the module-scoped schema cleanup in patient_schema_uri can succeed + (the server refuses to delete a schema that is still bound to an entity). + """ + folder = Folder(name=_test_name(), parent_id=project.id).store(synapse_client=syn) + + def cleanup(): + folder.unbind_schema(synapse_client=syn) + folder.delete(synapse_client=syn) + + request.addfinalizer(cleanup) + + return folder + + +class TestCreateRecordBasedMetadataTask: + """Integration tests for create_record_based_metadata_task.""" + + def test_creates_single_record_set_version( + self, + syn: Synapse, + project: Project, + request: pytest.FixtureRequest, + patient_schema_uri: str, + folder: Folder, + ): + """ + The Grid created during bootstrap is initialized from the RecordSet's + CSV with no edits, so exporting that Grid back to the RecordSet (the + reported bug) writes the same content as a duplicate v2. + """ + test_name = _test_name() + upsert_keys = ["PatientID"] + instructions = "Contribute Patient data." + + record_set, curation_task, grid = create_record_based_metadata_task( + folder_id=folder.id, + record_set_name=test_name, + record_set_description=test_name, + curation_task_name=test_name, + upsert_keys=upsert_keys, + instructions=instructions, + schema_uri=patient_schema_uri, + synapse_client=syn, + ) + + def cleanup(): + curation_task.delete(synapse_client=syn) + grid.delete(synapse_client=syn) + record_set.unbind_schema(synapse_client=syn) + record_set.delete(synapse_client=syn) + + request.addfinalizer(cleanup) + + from synapseclient.models import RecordSet + + record_set = RecordSet(id=record_set.id).get(synapse_client=syn) + + assert grid.record_set_id == record_set.id + assert grid.grid_json_schema_id == patient_schema_uri + + assert record_set.upsert_keys == upsert_keys + assert record_set.version_number == 1 + assert record_set.parent_id == folder.id + assert record_set.name == test_name + assert record_set.description == test_name + + assert curation_task.data_type == test_name + assert curation_task.project_id == project.id + assert curation_task.instructions == instructions + assert curation_task.task_properties.record_set_id == record_set.id diff --git a/tests/integration/synapseclient/extensions/curator/test_schema_management.py b/tests/integration/synapseclient/extensions/curator/test_schema_management.py index e7b2cf0a1..ccbe1c31b 100644 --- a/tests/integration/synapseclient/extensions/curator/test_schema_management.py +++ b/tests/integration/synapseclient/extensions/curator/test_schema_management.py @@ -1,4 +1,5 @@ """Integration tests for schema management wrapper functions (register and bind)""" + import json import os import tempfile diff --git a/tests/integration/synapseclient/integration_test.py b/tests/integration/synapseclient/integration_test.py index 4bdce1d5f..9bfdb6680 100644 --- a/tests/integration/synapseclient/integration_test.py +++ b/tests/integration/synapseclient/integration_test.py @@ -360,17 +360,13 @@ def test_provenance(syn, project, schedule_for_cleanup): # Create a File Entity of Code fd, path = tempfile.mkstemp(suffix=".py") with os.fdopen(fd, "w") as f: - f.write( - utils.normalize_lines( - """ + f.write(utils.normalize_lines(""" ## Chris's fabulous random data generator ############################################################ import random random.seed(12345) data = [random.gauss(mu=0.0, sigma=1.0) for i in range(100)] - """ - ) - ) + """)) schedule_for_cleanup(path) code_entity = syn.store(File(path, parent=project["id"])) diff --git a/tests/integration/synapseclient/integration_test_Entity.py b/tests/integration/synapseclient/integration_test_Entity.py index 9ab0c9902..b6603f5d7 100644 --- a/tests/integration/synapseclient/integration_test_Entity.py +++ b/tests/integration/synapseclient/integration_test_Entity.py @@ -479,7 +479,7 @@ def test_store_activity( # Store another Entity with the same Activity entity = File( - "http://en.wikipedia.org/wiki/File:Nettlebed_cave.jpg", + "https://www.synapse.org/Portal/clear.cache.gif", name="Nettlebed Cave" + str(uuid.uuid4()), parent=project, synapseStore=False, @@ -510,8 +510,8 @@ def test_store_is_restricted_flag( # @skip("Skip integration tests for soon to be removed code") def test_external_file_handle(syn: Synapse, project: Project) -> None: - # Tests shouldn't have external dependencies, but this is a pretty picture of Singapore - singapore_url = "http://upload.wikimedia.org/wikipedia/commons/thumb/3/3e/1_singapore_city_skyline_dusk_panorama_2011.jpg/1280px-1_singapore_city_skyline_dusk_panorama_2011.jpg" # noqa + # Use a stable Sage-hosted asset to avoid external rate limiting (e.g. 429s) + singapore_url = "https://www.synapse.org/Portal/clear.cache.gif" singapore = File(singapore_url, parent=project, synapseStore=False) singapore = syn.store(singapore) @@ -535,10 +535,8 @@ def test_external_file_handle(syn: Synapse, project: Project) -> None: assert singapore.externalURL == singapore_url assert os.path.exists(singapore.path) - # Update external URL - singapore_2_url = ( - "https://upload.wikimedia.org/wikipedia/commons/a/a2/Singapore_Panorama_v2.jpg" - ) + # Update external URL (metadata only, not downloaded) + singapore_2_url = "https://www.synapse.org/Portal/clear.cache.gif?v=2" singapore.externalURL = singapore_2_url singapore = syn.store(singapore) s2 = syn.get(singapore, downloadFile=False) diff --git a/tests/integration/synapseclient/models/async/test_curation_async.py b/tests/integration/synapseclient/models/async/test_curation_async.py index 14111aa13..9fd72cfa4 100644 --- a/tests/integration/synapseclient/models/async/test_curation_async.py +++ b/tests/integration/synapseclient/models/async/test_curation_async.py @@ -12,17 +12,45 @@ from synapseclient.core.exceptions import SynapseHTTPError from synapseclient.core.utils import make_bogus_uuid_file from synapseclient.models import ( - Column, - ColumnType, CurationTask, + CurationTaskStatus, EntityView, FileBasedMetadataTaskProperties, Folder, + Grid, + GridExecutionDetails, Project, RecordBasedMetadataTaskProperties, RecordSet, + TaskState, ViewTypeMask, ) +from synapseclient.models.table_components import Query +from tests.integration import ASYNC_JOB_TIMEOUT_SEC + + +@pytest.fixture(scope="function") +async def folder_with_view( + project_model: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], +) -> tuple[Folder, EntityView]: + """Create a folder with an associated EntityView for file-based testing.""" + folder = await Folder( + name=str(uuid.uuid4()), + parent_id=project_model.id, + ).store_async(synapse_client=syn) + schedule_for_cleanup(folder.id) + + entity_view = await EntityView( + name=str(uuid.uuid4()), + parent_id=project_model.id, + scope_ids=[folder.id], + view_type_mask=ViewTypeMask.FILE.value, + ).store_async(synapse_client=syn) + schedule_for_cleanup(entity_view.id) + + return folder, entity_view class TestCurationTaskStoreAsync: @@ -33,48 +61,6 @@ def init(self, syn: Synapse, schedule_for_cleanup: Callable[..., None]) -> None: self.syn = syn self.schedule_for_cleanup = schedule_for_cleanup - @pytest.fixture(scope="class") - async def folder_with_view( - self, - project_model: Project, - syn: Synapse, - schedule_for_cleanup: Callable[..., None], - ) -> tuple[Folder, EntityView]: - """Create a folder with an associated EntityView for file-based testing.""" - # Create a folder - folder = await Folder( - name=str(uuid.uuid4()), - parent_id=project_model.id, - ).store_async(synapse_client=syn) - schedule_for_cleanup(folder.id) - - # Create an EntityView for the folder - columns = [ - Column(name="id", column_type=ColumnType.ENTITYID), - Column(name="name", column_type=ColumnType.STRING, maximum_size=256), - Column(name="createdOn", column_type=ColumnType.DATE), - Column(name="createdBy", column_type=ColumnType.USERID), - Column(name="etag", column_type=ColumnType.STRING, maximum_size=64), - Column(name="type", column_type=ColumnType.STRING, maximum_size=64), - Column(name="parentId", column_type=ColumnType.ENTITYID), - Column(name="benefactorId", column_type=ColumnType.ENTITYID), - Column(name="projectId", column_type=ColumnType.ENTITYID), - Column(name="modifiedOn", column_type=ColumnType.DATE), - Column(name="modifiedBy", column_type=ColumnType.USERID), - Column(name="dataFileHandleId", column_type=ColumnType.FILEHANDLEID), - ] - - entity_view = await EntityView( - name=str(uuid.uuid4()), - parent_id=project_model.id, - scope_ids=[folder.id], - view_type_mask=ViewTypeMask.FILE.value, - columns=columns, - ).store_async(synapse_client=syn) - schedule_for_cleanup(entity_view.id) - - return folder, entity_view - @pytest.fixture(scope="class") async def record_set( self, @@ -262,48 +248,6 @@ def init(self, syn: Synapse, schedule_for_cleanup: Callable[..., None]) -> None: self.syn = syn self.schedule_for_cleanup = schedule_for_cleanup - @pytest.fixture(scope="class") - async def folder_with_view( - self, - project_model: Project, - syn: Synapse, - schedule_for_cleanup: Callable[..., None], - ) -> tuple[Folder, EntityView]: - """Create a folder with an associated EntityView for file-based testing.""" - # Create a folder - folder = await Folder( - name=str(uuid.uuid4()), - parent_id=project_model.id, - ).store_async(synapse_client=syn) - schedule_for_cleanup(folder.id) - - # Create required columns for the EntityView - columns = [ - Column(name="id", column_type=ColumnType.ENTITYID), - Column(name="name", column_type=ColumnType.STRING, maximum_size=256), - Column(name="createdOn", column_type=ColumnType.DATE), - Column(name="createdBy", column_type=ColumnType.USERID), - Column(name="etag", column_type=ColumnType.STRING, maximum_size=64), - Column(name="type", column_type=ColumnType.STRING, maximum_size=64), - Column(name="parentId", column_type=ColumnType.ENTITYID), - Column(name="benefactorId", column_type=ColumnType.ENTITYID), - Column(name="projectId", column_type=ColumnType.ENTITYID), - Column(name="modifiedOn", column_type=ColumnType.DATE), - Column(name="modifiedBy", column_type=ColumnType.USERID), - Column(name="dataFileHandleId", column_type=ColumnType.FILEHANDLEID), - ] - - entity_view = await EntityView( - name=str(uuid.uuid4()), - parent_id=project_model.id, - scope_ids=[folder.id], - view_type_mask=ViewTypeMask.FILE.value, - columns=columns, - ).store_async(synapse_client=syn) - schedule_for_cleanup(entity_view.id) - - return folder, entity_view - async def test_get_curation_task_async( self, project_model: Project, folder_with_view: tuple[Folder, EntityView] ) -> None: @@ -371,48 +315,6 @@ def init(self, syn: Synapse, schedule_for_cleanup: Callable[..., None]) -> None: self.syn = syn self.schedule_for_cleanup = schedule_for_cleanup - @pytest.fixture(scope="function") - async def folder_with_view( - self, - project_model: Project, - syn: Synapse, - schedule_for_cleanup: Callable[..., None], - ) -> tuple[Folder, EntityView]: - """Create a folder with an associated EntityView for file-based testing.""" - # Create a folder - folder = await Folder( - name=str(uuid.uuid4()), - parent_id=project_model.id, - ).store_async(synapse_client=syn) - schedule_for_cleanup(folder.id) - - # Create required columns for the EntityView - columns = [ - Column(name="id", column_type=ColumnType.ENTITYID), - Column(name="name", column_type=ColumnType.STRING, maximum_size=256), - Column(name="createdOn", column_type=ColumnType.DATE), - Column(name="createdBy", column_type=ColumnType.USERID), - Column(name="etag", column_type=ColumnType.STRING, maximum_size=64), - Column(name="type", column_type=ColumnType.STRING, maximum_size=64), - Column(name="parentId", column_type=ColumnType.ENTITYID), - Column(name="benefactorId", column_type=ColumnType.ENTITYID), - Column(name="projectId", column_type=ColumnType.ENTITYID), - Column(name="modifiedOn", column_type=ColumnType.DATE), - Column(name="modifiedBy", column_type=ColumnType.USERID), - Column(name="dataFileHandleId", column_type=ColumnType.FILEHANDLEID), - ] - - entity_view = await EntityView( - name=str(uuid.uuid4()), - parent_id=project_model.id, - scope_ids=[folder.id], - view_type_mask=ViewTypeMask.FILE.value, - columns=columns, - ).store_async(synapse_client=syn) - schedule_for_cleanup(entity_view.id) - - return folder, entity_view - @pytest.fixture(scope="function") async def folder_with_record_set( self, @@ -589,50 +491,43 @@ def init(self, syn: Synapse, schedule_for_cleanup: Callable[..., None]) -> None: self.syn = syn self.schedule_for_cleanup = schedule_for_cleanup - @pytest.fixture(scope="class") + @pytest.fixture(scope="function") + async def project_for_test( + self, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + ) -> Project: + """Create a fresh project per test so tasks from other tests don't appear in listings.""" + project = await Project(name=str(uuid.uuid4())).store_async(synapse_client=syn) + schedule_for_cleanup(project.id) + return project + + @pytest.fixture(scope="function") async def folder_with_view( self, - project_model: Project, + project_for_test: Project, syn: Synapse, schedule_for_cleanup: Callable[..., None], ) -> tuple[Folder, EntityView]: """Create a folder with an associated EntityView for file-based testing.""" - # Create a folder folder = await Folder( name=str(uuid.uuid4()), - parent_id=project_model.id, + parent_id=project_for_test.id, ).store_async(synapse_client=syn) schedule_for_cleanup(folder.id) - # Create required columns for the EntityView - columns = [ - Column(name="id", column_type=ColumnType.ENTITYID), - Column(name="name", column_type=ColumnType.STRING, maximum_size=256), - Column(name="createdOn", column_type=ColumnType.DATE), - Column(name="createdBy", column_type=ColumnType.USERID), - Column(name="etag", column_type=ColumnType.STRING, maximum_size=64), - Column(name="type", column_type=ColumnType.STRING, maximum_size=64), - Column(name="parentId", column_type=ColumnType.ENTITYID), - Column(name="benefactorId", column_type=ColumnType.ENTITYID), - Column(name="projectId", column_type=ColumnType.ENTITYID), - Column(name="modifiedOn", column_type=ColumnType.DATE), - Column(name="modifiedBy", column_type=ColumnType.USERID), - Column(name="dataFileHandleId", column_type=ColumnType.FILEHANDLEID), - ] - entity_view = await EntityView( name=str(uuid.uuid4()), - parent_id=project_model.id, + parent_id=project_for_test.id, scope_ids=[folder.id], view_type_mask=ViewTypeMask.FILE.value, - columns=columns, ).store_async(synapse_client=syn) schedule_for_cleanup(entity_view.id) return folder, entity_view async def test_list_curation_tasks_async( - self, project_model: Project, folder_with_view: tuple[Folder, EntityView] + self, project_for_test: Project, folder_with_view: tuple[Folder, EntityView] ) -> None: # GIVEN a project, folder, and entity view folder, entity_view = folder_with_view @@ -647,7 +542,7 @@ async def test_list_curation_tasks_async( ) task = await CurationTask( data_type=data_type, - project_id=project_model.id, + project_id=project_for_test.id, instructions=f"Instructions for task {i}", task_properties=task_properties, ).store_async(synapse_client=self.syn) @@ -656,12 +551,12 @@ async def test_list_curation_tasks_async( # WHEN I list all curation tasks for the project asynchronously listed_tasks = [] async for task in CurationTask.list_async( - project_id=project_model.id, synapse_client=self.syn + project_id=project_for_test.id, synapse_client=self.syn ): listed_tasks.append(task) - # THEN I should get all the created tasks - assert len(listed_tasks) >= 3 # There might be other tasks from other tests + # THEN I should get exactly the 3 created tasks (isolated project) + assert len(listed_tasks) == 3 # Check that our created tasks are in the list listed_task_ids = [task.task_id for task in listed_tasks] @@ -673,12 +568,11 @@ async def test_list_curation_tasks_async( # Verify the structure of retrieved tasks for task in listed_tasks: - if task.task_id in [t[1] for t in tasks_data]: - assert task.project_id == project_model.id - assert task.task_properties is not None - assert task.etag is not None - assert task.created_on is not None - assert task.created_by is not None + assert task.project_id == project_for_test.id + assert task.task_properties is not None + assert task.etag is not None + assert task.created_on is not None + assert task.created_by is not None async def test_list_empty_project_async(self) -> None: # GIVEN a project with no curation tasks @@ -696,3 +590,343 @@ async def test_list_empty_project_async(self) -> None: # THEN I should get an empty list assert len(listed_tasks) == 0 + + async def test_list_filters_async( + self, project_for_test: Project, folder_with_view: tuple[Folder, EntityView] + ) -> None: + # GIVEN a newly created curation task (default state is NOT_STARTED) + folder, entity_view = folder_with_view + data_type = f"test_data_type_{str(uuid.uuid4()).replace('-', '_')}" + task = await CurationTask( + data_type=data_type, + project_id=project_for_test.id, + instructions="Test instructions", + task_properties=FileBasedMetadataTaskProperties( + upload_folder_id=folder.id, + file_view_id=entity_view.id, + ), + ).store_async(synapse_client=self.syn) + + # WHEN I list tasks filtered to NOT_STARTED + listed_task_ids = [ + t.task_id + async for t in CurationTask.list_async( + project_id=project_for_test.id, + state_filter=[TaskState.NOT_STARTED], + synapse_client=self.syn, + ) + ] + + # THEN exactly 1 task should appear and it should be our task + assert len(listed_task_ids) == 1 + assert task.task_id in listed_task_ids + + # WHEN I list tasks filtered to COMPLETED + listed_task_ids = [ + t.task_id + async for t in CurationTask.list_async( + project_id=project_for_test.id, + state_filter=[TaskState.COMPLETED], + synapse_client=self.syn, + ) + ] + + # THEN no tasks should appear (the only task is NOT_STARTED) + assert len(listed_task_ids) == 0 + + +class TestCurationTaskStatusAsync: + """Tests for the CurationTask.get_status_async and CurationTask.update_status_async methods.""" + + @pytest.fixture(scope="function") + async def grid( + self, + syn: Synapse, + folder_with_view: tuple[Folder, EntityView], + request: pytest.FixtureRequest, + ) -> Grid: + """Create a Grid backed by the entity view; delete it after the test.""" + _, entity_view = folder_with_view + grid = await Grid( + initial_query=Query(sql=f"SELECT * FROM {entity_view.id}") + ).create_async(timeout=ASYNC_JOB_TIMEOUT_SEC, synapse_client=syn) + + def delete_grid() -> None: + grid.delete(synapse_client=syn) + + request.addfinalizer(delete_grid) + return grid + + async def test_get_and_update_curation_task_status_async( + self, + syn: Synapse, + project_model: Project, + folder_with_view: tuple[Folder, EntityView], + grid: Grid, + ) -> None: + # GIVEN a project, folder, and entity view + folder, entity_view = folder_with_view + + # AND a stored curation task + data_type = f"test_data_type_{str(uuid.uuid4()).replace('-', '_')}" + task_properties = FileBasedMetadataTaskProperties( + upload_folder_id=folder.id, + file_view_id=entity_view.id, + ) + stored_task = await CurationTask( + data_type=data_type, + project_id=project_model.id, + instructions="Test instructions for status flow.", + task_properties=task_properties, + ).store_async(synapse_client=syn) + + # WHEN I get the initial status of the task + initial_status = await stored_task.get_status_async(synapse_client=syn) + + # THEN it should be parsed into a CurationTaskStatus tied to this task + assert isinstance(initial_status, CurationTaskStatus) + assert initial_status.task_id == stored_task.task_id + assert initial_status.state == TaskState.NOT_STARTED + # AND it should not yet reference an active grid session + assert initial_status.execution_details is None + + # AND WHEN I modify the state to IN_PROGRESS, attach a GridExecutionDetails + # pointing to the active grid session, and store the status + initial_status.state = TaskState.IN_PROGRESS + initial_status.execution_details = GridExecutionDetails( + active_session_id=grid.session_id + ) + updated_status = await stored_task.update_status_async( + curation_task_status=initial_status, synapse_client=syn + ) + + # THEN the update response should reflect the new state and execution details + assert isinstance(updated_status, CurationTaskStatus) + assert updated_status.task_id == stored_task.task_id + assert updated_status.state == TaskState.IN_PROGRESS + assert isinstance(updated_status.execution_details, GridExecutionDetails) + assert updated_status.execution_details.active_session_id == grid.session_id + + # AND WHEN I get the status again + refetched_status = await stored_task.get_status_async(synapse_client=syn) + + # THEN the modification should have persisted on the server + assert refetched_status.task_id == stored_task.task_id + assert refetched_status.state == TaskState.IN_PROGRESS + assert isinstance(refetched_status.execution_details, GridExecutionDetails) + assert refetched_status.execution_details.active_session_id == grid.session_id + + +class TestCurationTaskCreateGridSessionAsync: + """Tests for the CurationTask.create_grid_session_async method.""" + + async def test_create_grid_session_async( + self, + syn: Synapse, + project_model: Project, + folder_with_view: tuple[Folder, EntityView], + request: pytest.FixtureRequest, + ) -> None: + # GIVEN a project, folder, and entity view + folder, entity_view = folder_with_view + + # AND a stored file-based curation task + data_type = f"test_data_type_{str(uuid.uuid4()).replace('-', '_')}" + task_properties = FileBasedMetadataTaskProperties( + upload_folder_id=folder.id, + file_view_id=entity_view.id, + ) + stored_task = await CurationTask( + data_type=data_type, + project_id=project_model.id, + instructions="Create a grid session for this task.", + task_properties=task_properties, + ).store_async(synapse_client=syn) + + # WHEN I create a grid session for the task asynchronously + grid = await stored_task.create_grid_session_async( + timeout=ASYNC_JOB_TIMEOUT_SEC, synapse_client=syn + ) + request.addfinalizer(lambda: grid.delete(synapse_client=syn)) + + # THEN a Grid is returned with a populated session_id + assert isinstance(grid, Grid) + assert grid.session_id is not None + + # AND the curation task status now references the new grid session + status = await stored_task.get_status_async(synapse_client=syn) + assert isinstance(status, CurationTaskStatus) + assert isinstance(status.execution_details, GridExecutionDetails) + assert status.execution_details.active_session_id == grid.session_id + + +class TestCurationTaskSetActiveGridSessionAsync: + """Tests for the CurationTask.set_active_grid_session_async method.""" + + @pytest.fixture(scope="function") + async def grid( + self, + syn: Synapse, + folder_with_view: tuple[Folder, EntityView], + request: pytest.FixtureRequest, + ) -> Grid: + """Create a Grid backed by the entity view; delete it after the test.""" + _, entity_view = folder_with_view + grid = await Grid( + initial_query=Query(sql=f"SELECT * FROM {entity_view.id}") + ).create_async(timeout=ASYNC_JOB_TIMEOUT_SEC, synapse_client=syn) + + def delete_grid() -> None: + grid.delete(synapse_client=syn) + + request.addfinalizer(delete_grid) + return grid + + async def test_set_active_grid_session_async( + self, + syn: Synapse, + project_model: Project, + folder_with_view: tuple[Folder, EntityView], + grid: Grid, + ) -> None: + # GIVEN a project, folder, and entity view + folder, entity_view = folder_with_view + + # AND a stored file-based curation task + data_type = f"test_data_type_{str(uuid.uuid4()).replace('-', '_')}" + task_properties = FileBasedMetadataTaskProperties( + upload_folder_id=folder.id, + file_view_id=entity_view.id, + ) + stored_task = await CurationTask( + data_type=data_type, + project_id=project_model.id, + instructions="Attach an existing grid session to this task.", + task_properties=task_properties, + ).store_async(synapse_client=syn) + + # AND the task's initial status has no execution details + initial_status = await stored_task.get_status_async(synapse_client=syn) + assert initial_status.execution_details is None + + # WHEN I attach the existing grid session to the task + updated_status = await stored_task.set_active_grid_session_async( + active_session_id=grid.session_id, synapse_client=syn + ) + + # THEN the returned status references the grid session + assert isinstance(updated_status, CurationTaskStatus) + assert updated_status.task_id == stored_task.task_id + assert isinstance(updated_status.execution_details, GridExecutionDetails) + assert updated_status.execution_details.active_session_id == grid.session_id + # AND the task state is not transitioned by this call + assert updated_status.state == initial_status.state + + # AND the change persists on the server + refetched_status = await stored_task.get_status_async(synapse_client=syn) + assert isinstance(refetched_status.execution_details, GridExecutionDetails) + assert refetched_status.execution_details.active_session_id == grid.session_id + + async def test_set_active_grid_session_async_replaces_existing_session( + self, + syn: Synapse, + project_model: Project, + folder_with_view: tuple[Folder, EntityView], + grid: Grid, + request: pytest.FixtureRequest, + ) -> None: + # GIVEN a project, folder, and entity view + folder, entity_view = folder_with_view + + # AND a stored file-based curation task that already has an active + # grid session linked via create_grid_session_async + data_type = f"test_data_type_{str(uuid.uuid4()).replace('-', '_')}" + task_properties = FileBasedMetadataTaskProperties( + upload_folder_id=folder.id, + file_view_id=entity_view.id, + ) + stored_task = await CurationTask( + data_type=data_type, + project_id=project_model.id, + instructions="Replace the active grid session on this task.", + task_properties=task_properties, + ).store_async(synapse_client=syn) + + original_grid = await stored_task.create_grid_session_async( + timeout=ASYNC_JOB_TIMEOUT_SEC, synapse_client=syn + ) + request.addfinalizer(lambda: original_grid.delete(synapse_client=syn)) + assert original_grid.session_id is not None + assert original_grid.session_id != grid.session_id + + # WHEN I point the task at a different existing grid session + updated_status = await stored_task.set_active_grid_session_async( + active_session_id=grid.session_id, synapse_client=syn + ) + + # THEN the status now references the new session, not the original + assert isinstance(updated_status.execution_details, GridExecutionDetails) + assert updated_status.execution_details.active_session_id == grid.session_id + + # AND the change persists on the server + refetched_status = await stored_task.get_status_async(synapse_client=syn) + assert refetched_status.execution_details.active_session_id == grid.session_id + + async def test_set_active_grid_session_async_validation_error( + self, syn: Synapse + ) -> None: + # GIVEN a CurationTask without a task_id + curation_task = CurationTask() + + # WHEN I try to set an active grid session + # THEN it should raise a ValueError from the underlying get_status call + with pytest.raises( + ValueError, match="task_id is required to get a CurationTask status" + ): + await curation_task.set_active_grid_session_async( + active_session_id="some-session-id", synapse_client=syn + ) + + +class TestCurationTaskSetTaskStateAsync: + """Test for the CurationTask.set_task_state_async method.""" + + async def test_set_task_state_async( + self, + syn: Synapse, + project_model: Project, + folder_with_view: tuple[Folder, EntityView], + ) -> None: + # GIVEN a project, folder, and entity view + folder, entity_view = folder_with_view + + # AND a stored file-based curation task + data_type = f"test_data_type_{str(uuid.uuid4()).replace('-', '_')}" + task_properties = FileBasedMetadataTaskProperties( + upload_folder_id=folder.id, + file_view_id=entity_view.id, + ) + stored_task = await CurationTask( + data_type=data_type, + project_id=project_model.id, + instructions="Set the task state on this curation task.", + task_properties=task_properties, + ).store_async(synapse_client=syn) + + # AND the task's status starts at NOT_STARTED + initial_status = await stored_task.get_status_async(synapse_client=syn) + assert initial_status.state == TaskState.NOT_STARTED + + # WHEN I transition the state to IN_PROGRESS + updated_status = await stored_task.set_task_state_async( + state=TaskState.IN_PROGRESS, synapse_client=syn + ) + + # THEN the returned status reflects the new state + assert isinstance(updated_status, CurationTaskStatus) + assert updated_status.task_id == stored_task.task_id + assert updated_status.state == TaskState.IN_PROGRESS + + # AND the change persists on the server + refetched_status = await stored_task.get_status_async(synapse_client=syn) + assert refetched_status.state == TaskState.IN_PROGRESS diff --git a/tests/integration/synapseclient/models/async/test_folder_async.py b/tests/integration/synapseclient/models/async/test_folder_async.py index fc6075594..067ceb989 100644 --- a/tests/integration/synapseclient/models/async/test_folder_async.py +++ b/tests/integration/synapseclient/models/async/test_folder_async.py @@ -1,8 +1,12 @@ """Integration tests for the synapseclient.models.Folder class.""" +import csv +import datetime import os +import tempfile import uuid from typing import Callable, List +from unittest.mock import patch import pytest @@ -10,6 +14,7 @@ from synapseclient.core import utils from synapseclient.core.exceptions import SynapseHTTPError from synapseclient.models import ( + Activity, Column, ColumnType, Dataset, @@ -25,6 +30,7 @@ ViewTypeMask, VirtualTable, ) +from synapseclient.models.activity import UsedURL DESCRIPTION_FOLDER = "This is an example folder." DESCRIPTION_FILE = "This is an example file." @@ -811,3 +817,280 @@ async def test_walk_async_recursive_false(self, project_model: Project) -> None: assert hasattr(nondirs[0], "name") assert hasattr(nondirs[0], "id") assert hasattr(nondirs[0], "type") + + +class TestFolderManifestCSV: + """Integration tests for manifest CSV generation during sync_from_synapse_async.""" + + BOGUS_URL = "https://example.com" + + @pytest.fixture(autouse=True, scope="function") + def init(self, syn: Synapse, schedule_for_cleanup: Callable[..., None]) -> None: + self.syn = syn + self.schedule_for_cleanup = schedule_for_cleanup + + def create_file_instance(self) -> File: + filename = utils.make_bogus_uuid_file() + self.schedule_for_cleanup(filename) + return File( + path=filename, + content_type="text/plain", + ) + + async def test_manifest_all_creates_csv_per_directory( + self, project_model: Project + ) -> None: + # GIVEN a root folder with a file and a nested subfolder with its own file + root_folder = Folder(name=str(uuid.uuid4()), parent_id=project_model.id) + root_folder = await root_folder.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(root_folder.id) + + root_file = self.create_file_instance() + root_file.parent_id = root_folder.id + root_file = await root_file.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(root_file.id) + + sub_folder = Folder(name=str(uuid.uuid4()), parent_id=root_folder.id) + sub_folder = await sub_folder.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(sub_folder.id) + + sub_file = self.create_file_instance() + sub_file.parent_id = sub_folder.id + sub_file = await sub_file.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(sub_file.id) + + # WHEN I sync the root folder with manifest="all" + with tempfile.TemporaryDirectory() as tmpdir: + await root_folder.sync_from_synapse_async( + path=tmpdir, + manifest="all", + synapse_client=self.syn, + ) + + root_manifest = os.path.join(tmpdir, "manifest.csv") + sub_manifest = os.path.join(tmpdir, sub_folder.name, "manifest.csv") + + assert os.path.isfile(root_manifest) + assert os.path.isfile(sub_manifest) + + with open(root_manifest, newline="", encoding="utf8") as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 2 + rows_by_id = {row["ID"]: row for row in rows} + root_row = rows_by_id[root_file.id] + assert root_row["name"] == root_file.name + assert root_row["parentId"] == root_folder.id + sub_row = rows_by_id[sub_file.id] + assert sub_row["name"] == sub_file.name + assert sub_row["parentId"] == sub_folder.id + + with open(sub_manifest, newline="", encoding="utf8") as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 1 + sub_row = rows[0] + assert sub_row["name"] == sub_file.name + assert sub_row["parentId"] == sub_folder.id + + async def test_manifest_root_creates_csv_only_at_root( + self, project_model: Project + ) -> None: + # GIVEN a root folder with a file and a nested subfolder with its own file + root_folder = Folder(name=str(uuid.uuid4()), parent_id=project_model.id) + root_folder = await root_folder.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(root_folder.id) + + root_file = self.create_file_instance() + root_file.parent_id = root_folder.id + root_file = await root_file.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(root_file.id) + + sub_folder = Folder(name=str(uuid.uuid4()), parent_id=root_folder.id) + sub_folder = await sub_folder.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(sub_folder.id) + + sub_file = self.create_file_instance() + sub_file.parent_id = sub_folder.id + sub_file = await sub_file.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(sub_file.id) + + # WHEN I sync with manifest="root" + with tempfile.TemporaryDirectory() as tmpdir: + await root_folder.sync_from_synapse_async( + path=tmpdir, + manifest="root", + synapse_client=self.syn, + ) + + root_manifest = os.path.join(tmpdir, "manifest.csv") + sub_manifest = os.path.join(tmpdir, sub_folder.name, "manifest.csv") + + # THEN manifest.csv exists only at the root + assert os.path.isfile(root_manifest) + assert not os.path.isfile(sub_manifest) + with open(root_manifest, newline="", encoding="utf8") as f: + reader = csv.DictReader(f) + rows = list(reader) + assert len(rows) == 2 + rows_by_id = {row["ID"]: row for row in rows} + root_row = rows[0] + assert root_row["name"] == root_file.name + assert root_row["parentId"] == root_folder.id + sub_row = rows_by_id[sub_file.id] + assert sub_row["name"] == sub_file.name + assert sub_row["parentId"] == sub_folder.id + + async def test_manifest_suppress_creates_no_csv( + self, project_model: Project + ) -> None: + # GIVEN a folder with a file + folder = Folder(name=str(uuid.uuid4()), parent_id=project_model.id) + folder = await folder.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(folder.id) + + f = self.create_file_instance() + f.parent_id = folder.id + f = await f.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(f.id) + + # WHEN I sync with manifest="suppress" + with tempfile.TemporaryDirectory() as tmpdir: + await folder.sync_from_synapse_async( + path=tmpdir, + manifest="suppress", + synapse_client=self.syn, + ) + + # THEN no manifest.csv is created + assert not os.path.isfile(os.path.join(tmpdir, "manifest.csv")) + + async def test_manifest_includes_annotations(self, project_model: Project) -> None: + # GIVEN a file with mixed-type annotations + folder = Folder(name=str(uuid.uuid4()), parent_id=project_model.id) + folder = await folder.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(folder.id) + + f = self.create_file_instance() + f.parent_id = folder.id + f.annotations = { + "single_str": ["hello"], + "multi_str": ["a", "b", "c"], + "str_with_comma": ["hello,world", "plain text"], + "booleans": [True, False], + "integers": [1, 2, 3], + "floats": [1.0], + "datetimes": [ + datetime.datetime(2020, 1, 1, 0, 0, 0, 0, tzinfo=datetime.timezone.utc) + ], + } + f = await f.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(f.id) + + # WHEN I sync with manifest generation + with tempfile.TemporaryDirectory() as tmpdir: + await folder.sync_from_synapse_async( + path=tmpdir, + manifest="root", + synapse_client=self.syn, + ) + + manifest_path = os.path.join(tmpdir, "manifest.csv") + assert os.path.isfile(manifest_path) + + with open(manifest_path, newline="", encoding="utf8") as mf: + reader = csv.DictReader(mf) + rows = list(reader) + + # THEN annotation columns are present and correctly serialized + assert len(rows) == 1 + row = rows[0] + assert row["single_str"] == "hello" + assert row["multi_str"] == "[a,b,c]" + assert row["str_with_comma"] == '["hello,world",plain text]' + assert row["booleans"] == "[True,False]" + assert row["integers"] == "[1,2,3]" + assert row["floats"] == "1.0" + assert row["datetimes"] == "2020-01-01T00:00:00Z" + + async def test_manifest_includes_provenance(self, project_model: Project) -> None: + # GIVEN a file with activity (provenance) + folder = Folder(name=str(uuid.uuid4()), parent_id=project_model.id) + folder = await folder.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(folder.id) + + f = self.create_file_instance() + f.parent_id = folder.id + f.activity = Activity( + name="my_activity", + description="my_description", + used=[UsedURL(name="my_source", url=self.BOGUS_URL)], + ) + f = await f.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(f.id) + + # WHEN I sync with manifest generation and include_activity=True + with tempfile.TemporaryDirectory() as tmpdir: + await folder.sync_from_synapse_async( + path=tmpdir, + manifest="root", + include_activity=True, + synapse_client=self.syn, + ) + + manifest_path = os.path.join(tmpdir, "manifest.csv") + assert os.path.isfile(manifest_path) + + with open(manifest_path, newline="", encoding="utf8") as mf: + reader = csv.DictReader(mf) + rows = list(reader) + + # THEN provenance columns are populated + assert len(rows) == 1 + row = rows[0] + assert row["activityName"] == "my_activity" + assert row["activityDescription"] == "my_description" + assert row["used"] == "my_source" + + async def test_manifest_generation_logs_info_per_directory( + self, project_model: Project + ) -> None: + # GIVEN a root folder with a file and a nested subfolder with its own file + root_folder = Folder(name=str(uuid.uuid4()), parent_id=project_model.id) + root_folder = await root_folder.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(root_folder.id) + + root_file = self.create_file_instance() + root_file.parent_id = root_folder.id + root_file = await root_file.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(root_file.id) + + sub_folder = Folder(name=str(uuid.uuid4()), parent_id=root_folder.id) + sub_folder = await sub_folder.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(sub_folder.id) + + sub_file = self.create_file_instance() + sub_file.parent_id = sub_folder.id + sub_file = await sub_file.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(sub_file.id) + + with tempfile.TemporaryDirectory() as tmpdir: + root_manifest = os.path.join(tmpdir, "manifest.csv") + sub_manifest = os.path.join(tmpdir, sub_folder.name, "manifest.csv") + + with patch.object(self.syn.logger, "info") as mock_logger_info: + # WHEN I sync with manifest="all" + await root_folder.sync_from_synapse_async( + path=tmpdir, + manifest="all", + synapse_client=self.syn, + ) + + # THEN _write_manifest_data_csv logs one message per generated manifest + logged_messages = [call.args[0] for call in mock_logger_info.call_args_list] + assert ( + f"Manifest file {root_manifest} has been generated." in logged_messages + ) + assert ( + f"Manifest file {sub_manifest} has been generated." in logged_messages + ) diff --git a/tests/integration/synapseclient/models/async/test_form_async.py b/tests/integration/synapseclient/models/async/test_form_async.py index 00507eea4..9812ab56c 100644 --- a/tests/integration/synapseclient/models/async/test_form_async.py +++ b/tests/integration/synapseclient/models/async/test_form_async.py @@ -1,6 +1,7 @@ """ Integration tests for the synapseclient.models.Form class. """ + import tempfile import uuid from typing import Callable diff --git a/tests/integration/synapseclient/models/async/test_grid_async.py b/tests/integration/synapseclient/models/async/test_grid_async.py index cd16a0cf0..75b297333 100644 --- a/tests/integration/synapseclient/models/async/test_grid_async.py +++ b/tests/integration/synapseclient/models/async/test_grid_async.py @@ -9,8 +9,20 @@ import pytest from synapseclient import Synapse -from synapseclient.models import Grid, Project, RecordSet -from tests.integration import ASYNC_JOB_TIMEOUT_SEC +from synapseclient.core.utils import make_bogus_data_file +from synapseclient.models import ( + EntityView, + File, + Folder, + Grid, + Project, + RecordSet, + ViewTypeMask, + query_async, +) +from synapseclient.models.table_components import Query +from tests.integration import ASYNC_JOB_TIMEOUT_SEC, QUERY_TIMEOUT_SEC +from tests.integration.helpers import wait_for_condition class TestGridAsync: @@ -21,6 +33,31 @@ def init(self, syn: Synapse, schedule_for_cleanup: Callable[..., None]) -> None: self.syn = syn self.schedule_for_cleanup = schedule_for_cleanup + @pytest.fixture(scope="function") + async def entity_view( + self, + project_model: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + ) -> tuple[Folder, EntityView]: + """Create a folder with an associated EntityView for file-based testing.""" + # Create a folder + folder = await Folder( + name=str(uuid.uuid4()), + parent_id=project_model.id, + ).store_async(synapse_client=syn) + schedule_for_cleanup(folder.id) + + entity_view = await EntityView( + name=str(uuid.uuid4()), + parent_id=project_model.id, + scope_ids=[folder.id], + view_type_mask=ViewTypeMask.FILE.value, + ).store_async(synapse_client=syn) + schedule_for_cleanup(entity_view.id) + + return folder, entity_view + @pytest.fixture(scope="function") async def record_set_fixture(self, project_model: Project) -> RecordSet: """Create a RecordSet fixture for Grid testing.""" @@ -183,3 +220,155 @@ async def test_delete_grid_session_validation_error_async(self) -> None: match="session_id is required to delete a GridSession", ): await grid.delete_async(synapse_client=self.syn) + + async def test_synchronize_grid_async( + self, + entity_view: tuple[Folder, EntityView], + ) -> None: + folder, ev = entity_view + + # GIVEN: A Grid session created at T0 from an empty EntityView + query = Query(sql=f"SELECT * FROM {ev.id}") + grid = Grid(initial_query=query) + created_grid = await grid.create_async(synapse_client=self.syn) + + try: + # AND: A file uploaded into the scoped folder + bogus_file = make_bogus_data_file() + self.schedule_for_cleanup(bogus_file) + uploaded_file = await File( + path=bogus_file, + parent_id=folder.id, + ).store_async(synapse_client=self.syn) + self.schedule_for_cleanup(uploaded_file.id) + + # Wait for the EntityView to index the new file + async def file_indexed() -> bool: + df = await query_async( + query=f"SELECT id FROM {ev.id} WHERE id = '{uploaded_file.id}'", + include_row_id_and_row_version=False, + synapse_client=self.syn, + ) + return not df.empty + + await wait_for_condition( + condition_fn=file_indexed, + timeout_seconds=QUERY_TIMEOUT_SEC, + ) + + # WHEN: Synchronizing the same session + synced_grid = await created_grid.synchronize_async(synapse_client=self.syn) + + # THEN: The session ID is unchanged + assert synced_grid.session_id == created_grid.session_id + assert synced_grid.source_entity_id == ev.id + + # AND: The downloaded CSV reflects the newly uploaded file + dest = tempfile.mkdtemp() + self.schedule_for_cleanup(dest) + csv_path = await synced_grid.download_csv_async( + destination=dest, + timeout=ASYNC_JOB_TIMEOUT_SEC, + synapse_client=self.syn, + ) + df = pd.read_csv(csv_path) + assert uploaded_file.id in df["id"].tolist() + finally: + if created_grid.session_id: + await created_grid.delete_async(synapse_client=self.syn) + + async def test_import_csv_to_grid_session_async( + self, + record_set_fixture: RecordSet, + ) -> None: + """Test importing a CSV file into a grid session.""" + + # GIVEN: Create a grid session first + grid = Grid(record_set_id=record_set_fixture.id) + created_grid = None + try: + created_grid = await grid.create_async( + timeout=ASYNC_JOB_TIMEOUT_SEC, synapse_client=self.syn + ) + + assert created_grid.session_id is not None + + # AND a CSV file uploaded to Synapse + test_data = pd.DataFrame( + { + "id": [6, 7, 8, 9, 10], + "name": ["Alpha", "Beta", "Gamma", "Delta", "Epsilon"], + "value": [10.5, 20.3, 30.7, 40.1, 50.9], + "category": ["A", "B", "A", "C", "B"], + "active": [True, False, False, True, True], + } + ) + + # Create a temporary CSV file. + with tempfile.NamedTemporaryFile( + "w", suffix=".csv", delete=False + ) as temp_csv: + temp_csv_path = temp_csv.name + + test_data.to_csv(temp_csv_path, index=False) + self.schedule_for_cleanup(temp_csv_path) + + # WHEN: Importing the CSV into the grid session + imported_grid = await created_grid.import_csv_async( + path=temp_csv_path, + timeout=ASYNC_JOB_TIMEOUT_SEC, + synapse_client=self.syn, + ) + + # THEN: The import should complete and return the Grid with the same session + assert imported_grid.session_id == created_grid.session_id + + # WHEN: Exporting the grid back to the record set + exported_grid = await imported_grid.export_to_record_set_async( + timeout=ASYNC_JOB_TIMEOUT_SEC, synapse_client=self.syn + ) + + # THEN: The export should contain 10 total rows + # (5 from the original record set + 5 imported) + assert exported_grid.validation_summary_statistics is not None + assert ( + exported_grid.validation_summary_statistics.total_number_of_children + == 10 + ) + finally: + if created_grid is not None and created_grid.session_id: + await created_grid.delete_async(synapse_client=self.syn) + + async def test_download_csv_async(self, record_set_fixture: RecordSet) -> None: + # GIVEN: Create a grid session first + grid = Grid(record_set_id=record_set_fixture.id) + try: + created_grid = await grid.create_async( + timeout=ASYNC_JOB_TIMEOUT_SEC, synapse_client=self.syn + ) + + # WHEN: Downloading the grid results as CSV + temp_dir = tempfile.mkdtemp() + self.schedule_for_cleanup(temp_dir) + csv_path = await created_grid.download_csv_async( + synapse_client=self.syn, + timeout=ASYNC_JOB_TIMEOUT_SEC, + destination=temp_dir, + ) + + # THEN: The CSV content should be returned and match the original data + assert os.path.exists(csv_path) + df = pd.read_csv(csv_path) + expected_df = pd.DataFrame( + { + "id": [1, 2, 3, 4, 5], + "name": ["Alpha", "Beta", "Gamma", "Delta", "Epsilon"], + "value": [10.5, 20.3, 30.7, 40.1, 50.9], + "category": ["A", "B", "A", "C", "B"], + "active": [True, False, True, True, False], + } + ) + pd.testing.assert_frame_equal(df, expected_df, check_dtype=False) + finally: + if created_grid is not None and created_grid.session_id: + await created_grid.delete_async(synapse_client=self.syn) diff --git a/tests/integration/synapseclient/models/async/test_migration_async.py b/tests/integration/synapseclient/models/async/test_migration_async.py new file mode 100644 index 000000000..7f78cec8c --- /dev/null +++ b/tests/integration/synapseclient/models/async/test_migration_async.py @@ -0,0 +1,218 @@ +"""Integration tests for storage location migration using the StorageLocation model.""" + +import os +import tempfile +import uuid + +import pandas as pd +import pytest +import pytest_asyncio + +import synapseclient.core.utils as syn_utils +from synapseclient import Synapse +from synapseclient.api.file_services import get_file_handle_for_download_async +from synapseclient.core.upload import upload_file_handle +from synapseclient.models import ( + Column, + File, + FileHandle, + Folder, + Project, + StorageLocation, + StorageLocationType, + Table, +) + + +@pytest_asyncio.fixture(loop_scope="session", scope="session") +async def migration_storage_location(syn: Synapse) -> StorageLocation: + """Create a EXTERNAL_S3 storage location to migrate files into.""" + storage_location = await StorageLocation( + storage_type=StorageLocationType.EXTERNAL_S3, + bucket="test-storage-location-python-client-us-east-1", + ).store_async(synapse_client=syn) + return storage_location + + +def _assert_storage_location(file_handles, storage_location_id): + for fh in file_handles: + assert fh.storage_location_id == storage_location_id + + +@pytest.mark.skipif( + os.getenv("GITHUB_ACTIONS") == "true", + reason="This test runs only locally, not in CI/CD environments.", +) +class TestMigrateProjectWithStorageLocation: + """Tests migrating a project's files to a storage location created via StorageLocation model.""" + + @pytest.fixture(autouse=True) + def setup_method( + self, + syn: Synapse, + schedule_for_cleanup, + migration_storage_location: StorageLocation, + project_model: Project, + ) -> None: + self.syn = syn + self.schedule_for_cleanup = schedule_for_cleanup + self.dest_storage_location_id = migration_storage_location.storage_location_id + self.project = project_model + + async def test_migrate_project(self) -> None: + """Test migrating a project's files and table file handles to a new EXTERNAL_S3 + storage location created via the StorageLocation model.""" + # Create files to migrate + file_0_path = syn_utils.make_bogus_uuid_file() + self.schedule_for_cleanup(file_0_path) + file_0 = File( + name=os.path.basename(file_0_path), + path=file_0_path, + parent_id=self.project.id, + ) + file_0_entity = await file_0.store_async(synapse_client=self.syn) + default_storage_location_id = file_0_entity.file_handle.storage_location_id + + folder_1 = await Folder( + parent_id=self.project.id, name=str(uuid.uuid4()) + ).store_async(synapse_client=self.syn) + + file_1_path = syn_utils.make_bogus_uuid_file() + self.schedule_for_cleanup(file_1_path) + file_1_entity = await File( + name=os.path.basename(file_1_path), path=file_1_path, parent_id=folder_1.id + ).store_async(synapse_client=self.syn) + + file_2_path = syn_utils.make_bogus_uuid_file() + self.schedule_for_cleanup(file_2_path) + file_2_entity = await File( + name=os.path.basename(file_2_path), path=file_2_path, parent_id=folder_1.id + ).store_async(synapse_client=self.syn) + + # file_3 shares the same file handle as file_1 + file_3_entity = File( + name=f"{os.path.basename(file_1_path)}_copy", + path=file_1_path, + parent_id=folder_1.id, + ) + file_3_entity.data_file_handle_id = file_1_entity.data_file_handle_id + file_3_entity = await file_3_entity.store_async(synapse_client=self.syn) + + # filehandles + file_0_fh = file_0_entity.file_handle + file_1_fh = file_1_entity.file_handle + file_2_fh = file_2_entity.file_handle + file_3_fh = file_3_entity.file_handle + + # Create a table with file handle columns + table_cols = [ + Column(name="file_col_1", column_type="FILEHANDLEID"), + Column(name="num", column_type="INTEGER"), + Column(name="file_col_2", column_type="FILEHANDLEID"), + ] + table_entity = await Table( + columns=table_cols, name=str(uuid.uuid4()), parent_id=self.project.id + ).store_async(synapse_client=self.syn) + fh_1_table = upload_file_handle( + syn=self.syn, + path=syn_utils.make_bogus_uuid_file(), + parent_entity=table_entity.id, + ) + fh_2_table = upload_file_handle( + syn=self.syn, + path=syn_utils.make_bogus_uuid_file(), + parent_entity=table_entity.id, + ) + fh_3_table = upload_file_handle( + syn=self.syn, + path=syn_utils.make_bogus_uuid_file(), + parent_entity=table_entity.id, + ) + fh_4_table = upload_file_handle( + syn=self.syn, + path=syn_utils.make_bogus_uuid_file(), + parent_entity=table_entity.id, + ) + df = pd.DataFrame( + { + "file_col_1": [fh_1_table["id"], fh_3_table["id"]], + "num": [1, 2], + "file_col_2": [fh_2_table["id"], fh_4_table["id"]], + } + ) + await table_entity.store_rows_async(values=df, synapse_client=self.syn) + db_path = tempfile.NamedTemporaryFile(delete=False).name + self.schedule_for_cleanup(db_path) + # GIVEN files indexed for migration to the new storage location + index_result = await self.project.index_files_for_migration_async( + dest_storage_location_id=self.dest_storage_location_id, + db_path=db_path, + include_table_files=True, + synapse_client=self.syn, + ) + counts_by_status = await index_result.get_counts_by_status_async() + assert counts_by_status["INDEXED"] == 8 + assert counts_by_status["ERRORED"] == 0 + assert counts_by_status["ALREADY_MIGRATED"] == 0 + # WHEN we migrate the indexed files + migration_result = await self.project.migrate_indexed_files_async( + db_path=db_path, + force=True, + synapse_client=self.syn, + ) + # AND migration status should show all as MIGRATED + counts_by_status = await migration_result.get_counts_by_status_async() + assert counts_by_status["INDEXED"] == 0 + assert counts_by_status["ERRORED"] == 0 + assert counts_by_status["MIGRATED"] == 8 + + file_0_updated = await File(id=file_0_entity.id).get_async( + synapse_client=self.syn + ) + file_1_updated = await File(id=file_1_entity.id).get_async( + synapse_client=self.syn + ) + file_2_updated = await File(id=file_2_entity.id).get_async( + synapse_client=self.syn + ) + file_3_updated = await File(id=file_3_entity.id).get_async( + synapse_client=self.syn + ) + file_handles = [ + file_0_updated.file_handle, + file_1_updated.file_handle, + file_2_updated.file_handle, + file_3_updated.file_handle, + ] + + # file handles for files should be updated + assert file_0_updated.file_handle != file_0_fh + assert file_1_updated.file_handle != file_1_fh + assert file_2_updated.file_handle != file_2_fh + assert file_3_updated.file_handle != file_3_fh + + table_id = table_entity.id + results = await table_entity.query_async( + "select file_col_1, file_col_2 from {}".format(table_id), + synapse_client=self.syn, + ) + # assert that the table file handles are updated + assert results.iloc[0]["file_col_1"] != fh_1_table["id"] + assert results.iloc[0]["file_col_2"] != fh_2_table["id"] + assert results.iloc[1]["file_col_1"] != fh_3_table["id"] + assert results.iloc[1]["file_col_2"] != fh_4_table["id"] + + table_file_handles = [] + for _, row in results.iterrows(): + for file_handle_id in row[2:]: + response = await get_file_handle_for_download_async( + file_handle_id=file_handle_id, + synapse_id=table_id, + entity_type="TableEntity", + synapse_client=self.syn, + ) + file_handle = FileHandle().fill_from_dict(response["fileHandle"]) + table_file_handles.append(file_handle) + file_handles.extend(table_file_handles) + # THEN all file handles should be migrated to the new storage location + _assert_storage_location(file_handles, self.dest_storage_location_id) diff --git a/tests/integration/synapseclient/models/async/test_schema_organization_async.py b/tests/integration/synapseclient/models/async/test_schema_organization_async.py index f5ec1ba66..f7461c732 100644 --- a/tests/integration/synapseclient/models/async/test_schema_organization_async.py +++ b/tests/integration/synapseclient/models/async/test_schema_organization_async.py @@ -1,4 +1,5 @@ """Integration tests for SchemaOrganization and JSONSchema classes""" + import asyncio import uuid from typing import Any, Optional diff --git a/tests/integration/synapseclient/models/async/test_storable_container_async.py b/tests/integration/synapseclient/models/async/test_storable_container_async.py new file mode 100644 index 000000000..cd3adac64 --- /dev/null +++ b/tests/integration/synapseclient/models/async/test_storable_container_async.py @@ -0,0 +1,570 @@ +"""Integration tests for StorableContainer""" + +import csv +import os +import platform +import uuid +from pathlib import Path +from typing import Callable + +import pandas as pd +import pytest +import pytest_asyncio + +import synapseclient.core.utils as utils +from synapseclient import Synapse +from synapseclient.core.exceptions import SynapseHTTPError +from synapseclient.models import File, Folder, Project +from synapseclient.models.activity import UsedURL + +BOGUS_URL = "https://www.synapse.org/" + + +def _write_manifest(rows: list[dict], tmp_path: Path) -> Path: + """Write a minimal CSV manifest to a unique path under *tmp_path*. + + Returns: + Path to the written manifest file. + """ + path = tmp_path / f"{uuid.uuid4()}_manifest.csv" + if not rows: + return path + fieldnames = list(rows[0].keys()) + with open(path, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter( + f, fieldnames=fieldnames, restval="", extrasaction="ignore" + ) + writer.writeheader() + writer.writerows(rows) + return path + + +def _create_local_test_file(content: str, tmp_path: Path) -> Path: + """Write content to a unique file under *tmp_path*. + + Returns: + Path to the written file. + """ + path = tmp_path / f"{uuid.uuid4()}_local_test_file.txt" + path.write_text(content, encoding="utf-8") + return path + + +class TestSyncToSynapse: + """Integration tests for Project.sync_to_synapse / Folder.sync_to_synapse. + + Tests: + - Upload new files from a CSV manifest + - Annotation columns in the manifest are stored as file annotations + - Updating an existing file by ID creates a new version + - Provenance (used/executed) columns are recorded as activity + - dry_run=True validates without uploading + - Files can target a subfolder as parentId + - A non-container parentId (e.g. a File) raises ValueError + - Rows with a non-empty error column are skipped + """ + + @pytest.fixture(autouse=True, scope="function") + def init(self, syn: Synapse, schedule_for_cleanup: Callable[..., None]) -> None: + self.syn = syn + self.schedule_for_cleanup = schedule_for_cleanup + + async def _create_test_file(self, project: Project, **kwargs) -> File: + """Upload a small test file to Synapse and return the File model.""" + path = utils.make_bogus_uuid_file() + self.schedule_for_cleanup(path) + file = File( + parent_id=project.id, + path=path, + name=f"test_file_{uuid.uuid4()}", + **kwargs, + ) + await file.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(file.id) + return file + + async def test_upload_new_files_from_manifest( + self, project_model: Project, tmp_path: Path + ) -> None: + """Files listed in the manifest that don't yet exist in Synapse are created.""" + # GIVEN two local files and a manifest that points them at the project + file_a = _create_local_test_file("content of file A", tmp_path) + name_a = file_a.name + file_b = _create_local_test_file("content of file B", tmp_path) + name_b = file_b.name + + manifest_path = _write_manifest( + [ + { + "path": str(file_a), + "parentId": project_model.id, + "name": name_a, + }, + { + "path": str(file_b), + "parentId": project_model.id, + "name": name_b, + }, + ], + tmp_path, + ) + + # WHEN I sync to Synapse + uploaded_files = await project_model.sync_to_synapse_async( + manifest_path=str(manifest_path), + send_messages=False, + synapse_client=self.syn, + ) + + for f in uploaded_files: + self.schedule_for_cleanup(f.id) + + # THEN both files are returned and exist in Synapse + assert len(uploaded_files) == 2 + uploaded_names = {f.name for f in uploaded_files} + assert name_a in uploaded_names + assert name_b in uploaded_names + + for f in uploaded_files: + file_entity = await File(id=f.id).get_async(synapse_client=self.syn) + assert file_entity.id is not None + assert file_entity.parent_id == project_model.id + + async def test_annotations_written_to_synapse( + self, project_model: Project, tmp_path: Path + ) -> None: + """Annotation columns in the manifest are stored as file annotations.""" + # GIVEN a local file with annotation columns in the manifest + local_file = _create_local_test_file("annotated content", tmp_path) + + unique_name = f"annotated_{uuid.uuid4()}.txt" + manifest_path = _write_manifest( + [ + { + "path": str(local_file), + "parentId": project_model.id, + "name": unique_name, + "my_string": "hello", + "my_number": "42", + }, + ], + tmp_path, + ) + + # WHEN I sync to Synapse + uploaded_files = await project_model.sync_to_synapse_async( + manifest_path=str(manifest_path), + send_messages=False, + synapse_client=self.syn, + ) + + for f in uploaded_files: + self.schedule_for_cleanup(f.id) + + # THEN the file is returned and exists in Synapse + # AND the file exists with the correct annotations + assert len(uploaded_files) == 1 + uploaded_names = {f.name for f in uploaded_files} + assert unique_name in uploaded_names + + for f in uploaded_files: + file_entity = await File(id=f.id).get_async(synapse_client=self.syn) + assert file_entity.id is not None + assert file_entity.parent_id == project_model.id + assert file_entity.annotations.get("my_string") == ["hello"] + # "42" in the CSV is parsed by ast.literal_eval into the integer 42 + assert file_entity.annotations.get("my_number") == [42] + + async def test_update_existing_file_creates_new_version( + self, project_model: Project, tmp_path: Path + ) -> None: + """Referencing an existing file by ID in the manifest creates a new version.""" + # GIVEN a file already in Synapse + stored_file = await self._create_test_file( + project_model, annotations={"status": "original"} + ) + assert stored_file.version_number == 1 + + # WHEN I update the local file content and change the annotation in the manifest + updated_path = _create_local_test_file("updated content — v2", tmp_path) + + manifest_path = _write_manifest( + [ + { + "path": str(updated_path), + "parentId": project_model.id, + "ID": stored_file.id, + "name": stored_file.name, + "status": "updated", + }, + ], + tmp_path, + ) + + await project_model.sync_to_synapse_async( + manifest_path=str(manifest_path), + send_messages=False, + synapse_client=self.syn, + ) + + # THEN a new version exists with the updated annotation + refreshed = await File(id=stored_file.id).get_async(synapse_client=self.syn) + assert refreshed.version_number == 2 + assert refreshed.annotations.get("status") == ["updated"] + + async def test_provenance_recorded_from_manifest( + self, project_model: Project, tmp_path: Path + ) -> None: + """used and executed columns in the manifest are stored as provenance.""" + # GIVEN a local file with provenance entries in the manifest + local_file = _create_local_test_file("data", tmp_path) + + unique_name = f"prov_{uuid.uuid4()}.txt" + manifest_path = _write_manifest( + [ + { + "path": str(local_file), + "parentId": project_model.id, + "name": unique_name, + "used": BOGUS_URL, + "executed": BOGUS_URL, + "activityName": "my_activity", + "activityDescription": "test provenance", + }, + ], + tmp_path, + ) + + # WHEN I sync to Synapse + uploaded_files = await project_model.sync_to_synapse_async( + manifest_path=str(manifest_path), + send_messages=False, + synapse_client=self.syn, + ) + + for f in uploaded_files: + self.schedule_for_cleanup(f.id) + + # THEN the file is returned with the expected provenance + assert len(uploaded_files) == 1 + uploaded = uploaded_files[0] + assert uploaded.name == unique_name + + refreshed = await File(id=uploaded.id).get_async( + include_activity=True, synapse_client=self.syn + ) + assert refreshed.activity is not None + assert refreshed.activity.name == "my_activity" + assert refreshed.activity.description == "test provenance" + used_urls = [u.url for u in refreshed.activity.used if isinstance(u, UsedURL)] + assert BOGUS_URL in used_urls + + async def test_dry_run_does_not_upload( + self, project_model: Project, tmp_path: Path + ) -> None: + """dry_run=True validates the manifest but does not create any entities.""" + # GIVEN a unique file name we can look for after the dry run + local_file = _create_local_test_file("should not appear in Synapse", tmp_path) + unique_name = f"dry_run_{uuid.uuid4()}.txt" + + manifest_path = _write_manifest( + [ + { + "path": str(local_file), + "parentId": project_model.id, + "name": unique_name, + }, + ], + tmp_path, + ) + + # WHEN I sync with dry_run=True + uploaded_files = await project_model.sync_to_synapse_async( + manifest_path=str(manifest_path), + dry_run=True, + send_messages=False, + synapse_client=self.syn, + ) + + # THEN no files were uploaded + assert uploaded_files == [] + + async def test_upload_into_subfolder( + self, project_model: Project, tmp_path: Path + ) -> None: + """Files can be targeted at a Folder (not just the Project root).""" + # GIVEN a folder in the project + folder = await Folder( + name=f"sub_{uuid.uuid4()}", parent_id=project_model.id + ).store_async(synapse_client=self.syn) + self.schedule_for_cleanup(folder.id) + + local_file = _create_local_test_file("goes into the folder", tmp_path) + + manifest_path = _write_manifest( + [ + { + "path": str(local_file), + "parentId": folder.id, + "name": "in_folder.txt", + }, + ], + tmp_path, + ) + + # WHEN I sync to Synapse + uploaded_files = await project_model.sync_to_synapse_async( + manifest_path=str(manifest_path), + send_messages=False, + synapse_client=self.syn, + ) + + for f in uploaded_files: + self.schedule_for_cleanup(f.id) + + # THEN the file exists inside the folder, not at the project root + assert len(uploaded_files) == 1 + assert uploaded_files[0].name == "in_folder.txt" + + file_entity = await File(id=uploaded_files[0].id).get_async( + synapse_client=self.syn + ) + assert file_entity.parent_id == folder.id + + async def test_non_container_parent_id_raises( + self, project_model: Project, tmp_path: Path + ) -> None: + """A parentId pointing at a File (not a container) raises during validation.""" + # GIVEN a File entity stored in Synapse (not a container) + file_entity = await self._create_test_file(project_model) + + # AND a manifest that points another file at that File entity as parent + upload_file = _create_local_test_file("upload", tmp_path) + + manifest_path = _write_manifest( + [ + { + "path": str(upload_file), + "parentId": file_entity.id, + "name": "upload.txt", + } + ], + tmp_path, + ) + + # THEN sync_to_synapse should raise because the parent is not a container + with pytest.raises(ValueError, match="not a Folder or Project"): + await project_model.sync_to_synapse_async( + manifest_path=str(manifest_path), + send_messages=False, + synapse_client=self.syn, + ) + + async def test_error_column_rows_skipped( + self, project_model: Project, tmp_path: Path + ) -> None: + """Rows with a non-empty 'error' column (from get-download-list) are ignored.""" + # GIVEN a manifest where one row has an error and one is valid + good_file = _create_local_test_file("good content", tmp_path) + unique_name = f"error_skip_{uuid.uuid4()}.txt" + + manifest_path = _write_manifest( + [ + # Row with an error — should be skipped + { + "path": "/nonexistent/file.txt", + "parentId": project_model.id, + "name": "should_not_appear.txt", + "error": "Download failed", + }, + # Valid row — should be uploaded + { + "path": str(good_file), + "parentId": project_model.id, + "name": unique_name, + "error": "", + }, + ], + tmp_path, + ) + + # WHEN I sync to Synapse + uploaded_files = await project_model.sync_to_synapse_async( + manifest_path=str(manifest_path), + send_messages=False, + synapse_client=self.syn, + ) + + for f in uploaded_files: + self.schedule_for_cleanup(f.id) + + # THEN only the valid row was uploaded + assert len(uploaded_files) == 1 + assert uploaded_files[0].name == unique_name + + +class TestGenerateSyncManifest: + """Integration tests for StorableContainer.generate_sync_manifest_async + against the live Synapse API.""" + + @pytest_asyncio.fixture(loop_scope="session", scope="function") + async def scope_folder( + self, + syn: Synapse, + project_model: Project, + schedule_for_cleanup: Callable[..., None], + request: pytest.FixtureRequest, + ) -> Folder: + """A fresh Folder under the worker-scoped project per test, so + assertions can reference the folder's full child state without + interference from sibling tests. A Folder is cheaper to create than + a Project while providing equivalent isolation for these tests. + """ + folder = await Folder( + name=f"{request.node.name}_{uuid.uuid4()}", + parent_id=project_model.id, + ).store_async(synapse_client=syn) + schedule_for_cleanup(folder.id) + return folder + + async def test_flat_directory_uses_parent_id( + self, + syn: Synapse, + scope_folder: Folder, + tmp_path: Path, + ) -> None: + """Flat directories should produce a manifest where every file points + directly at the container's id, and empty files should be skipped. + No Synapse folders should be created when there are no subdirectories + to mirror. + """ + # GIVEN a flat directory of non-empty files plus one empty file + src = tmp_path / "flat" + src.mkdir() + (src / "a.txt").write_text("alpha") + (src / "b.txt").write_text("bravo") + (src / "empty.txt").write_text("") + manifest = tmp_path / "manifest.csv" + + # WHEN I generate a sync manifest on the scope folder + await scope_folder.generate_sync_manifest_async( + directory_path=str(src), + manifest_path=str(manifest), + synapse_client=syn, + ) + + # THEN the manifest only contains the non-empty files, all pointing + # at the scope folder as their parent, and paths are absolute + df = pd.read_csv(manifest) + assert list(df.columns) == ["path", "parentId"] + assert sorted(os.path.basename(p) for p in df["path"]) == ["a.txt", "b.txt"] + assert (df["parentId"] == scope_folder.id).all() + for path in df["path"]: + assert os.path.isabs(path) + + # AND no folders were created under the scope folder + await scope_folder.sync_from_synapse_async( + download_file=False, recursive=False, synapse_client=syn + ) + assert scope_folder.folders == [] + + async def test_nested_directory_creates_folders( + self, + syn: Synapse, + scope_folder: Folder, + tmp_path: Path, + ) -> None: + """Nested directory trees should create matching Synapse folders at + each level, and the manifest parentId for each file should be the ID + of the Synapse folder corresponding to the file's on-disk directory. + """ + # GIVEN a nested directory tree with sibling folders at the root and a + # deeper leaf folder + src = tmp_path / "root" + sibling_a = src / "sibling_a" + sibling_b = src / "sibling_b" + deep = sibling_a / "deep" + deep.mkdir(parents=True) + sibling_b.mkdir() + + (src / "root.txt").write_text("at root") + (sibling_a / "a.txt").write_text("sibling a") + (sibling_b / "b.txt").write_text("sibling b") + (deep / "deep.txt").write_text("deep file") + manifest = tmp_path / "manifest.csv" + + # WHEN I generate a sync manifest on the scope folder + await scope_folder.generate_sync_manifest_async( + directory_path=str(src), + manifest_path=str(manifest), + synapse_client=syn, + ) + + # THEN each manifest row's parentId identifies the Synapse folder that + # contains the file on disk + df = pd.read_csv(manifest) + by_basename = { + os.path.basename(p): pid for p, pid in zip(df["path"], df["parentId"]) + } + assert by_basename["root.txt"] == scope_folder.id + sibling_a_id = by_basename["a.txt"] + sibling_b_id = by_basename["b.txt"] + deep_id = by_basename["deep.txt"] + + for path in df["path"]: + assert os.path.isabs(path) + + # AND the Synapse tree matches the local layout + await scope_folder.sync_from_synapse_async( + download_file=False, recursive=True, synapse_client=syn + ) + top_level = {f.name: f for f in scope_folder.folders} + assert sorted(top_level) == ["sibling_a", "sibling_b"] + assert top_level["sibling_a"].id == sibling_a_id + assert top_level["sibling_b"].id == sibling_b_id + assert [(f.name, f.id) for f in top_level["sibling_a"].folders] == [ + ("deep", deep_id) + ] + assert top_level["sibling_b"].folders == [] + + async def test_existing_folders_are_reused( + self, + syn: Synapse, + scope_folder: Folder, + tmp_path: Path, + ) -> None: + """When a Synapse folder with a matching name already exists under + the container, the method should reuse its ID instead of creating a + new folder or raising a conflict. + """ + # GIVEN a folder that already exists in Synapse under the scope folder + folder_name = "preexisting" + existing = await Folder( + name=folder_name, parent_id=scope_folder.id + ).store_async(synapse_client=syn) + + # AND a local directory that mirrors that folder's name with a file inside + src = tmp_path / "root" + child = src / folder_name + child.mkdir(parents=True) + (child / "payload.txt").write_text("payload") + manifest = tmp_path / "manifest.csv" + + # WHEN I generate a sync manifest + await scope_folder.generate_sync_manifest_async( + directory_path=str(src), + manifest_path=str(manifest), + synapse_client=syn, + ) + + # THEN the manifest reuses the existing folder's Synapse ID + df = pd.read_csv(manifest) + assert len(df) == 1 + assert df["parentId"].iloc[0] == existing.id + + # AND the scope folder has exactly the one pre-existing child folder + await scope_folder.sync_from_synapse_async( + download_file=False, recursive=False, synapse_client=syn + ) + assert len(scope_folder.folders) == 1 + assert scope_folder.folders[0].id == existing.id diff --git a/tests/integration/synapseclient/models/async/test_storage_location_async.py b/tests/integration/synapseclient/models/async/test_storage_location_async.py new file mode 100644 index 000000000..6a93a224d --- /dev/null +++ b/tests/integration/synapseclient/models/async/test_storage_location_async.py @@ -0,0 +1,376 @@ +"""Integration tests for the synapseclient.models.StorageLocation class.""" + +from __future__ import annotations + +import importlib +import os +import uuid +from typing import Callable + +import boto3 +import pytest +from botocore.exceptions import ClientError + +try: + gcs_storage = importlib.import_module("google.cloud.storage") +except ImportError: + gcs_storage = None + +from synapseclient import Synapse +from synapseclient.core import utils as syn_utils +from synapseclient.models import ( + File, + Folder, + Project, + StorageLocation, + StorageLocationType, +) + +# External S3 bucket for integration tests +EXTERNAL_S3_BUCKET = "test-storage-location-python-client-us-east-1" + +# External GCS bucket for integration tests +EXTERNAL_GCS_BUCKET = "test_storage_location_dl" +GCS_PROJECT = "sagebio-integration-testing" + + +class TestSynapseS3StorageLocation: + """Integration tests for SYNAPSE_S3 storage location. + + These tests do not require external bucket credentials. + """ + + @pytest.fixture(autouse=True) + def setup_method(self, syn: Synapse) -> None: + self.syn = syn + + async def test_store_and_get_synapse_s3(self) -> None: + """Test creating and retrieving a SYNAPSE_S3 storage location.""" + storage = StorageLocation( + storage_type=StorageLocationType.SYNAPSE_S3, + description=f"Integration test SYNAPSE_S3 {uuid.uuid4()}", + ) + + stored = await storage.store_async(synapse_client=self.syn) + + assert stored.storage_location_id is not None + assert stored.storage_type == StorageLocationType.SYNAPSE_S3 + assert stored.etag is not None + assert stored.created_by is not None + + retrieved = await StorageLocation( + storage_location_id=stored.storage_location_id + ).get_async(synapse_client=self.syn) + + assert retrieved.storage_location_id == stored.storage_location_id + assert retrieved.storage_type == StorageLocationType.SYNAPSE_S3 + + async def test_store_synapse_s3_with_sts_enabled(self) -> None: + """Test creating a SYNAPSE_S3 storage location with STS enabled.""" + storage = StorageLocation( + storage_type=StorageLocationType.SYNAPSE_S3, + sts_enabled=True, + ) + + stored = await storage.store_async(synapse_client=self.syn) + + assert stored.storage_location_id is not None + assert stored.sts_enabled is True + + retrieved = await StorageLocation( + storage_location_id=stored.storage_location_id + ).get_async(synapse_client=self.syn) + + assert retrieved.sts_enabled is True + + async def test_store_is_idempotent(self) -> None: + """Test that storing the same SYNAPSE_S3 storage location twice returns the same ID.""" + description = f"Idempotent test {uuid.uuid4()}" + storage1 = StorageLocation( + storage_type=StorageLocationType.SYNAPSE_S3, + description=description, + ) + stored1 = await storage1.store_async(synapse_client=self.syn) + + storage2 = StorageLocation( + storage_type=StorageLocationType.SYNAPSE_S3, + description=description, + ) + stored2 = await storage2.store_async(synapse_client=self.syn) + + assert stored1.storage_location_id == stored2.storage_location_id + + +@pytest.mark.skipif( + os.getenv("GITHUB_ACTIONS") == "true", + reason="This test runs only locally, not in CI/CD environments.", +) +class TestExternalS3StorageLocation: + """Integration tests for EXTERNAL_S3 storage location. + + Requires environment variables: + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_SESSION_TOKEN + + Before each test, the bucket root object ``owner.txt`` is updated so it includes the + current Synapse ``owner_id`` as a line if not already present (shared bucket hygiene). + """ + + @pytest.fixture(autouse=True) + def setup_method(self, syn: Synapse, project_model: Project) -> None: + self.syn = syn + self.project = project_model + self._ensure_bucket_root_owner_txt_includes_owner_id() + + def _get_s3_client(self): + creds = { + "aws_access_key_id": os.environ["AWS_ACCESS_KEY_ID"], + "aws_secret_access_key": os.environ["AWS_SECRET_ACCESS_KEY"], + "aws_session_token": os.environ.get("AWS_SESSION_TOKEN"), + } + return boto3.client("s3", **creds) + + def _ensure_bucket_root_owner_txt_includes_owner_id(self) -> None: + """Ensure bucket root ``owner.txt`` lists this Synapse principal (one id per line).""" + s3 = self._get_s3_client() + key = "owner.txt" + owner_id = str(self.syn.credentials.owner_id) + try: + response = s3.get_object(Bucket=EXTERNAL_S3_BUCKET, Key=key) + text = response["Body"].read().decode("utf-8") + lines = [ln.strip() for ln in text.splitlines() if ln.strip()] + if owner_id in lines: + return + lines.append(owner_id) + body = "\n".join(lines) + "\n" + except ClientError as err: + code = err.response.get("Error", {}).get("Code", "") + if code not in ("NoSuchKey", "404"): + raise + body = owner_id + s3.put_object( + Bucket=EXTERNAL_S3_BUCKET, + Key=key, + Body=body.encode("utf-8"), + ) + + def _put_owner_txt(self, base_key: str) -> None: + """Upload owner.txt required by Synapse to validate bucket ownership.""" + s3 = self._get_s3_client() + s3.put_object( + Body=self.syn.credentials.owner_id, + Bucket=EXTERNAL_S3_BUCKET, + Key=f"{base_key}/owner.txt", + ) + + def _cleanup_prefix(self, base_key: str) -> None: + """Delete all objects under the given prefix from the S3 bucket.""" + s3 = self._get_s3_client() + response = s3.list_objects_v2(Bucket=EXTERNAL_S3_BUCKET, Prefix=f"{base_key}/") + if "Contents" in response: + s3.delete_objects( + Bucket=EXTERNAL_S3_BUCKET, + Delete={"Objects": [{"Key": o["Key"]} for o in response["Contents"]]}, + ) + + async def test_store_and_get_external_s3(self) -> None: + """Test creating and retrieving an EXTERNAL_S3 storage location.""" + base_key = str(uuid.uuid4()) + self._put_owner_txt(base_key) + try: + # GIVEN an EXTERNAL_S3 storage location + storage = StorageLocation( + storage_type=StorageLocationType.EXTERNAL_S3, + bucket=EXTERNAL_S3_BUCKET, + base_key=base_key, + description="Integration test EXTERNAL_S3", + ) + + # WHEN we store it + stored = await storage.store_async(synapse_client=self.syn) + # THEN it should have the correct fields + assert stored.storage_location_id is not None + assert stored.storage_type == StorageLocationType.EXTERNAL_S3 + assert stored.bucket == EXTERNAL_S3_BUCKET + assert stored.base_key == base_key + + # AND we can retrieve it by ID + retrieved = await StorageLocation( + storage_location_id=stored.storage_location_id + ).get_async(synapse_client=self.syn) + assert retrieved.storage_location_id == stored.storage_location_id + assert retrieved.storage_type == StorageLocationType.EXTERNAL_S3 + assert retrieved.bucket == EXTERNAL_S3_BUCKET + assert retrieved.base_key == base_key + finally: + self._cleanup_prefix(base_key) + + async def test_store_external_s3_and_upload_file(self) -> None: + """Test uploading a file to an EXTERNAL_S3 storage location via Synapse.""" + base_key = str(uuid.uuid4()) + self._put_owner_txt(base_key) + try: + # GIVEN an EXTERNAL_S3 storage location + stored_location = await StorageLocation( + storage_type=StorageLocationType.EXTERNAL_S3, + bucket=EXTERNAL_S3_BUCKET, + base_key=base_key, + ).store_async(synapse_client=self.syn) + + # AND a folder associated with it + folder = await Folder( + name=str(uuid.uuid4()), + parent_id=self.project.id, + ).store_async(synapse_client=self.syn) + + self.syn.setStorageLocation( + entity=folder.id, + storage_location_id=stored_location.storage_location_id, + ) + + # WHEN we upload a file to the folder + upload_file = syn_utils.make_bogus_uuid_file() + with open(upload_file, "r", encoding="utf-8") as f: + file_contents = f.read() + + file = await File(path=upload_file, parent_id=folder.id).store_async( + synapse_client=self.syn + ) + + # THEN the file should be downloadable and its contents match + os.remove(upload_file) + file = await File(id=file.id).get_async(synapse_client=self.syn) + with open(file.path, "r", encoding="utf-8") as f: + assert f.read() == file_contents + finally: + self._cleanup_prefix(base_key) + + +@pytest.mark.skipif( + os.getenv("GITHUB_ACTIONS") == "true", + reason="This test runs only locally, not in CI/CD environments.", +) +class TestExternalGCSStorageLocation: + """Integration tests for EXTERNAL_GOOGLE_CLOUD storage location. + + Requires google-cloud-storage to be installed. + + Google Cloud auth (one of): + - Application Default Credentials: ``gcloud auth application-default login`` + - Or ``GOOGLE_APPLICATION_CREDENTIALS`` pointing at a service account JSON key + + The Storage client also needs a **project ID**. Set one of: + - ``GOOGLE_CLOUD_PROJECT`` or ``GCLOUD_PROJECT`` (recommended for tests), or + - ``gcloud config set project YOUR_PROJECT_ID`` so ADC resolves a default project, or + - Use a service account key JSON that includes ``project_id`` (often inferred automatically). + + Uses bucket: test_storage_location_dl. + + Before each test, the bucket root object ``owner.txt`` is updated so it includes the + current Synapse ``owner_id`` as a line if not already present (shared bucket hygiene). + """ + + @pytest.fixture(autouse=True) + def setup_method(self, syn: Synapse, project_model: Project) -> None: + self.syn = syn + self.project = project_model + self._ensure_bucket_root_owner_txt_includes_owner_id() + + def _gcs_client(self, project: str) -> gcs_storage.Client: + return gcs_storage.Client(project=project) + + def _ensure_bucket_root_owner_txt_includes_owner_id(self) -> None: + """Ensure bucket root ``owner.txt`` lists this Synapse principal (one id per line).""" + bucket = self._gcs_client(GCS_PROJECT).bucket(EXTERNAL_GCS_BUCKET) + blob = bucket.blob("owner.txt") + owner_id = str(self.syn.credentials.owner_id) + if blob.exists(): + text = blob.download_as_text(encoding="utf-8") + lines = [ln.strip() for ln in text.splitlines() if ln.strip()] + if owner_id in lines: + return + lines.append(owner_id) + blob.upload_from_string("\n".join(lines) + "\n") + else: + blob.upload_from_string(owner_id) + + def _put_owner_txt(self, base_key: str) -> None: + """Upload owner.txt to GCS at the base key path for Synapse bucket validation.""" + bucket = self._gcs_client(GCS_PROJECT).bucket(EXTERNAL_GCS_BUCKET) + blob = bucket.blob(f"{base_key}/owner.txt") + blob.upload_from_string(self.syn.credentials.owner_id) + + def _cleanup_prefix(self, base_key: str) -> None: + bucket = self._gcs_client(GCS_PROJECT).bucket(EXTERNAL_GCS_BUCKET) + blobs = bucket.list_blobs(prefix=f"{base_key}/") + for blob in blobs: + blob.delete() + + async def test_store_and_get_external_gcs(self) -> None: + """Test creating and retrieving an EXTERNAL_GOOGLE_CLOUD storage location.""" + base_key = str(uuid.uuid4()) + self._put_owner_txt(base_key) + try: + storage = StorageLocation( + storage_type=StorageLocationType.EXTERNAL_GOOGLE_CLOUD, + bucket=EXTERNAL_GCS_BUCKET, + base_key=base_key, + description="Integration test EXTERNAL_GOOGLE_CLOUD", + ) + + stored = await storage.store_async(synapse_client=self.syn) + + assert stored.storage_location_id is not None + assert stored.storage_type == StorageLocationType.EXTERNAL_GOOGLE_CLOUD + assert stored.bucket == EXTERNAL_GCS_BUCKET + assert stored.base_key == base_key + + retrieved = await StorageLocation( + storage_location_id=stored.storage_location_id + ).get_async(synapse_client=self.syn) + + assert retrieved.storage_location_id == stored.storage_location_id + assert retrieved.storage_type == StorageLocationType.EXTERNAL_GOOGLE_CLOUD + assert retrieved.bucket == EXTERNAL_GCS_BUCKET + assert retrieved.base_key == base_key + finally: + self._cleanup_prefix(base_key) + + async def test_store_external_gcs_and_upload_file(self) -> None: + """Test uploading a file to an EXTERNAL_GOOGLE_CLOUD storage location via Synapse.""" + base_key = str(uuid.uuid4()) + self._put_owner_txt(base_key) + try: + storage = StorageLocation( + storage_type=StorageLocationType.EXTERNAL_GOOGLE_CLOUD, + bucket=EXTERNAL_GCS_BUCKET, + base_key=base_key, + ) + stored = await storage.store_async(synapse_client=self.syn) + assert stored.storage_location_id is not None + + folder = await Folder( + name=str(uuid.uuid4()), + parent_id=self.project.id, + ).store_async(synapse_client=self.syn) + + self.syn.setStorageLocation( + entity=folder.id, + storage_location_id=stored.storage_location_id, + ) + + upload_file = syn_utils.make_bogus_uuid_file() + with open(upload_file, "r", encoding="utf-8") as f: + file_contents = f.read() + + file = await File(path=upload_file, parent_id=folder.id).store_async( + synapse_client=self.syn + ) + + os.remove(upload_file) + file = await File(id=file.id).get_async(synapse_client=self.syn) + with open(file.path, "r", encoding="utf-8") as f: + assert f.read() == file_contents + finally: + self._cleanup_prefix(base_key) diff --git a/tests/integration/synapseclient/models/async/test_submission_bundle_async.py b/tests/integration/synapseclient/models/async/test_submission_bundle_async.py index b186d4ea6..cd41f4717 100644 --- a/tests/integration/synapseclient/models/async/test_submission_bundle_async.py +++ b/tests/integration/synapseclient/models/async/test_submission_bundle_async.py @@ -184,7 +184,9 @@ async def test_get_evaluation_submission_bundles_with_status_filter_async( # WHEN I attempt to get submission bundles with an invalid status with pytest.raises(SynapseHTTPError) as exc_info: bundles = [] - async for bundle in SubmissionBundle.get_evaluation_submission_bundles_async( + async for ( + bundle + ) in SubmissionBundle.get_evaluation_submission_bundles_async( evaluation_id=test_evaluation.id, status="NONEXISTENT_STATUS", synapse_client=self.syn, @@ -233,7 +235,9 @@ async def test_get_evaluation_submission_bundles_invalid_evaluation_async(self): # WHEN I try to get submission bundles for a non-existent evaluation with pytest.raises(SynapseHTTPError) as exc_info: bundles = [] - async for bundle in SubmissionBundle.get_evaluation_submission_bundles_async( + async for ( + bundle + ) in SubmissionBundle.get_evaluation_submission_bundles_async( evaluation_id="syn999999999999", synapse_client=self.syn, ): diff --git a/tests/integration/synapseclient/models/async/test_table_async.py b/tests/integration/synapseclient/models/async/test_table_async.py index db88116ac..3dbb82bb7 100644 --- a/tests/integration/synapseclient/models/async/test_table_async.py +++ b/tests/integration/synapseclient/models/async/test_table_async.py @@ -10,6 +10,7 @@ import pandas as pd import pytest +from pandas.api.types import is_object_dtype from pytest_mock import MockerFixture import synapseclient.models.mixins.asynchronous_job as asynchronous_job_module @@ -351,6 +352,8 @@ async def test_store_rows_from_csv_infer_columns( "float_string": [1.1, 2.2, 3.3, None], } ) + data_for_table = data_for_table.convert_dtypes() + data_for_table = data_for_table.replace({pd.NA: None}) filepath = f"{tempfile.mkdtemp()}/upload_{uuid.uuid4()}.csv" self.schedule_for_cleanup(filepath) data_for_table.to_csv(filepath, index=False, float_format="%.12g") @@ -512,6 +515,8 @@ async def test_store_rows_from_manually_defined_columns( "float_column": [1.1, 2.2, 3.3, None], } ) + data_for_table = data_for_table.convert_dtypes() + data_for_table = data_for_table.replace({pd.NA: None}) filepath = f"{tempfile.mkdtemp()}/upload_{uuid.uuid4()}.csv" self.schedule_for_cleanup(filepath) data_for_table.to_csv(filepath, index=False, float_format="%.12g") @@ -977,6 +982,179 @@ async def test_store_rows_as_large_df_being_split_and_uploaded( # AND The spy should have been called in multiple batches assert spy_send_job.call_count == 1 + async def test_store_rows_with_quotes_and_apostrophes_ellipses( + self, project_model: Project + ) -> None: + """Test columns with quotes, apostrophes, and ellipses (in lists, dicts, and standalone) in values are properly stored and retrieved in the tables""" + # GIVEN a table with a JSON column + table_name = str(uuid.uuid4()) + table = Table( + name=table_name, + parent_id=project_model.id, + columns=[ + Column(name="id", column_type=ColumnType.INTEGER), + Column(name="json_data", column_type=ColumnType.JSON), + Column( + name="string_list_with_ellipses", column_type=ColumnType.STRING_LIST + ), + Column(name="string_col_with_ellipses", column_type=ColumnType.STRING), + Column(name="int_list_with_pa_na", column_type=ColumnType.INTEGER_LIST), + Column(name="nullable_int", column_type=ColumnType.INTEGER), + Column(name="nullable_float", column_type=ColumnType.DOUBLE), + ], + ) + table = await table.store_async(synapse_client=self.syn) + self.schedule_for_cleanup(table.id) + + # AND data with quotes in JSON values + data_for_table = pd.DataFrame( + { + "id": [1, 2, 3, 4, 5, 6, 7], + "json_data": [ + {"description": 'Text with "quotes" here', "value": 100}, + { + "description": 'Multiple "quoted" "words" here', + "value": 300, + }, + { + "description": ..., + "value": 200, + }, # standalone ellipses in the json value + { + "description": [1, 2, ...], + "value": 400, + }, # list with ellipses in the json value + { + "description": {"inner": ...}, + "value": 500, + }, # dict with ellipses in the json value + { + "description": "single apostrophe's", + "author": "D'Angelo", + }, # single apostrophe in the json value + { + "description": "Multiple's apostrophe's", + "author": "McDonald's", + }, # multiple apostrophe's in the json value + ], + "string_list_with_ellipses": [ + ["a", "b", ...], + ["d", ..., "f"], + ["g", "h", "i"], + [...], + ["m", "n", "..."], + ["p", "q", "r"], + ["s", "t", "u"], + ], + "string_col_with_ellipses": [ + "value1", + ..., + "value3", + ..., + "value6", + ..., + "value8", + ], + "int_list_with_pa_na": [ + [1, 2, 3], + pd.NA, + [7, 8, 9], + pd.NA, + [11, 12, 13], + pd.NA, + [15, 16, 17], + ], + "nullable_int": pd.array([10, pd.NA, 30, pd.NA, 31, pd.NA, 32]), + "nullable_float": pd.array([1.1, pd.NA, 3.3, pd.NA, 3.4, pd.NA, 3.5]), + } + ) + # WHEN I store the rows + await table.store_rows_async( + values=data_for_table, + synapse_client=self.syn, + ) + # THEN I can query the table and retrieve the data correctly + results = await query_async( + f"SELECT * FROM {table.id}", + synapse_client=self.syn, + timeout=QUERY_TIMEOUT_SEC, + ) + # AND the JSON data should be properly preserved with quotes + assert len(results) == 7 + expected_result = pd.DataFrame( + { + "id": [1, 2, 3, 4, 5, 6, 7], + "json_data": [ + {"description": 'Text with "quotes" here', "value": 100}, + { + "description": 'Multiple "quoted" "words" here', + "value": 300, + }, + { + "description": "...", + "value": 200, + }, # standalone ellipses in the json value + { + "description": [1, 2, "..."], + "value": 400, + }, # list with ellipses in the json value + { + "description": {"inner": "..."}, + "value": 500, + }, # dict with ellipses in the json value + { + "description": "single apostrophe's", + "author": "D'Angelo", + }, # single apostrophe in the json value + { + "description": "Multiple's apostrophe's", + "author": "McDonald's", + }, # multiple apostrophe's in the json value + ], + "string_list_with_ellipses": [ + ["a", "b", "..."], + ["d", "...", "f"], + ["g", "h", "i"], + ["..."], + ["m", "n", "..."], + ["p", "q", "r"], + ["s", "t", "u"], + ], + "string_col_with_ellipses": [ + "value1", + "...", + "value3", + "...", + "value6", + "...", + "value8", + ], + "int_list_with_pa_na": [ + [1, 2, 3], + [], + [7, 8, 9], + [], + [11, 12, 13], + [], + [15, 16, 17], + ], + "nullable_int": pd.array([10, None, 30, None, 31, None, 32]), + "nullable_float": pd.array([1.1, None, 3.3, None, 3.4, None, 3.5]), + } + ) + assert is_object_dtype(results.json_data) + assert is_object_dtype(results.int_list_with_pa_na) + assert is_object_dtype(results.nullable_int) + assert is_object_dtype(results.nullable_float) + + expected_result = expected_result.convert_dtypes() + expected_result = expected_result.replace({pd.NA: None}) + pd.testing.assert_frame_equal( + results.drop(columns=["ROW_ID", "ROW_VERSION"]), + expected_result, + check_dtype=False, + ) + class TestUpsertRows: @pytest.fixture(autouse=True, scope="function") @@ -1549,9 +1727,13 @@ async def test_upsert_all_data_types(self, project_model: Project) -> None: ], } ) + + expected_results = expected_results.convert_dtypes() + expected_results = expected_results.replace({pd.NA: None}) pd.testing.assert_frame_equal( results_after_insert, expected_results, check_dtype=False ) + # Create a second test file to update references path2 = utils.make_bogus_data_file() self.schedule_for_cleanup(path2) @@ -1733,7 +1915,10 @@ async def test_upsert_all_data_types(self, project_model: Project) -> None: ], } ) + expected_results = expected_results.convert_dtypes() + expected_results = expected_results.replace({pd.NA: None}) pd.testing.assert_frame_equal(results, expected_results, check_dtype=False) + # WHEN I upsert with multiple primary keys and null values multi_key_data = pd.DataFrame( { diff --git a/tests/integration/synapseclient/operations/async/test_delete_operations_async.py b/tests/integration/synapseclient/operations/async/test_delete_operations_async.py index 6f24d167a..2ec6bbc88 100644 --- a/tests/integration/synapseclient/operations/async/test_delete_operations_async.py +++ b/tests/integration/synapseclient/operations/async/test_delete_operations_async.py @@ -1,4 +1,5 @@ """Integration tests for delete operations async.""" + import uuid from typing import Callable diff --git a/tests/integration/synapseclient/operations/async/test_download_list_operations_async.py b/tests/integration/synapseclient/operations/async/test_download_list_operations_async.py new file mode 100644 index 000000000..4ea6cf5dc --- /dev/null +++ b/tests/integration/synapseclient/operations/async/test_download_list_operations_async.py @@ -0,0 +1,614 @@ +"""Integration tests for download_list operation functions. + +The Synapse download list is a user-scoped resource: every test run against +the same Synapse account shares one cart. To coexist with other tests and +concurrent CI runs, these tests track the items they add and remove only +those items on teardown, instead of calling download_list_clear_async() as a global +reset. Assertions reason only about the test's own file ids, never about +the cart being globally empty. +""" + +import csv +import os +import tempfile +import uuid +from typing import Callable + +import pytest +import pytest_asyncio + +import synapseclient.core.utils as utils +from synapseclient import Project, Synapse +from synapseclient.core.exceptions import SynapseHTTPError +from synapseclient.models import File +from synapseclient.models.table_components import CsvTableDescriptor +from synapseclient.operations import ( + DownloadListItem, + download_list_add_async, + download_list_files_async, + download_list_manifest_async, + download_list_remove_async, +) + + +@pytest_asyncio.fixture +async def scheduled_for_cart_removal(syn: Synapse): + """Track items a test adds to the cart and remove only those items on teardown.""" + scheduled: list[DownloadListItem] = [] + yield scheduled + if scheduled: + try: + await download_list_remove_async(files=scheduled, synapse_client=syn) + except Exception as e: + pytest.fail( + f"Cart teardown failed — {len(scheduled)} item(s) may remain in " + f"the cart and affect subsequent tests: {e}" + ) + + +async def _create_test_file( + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], +) -> File: + """Upload a small test file to Synapse and return the File model.""" + path = utils.make_bogus_uuid_file() + schedule_for_cleanup(path) + file = File( + parent_id=project["id"], + path=path, + name=f"download_list_test_{uuid.uuid4()}", + ) + await file.store_async(synapse_client=syn) + schedule_for_cleanup(file.id) + return file + + +async def _upload_new_version( + file: File, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], +) -> int: + """Upload a new version of an existing file and return the new version number.""" + new_path = utils.make_bogus_uuid_file() + schedule_for_cleanup(new_path) + file.path = new_path + await file.store_async(synapse_client=syn) + return file.version_number + + +async def _add_to_cart( + file: File, + syn: Synapse, + scheduled_for_cart_removal: list[DownloadListItem], +) -> None: + """Add a single file to the Synapse download list cart and register it + for teardown removal.""" + item = DownloadListItem( + file_entity_id=file.id, + version_number=file.version_number, + ) + await download_list_add_async(files=[item], synapse_client=syn) + scheduled_for_cart_removal.append(item) + + +async def _cart_entries( + syn: Synapse, + schedule_for_cleanup: Callable[..., None], +) -> set[tuple[str, int]]: + """Return all (file_id, version_number) pairs currently in the user's cart. + + Returns an empty set when the cart is empty. Synapse returns HTTP 400 with + the message 'No files available for download' in that case rather than + producing an empty CSV. If this string changes server-side, update it here + and in download_list_files_async's documented 'Raises' section. + See POST /download/list/manifest/async/start in the Synapse REST docs + (DownloadListController). + """ + try: + manifest_path = await download_list_manifest_async(synapse_client=syn) + except SynapseHTTPError as e: + if "No files available for download" in str(e): + return set() + raise + schedule_for_cleanup(manifest_path) + with open(manifest_path, newline="") as f: + return {(row["ID"], int(row["versionNumber"])) for row in csv.DictReader(f)} + + +class TestDownloadListAddAsync: + """Integration tests for download_list_add_async. + + - test_adds_specific_version_of_each_file_in_one_call: multiple files and versions added in one call + - test_download_list_add_with_no_version_number: version_number=None adds latest version + """ + + async def test_adds_specific_version_of_each_file_in_one_call( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """download_list_add_async() adds multiple files with multiple versions in a single call.""" + # GIVEN two files, each with two versions; we'll select v1 of file_a and v2 of file_b + file_a = await _create_test_file(project, syn, schedule_for_cleanup) + file_a_v1 = file_a.version_number + await _upload_new_version(file_a, syn, schedule_for_cleanup) + + file_b = await _create_test_file(project, syn, schedule_for_cleanup) + await _upload_new_version(file_b, syn, schedule_for_cleanup) + file_b_v2 = file_b.version_number + + # WHEN I add file_a v1 and file_b v2 in one call + items = [ + DownloadListItem(file_entity_id=file_a.id, version_number=file_a_v1), + DownloadListItem(file_entity_id=file_b.id, version_number=file_b_v2), + ] + count = await download_list_add_async(files=items, synapse_client=syn) + scheduled_for_cart_removal.extend(items) + cart_entries = { + e + for e in await _cart_entries(syn, schedule_for_cleanup) + if e[0] in {file_a.id, file_b.id} + } + + # THEN the returned count is 2 + assert count == 2, f"Expected 2 files added, got {count}" + + # AND only the added versions appear in the manifest for these file ids + assert cart_entries == { + (file_a.id, file_a_v1), + (file_b.id, file_b_v2), + }, f"Unexpected cart contents for test files: {cart_entries}" + + async def test_download_list_add_with_no_version_number( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """download_list_add_async() with version_number=None adds the latest version.""" + # GIVEN a file with two versions + file = await _create_test_file(project, syn, schedule_for_cleanup) + v1 = file.version_number + v2 = await _upload_new_version(file, syn, schedule_for_cleanup) + assert v2 != v1, "Expected a new version number" + + # WHEN I add the file without specifying a version number + item_no_version = DownloadListItem(file_entity_id=file.id) + count = await download_list_add_async( + files=[item_no_version], synapse_client=syn + ) + scheduled_for_cart_removal.append(item_no_version) + cart_entries = { + e for e in await _cart_entries(syn, schedule_for_cleanup) if e[0] == file.id + } + + # THEN the file is added to the cart with the latest version + assert count == 1, f"Expected 1 file added, got {count}" + + # AND the file appears in the manifest at the latest version + assert cart_entries == { + (file.id, v2) + }, f"Expected one row for {file.id} at v{v2}, got {cart_entries}" + + +class TestDownloadListRemoveAsync: + """Integration tests for download_list_remove_async. + + - test_download_list_remove_removes_only_specified_files: selective version removal + - test_download_list_remove_wrong_version_leaves_file_in_cart: wrong version is a no-op + - test_download_list_remove_no_version_leaves_file_in_cart: omitted version does not match explicit version + - test_download_list_remove_no_version_matches_no_version_entry: omitted version removes no-version entry + """ + + async def test_download_list_remove_removes_only_specified_files( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """download_list_remove_async() removes only the specified file versions, not others.""" + # GIVEN two files, each with two versions + file_a = await _create_test_file(project, syn, schedule_for_cleanup) + file_a_v1 = file_a.version_number + file_a_v2 = await _upload_new_version(file_a, syn, schedule_for_cleanup) + + file_b = await _create_test_file(project, syn, schedule_for_cleanup) + file_b_v1 = file_b.version_number + file_b_v2 = await _upload_new_version(file_b, syn, schedule_for_cleanup) + + # AND all four versions are added to the cart + added = [ + DownloadListItem(file_entity_id=file_a.id, version_number=file_a_v1), + DownloadListItem(file_entity_id=file_a.id, version_number=file_a_v2), + DownloadListItem(file_entity_id=file_b.id, version_number=file_b_v1), + DownloadListItem(file_entity_id=file_b.id, version_number=file_b_v2), + ] + await download_list_add_async(files=added, synapse_client=syn) + scheduled_for_cart_removal.extend(added) + + # WHEN I remove file_a v1 and file_b v2 + removed = await download_list_remove_async( + files=[ + DownloadListItem(file_entity_id=file_a.id, version_number=file_a_v1), + DownloadListItem(file_entity_id=file_b.id, version_number=file_b_v2), + ], + synapse_client=syn, + ) + our_ids = {file_a.id, file_b.id} + cart_entries = { + e for e in await _cart_entries(syn, schedule_for_cleanup) if e[0] in our_ids + } + + # THEN exactly 2 items were removed + assert removed == 2, f"Expected 2 files removed, got {removed}" + + # AND the manifest (filtered to our file ids) contains only file_a v2 and file_b v1 + assert cart_entries == { + (file_a.id, file_a_v2), + (file_b.id, file_b_v1), + }, f"Unexpected cart contents for test files: {cart_entries}" + + async def test_download_list_remove_wrong_version_leaves_file_in_cart( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """download_list_remove_async() with a wrong version is a no-op -- the file stays in the cart.""" + # GIVEN a cart entry for a file (added with an explicit version) + file = await _create_test_file(project, syn, schedule_for_cleanup) + await _add_to_cart(file, syn, scheduled_for_cart_removal) + + # WHEN I try to remove the file with a wrong version number + removed = await download_list_remove_async( + files=[ + DownloadListItem( + file_entity_id=file.id, + version_number=(file.version_number or 1) + 99, + ) + ], + synapse_client=syn, + ) + + # THEN no files are removed and the file remains in the cart + assert removed == 0, f"Expected 0 files removed, got {removed}" + cart_ids = {id_ for id_, _ in await _cart_entries(syn, schedule_for_cleanup)} + assert file.id in cart_ids, f"Expected {file.id} to remain in the cart" + + async def test_download_list_remove_no_version_leaves_file_in_cart( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """download_list_remove_async() with no version does not match a cart entry that was + added with an explicit version -- the API requires an exact + (fileEntityId, versionNumber) pair.""" + # GIVEN a cart entry for a file (added with an explicit version) + file = await _create_test_file(project, syn, schedule_for_cleanup) + await _add_to_cart(file, syn, scheduled_for_cart_removal) + + # WHEN I try to remove the file without specifying a version + removed = await download_list_remove_async( + files=[DownloadListItem(file_entity_id=file.id)], + synapse_client=syn, + ) + + # THEN no files are removed and the file remains in the cart + assert removed == 0, f"Expected 0 files removed, got {removed}" + cart_ids = {id_ for id_, _ in await _cart_entries(syn, schedule_for_cleanup)} + assert file.id in cart_ids, f"Expected {file.id} to remain in the cart" + + async def test_download_list_remove_no_version_matches_no_version_entry( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """download_list_remove_async() with no version removes a cart entry that was also + added without a version.""" + # GIVEN a cart entry for a file added without a version number + file = await _create_test_file(project, syn, schedule_for_cleanup) + item_no_version = DownloadListItem(file_entity_id=file.id) + await download_list_add_async(files=[item_no_version], synapse_client=syn) + scheduled_for_cart_removal.append(item_no_version) + + # WHEN I remove the file without specifying a version + removed = await download_list_remove_async( + files=[DownloadListItem(file_entity_id=file.id)], + synapse_client=syn, + ) + + # THEN the file is reported as removed and no longer appears in the cart + assert removed == 1, f"Expected 1 file removed, got {removed}" + cart_ids = {id_ for id_, _ in await _cart_entries(syn, schedule_for_cleanup)} + assert file.id not in cart_ids, f"Expected {file.id} to be absent from the cart" + + +class TestDownloadListFilesAsync: + """Integration tests for download_list_files_async. + + - test_download_list_files_downloads_and_removes_from_cart: sequential and parallel download + - test_download_list_files_multiple_versions_of_same_file: two versions both download + - test_download_list_files_default_location: omitting download_location writes to CWD + - test_download_list_files_no_version_add_is_removed_from_cart: + no-version add is downloaded and removed from the cart + """ + + @pytest.mark.parametrize("parallel", [False, True]) + async def test_download_list_files_downloads_and_removes_from_cart( + self, + parallel: bool, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """Downloaded files are present in the manifest and removed from cart.""" + # GIVEN two files added to the cart + file_a = await _create_test_file(project, syn, schedule_for_cleanup) + file_b = await _create_test_file(project, syn, schedule_for_cleanup) + await _add_to_cart(file_a, syn, scheduled_for_cart_removal) + await _add_to_cart(file_b, syn, scheduled_for_cart_removal) + + # WHEN I download the files + with tempfile.TemporaryDirectory() as tmpdir: + manifest_path = await download_list_files_async( + download_location=tmpdir, + parallel=parallel, + synapse_client=syn, + ) + schedule_for_cleanup(manifest_path) + + # THEN the manifest contains both files with valid paths and no errors + assert os.path.exists(manifest_path) + with open(manifest_path, newline="") as f: + reader = csv.DictReader(f) + rows = list(reader) + + my_rows = [r for r in rows if r["ID"] in {file_a.id, file_b.id}] + ids_in_manifest = {row["ID"] for row in my_rows} + assert file_a.id in ids_in_manifest + assert file_b.id in ids_in_manifest + + for row in my_rows: + assert ( + row["error"] == "" + ), f"Unexpected error for {row['ID']}: {row['error']}" + assert os.path.exists( + row["path"] + ), f"File not downloaded: {row['path']}" + + # AND our files are no longer in the cart after successful downloads + cart_ids = {id_ for id_, _ in await _cart_entries(syn, schedule_for_cleanup)} + assert ( + file_a.id not in cart_ids + ), f"Expected {file_a.id} to be removed from cart after download" + assert ( + file_b.id not in cart_ids + ), f"Expected {file_b.id} to be removed from cart after download" + + async def test_download_list_files_multiple_versions_of_same_file( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """Cart can hold two versions of the same file and both are downloaded.""" + # GIVEN a file with two versions, both added to the cart + file = await _create_test_file(project, syn, schedule_for_cleanup) + v1_id = file.id + v1_version = file.version_number + v2_version = await _upload_new_version(file, syn, schedule_for_cleanup) + assert v2_version != v1_version, "Expected a new version number" + + items = [ + DownloadListItem(file_entity_id=v1_id, version_number=v1_version), + DownloadListItem(file_entity_id=v1_id, version_number=v2_version), + ] + await download_list_add_async(files=items, synapse_client=syn) + scheduled_for_cart_removal.extend(items) + + # WHEN I download the cart + with tempfile.TemporaryDirectory() as tmpdir: + manifest_path = await download_list_files_async( + download_location=tmpdir, + synapse_client=syn, + ) + schedule_for_cleanup(manifest_path) + + # THEN the manifest contains two rows for the same entity ID + with open(manifest_path, newline="") as f: + reader = csv.DictReader(f) + rows = [r for r in reader if r["ID"] == v1_id] + + assert len(rows) == 2, f"Expected 2 rows for {v1_id}, got {len(rows)}" + versions_in_manifest = {int(r["versionNumber"]) for r in rows} + assert versions_in_manifest == { + v1_version, + v2_version, + }, f"Expected versions {v1_version} and {v2_version}, got {versions_in_manifest}" + for row in rows: + assert ( + row["path"] != "" + ), f"Missing path for version {row['versionNumber']}" + assert ( + row["error"] == "" + ), f"Error for version {row['versionNumber']}: {row['error']}" + + # AND our file is no longer in the cart + cart_ids = {id_ for id_, _ in await _cart_entries(syn, schedule_for_cleanup)} + assert ( + v1_id not in cart_ids + ), f"Expected {v1_id} to be removed from cart after download" + + async def test_download_list_files_default_location( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """download_list_files_async() with download_location=None writes to CWD.""" + # GIVEN a cart containing one of our files + file = await _create_test_file(project, syn, schedule_for_cleanup) + await _add_to_cart(file, syn, scheduled_for_cart_removal) + + # WHEN I download with no explicit download_location (uses CWD) + with tempfile.TemporaryDirectory() as tmpdir: + original_cwd = os.getcwd() + try: + os.chdir(tmpdir) + manifest_path = await download_list_files_async( + synapse_client=syn, + ) + schedule_for_cleanup(manifest_path) + + # THEN the manifest is written under the CWD + abs_manifest = os.path.abspath(manifest_path) + assert os.path.exists(abs_manifest) + # Normalize both paths with realpath -- on macOS /var is a + # symlink to /private/var, so tmpdir and the resolved manifest + # path can differ even when the manifest is under tmpdir. + assert os.path.realpath(abs_manifest).startswith( + os.path.realpath(tmpdir) + ), f"Expected manifest under {tmpdir}, got {abs_manifest}" + + with open(manifest_path, newline="") as f: + reader = csv.DictReader(f) + rows = list(reader) + + file_row = next(r for r in rows if r["ID"] == file.id) + assert file_row["error"] == "" + finally: + os.chdir(original_cwd) + + async def test_download_list_files_no_version_add_is_removed_from_cart( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """A file added to the cart without a version is downloaded + successfully and removed from the cart. + """ + # GIVEN a file added to the cart without a version number + file = await _create_test_file(project, syn, schedule_for_cleanup) + item_no_version = DownloadListItem(file_entity_id=file.id) + await download_list_add_async(files=[item_no_version], synapse_client=syn) + scheduled_for_cart_removal.append(item_no_version) + + # WHEN I download the cart contents + with tempfile.TemporaryDirectory() as tmpdir: + manifest_path = await download_list_files_async( + download_location=tmpdir, + synapse_client=syn, + ) + schedule_for_cleanup(manifest_path) + + # THEN the file is downloaded successfully (no error in the manifest) + with open(manifest_path, newline="") as f: + rows = [r for r in csv.DictReader(f) if r["ID"] == file.id] + assert len(rows) == 1, f"Expected 1 row for {file.id}, got {len(rows)}" + assert ( + rows[0]["error"] == "" + ), f"Unexpected error for {file.id}: {rows[0]['error']}" + assert os.path.exists( + rows[0]["path"] + ), f"File not downloaded: {rows[0]['path']}" + + # AND the file is removed from the cart after a successful download, + # even though it was added without a version number + cart_ids = {id_ for id_, _ in await _cart_entries(syn, schedule_for_cleanup)} + assert ( + file.id not in cart_ids + ), f"Expected {file.id} to be removed from cart after download." + + +class TestDownloadListManifestAsync: + """Integration tests for download_list_manifest_async.""" + + async def test_download_list_manifest_with_custom_csv_descriptor( + self, + project: Project, + syn: Synapse, + schedule_for_cleanup: Callable[..., None], + scheduled_for_cart_removal: list[DownloadListItem], + ) -> None: + """download_list_manifest_async() respects a custom CsvTableDescriptor.""" + # GIVEN a cart containing a file whose name contains the quote + # character, so the writer must emit the escape character + path = utils.make_bogus_uuid_file() + schedule_for_cleanup(path) + uuid_suffix = str(uuid.uuid4()) + file_name = f"it's_{uuid_suffix}" + file = File( + parent_id=project["id"], + path=path, + name=file_name, + ) + file = await file.store_async(synapse_client=syn) + schedule_for_cleanup(file.id) + await _add_to_cart(file, syn, scheduled_for_cart_removal) + + # WHEN I request a manifest with all non-default descriptor options + descriptor = CsvTableDescriptor( + separator="\t", + quote_character="'", + escape_character="/", + line_end="\n", + is_first_line_header=False, + ) + manifest_path = await download_list_manifest_async( + csv_table_descriptor=descriptor, + synapse_client=syn, + ) + schedule_for_cleanup(manifest_path) + + with open(manifest_path, newline="") as f: + content = f.read() + + # THEN tab separator is used + assert "\t" in content, "Expected tab separators in manifest" + + # AND the escape character was used for the embedded quote in the file name + # TODO: uncomment after PLFM-9598 is resolved + # assert "/'" in content, ( + # f"Expected escape sequence /' in manifest (from escaping ' in file name), " + # f"got: {content!r}" + # ) + + # AND line endings are LF only (no CR) + assert "\r" not in content, "Expected LF-only line endings; found CR" + + # AND there is no header row -- the first non-empty line is the data row + # NOTE: The cart is per-user and shared across all parallel workers (-n 8). + # Other tests running concurrently can add items to the cart, so the manifest + # may contain more than just this test's file. + lines = [line for line in content.split("\n") if line] + assert lines, "Expected at least one row in the manifest" + assert any(file.id in line for line in lines), ( + f"Expected a data row containing {file.id} in manifest, " + f"got: {content!r}" + ) + + # AND the name field is wrapped in single quotes (the writer quoted it + # because it contains the quote character). + # Search all lines since the cart may contain other items from concurrent tests. + file_line = next((line for line in lines if file.id in line), None) + assert file_line is not None, f"No line found for {file.id} in manifest" + fields = file_line.split("\t") + name_field = next((f for f in fields if uuid_suffix in f), None) + assert ( + name_field is not None + ), f"Name field containing {uuid_suffix!r} not found in {file_line!r}" diff --git a/tests/integration/synapseclient/operations/async/test_factory_operations_async.py b/tests/integration/synapseclient/operations/async/test_factory_operations_async.py index 2eba51bbc..7062c4684 100644 --- a/tests/integration/synapseclient/operations/async/test_factory_operations_async.py +++ b/tests/integration/synapseclient/operations/async/test_factory_operations_async.py @@ -852,10 +852,11 @@ async def test_get_docker_repo_by_id( ) -> None: """Test retrieving a Docker repository using get factory function.""" # GIVEN a Docker repository exists + unique_id = str(uuid.uuid4())[:8] docker_repo = await DockerRepository( parent_id=project_model.id, - repository_name="username/test-get-factory", - name="Test Factory Repo", + repository_name=f"username/test-get-factory-{unique_id}", + name=f"Test Factory Repo {unique_id}", description="Testing get factory", ).store_async(synapse_client=self.syn) @@ -867,8 +868,8 @@ async def test_get_docker_repo_by_id( # THEN the correct DockerRepository is returned assert isinstance(retrieved, DockerRepository) assert retrieved.id == docker_repo.id - assert retrieved.repository_name == "username/test-get-factory" - assert retrieved.name == "Test Factory Repo" + assert retrieved.repository_name == f"username/test-get-factory-{unique_id}" + assert retrieved.name == f"Test Factory Repo {unique_id}" assert retrieved.description == "Testing get factory" assert retrieved.parent_id == project_model.id assert retrieved.etag is not None diff --git a/tests/integration/synapseclient/operations/async/test_utility_operations_async.py b/tests/integration/synapseclient/operations/async/test_utility_operations_async.py index 7a5e96e09..a58eb8fd9 100644 --- a/tests/integration/synapseclient/operations/async/test_utility_operations_async.py +++ b/tests/integration/synapseclient/operations/async/test_utility_operations_async.py @@ -1,4 +1,5 @@ """Integration tests for utility operations asynchronous.""" + import uuid from typing import Callable diff --git a/tests/integration/synapseclient/test_command_line_client.py b/tests/integration/synapseclient/test_command_line_client.py index 803f08cdf..975b70896 100644 --- a/tests/integration/synapseclient/test_command_line_client.py +++ b/tests/integration/synapseclient/test_command_line_client.py @@ -1,4 +1,5 @@ """Integration tests for the CLI.""" + import filecmp import json import logging @@ -230,13 +231,8 @@ def test_command_line_client(test_state): assert used["url"] == repo_url assert used["wasExecuted"] - # Note: Tests shouldn't have external dependencies - # but this is a pretty picture of Singapore - singapore_url = ( - "http://upload.wikimedia.org/wikipedia/commons/" - "thumb/3/3e/1_singapore_city_skyline_dusk_panorama_2011.jpg" - "/1280px-1_singapore_city_skyline_dusk_panorama_2011.jpg" - ) + # Use a stable Sage-hosted asset to avoid external rate limiting (e.g. 429s) + singapore_url = "https://www.synapse.org/Portal/clear.cache.gif" # Test external file handle output = run( @@ -932,11 +928,14 @@ def test_login(test_state): alt_syn = Synapse(cache_client=False) username = "username" auth_token = "my_auth_token" - with patch.object(alt_syn, "login") as mock_login, patch.object( - alt_syn, - "getUserProfile", - return_value={"userName": "test_user", "ownerId": "ownerId"}, - ) as mock_get_user_profile: + with ( + patch.object(alt_syn, "login") as mock_login, + patch.object( + alt_syn, + "getUserProfile", + return_value={"userName": "test_user", "ownerId": "ownerId"}, + ) as mock_get_user_profile, + ): run( test_state, "synapse" "--skip-checks", diff --git a/tests/integration/synapseclient/test_wikis.py b/tests/integration/synapseclient/test_wikis.py index 2575b6590..e59c3e0eb 100644 --- a/tests/integration/synapseclient/test_wikis.py +++ b/tests/integration/synapseclient/test_wikis.py @@ -5,14 +5,23 @@ import synapseclient.core.utils as utils from synapseclient import Project, Synapse, Wiki -from synapseclient.core.exceptions import SynapseHTTPError from synapseclient.core.upload.upload_functions import upload_synapse_s3 # from unittest import skip +@pytest.fixture +def wiki_project(syn: Synapse, schedule_for_cleanup) -> Project: + """Function-scoped project so each wiki test gets a clean, isolated project.""" + project = syn.store(Project(name=str(uuid.uuid4()))) + schedule_for_cleanup(project) + return project + + # @skip("Skip integration tests for soon to be removed code") -def test_wikiAttachment(syn: Synapse, project: Project, schedule_for_cleanup) -> None: +def test_wikiAttachment( + syn: Synapse, wiki_project: Project, schedule_for_cleanup +) -> None: # Upload a file to be attached to a Wiki filename = utils.make_bogus_data_file() attachname = utils.make_bogus_data_file() @@ -29,7 +38,7 @@ def test_wikiAttachment(syn: Synapse, project: Project, schedule_for_cleanup) -> Blabber jabber blah blah boo. """ wiki = Wiki( - owner=project, + owner=wiki_project, title="A Test Wiki", markdown=md, fileHandles=[fileHandle["id"]], @@ -39,7 +48,7 @@ def test_wikiAttachment(syn: Synapse, project: Project, schedule_for_cleanup) -> # Create a Wiki sub-page subwiki = Wiki( - owner=project, + owner=wiki_project, title="A sub-wiki", markdown="nothing", parentWikiId=wiki.id, @@ -47,7 +56,7 @@ def test_wikiAttachment(syn: Synapse, project: Project, schedule_for_cleanup) -> subwiki = syn.store(subwiki) # Retrieve the root Wiki from Synapse - wiki2 = syn.getWiki(project) + wiki2 = syn.getWiki(wiki_project) # due to the new wiki api, we'll get back some new properties, # namely markdownFileHandleId and markdown_path, so only compare # properties that are in the first object @@ -55,7 +64,7 @@ def test_wikiAttachment(syn: Synapse, project: Project, schedule_for_cleanup) -> assert wiki[property_name] == wiki2[property_name] # Retrieve the sub Wiki from Synapse - wiki2 = syn.getWiki(project, subpageId=subwiki.id) + wiki2 = syn.getWiki(wiki_project, subpageId=subwiki.id) for property_name in wiki: assert subwiki[property_name] == wiki2[property_name] @@ -63,12 +72,12 @@ def test_wikiAttachment(syn: Synapse, project: Project, schedule_for_cleanup) -> wiki["title"] = "A New Title" wiki["markdown"] = wiki["markdown"] + "\nNew stuff here!!!\n" syn.store(wiki) - wiki = syn.getWiki(project) + wiki = syn.getWiki(wiki_project) assert wiki["title"] == "A New Title" assert wiki["markdown"].endswith("\nNew stuff here!!!\n") # Check the Wiki's metadata - headers = syn.getWikiHeaders(project) + headers = syn.getWikiHeaders(wiki_project) assert len(headers) == 2 assert headers[0]["title"] in (wiki["title"], subwiki["title"]) @@ -77,18 +86,14 @@ def test_wikiAttachment(syn: Synapse, project: Project, schedule_for_cleanup) -> for fn in [filename, attachname]: assert os.path.basename(fn) in file_names - syn.delete(subwiki) - syn.delete(wiki) - pytest.raises(SynapseHTTPError, syn.getWiki, project) - # @skip("Skip integration tests for soon to be removed code") -def test_create_or_update_wiki(syn: Synapse, project: Project) -> None: +def test_create_or_update_wiki(syn: Synapse, wiki_project: Project) -> None: # create wiki once syn.store( Wiki( title="This is the title", - owner=project, + owner=wiki_project, markdown="#Wikis are OK\n\nBlabber jabber blah blah blither blather bonk!", ) ) @@ -98,7 +103,7 @@ def test_create_or_update_wiki(syn: Synapse, project: Project) -> None: wiki = syn.store( Wiki( title=new_title, - owner=project, + owner=wiki_project, markdown="#Wikis are awesome\n\nNew babble boo flabble gibber wiggle sproing!", ), createOrUpdate=True, @@ -107,13 +112,11 @@ def test_create_or_update_wiki(syn: Synapse, project: Project) -> None: # @skip("Skip integration tests for soon to be removed code") -def test_wiki_version(syn: Synapse, project: Project) -> None: - # create a new project to avoid artifacts from previous tests - project = syn.store(Project(name=str(uuid.uuid4()))) +def test_wiki_version(syn: Synapse, wiki_project: Project) -> None: wiki = syn.store( Wiki( title="Title version 1", - owner=project, + owner=wiki_project, markdown="##A heading\n\nThis is version 1 of the wiki page!\n", ) ) @@ -133,13 +136,15 @@ def test_wiki_version(syn: Synapse, project: Project) -> None: # @skip("Skip integration tests for soon to be removed code") -def test_wiki_with_empty_string_parent_wiki_id(syn: Synapse, project: Project) -> None: +def test_wiki_with_empty_string_parent_wiki_id( + syn: Synapse, wiki_project: Project +) -> None: # GIVEN a wiki is created with an empty string parentWikiId # WHEN it is stored wiki_stored = syn.store( Wiki( title="This is the title", - owner=project, + owner=wiki_project, markdown="#Wikis are OK\n\nBlabber jabber blah blah blither blather bonk!", parentWikiId="", ) diff --git a/tests/test_utils.py b/tests/test_utils.py index 8a1102a12..8173e479e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,7 +4,7 @@ def spy_for_async_function( - original_func: Callable[..., Any] + original_func: Callable[..., Any], ) -> Callable[..., Coroutine[Any, Any, Any]]: """This function is used to create a spy for async functions.""" diff --git a/tests/unit/synapseclient/api/unit_test_entity_services.py b/tests/unit/synapseclient/api/unit_test_entity_services.py index fb4a43dd5..57f1f021f 100644 --- a/tests/unit/synapseclient/api/unit_test_entity_services.py +++ b/tests/unit/synapseclient/api/unit_test_entity_services.py @@ -1,4 +1,5 @@ """Unit tests for entity_services utility functions.""" + from unittest.mock import AsyncMock, patch import pytest diff --git a/tests/unit/synapseclient/api/unit_test_project_setting_services.py b/tests/unit/synapseclient/api/unit_test_project_setting_services.py new file mode 100644 index 000000000..64f38bb7f --- /dev/null +++ b/tests/unit/synapseclient/api/unit_test_project_setting_services.py @@ -0,0 +1,150 @@ +"""Unit tests for project_setting_services utility functions.""" + +from unittest.mock import AsyncMock, patch + +import pytest + +import synapseclient.api.project_setting_services as project_setting_services + + +class TestGetProjectSetting: + """Tests for get_project_setting function.""" + + @pytest.mark.asyncio + @patch("synapseclient.Synapse") + async def test_get_project_setting_exists(self, mock_synapse): + """Test get_project_setting when setting exists.""" + # GIVEN a mock client that returns a project setting + mock_client = AsyncMock() + mock_synapse.get_client.return_value = mock_client + mock_client.rest_get_async.return_value = { + "id": "setting123", + "projectId": "syn456", + "settingsType": "upload", + "locations": [12345], + } + + # WHEN I call get_project_setting + result = await project_setting_services.get_project_setting( + project_id="syn456", + setting_type="upload", + synapse_client=None, + ) + + # THEN I expect the project setting to be returned + assert result["id"] == "setting123" + assert result["locations"] == [12345] + mock_client.rest_get_async.assert_awaited_once_with( + uri="/projectSettings/syn456/type/upload", + ) + + @pytest.mark.asyncio + @patch("synapseclient.Synapse") + async def test_get_project_setting_not_exists(self, mock_synapse): + """Test get_project_setting when setting does not exist.""" + # GIVEN a mock client that returns empty response + mock_client = AsyncMock() + mock_synapse.get_client.return_value = mock_client + mock_client.rest_get_async.return_value = "" + + # WHEN I call get_project_setting + result = await project_setting_services.get_project_setting( + project_id="syn456", + setting_type="upload", + synapse_client=None, + ) + + # THEN I expect None to be returned + assert result is "" + + +class TestCreateProjectSetting: + """Tests for create_project_setting function.""" + + @pytest.mark.asyncio + @patch("synapseclient.Synapse") + async def test_create_project_setting(self, mock_synapse): + """Test create_project_setting creates a project setting.""" + # GIVEN a mock client that returns a project setting + mock_client = AsyncMock() + mock_synapse.get_client.return_value = mock_client + mock_client.rest_post_async.return_value = { + "id": "setting123", + "projectId": "syn456", + "settingsType": "upload", + "locations": [12345], + } + + # WHEN I call create_project_setting + body = { + "concreteType": "org.sagebionetworks.repo.model.project.UploadDestinationListSetting", + "settingsType": "upload", + "locations": [12345], + "projectId": "syn456", + } + result = await project_setting_services.create_project_setting( + request=body, + synapse_client=None, + ) + + # THEN I expect the project setting to be returned + assert result["id"] == "setting123" + mock_client.rest_post_async.assert_awaited_once() + + +class TestUpdateProjectSetting: + """Tests for update_project_setting function.""" + + @pytest.mark.asyncio + @patch("synapseclient.Synapse") + async def test_update_project_setting(self, mock_synapse): + """Test update_project_setting updates a project setting.""" + # GIVEN a mock client that returns an updated project setting + mock_client = AsyncMock() + mock_synapse.get_client.return_value = mock_client + mock_client.rest_put_async.return_value = { + "id": "setting123", + "projectId": "syn456", + "settingsType": "upload", + "locations": [12345, 67890], + } + + # WHEN I call update_project_setting + body = { + "id": "setting123", + "projectId": "syn456", + "settingsType": "upload", + "locations": [12345, 67890], + } + result = await project_setting_services.update_project_setting( + request=body, + synapse_client=None, + ) + + # THEN I expect the updated project setting to be returned + assert result["locations"] == [12345, 67890] + mock_client.rest_put_async.assert_awaited_once() + + +class TestDeleteProjectSetting: + """Tests for delete_project_setting function.""" + + @pytest.mark.asyncio + @patch("synapseclient.Synapse") + async def test_delete_project_setting(self, mock_synapse): + """Test delete_project_setting deletes a project setting.""" + # GIVEN a mock client + mock_client = AsyncMock() + mock_synapse.get_client.return_value = mock_client + mock_client.rest_delete_async.return_value = None + + # WHEN I call delete_project_setting + await project_setting_services.delete_project_setting( + setting_id="setting123", + synapse_client=None, + ) + + # THEN I expect the delete to be called + mock_client.rest_delete_async.assert_awaited_once_with( + uri="/projectSettings/setting123", + ) diff --git a/tests/unit/synapseclient/api/unit_test_storage_location_services.py b/tests/unit/synapseclient/api/unit_test_storage_location_services.py new file mode 100644 index 000000000..692ec0d2c --- /dev/null +++ b/tests/unit/synapseclient/api/unit_test_storage_location_services.py @@ -0,0 +1,72 @@ +"""Unit tests for storage_location_services utility functions.""" + +from unittest.mock import AsyncMock, patch + +import pytest + +import synapseclient.api.storage_location_services as storage_location_services + + +class TestCreateStorageLocationSetting: + """Tests for create_storage_location_setting function.""" + + @pytest.mark.asyncio + @patch("synapseclient.Synapse") + async def test_create_storage_location_setting(self, mock_synapse): + """Test create_storage_location_setting creates a storage location.""" + # GIVEN a mock client that returns a storage location + mock_client = AsyncMock() + mock_synapse.get_client.return_value = mock_client + mock_client.rest_post_async.return_value = { + "storageLocationId": 12345, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalS3StorageLocationSetting", + "uploadType": "S3", + "bucket": "my-bucket", + } + + # WHEN I call create_storage_location_setting + request = { + "concreteType": "org.sagebionetworks.repo.model.project.ExternalS3StorageLocationSetting", + "uploadType": "S3", + "bucket": "my-bucket", + } + result = await storage_location_services.create_storage_location_setting( + request=request, + synapse_client=None, + ) + + # THEN I expect the storage location to be returned + assert result["storageLocationId"] == 12345 + assert result["bucket"] == "my-bucket" + mock_client.rest_post_async.assert_awaited_once() + + +class TestGetStorageLocationSetting: + """Tests for get_storage_location_setting function.""" + + @pytest.mark.asyncio + @patch("synapseclient.Synapse") + async def test_get_storage_location_setting(self, mock_synapse): + """Test get_storage_location_setting retrieves a storage location.""" + # GIVEN a mock client that returns a storage location + mock_client = AsyncMock() + mock_synapse.get_client.return_value = mock_client + mock_client.rest_get_async.return_value = { + "storageLocationId": 12345, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalS3StorageLocationSetting", + "uploadType": "S3", + "bucket": "my-bucket", + } + + # WHEN I call get_storage_location_setting + result = await storage_location_services.get_storage_location_setting( + storage_location_id=12345, + synapse_client=None, + ) + + # THEN I expect the storage location to be returned + assert result["storageLocationId"] == 12345 + assert result["bucket"] == "my-bucket" + mock_client.rest_get_async.assert_awaited_once_with( + uri="/storageLocation/12345", + ) diff --git a/tests/unit/synapseclient/api/unit_test_web_services.py b/tests/unit/synapseclient/api/unit_test_web_services.py index c5b9b5329..825d843e3 100644 --- a/tests/unit/synapseclient/api/unit_test_web_services.py +++ b/tests/unit/synapseclient/api/unit_test_web_services.py @@ -1,4 +1,5 @@ """Unit tests for web_services functions.""" + from unittest.mock import AsyncMock, MagicMock, patch import synapseclient.api.web_services as web_services diff --git a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py index df9f6e7e1..3efd7c558 100644 --- a/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py +++ b/tests/unit/synapseclient/core/credentials/unit_test_cred_provider.py @@ -117,11 +117,14 @@ def _get_auth_info( def test_get_synapse_credentials(self) -> None: auth_info = ("username", "auth_token") - with patch.object( - self.provider, "_get_auth_info", return_value=auth_info - ) as mock_get_auth_info, patch.object( - self.provider, "_create_synapse_credential" - ) as mock_create_synapse_credentials: + with ( + patch.object( + self.provider, "_get_auth_info", return_value=auth_info + ) as mock_get_auth_info, + patch.object( + self.provider, "_create_synapse_credential" + ) as mock_create_synapse_credentials, + ): self.provider.get_synapse_credentials(self.syn, self.user_login_args) mock_get_auth_info.assert_called_once_with( diff --git a/tests/unit/synapseclient/core/download/unit_test_download_async.py b/tests/unit/synapseclient/core/download/unit_test_download_async.py index 4ee1fdc00..587463baa 100644 --- a/tests/unit/synapseclient/core/download/unit_test_download_async.py +++ b/tests/unit/synapseclient/core/download/unit_test_download_async.py @@ -32,11 +32,14 @@ async def test_get_info_not_expired(self) -> None: expiration_utc=utc_now + datetime.timedelta(seconds=6), ) - with mock.patch.object( - PresignedUrlProvider, "_get_pre_signed_info", return_value=info - ) as mock_get_presigned_info, mock.patch.object( - download_async, "datetime", wraps=datetime - ) as mock_datetime: + with ( + mock.patch.object( + PresignedUrlProvider, "_get_pre_signed_info", return_value=info + ) as mock_get_presigned_info, + mock.patch.object( + download_async, "datetime", wraps=datetime + ) as mock_datetime, + ): mock_datetime.datetime.now.return_value = utc_now presigned_url_provider = PresignedUrlProvider( @@ -64,16 +67,19 @@ async def test_get_info_expired(self) -> None: expiration_utc=unexpired_date, ) - with mock.patch.object( - PresignedUrlProvider, - "_get_pre_signed_info", - side_effect=[unexpired_info], - ) as mock_get_presigned_info, mock.patch( - "synapseclient.core.download.download_async.get_file_handle_for_download", - return_value={ - "fileHandle": {"fileName": "myFile.txt"}, - "preSignedURL": f"https://synapse.org?X-Amz-Date={unexpired_date.strftime('%Y%m%dT%H%M%SZ')}&X-Amz-Expires=5&X-Amz-Signature=123456", - }, + with ( + mock.patch.object( + PresignedUrlProvider, + "_get_pre_signed_info", + side_effect=[unexpired_info], + ) as mock_get_presigned_info, + mock.patch( + "synapseclient.core.download.download_async.get_file_handle_for_download", + return_value={ + "fileHandle": {"fileName": "myFile.txt"}, + "preSignedURL": f"https://synapse.org?X-Amz-Date={unexpired_date.strftime('%Y%m%dT%H%M%SZ')}&X-Amz-Expires=5&X-Amz-Signature=123456", + }, + ), ): presigned_url_provider = PresignedUrlProvider( self.mock_synapse_client, request=self.download_request @@ -89,17 +95,20 @@ async def test_get_pre_signed_info(self) -> None: fake_url = "https://synapse.org/foo.txt" fake_file_name = "foo.txt" - with mock.patch.object( - download_async, - "_pre_signed_url_expiration_time", - return_value=fake_exp_time, - ) as mock_pre_signed_url_expiration_time, mock.patch( - "synapseclient.core.download.download_async.get_file_handle_for_download", - return_value={ - "fileHandle": {"fileName": "myFile.txt"}, - "preSignedURL": f"https://synapse.org?X-Amz-Date={fake_exp_time.strftime('%Y%m%dT%H%M%SZ')}&X-Amz-Expires=5&X-Amz-Signature=123456", - }, - ) as mock_file_handle_download: + with ( + mock.patch.object( + download_async, + "_pre_signed_url_expiration_time", + return_value=fake_exp_time, + ) as mock_pre_signed_url_expiration_time, + mock.patch( + "synapseclient.core.download.download_async.get_file_handle_for_download", + return_value={ + "fileHandle": {"fileName": "myFile.txt"}, + "preSignedURL": f"https://synapse.org?X-Amz-Date={fake_exp_time.strftime('%Y%m%dT%H%M%SZ')}&X-Amz-Expires=5&X-Amz-Signature=123456", + }, + ) as mock_file_handle_download, + ): fake_file_handle_response = { "fileHandle": {"fileName": fake_file_name}, "preSignedURL": fake_url, diff --git a/tests/unit/synapseclient/core/multithread_download/unit_test_download_threads.py b/tests/unit/synapseclient/core/multithread_download/unit_test_download_threads.py index 1c8cac7e8..b3178d429 100644 --- a/tests/unit/synapseclient/core/multithread_download/unit_test_download_threads.py +++ b/tests/unit/synapseclient/core/multithread_download/unit_test_download_threads.py @@ -37,11 +37,14 @@ def test_get_info_not_expired(self): expiration_utc=utc_now + datetime.timedelta(seconds=6), ) - with mock.patch.object( - PresignedUrlProvider, "_get_pre_signed_info", return_value=info - ) as mock_get_presigned_info, mock.patch.object( - download_threads, "datetime", wraps=datetime - ) as mock_datetime: + with ( + mock.patch.object( + PresignedUrlProvider, "_get_pre_signed_info", return_value=info + ) as mock_get_presigned_info, + mock.patch.object( + download_threads, "datetime", wraps=datetime + ) as mock_datetime, + ): mock_datetime.datetime.utcnow.return_value = utc_now presigned_url_provider = PresignedUrlProvider( @@ -68,13 +71,14 @@ def test_get_info_expired(self): expiration_utc=utc_now + datetime.timedelta(seconds=6), ) - with mock.patch.object( - PresignedUrlProvider, - "_get_pre_signed_info", - side_effect=[expired_info, unexpired_info], - ) as mock_get_presigned_info, mock.patch.object( - download_threads, "datetime" - ) as mock_datetime: + with ( + mock.patch.object( + PresignedUrlProvider, + "_get_pre_signed_info", + side_effect=[expired_info, unexpired_info], + ) as mock_get_presigned_info, + mock.patch.object(download_threads, "datetime") as mock_datetime, + ): mock_datetime.datetime.utcnow.return_value = utc_now presigned_url_provider = PresignedUrlProvider( @@ -235,25 +239,29 @@ def test_download_file(self): file_size = int(1.5 * (2**20)) request = DownloadRequest(file_handle_id, object_id, None, path) - with mock.patch.object( - download_threads, "PresignedUrlProvider" - ) as mock_url_provider_init, mock.patch.object( - download_threads, "TransferStatus" - ) as mock_transfer_status_init, mock.patch.object( - download_threads, "_get_file_size" - ) as mock_get_file_size, mock.patch.object( - download_threads, "_generate_chunk_ranges" - ) as mock_generate_chunk_ranges, mock.patch.object( - _MultithreadedDownloader, "_prep_file" - ) as mock_prep_file, mock.patch.object( - _MultithreadedDownloader, "_submit_chunks" - ) as mock_submit_chunks, mock.patch.object( - _MultithreadedDownloader, "_write_chunks" - ) as mock_write_chunks, mock.patch( - "concurrent.futures.wait" - ) as mock_futures_wait, mock.patch.object( - _MultithreadedDownloader, "_check_for_errors" - ) as mock_check_for_errors: + with ( + mock.patch.object( + download_threads, "PresignedUrlProvider" + ) as mock_url_provider_init, + mock.patch.object( + download_threads, "TransferStatus" + ) as mock_transfer_status_init, + mock.patch.object(download_threads, "_get_file_size") as mock_get_file_size, + mock.patch.object( + download_threads, "_generate_chunk_ranges" + ) as mock_generate_chunk_ranges, + mock.patch.object(_MultithreadedDownloader, "_prep_file") as mock_prep_file, + mock.patch.object( + _MultithreadedDownloader, "_submit_chunks" + ) as mock_submit_chunks, + mock.patch.object( + _MultithreadedDownloader, "_write_chunks" + ) as mock_write_chunks, + mock.patch("concurrent.futures.wait") as mock_futures_wait, + mock.patch.object( + _MultithreadedDownloader, "_check_for_errors" + ) as mock_check_for_errors, + ): mock_url_info = mock.create_autospec(PresignedUrlInfo, url=url) mock_url_provider = mock.create_autospec(PresignedUrlProvider) mock_url_provider.get_info.return_value = mock_url_info @@ -340,25 +348,25 @@ def test_download_file__error(self): file_size = int(1.5 * (2**20)) request = DownloadRequest(file_handle_id, entity_id, None, path) - with mock.patch.object( - download_threads, "PresignedUrlProvider" - ) as mock_url_provider_init, mock.patch.object( - download_threads, "TransferStatus" - ) as mock_transfer_status_init, mock.patch.object( - download_threads, "_get_file_size" - ) as mock_get_file_size, mock.patch.object( - download_threads, "_generate_chunk_ranges" - ) as mock_generate_chunk_ranges, mock.patch.object( - download_threads, "os" - ) as mock_os, mock.patch.object( - _MultithreadedDownloader, "_prep_file" - ), mock.patch.object( - _MultithreadedDownloader, "_submit_chunks" - ) as mock_submit_chunks, mock.patch.object( - _MultithreadedDownloader, "_write_chunks" - ), mock.patch( - "concurrent.futures.wait" - ) as mock_futures_wait: + with ( + mock.patch.object( + download_threads, "PresignedUrlProvider" + ) as mock_url_provider_init, + mock.patch.object( + download_threads, "TransferStatus" + ) as mock_transfer_status_init, + mock.patch.object(download_threads, "_get_file_size") as mock_get_file_size, + mock.patch.object( + download_threads, "_generate_chunk_ranges" + ) as mock_generate_chunk_ranges, + mock.patch.object(download_threads, "os") as mock_os, + mock.patch.object(_MultithreadedDownloader, "_prep_file"), + mock.patch.object( + _MultithreadedDownloader, "_submit_chunks" + ) as mock_submit_chunks, + mock.patch.object(_MultithreadedDownloader, "_write_chunks"), + mock.patch("concurrent.futures.wait") as mock_futures_wait, + ): mock_url_info = mock.create_autospec(PresignedUrlInfo, url=url) mock_url_provider = mock.create_autospec(PresignedUrlProvider) mock_url_provider.get_info.return_value = mock_url_info @@ -412,11 +420,14 @@ def test_download_file_error_in_retrieving_file_from_storage(self) -> None: request = DownloadRequest(file_handle_id, entity_id, None, path) # AND A mocked session - with mock.patch.object( - download_threads, "_get_new_session" - ) as mock_get_new_session, mock.patch.object( - download_threads, "PresignedUrlProvider" - ) as mock_url_provider_init: + with ( + mock.patch.object( + download_threads, "_get_new_session" + ) as mock_get_new_session, + mock.patch.object( + download_threads, "PresignedUrlProvider" + ) as mock_url_provider_init, + ): mock_url_info = mock.create_autospec(PresignedUrlInfo, url=url) mock_url_provider = mock.create_autospec(PresignedUrlProvider) mock_url_provider.get_info.return_value = mock_url_info diff --git a/tests/unit/synapseclient/core/unit_test_Cache.py b/tests/unit/synapseclient/core/unit_test_Cache.py index 8d654e642..9466146df 100644 --- a/tests/unit/synapseclient/core/unit_test_Cache.py +++ b/tests/unit/synapseclient/core/unit_test_Cache.py @@ -121,11 +121,10 @@ def test_subsecond_timestamps(): my_cache.add(file_handle_id=1234, path=path) - with patch.object( - cache, "_get_modified_time" - ) as _get_modified_time_mock, patch.object( - cache.Cache, "_read_cache_map" - ) as _read_cache_map_mock: + with ( + patch.object(cache, "_get_modified_time") as _get_modified_time_mock, + patch.object(cache.Cache, "_read_cache_map") as _read_cache_map_mock, + ): # this should be a match, 'cause we round microseconds to milliseconds _read_cache_map_mock.return_value = {path: "2015-05-05T21:34:55.001Z"} _get_modified_time_mock.return_value = 1430861695.001111 diff --git a/tests/unit/synapseclient/core/unit_test_doze.py b/tests/unit/synapseclient/core/unit_test_doze.py index fadeb2bbf..088e38607 100644 --- a/tests/unit/synapseclient/core/unit_test_doze.py +++ b/tests/unit/synapseclient/core/unit_test_doze.py @@ -3,6 +3,7 @@ @author: bhoff """ + import synapseclient.core.dozer as doze diff --git a/tests/unit/synapseclient/core/unit_test_remote_storage_file_wrappers.py b/tests/unit/synapseclient/core/unit_test_remote_storage_file_wrappers.py index 35f9d7678..25d4769cc 100644 --- a/tests/unit/synapseclient/core/unit_test_remote_storage_file_wrappers.py +++ b/tests/unit/synapseclient/core/unit_test_remote_storage_file_wrappers.py @@ -33,13 +33,13 @@ def _download_test(**kwargs) -> None: endpoint_url = "http://foo.s3.amazon.com" show_progress = kwargs.pop("show_progress", True) - with mock.patch( - "boto3.session.Session" - ) as mock_boto_session, mock.patch.object( - S3ClientWrapper, "_create_progress_callback_func" - ) as mock_create_progress_callback, mock.patch( - "boto3.s3.transfer.TransferConfig" - ) as mock_TransferConfig: + with ( + mock.patch("boto3.session.Session") as mock_boto_session, + mock.patch.object( + S3ClientWrapper, "_create_progress_callback_func" + ) as mock_create_progress_callback, + mock.patch("boto3.s3.transfer.TransferConfig") as mock_TransferConfig, + ): # Create a mock object for s3.Object with content_length set to an integer mock_s3_object = mock.Mock(content_length=1234) # Make resource().Object return the mock object @@ -162,15 +162,14 @@ def _upload_test(**kwargs): upload_file_path = "/tmp/upload_file" endpoint_url = "http://foo.s3.amazon.com" - with mock.patch( - "boto3.session.Session" - ) as mock_boto_session, mock.patch.object( - S3ClientWrapper, "_create_progress_callback_func" - ) as mock_create_progress_callback, mock.patch( - "boto3.s3.transfer.TransferConfig" - ) as mock_TransferConfig, mock.patch.object( - remote_file_storage_wrappers, "os" - ) as mock_os: + with ( + mock.patch("boto3.session.Session") as mock_boto_session, + mock.patch.object( + S3ClientWrapper, "_create_progress_callback_func" + ) as mock_create_progress_callback, + mock.patch("boto3.s3.transfer.TransferConfig") as mock_TransferConfig, + mock.patch.object(remote_file_storage_wrappers, "os") as mock_os, + ): mock_os.stat.return_value = mock.Mock(st_size=1234) returned_upload_path = S3ClientWrapper.upload_file( bucket_name, endpoint_url, remote_file_key, upload_file_path, **kwargs diff --git a/tests/unit/synapseclient/core/unit_test_sts_transfer.py b/tests/unit/synapseclient/core/unit_test_sts_transfer.py index fde7fbe36..d4c9e5518 100644 --- a/tests/unit/synapseclient/core/unit_test_sts_transfer.py +++ b/tests/unit/synapseclient/core/unit_test_sts_transfer.py @@ -75,12 +75,14 @@ def _command_output_test( ) ), _getDefaultUploadDestination=mock.Mock( - return_value=upload_destination - if upload_destination is not None - else { - "bucket": "bucket1", - "baseKey": "key1", - } + return_value=( + upload_destination + if upload_destination is not None + else { + "bucket": "bucket1", + "baseKey": "key1", + } + ) ), ) diff --git a/tests/unit/synapseclient/core/unit_test_utils.py b/tests/unit/synapseclient/core/unit_test_utils.py index a07cebf91..febc79ca9 100644 --- a/tests/unit/synapseclient/core/unit_test_utils.py +++ b/tests/unit/synapseclient/core/unit_test_utils.py @@ -5,12 +5,14 @@ import os import re import tempfile +from enum import Enum from shutil import rmtree from unittest.mock import MagicMock, Mock, call, mock_open, patch import pytest from synapseclient.core import constants, utils +from synapseclient.core.utils import coerce_enum_list def test_is_url() -> None: @@ -608,3 +610,41 @@ def test_print_tick_is_not_atty(self, mock_sys: MagicMock) -> None: mock_sys.stdout.write.assert_not_called() mock_sys.stdout.flush.assert_not_called() assert self.spinner._tick == 1 + + +class _Color(Enum): + RED = "RED" + BLUE = "BLUE" + + +class TestCoerceEnumList: + """Tests for coerce_enum_list.""" + + @pytest.mark.parametrize( + "input_filter,expected", + [ + ([_Color.RED], ["RED"]), + ([_Color.BLUE], ["BLUE"]), + ([_Color.RED, _Color.BLUE], ["RED", "BLUE"]), + (["RED"], ["RED"]), + (["BLUE"], ["BLUE"]), + ([_Color.RED, "BLUE"], ["RED", "BLUE"]), + ([], []), + ], + ) + def test_valid_inputs(self, input_filter, expected) -> None: + """Accepts enum members, matching strings, mixed lists, and empty lists.""" + assert coerce_enum_list(_Color, input_filter) == expected + + @pytest.mark.parametrize( + "input_filter,match", + [ + (["NOT_A_COLOR"], "Invalid value"), + ([42], "Invalid value"), + (["red"], "Invalid value"), + ], + ) + def test_invalid_inputs_raise_value_error(self, input_filter, match) -> None: + """Raises ValueError for unrecognized strings and non-string, non-enum values.""" + with pytest.raises(ValueError, match=match): + coerce_enum_list(_Color, input_filter) diff --git a/tests/unit/synapseclient/core/unit_test_version_check.py b/tests/unit/synapseclient/core/unit_test_version_check.py index 5b614808c..958304f07 100644 --- a/tests/unit/synapseclient/core/unit_test_version_check.py +++ b/tests/unit/synapseclient/core/unit_test_version_check.py @@ -1,6 +1,5 @@ """Unit tests for version check functions""" - from unittest.mock import patch import pytest diff --git a/tests/unit/synapseclient/core/upload/unit_test_multipart_upload.py b/tests/unit/synapseclient/core/upload/unit_test_multipart_upload.py index 2aa8faa62..70771c4f1 100644 --- a/tests/unit/synapseclient/core/upload/unit_test_multipart_upload.py +++ b/tests/unit/synapseclient/core/upload/unit_test_multipart_upload.py @@ -307,14 +307,11 @@ def _handle_part_success_test( md5_hex = md5_fn(chunk, None) - with mock.patch.object( - multipart_upload, "get_file_chunk" - ) as chunk_fn, mock.patch.object( - upload, "_get_thread_session" - ) as get_session, mock.patch.object( - upload, "_refresh_pre_signed_part_urls" - ) as refresh_urls, mock.patch.object( - syn, "restPUT" + with ( + mock.patch.object(multipart_upload, "get_file_chunk") as chunk_fn, + mock.patch.object(upload, "_get_thread_session") as get_session, + mock.patch.object(upload, "_refresh_pre_signed_part_urls") as refresh_urls, + mock.patch.object(syn, "restPUT"), ): get_session.return_value = mock_session chunk_fn.return_value = chunk @@ -440,13 +437,11 @@ def test_handle_part__url_expired_twice(self, syn): upload._pre_signed_part_urls = {part_number: (pre_signed_url_1, signed_headers)} mock_session = mock.Mock() - with mock.patch.object( - multipart_upload, "get_file_chunk" - ) as chunk_fn, mock.patch.object( - upload, "_get_thread_session" - ) as get_session, mock.patch.object( - upload, "_refresh_pre_signed_part_urls" - ) as refresh_urls: + with ( + mock.patch.object(multipart_upload, "get_file_chunk") as chunk_fn, + mock.patch.object(upload, "_get_thread_session") as get_session, + mock.patch.object(upload, "_refresh_pre_signed_part_urls") as refresh_urls, + ): get_session.return_value = mock_session chunk_fn.return_value = chunk refresh_urls.side_effect = [ @@ -490,17 +485,17 @@ def test_call_upload(self, syn): future.set_result((i, upload._part_size)) futures.append(future) - with mock.patch.object( - upload, "_create_synapse_upload" - ) as create_synapse_upload, mock.patch.object( - upload, "_fetch_pre_signed_part_urls" - ) as fetch_pre_signed_urls, mock.patch.object( - pool_provider, "get_executor" - ) as get_executor, mock.patch.object( - upload, "_get_thread_session" - ) as get_session, mock.patch.object( - syn, "restPUT" - ) as restPUT: + with ( + mock.patch.object( + upload, "_create_synapse_upload" + ) as create_synapse_upload, + mock.patch.object( + upload, "_fetch_pre_signed_part_urls" + ) as fetch_pre_signed_urls, + mock.patch.object(pool_provider, "get_executor") as get_executor, + mock.patch.object(upload, "_get_thread_session") as get_session, + mock.patch.object(syn, "restPUT") as restPUT, + ): mock_session = mock.Mock() get_session.return_value = mock_session @@ -539,13 +534,15 @@ def _test_call_upload__part_exception( future = Future() future.set_exception(part_exception()) - with mock.patch.object( - upload, "_create_synapse_upload" - ) as create_synapse_upload, mock.patch.object( - upload, "_fetch_pre_signed_part_urls" - ) as fetch_pre_signed_urls, mock.patch.object( - pool_provider, "get_executor" - ) as get_executor: + with ( + mock.patch.object( + upload, "_create_synapse_upload" + ) as create_synapse_upload, + mock.patch.object( + upload, "_fetch_pre_signed_part_urls" + ) as fetch_pre_signed_urls, + mock.patch.object(pool_provider, "get_executor") as get_executor, + ): create_synapse_upload.return_value = upload_status fetch_pre_signed_urls.return_value = pre_signed_urls @@ -585,13 +582,15 @@ def test_already_completed(self, syn): "state": "COMPLETED", } - with mock.patch.object( - upload, "_create_synapse_upload" - ) as create_synapse_upload, mock.patch.object( - upload, "_fetch_pre_signed_part_urls" - ) as fetch_pre_signed_urls, mock.patch.object( - pool_provider, "get_executor" - ) as get_executor: + with ( + mock.patch.object( + upload, "_create_synapse_upload" + ) as create_synapse_upload, + mock.patch.object( + upload, "_fetch_pre_signed_part_urls" + ) as fetch_pre_signed_urls, + mock.patch.object(pool_provider, "get_executor") as get_executor, + ): create_synapse_upload.return_value = upload_status_response upload_result = upload() @@ -622,15 +621,16 @@ def test_all_parts_completed(self, syn): "state": "COMPLETED", } - with mock.patch.object( - upload, "_create_synapse_upload" - ) as create_synapse_upload, mock.patch.object( - upload, "_fetch_pre_signed_part_urls" - ) as fetch_pre_signed_urls, mock.patch.object( - pool_provider, "get_executor" - ) as get_executor, mock.patch.object( - upload._syn, "restPUT" - ) as restPUT: + with ( + mock.patch.object( + upload, "_create_synapse_upload" + ) as create_synapse_upload, + mock.patch.object( + upload, "_fetch_pre_signed_part_urls" + ) as fetch_pre_signed_urls, + mock.patch.object(pool_provider, "get_executor") as get_executor, + mock.patch.object(upload._syn, "restPUT") as restPUT, + ): create_synapse_upload.return_value = create_status_response restPUT.return_value = complete_status_response @@ -664,17 +664,19 @@ def test_multipart_upload_file(self): md5_hex = "abc123" storage_location_id = 5432 - with mock.patch("os.path.exists") as os_path_exists, mock.patch( - "os.path.isdir" - ) as os_path_is_dir, mock.patch( - "os.path.getsize" - ) as os_path_getsize, mock.patch.object( - multipart_upload, - "md5_for_file", - ) as md5_for_file, mock.patch.object( - multipart_upload, - "_multipart_upload", - ) as mock_multipart_upload: + with ( + mock.patch("os.path.exists") as os_path_exists, + mock.patch("os.path.isdir") as os_path_is_dir, + mock.patch("os.path.getsize") as os_path_getsize, + mock.patch.object( + multipart_upload, + "md5_for_file", + ) as md5_for_file, + mock.patch.object( + multipart_upload, + "_multipart_upload", + ) as mock_multipart_upload, + ): os_path_getsize.return_value = file_size md5_for_file.return_value.hexdigest.return_value = md5_hex diff --git a/tests/unit/synapseclient/extensions/test_schema_management.py b/tests/unit/synapseclient/extensions/test_schema_management.py index 101b94736..1b0d1a252 100644 --- a/tests/unit/synapseclient/extensions/test_schema_management.py +++ b/tests/unit/synapseclient/extensions/test_schema_management.py @@ -33,9 +33,11 @@ async def test_register_jsonschema_async(mock_synapse_client, mock_jsonschema): m_open = mock_open(read_data=json.dumps(schema_content)) - with patch("builtins.open", m_open), patch( - "synapseclient.Synapse.get_client", return_value=mock_synapse_client - ), patch("json.load", return_value=schema_content): + with ( + patch("builtins.open", m_open), + patch("synapseclient.Synapse.get_client", return_value=mock_synapse_client), + patch("json.load", return_value=schema_content), + ): result = await register_jsonschema_async( schema_path=schema_path, organization_name=org_name, @@ -71,9 +73,11 @@ async def test_register_jsonschema_async_fix_schema_name( m_open = mock_open(read_data=json.dumps(schema_content)) - with patch("builtins.open", m_open), patch( - "synapseclient.Synapse.get_client", return_value=mock_synapse_client - ), patch("json.load", return_value=schema_content): + with ( + patch("builtins.open", m_open), + patch("synapseclient.Synapse.get_client", return_value=mock_synapse_client), + patch("json.load", return_value=schema_content), + ): result = await register_jsonschema_async( schema_path=schema_path, organization_name=org_name, diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index e8ecfd8a4..74573b04a 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -999,9 +999,10 @@ def test_create_json_schema_with_class_label( use_display_labels=False, logger=logger, ) - with open(expected_path, encoding="utf-8") as file1, open( - test_path, encoding="utf-8" - ) as file2: + with ( + open(expected_path, encoding="utf-8") as file1, + open(test_path, encoding="utf-8") as file2, + ): expected_json = json.load(file1) test_json = json.load(file2) assert expected_json == test_json @@ -1029,9 +1030,10 @@ def test_create_json_schema_with_display_names( schema_path=test_path, logger=logger, ) - with open(expected_path, encoding="utf-8") as file1, open( - test_path, encoding="utf-8" - ) as file2: + with ( + open(expected_path, encoding="utf-8") as file1, + open(test_path, encoding="utf-8") as file2, + ): expected_json = json.load(file1) test_json = json.load(file2) assert expected_json == test_json @@ -1074,9 +1076,10 @@ def test_create_json_schema_with_class_label_using_jsonld( use_display_labels=False, logger=logger, ) - with open(expected_path, encoding="utf-8") as file1, open( - test_path, encoding="utf-8" - ) as file2: + with ( + open(expected_path, encoding="utf-8") as file1, + open(test_path, encoding="utf-8") as file2, + ): expected_json = json.load(file1) test_json = json.load(file2) assert expected_json == test_json diff --git a/tests/unit/synapseclient/extensions/unit_test_curator.py b/tests/unit/synapseclient/extensions/unit_test_curator.py index caf32adcc..de0273b1c 100644 --- a/tests/unit/synapseclient/extensions/unit_test_curator.py +++ b/tests/unit/synapseclient/extensions/unit_test_curator.py @@ -52,7 +52,7 @@ SchemaRegistryColumnConfig, get_latest_schema_uri, ) -from synapseclient.models import Column, ColumnType +from synapseclient.models import Column, ColumnType, ViewTypeMask from synapseclient.models.curation import ( FileBasedMetadataTaskProperties, RecordBasedMetadataTaskProperties, @@ -150,6 +150,7 @@ def test_create_file_based_metadata_task_success_with_schema( syn=self.mock_syn, synapse_entity_id=self.folder_id, entity_view_name=self.entity_view_name, + view_type_mask=ViewTypeMask.FILE, ) mock_create_wiki.assert_called_once_with( syn=self.mock_syn, entity_view_id="syn87654321", owner_id=self.folder_id @@ -480,6 +481,7 @@ def test_create_file_based_metadata_task_with_assignee( syn=self.mock_syn, synapse_entity_id=self.folder_id, entity_view_name=self.entity_view_name, + view_type_mask=ViewTypeMask.FILE, ) @@ -565,9 +567,7 @@ def test_create_record_based_metadata_task_success( mock_curation_task.store.return_value = mock_task mock_curation_task_cls.return_value = mock_curation_task - mock_grid = Mock() mock_grid_instance = Mock() - mock_grid_instance.export_to_record_set.return_value = mock_grid mock_grid_cls.return_value = mock_grid_instance # WHEN I create the record-based metadata task @@ -590,7 +590,7 @@ def test_create_record_based_metadata_task_success( record_set, task, grid = result assert record_set == mock_record_set assert task == mock_task - assert grid == mock_grid + assert grid == mock_grid_instance mock_extract_schema.assert_called_once_with( syn=self.mock_syn, schema_uri=self.schema_uri @@ -601,6 +601,11 @@ def test_create_record_based_metadata_task_success( synapse_client=self.mock_syn, ) + # AND the Grid deprecation warning was emitted + self.mock_syn.logger.warning.assert_any_call( + "A Grid object will no longer be created by this function starting in v5.0.0." + ) + @patch( "synapseclient.extensions.curator.record_based_metadata_task.project_id_from_entity_id" ) @@ -629,7 +634,7 @@ def test_create_record_based_metadata_task_no_schema_binding( mock_get_project_id_from_entity_id, ): """Test creation without schema binding.""" - # Setup mocks + # GIVEN a record-based metadata task with schema binding disabled mock_get_client.return_value = self.mock_syn mock_get_project_id_from_entity_id.return_value = self.project_id @@ -652,12 +657,10 @@ def test_create_record_based_metadata_task_no_schema_binding( mock_curation_task.store.return_value = mock_task mock_curation_task_cls.return_value = mock_curation_task - mock_grid = Mock() mock_grid_instance = Mock() - mock_grid_instance.export_to_record_set.return_value = mock_grid mock_grid_cls.return_value = mock_grid_instance - # Call function + # WHEN I create the record-based metadata task without schema binding result = create_record_based_metadata_task( folder_id=self.folder_id, record_set_name=self.record_set_name, @@ -670,7 +673,15 @@ def test_create_record_based_metadata_task_no_schema_binding( synapse_client=self.mock_syn, ) - # Assertions + # THEN the correct items are returned + assert isinstance(result, tuple) + assert len(result) == 3 + record_set, task, grid = result + assert record_set == mock_record_set + assert task == mock_task + assert grid == mock_grid_instance + + # AND schema binding was skipped mock_record_set.bind_schema.assert_not_called() @patch( @@ -1010,9 +1021,7 @@ def test_create_record_based_metadata_task_with_assignee( mock_curation_task.store.return_value = mock_task mock_curation_task_cls.return_value = mock_curation_task - mock_grid = Mock() mock_grid_instance = Mock() - mock_grid_instance.export_to_record_set.return_value = mock_grid mock_grid_cls.return_value = mock_grid_instance # WHEN I create the record-based metadata task with assignee_principal_id @@ -1035,7 +1044,7 @@ def test_create_record_based_metadata_task_with_assignee( record_set, task, grid = result assert record_set == mock_record_set assert task == mock_task - assert grid == mock_grid + assert grid == mock_grid_instance # AND the CurationTask should be called with assignee_principal_id as string mock_curation_task_cls.assert_called_once_with( @@ -1048,6 +1057,88 @@ def test_create_record_based_metadata_task_with_assignee( ), ) + @patch( + "synapseclient.extensions.curator.record_based_metadata_task.project_id_from_entity_id" + ) + @patch( + "synapseclient.extensions.curator.record_based_metadata_task.Synapse.get_client" + ) + @patch( + "synapseclient.extensions.curator.record_based_metadata_task.extract_schema_properties_from_web" + ) + @patch( + "synapseclient.extensions.curator.record_based_metadata_task.tempfile.NamedTemporaryFile" + ) + @patch("synapseclient.extensions.curator.record_based_metadata_task.RecordSet") + @patch("synapseclient.extensions.curator.record_based_metadata_task.CurationTask") + @patch("synapseclient.extensions.curator.record_based_metadata_task.Grid") + @patch("builtins.open") + def test_create_record_based_metadata_task_create_grid_false( + self, + mock_open, + mock_grid_cls, + mock_curation_task_cls, + mock_record_set_cls, + mock_temp_file, + mock_extract_schema, + mock_get_client, + mock_get_project_id_from_entity_id, + ): + """Test that Grid is not created and not returned when create_grid=False.""" + # GIVEN a record-based metadata task with Grid creation disabled + mock_get_client.return_value = self.mock_syn + mock_get_project_id_from_entity_id.return_value = self.project_id + + mock_df = pd.DataFrame(columns=["specimenID", "age", "diagnosis"]) + mock_extract_schema.return_value = mock_df + + mock_temp = Mock() + mock_temp.name = "/tmp/test.csv" + mock_temp_file.return_value = mock_temp + + mock_record_set = Mock() + mock_record_set.id = "syn87654321" + mock_record_set_instance = Mock() + mock_record_set_instance.store.return_value = mock_record_set + mock_record_set_cls.return_value = mock_record_set_instance + + mock_task = Mock() + mock_task.task_id = "task123" + mock_curation_task = Mock() + mock_curation_task.store.return_value = mock_task + mock_curation_task_cls.return_value = mock_curation_task + + # WHEN create_grid=False + result = create_record_based_metadata_task( + folder_id=self.folder_id, + record_set_name=self.record_set_name, + record_set_description=self.record_set_description, + curation_task_name=self.curation_task_name, + upsert_keys=self.upsert_keys, + instructions=self.instructions, + schema_uri=self.schema_uri, + synapse_client=self.mock_syn, + create_grid=False, + ) + + # THEN only RecordSet and CurationTask are returned + assert isinstance(result, tuple) + assert len(result) == 2 + record_set, task = result + assert record_set == mock_record_set + assert task == mock_task + + # AND Grid was never instantiated or created + mock_grid_cls.assert_not_called() + + # AND no deprecation warning about Grid was emitted + warning_calls = [ + str(call) + for call in self.mock_syn.logger.warning.call_args_list + if "Grid" in str(call) + ] + assert not warning_calls + class TestQuerySchemaRegistry(unittest.TestCase): """Test cases for query_schema_registry function.""" @@ -1466,6 +1557,72 @@ def test_create_json_schema_entity_view_success( mock_view.reorder_column.assert_any_call(name="name", index=0) mock_view.reorder_column.assert_any_call(name="id", index=0) + @patch("synapseclient.extensions.curator.file_based_metadata_task.isinstance") + @patch("synapseclient.extensions.curator.file_based_metadata_task.EntityView") + @patch("synapseclient.extensions.curator.file_based_metadata_task.get") + @patch("synapseclient.extensions.curator.file_based_metadata_task.JSONSchema") + def test_create_json_schema_entity_view_with_file_and_folder_view_type_mask( + self, + mock_json_schema_cls, + mock_get, + mock_entity_view_cls, + mock_isinstance, + ): + """Test that a combined FILE|FOLDER view_type_mask is forwarded to EntityView.""" + # GIVEN a valid synapse entity with a JSON schema + entity_id = "syn12345678" + entity_view_name = "Test View" + combined_mask = ViewTypeMask.FILE | ViewTypeMask.FOLDER + + mock_entity = Mock() + mock_entity.get_schema.return_value = JSONSchemaBinding( + object_id=1, + object_type="", + created_on="", + created_by="", + enable_derived_annotations=True, + json_schema_version_info=JSONSchemaVersionInfo( + organization_id="", + organization_name="org.name", + schema_id="", + id="", + schema_name="schema.name", + version_id="", + semantic_version="0.0.1", + json_sha256_hex="", + created_on="", + created_by="", + ), + ) + mock_get.return_value = mock_entity + mock_isinstance.return_value = True + + mock_json_schema = Mock() + mock_json_schema.get_body.return_value = { + "properties": {"name": {"type": "string"}, "age": {"type": "integer"}} + } + mock_json_schema_cls.return_value = mock_json_schema + + mock_view = Mock() + mock_view.id = "syn87654321" + mock_view.store.return_value = mock_view + mock_entity_view_cls.return_value = mock_view + + # WHEN I create the JSON schema entity view with both file and folder types + result = create_json_schema_entity_view( + syn=self.mock_syn, + synapse_entity_id=entity_id, + entity_view_name=entity_view_name, + view_type_mask=combined_mask, + ) + + # THEN the entity view should be created with the combined mask + assert result == "syn87654321" + _, kwargs = mock_entity_view_cls.call_args + assert kwargs["view_type_mask"] == combined_mask + assert kwargs["view_type_mask"] & ViewTypeMask.FILE + assert kwargs["view_type_mask"] & ViewTypeMask.FOLDER + @patch( "synapseclient.extensions.curator.file_based_metadata_task.update_wiki_with_entity_view" ) diff --git a/tests/unit/synapseclient/mixins/unit_test_table_components.py b/tests/unit/synapseclient/mixins/unit_test_table_components.py index 5f8b91566..6365a9665 100644 --- a/tests/unit/synapseclient/mixins/unit_test_table_components.py +++ b/tests/unit/synapseclient/mixins/unit_test_table_components.py @@ -9,6 +9,7 @@ import numpy as np import pandas as pd import pytest +from pandas.api.types import is_integer_dtype, is_object_dtype from synapseclient import Synapse from synapseclient.api import ViewEntityType, ViewTypeMask @@ -37,6 +38,7 @@ _query_table_csv, _query_table_next_page, _query_table_row_set, + convert_dtypes_to_json_serializable, csv_to_pandas_df, ) from synapseclient.models.table_components import ( @@ -75,7 +77,7 @@ ) DEFAULT_QUOTE_CHARACTER = '"' DEFAULT_SEPARATOR = "," -DEFAULT_ESCAPSE_CHAR = "\\" +DEFAULT_ESCAPE_CHAR = "\\" class TestTableStoreMixin: @@ -3968,3 +3970,294 @@ def test_csv_pandas_df_with_row_id_and_version_etag_in_index( ).convert_dtypes() # resolve datatype issue such as StringDtype vs object # THEN assert the dataframe is equal to the expected dataframe pd.testing.assert_frame_equal(df, expected_df) + + @pytest.mark.parametrize( + "list_column_types", + [ + {"empty_list": "INTEGER_LIST"}, + {"empty_list": "BOOLEAN_LIST"}, + {"empty_list": "STRING_LIST"}, + {"empty_list": "USERID_LIST"}, + {"empty_list": "ENTITYID_LIST"}, + None, + ], + ids=[ + "INTEGER_LIST", + "BOOLEAN_LIST", + "STRING_LIST", + "USERID_LIST", + "ENTITYID_LIST", + "no_types", + ], + ) + def test_csv_to_pandas_df_all_na_list_column(self, list_column_types): + """Reproducer for the bug where querying a table with a list column whose + values are all NA in the result set raised + TypeError: Invalid value '[]' for dtype 'Int64'. + + pandas' read_csv().convert_dtypes() infers an all-empty column as the + nullable Int64 dtype; the previous fillna({col: '[]'}) implementation + could not store a string into that column.""" + # GIVEN a CSV where every row has an empty value for the list column + csv_content = "name,empty_list\n" "Alice,\n" "Bob,\n" "Charlie," + csv_file = BytesIO(csv_content.encode("utf-8")) + + # WHEN csv_to_pandas_df is called for that list column + df = csv_to_pandas_df( + filepath=csv_file, + list_columns=["empty_list"], + list_column_types=list_column_types, + ) + + # THEN the all-NA column should become a column of empty lists, and the + # other columns should still parse normally + assert list(df["name"]) == ["Alice", "Bob", "Charlie"] + assert list(df["empty_list"]) == [[], [], []] + + def test_csv_to_pandas_df_mixed_all_na_and_populated_list_columns(self): + """When two list columns are present and only one is all-NA, the + populated one must still parse correctly.""" + # GIVEN a CSV with one populated list column and one all-NA list column + csv_content = ( + "name,populated_list,empty_list\n" + 'Alice,"[1, 2, 3]",\n' + 'Bob,"[4, 5]",\n' + 'Charlie,"[6]",' + ) + csv_file = BytesIO(csv_content.encode("utf-8")) + + # WHEN csv_to_pandas_df is called + df = csv_to_pandas_df( + filepath=csv_file, + list_columns=["populated_list", "empty_list"], + list_column_types={ + "populated_list": "INTEGER_LIST", + "empty_list": "INTEGER_LIST", + }, + ) + + # THEN both columns should have the correct contents + assert list(df["populated_list"]) == [[1, 2, 3], [4, 5], [6]] + assert list(df["empty_list"]) == [[], [], []] + + +class TestConvertDtypesToJsonSerializable: + """Tests for convert_dtypes_to_json_serializable function""" + + def test_int_and_float_columns_converted_to_object(self): + """Test that int64 and float64 columns are always cast to object dtype, + even when no NA is present (values are preserved).""" + df = pd.DataFrame({"int_col": [1, 2, 3, 4], "float_col": [1.1, 2.2, 3.3, 4.4]}) + assert df["int_col"].dtype == "int64" + assert df["float_col"].dtype == "float64" + + result = convert_dtypes_to_json_serializable(df) + assert is_object_dtype(result.int_col) + assert is_object_dtype(result.float_col) + assert list(result["int_col"]) == [1, 2, 3, 4] + assert list(result["float_col"]) == [1.1, 2.2, 3.3, 4.4] + + def test_convert_na_and_columns_to_object(self): + """Test that pd.NA values are converted to None for int64 and float64 columns by _serialize_json_value""" + df = pd.DataFrame( + { + "int_col": pd.array([1, 2, pd.NA, 4], dtype="Int64"), + "float_col": pd.array([1.1, 2.2, pd.NA, 4.4], dtype="Float64"), + } + ) + result = convert_dtypes_to_json_serializable(df) + assert is_object_dtype(result.int_col) + assert is_object_dtype(result.float_col) + assert list(result["int_col"]) == [1, 2, None, 4] + assert list(result["float_col"]) == [1.1, 2.2, None, 4.4] + + def test_row_columns_remain_int(self): + """Test that ROW_ID, ROW_VERSION, and ROW_ID.1 columns remain as int while other columns become object""" + # GIVEN a dataframe with special columns (ROW_ID, ROW_VERSION, ROW_ID.1) and a regular column + df = pd.DataFrame( + { + "ROW_ID": pd.array([1, 2, 3, 4], dtype="Int64"), + "ROW_VERSION": pd.array([5, 6, 7, 8], dtype="Int64"), + "ROW_ID.1": pd.array([9, 10, 11, 12], dtype="Int64"), + "other_col": [10, 20, 30, 40], # Use regular list without pd.NA + } + ) + + # WHEN convert_dtypes_to_json_serializable is called + result = convert_dtypes_to_json_serializable(df) + + # THEN all special columns should remain as int while other_col should become object + assert is_integer_dtype(result.ROW_ID), "ROW_ID should remain integer dtype" + assert is_integer_dtype( + result.ROW_VERSION + ), "ROW_VERSION should remain int64 dtype" + assert is_integer_dtype( + result["ROW_ID.1"] + ), "ROW_ID.1 should remain int64 dtype" + assert is_object_dtype(result.other_col), "other_col should become object dtype" + + def test_ellipsis_handling_in_list(self): + """Test that Ellipsis (...) objects in lists are converted to '...' strings""" + # GIVEN a dataframe with Ellipsis in a list + df = pd.DataFrame({"list_with_ellipsis": [[1, 2, ...], [4, ..., 6]]}) + + # WHEN convert_dtypes_to_json_serializable is called + result = convert_dtypes_to_json_serializable(df) + + # THEN Ellipsis should be converted to "..." in JSON string + assert result["list_with_ellipsis"].iloc[0] == [1, 2, "..."] + assert result["list_with_ellipsis"].iloc[1] == [4, "...", 6] + assert is_object_dtype(result.list_with_ellipsis) + + def test_ellipsis_handling_in_dict(self): + """Test that Ellipsis (...) objects in dicts are converted to '...' strings""" + # GIVEN a dataframe with Ellipsis in a dict + df = pd.DataFrame( + { + "dict_with_ellipsis": [ + {"id": 1, "data": ...}, + {"id": 2, "items": [1, ...]}, + ] + } + ) + + # WHEN convert_dtypes_to_json_serializable is called + result = convert_dtypes_to_json_serializable(df) + + # THEN Ellipsis should be converted to "..." in JSON string + assert result.dict_with_ellipsis.iloc[0] == {"id": 1, "data": "..."} + assert result.dict_with_ellipsis.iloc[1] == {"id": 2, "items": [1, "..."]} + assert is_object_dtype(result.dict_with_ellipsis) + + def test_standalone_ellipsis(self): + """Test that standalone Ellipsis objects are converted to '...' strings""" + # GIVEN a dataframe with standalone Ellipsis + df = pd.DataFrame({"ellipsis_col": [1, ..., 3]}) + + # WHEN convert_dtypes_to_json_serializable is called + result = convert_dtypes_to_json_serializable(df) + + # THEN Ellipsis should be converted to "..." + assert result["ellipsis_col"].iloc[0] == 1 + assert result["ellipsis_col"].iloc[1] == "..." + assert result["ellipsis_col"].iloc[2] == 3 + + def test_none_in_list_column_remains_none(self): + """Test that pd.NA values in a column of lists are normalized to None + (not to an empty list).""" + # GIVEN a dataframe with None in list column + df = pd.DataFrame({"list_col": [[1, 2, 3], pd.NA, [7, 8, 9]]}) + + # WHEN convert_dtypes_to_json_serializable is called + result = convert_dtypes_to_json_serializable(df) + + # THEN the pd.NA entry should become None (and surrounding lists preserved) + assert result["list_col"].iloc[0] == [1, 2, 3] + assert result["list_col"].iloc[1] is None + assert result["list_col"].iloc[2] == [7, 8, 9] + + def test_dict_with_quotes_in_values(self): + """Test that dicts with quotes in string values are properly handled""" + # GIVEN a dataframe with dict containing quotes + df = pd.DataFrame( + { + "dict_col": [ + {"description": 'Text with "quotes" here'}, + {"description": 'Another "quoted" text'}, + ] + } + ) + + # WHEN convert_dtypes_to_json_serializable is called + result = convert_dtypes_to_json_serializable(df) + + # THEN the JSON string should be properly formatted + assert result["dict_col"].iloc[0] == {"description": 'Text with "quotes" here'} + assert result["dict_col"].iloc[1] == {"description": 'Another "quoted" text'} + assert is_object_dtype(result.dict_col) + + def test_empty_dataframe(self): + """Test that empty dataframe is handled correctly""" + # GIVEN an empty dataframe + df = pd.DataFrame() + + # WHEN convert_dtypes_to_json_serializable is called + result = convert_dtypes_to_json_serializable(df) + + # THEN it should return an empty dataframe + assert len(result) == 0 + assert len(result.columns) == 0 + + def test_mixed_column_types_no_conversion_needed(self): + """Test that multiple column types without NA are handled correctly + together: values are preserved, ROW_* stays int, other columns become + object dtype.""" + # GIVEN a dataframe with mixed column types + df = pd.DataFrame( + { + "ROW_ID": pd.array([1, 2, 3], dtype="Int64"), + "ROW_VERSION": pd.array([1, 1, 1], dtype="Int64"), + "int_col": [10, 20, 30], # Use regular list without pd.NA + "float_col": [1.1, 2.2, 3.3], + "string_col": ["a", "b", "c"], + "list_col": [[1, 2], [3, 4], None], + "dict_col": [{"id": 1}, {"id": 2}, {"id": 3}], + "bool_col": [True, False, True], + } + ) + # Snapshot before the function mutates df in place + original = df.copy(deep=True) + + # WHEN convert_dtypes_to_json_serializable is called + result = convert_dtypes_to_json_serializable(df) + + # THEN values are preserved against the pre-call snapshot, ROW_* stay + # int, and other columns are object dtype + assert is_integer_dtype(result.ROW_ID) + assert is_integer_dtype(result.ROW_VERSION) + assert is_object_dtype(result.int_col) + assert is_object_dtype(result.float_col) + assert is_object_dtype(result.string_col) + assert is_object_dtype(result.list_col) + assert is_object_dtype(result.dict_col) + assert is_object_dtype(result.bool_col) + + for col in original.columns: + assert list(result[col]) == list(original[col]) + + def test_nested_dict_with_ellipsis(self): + """Test that nested dicts with Ellipsis are properly handled""" + # GIVEN a dataframe with nested dict containing Ellipsis + df = pd.DataFrame( + { + "nested_dict": [ + {"outer": {"inner": ...}}, + {"data": {"list": [1, 2, ...]}}, + ] + } + ) + + # WHEN convert_dtypes_to_json_serializable is called + result = convert_dtypes_to_json_serializable(df) + + # THEN Ellipsis should be converted in nested structures + assert result["nested_dict"].iloc[0] == {"outer": {"inner": "..."}} + assert result["nested_dict"].iloc[1] == {"data": {"list": [1, 2, "..."]}} + + def test_nullable_int64_with_pd_na(self): + """Test that Int64 columns with pd.NA get pd.NA converted to None by _serialize_json_value""" + # GIVEN a dataframe with nullable Int64 column containing pd.NA + df = pd.DataFrame( + {"nullable_int_col": pd.array([1, 2, pd.NA, 4, pd.NA], dtype="Int64")} + ) + + # WHEN convert_dtypes_to_json_serializable is called + result = convert_dtypes_to_json_serializable(df) + + # THEN the column should be object type and pd.NA should be converted to None + assert is_object_dtype(result.nullable_int_col) + expected_result = pd.DataFrame( + {"nullable_int_col": [1, 2, None, 4, None]} + ).convert_dtypes() + pd.testing.assert_frame_equal(result, expected_result, check_dtype=False) + assert is_object_dtype(result.nullable_int_col) diff --git a/tests/unit/synapseclient/models/async/unit_test_curation_async.py b/tests/unit/synapseclient/models/async/unit_test_curation_async.py index 53649445b..99f3d49b4 100644 --- a/tests/unit/synapseclient/models/async/unit_test_curation_async.py +++ b/tests/unit/synapseclient/models/async/unit_test_curation_async.py @@ -1,24 +1,37 @@ """Unit tests for the CurationTask and Grid models.""" -from unittest.mock import AsyncMock, patch +import os +from unittest.mock import AsyncMock, MagicMock, patch import pytest from synapseclient import Synapse from synapseclient.core.constants.concrete_types import ( FILE_BASED_METADATA_TASK_PROPERTIES, + GRID_CSV_IMPORT_REQUEST, + GRID_EXECUTION_DETAILS, RECORD_BASED_METADATA_TASK_PROPERTIES, + UPLOAD_TO_TABLE_PREVIEW_REQUEST, ) +from synapseclient.models import EntityView, RecordSet from synapseclient.models.curation import ( CreateGridRequest, CurationTask, + CurationTaskStatus, + DownloadFromGridRequest, FileBasedMetadataTaskProperties, Grid, + GridCsvImportRequest, + GridExecutionDetails, GridRecordSetExportRequest, RecordBasedMetadataTaskProperties, + SynchronizeGridRequest, + TaskState, + UploadToTablePreviewRequest, _create_task_properties_from_dict, ) from synapseclient.models.recordset import ValidationSummary +from synapseclient.models.table_components import Column, CsvTableDescriptor TASK_ID = 42 TASK_ID_2 = 99 @@ -39,6 +52,8 @@ GRID_ETAG = "grid-etag-456" STARTED_BY = "user-1" STARTED_ON = "2024-03-01T00:00:00.000Z" +FILE_HANDLE_ID = "1234567" +OWNER_PRINCIPAL_ID = 987654 def _get_file_based_task_api_response(): @@ -94,9 +109,35 @@ def _get_grid_session_response(): "lastReplicaIdService": -5, "gridJsonSchema$Id": "my-schema-id", "sourceEntityId": SOURCE_ENTITY_ID, + "ownerPrincipalId": OWNER_PRINCIPAL_ID, } +STATUS_ETAG = "status-etag-789" +STATUS_LAST_UPDATED_ON = "2024-04-01T00:00:00.000Z" +STATUS_LAST_UPDATED_BY = "444444" + + +def _get_curation_task_status_response( + state: str = "NOT_STARTED", + active_session_id: str | None = None, +): + """Return a mock CurationTaskStatus API response.""" + response = { + "taskId": TASK_ID, + "state": state, + "lastUpdatedBy": STATUS_LAST_UPDATED_BY, + "lastUpdatedOn": STATUS_LAST_UPDATED_ON, + "etag": STATUS_ETAG, + } + if active_session_id is not None: + response["executionDetails"] = { + "concreteType": GRID_EXECUTION_DETAILS, + "activeSessionId": active_session_id, + } + return response + + class TestFileBasedMetadataTaskProperties: """Tests for the FileBasedMetadataTaskProperties dataclass.""" @@ -209,6 +250,87 @@ def test_unknown_concrete_type_raises_error(self) -> None: _create_task_properties_from_dict(data) +class TestGridExecutionDetails: + """Tests for the GridExecutionDetails dataclass.""" + + def test_fill_from_dict(self) -> None: + # GIVEN a response dict with an active session id + response = { + "concreteType": GRID_EXECUTION_DETAILS, + "activeSessionId": SESSION_ID, + } + + # WHEN I fill a GridExecutionDetails from the dict + details = GridExecutionDetails() + details.fill_from_dict(response) + + # THEN the active_session_id should be populated + assert details.active_session_id == SESSION_ID + + def test_to_synapse_request(self) -> None: + # GIVEN a GridExecutionDetails with an active session id + details = GridExecutionDetails(active_session_id=SESSION_ID) + + # WHEN I convert it to a request dict + request = details.to_synapse_request() + + # THEN the request should contain the concreteType and activeSessionId + assert request["concreteType"] == GRID_EXECUTION_DETAILS + assert request["activeSessionId"] == SESSION_ID + + +class TestCurationTaskStatus: + """Tests for the CurationTaskStatus dataclass.""" + + def test_fill_from_dict_full(self) -> None: + """All scalar fields and nested GridExecutionDetails are populated; state string is coerced to enum.""" + # GIVEN a full status response with execution details + response = _get_curation_task_status_response( + state="IN_PROGRESS", active_session_id=SESSION_ID + ) + + # WHEN I fill a CurationTaskStatus from it + status = CurationTaskStatus().fill_from_dict(response) + + # THEN all fields should be populated and state coerced to the enum + assert status.task_id == TASK_ID + assert status.state == TaskState.IN_PROGRESS + assert status.last_updated_by == STATUS_LAST_UPDATED_BY + assert status.last_updated_on == STATUS_LAST_UPDATED_ON + assert status.etag == STATUS_ETAG + assert isinstance(status.execution_details, GridExecutionDetails) + assert status.execution_details.active_session_id == SESSION_ID + + def test_fill_from_dict_without_execution_details(self) -> None: + """execution_details is None when the response omits the executionDetails key.""" + # GIVEN a status response with no executionDetails + response = _get_curation_task_status_response(state="NOT_STARTED") + + # WHEN I fill a CurationTaskStatus from it + status = CurationTaskStatus().fill_from_dict(response) + + # THEN execution_details should be None + assert status.execution_details is None + assert status.state == TaskState.NOT_STARTED + + def test_fill_from_dict_unknown_execution_details_concrete_type(self) -> None: + """An unrecognized concreteType in executionDetails raises ValueError.""" + # GIVEN a status response with an unknown executionDetails concreteType + response = _get_curation_task_status_response(state="NOT_STARTED") + response["executionDetails"] = { + "concreteType": "org.sagebionetworks.repo.model.curation.execution.Unknown", + "activeSessionId": SESSION_ID, + } + + # WHEN I fill a CurationTaskStatus from it + # THEN it should raise ValueError, consistent with how unknown state + # values and unknown task-properties concrete types are handled + with pytest.raises( + ValueError, match="Unknown concreteType for TaskExecutionDetails" + ): + CurationTaskStatus().fill_from_dict(response) + + class TestCurationTask: """Unit tests for the CurationTask model.""" @@ -251,6 +373,16 @@ def test_fill_from_dict_record_based(self) -> None: assert isinstance(task.task_properties, RecordBasedMetadataTaskProperties) assert task.task_properties.record_set_id == RECORD_SET_ID + def test_fill_from_dict_missing_task_properties_raises(self) -> None: + # GIVEN a CurationTask API response with taskProperties omitted + response = _get_file_based_task_api_response() + del response["taskProperties"] + + # WHEN I fill a CurationTask from the response + # THEN a ValueError should be raised + with pytest.raises(ValueError, match="taskProperties was not found"): + CurationTask().fill_from_dict(response) + def test_to_synapse_request(self) -> None: # GIVEN a CurationTask with all fields set task = CurationTask( @@ -307,6 +439,23 @@ def test_has_changed_true_after_modification(self) -> None: # THEN has_changed should be True assert task.has_changed is True + def test_has_changed_true_after_nested_task_properties_mutation(self) -> None: + # GIVEN a CurationTask with task_properties and a persistent instance set + task = CurationTask( + task_id=TASK_ID, + task_properties=FileBasedMetadataTaskProperties( + file_view_id=FILE_VIEW_ID, + upload_folder_id=UPLOAD_FOLDER_ID, + ), + ) + task._set_last_persistent_instance() + + # WHEN I mutate a field on the nested task_properties object in place + task.task_properties.file_view_id = "syn_updated" + + # THEN has_changed should be True because task_properties was deep-copied + assert task.has_changed is True + async def test_get_async(self) -> None: # GIVEN a CurationTask with a task_id task = CurationTask(task_id=TASK_ID) @@ -381,14 +530,17 @@ async def empty_list_gen(*args, **kwargs): return yield # pragma: no cover - with patch( - "synapseclient.models.curation.list_curation_tasks", - return_value=empty_list_gen(), - ), patch( - "synapseclient.models.curation.create_curation_task", - new_callable=AsyncMock, - return_value=_get_file_based_task_api_response(), - ) as mock_create: + with ( + patch( + "synapseclient.models.curation.list_curation_tasks", + return_value=empty_list_gen(), + ), + patch( + "synapseclient.models.curation.create_curation_task", + new_callable=AsyncMock, + return_value=_get_file_based_task_api_response(), + ) as mock_create, + ): result = await task.store_async(synapse_client=self.syn) # THEN the create API should be called @@ -446,18 +598,22 @@ async def mock_list(*args, **kwargs): yield existing_response # WHEN I call store_async - with patch( - "synapseclient.models.curation.list_curation_tasks", - return_value=mock_list(), - ), patch( - "synapseclient.models.curation.get_curation_task", - new_callable=AsyncMock, - return_value=existing_response, - ), patch( - "synapseclient.models.curation.update_curation_task", - new_callable=AsyncMock, - return_value=existing_response, - ) as mock_update: + with ( + patch( + "synapseclient.models.curation.list_curation_tasks", + return_value=mock_list(), + ), + patch( + "synapseclient.models.curation.get_curation_task", + new_callable=AsyncMock, + return_value=existing_response, + ), + patch( + "synapseclient.models.curation.update_curation_task", + new_callable=AsyncMock, + return_value=existing_response, + ) as mock_update, + ): result = await task.store_async(synapse_client=self.syn) # THEN it should have merged the existing task and done an update @@ -561,6 +717,490 @@ async def mock_list(*args, **kwargs): results[1].task_properties, RecordBasedMetadataTaskProperties ) + async def test_get_status_async(self) -> None: + """Verify that get_status_async calls the API with the task_id and returns a parsed CurationTaskStatus.""" + # GIVEN a CurationTask with a task_id + task = CurationTask(task_id=TASK_ID) + + # WHEN I call get_status_async + with patch( + "synapseclient.models.curation.get_curation_task_status", + new_callable=AsyncMock, + return_value=_get_curation_task_status_response( + state="IN_PROGRESS", active_session_id=SESSION_ID + ), + ) as mock_get_status: + result = await task.get_status_async(synapse_client=self.syn) + + # THEN the API should be called with the task_id + mock_get_status.assert_called_once_with( + task_id=TASK_ID, synapse_client=self.syn + ) + + # AND the response should be parsed into a CurationTaskStatus + assert isinstance(result, CurationTaskStatus) + assert result.task_id == TASK_ID + assert result.state == TaskState.IN_PROGRESS + assert result.etag == STATUS_ETAG + assert result.last_updated_by == STATUS_LAST_UPDATED_BY + assert result.last_updated_on == STATUS_LAST_UPDATED_ON + assert isinstance(result.execution_details, GridExecutionDetails) + assert result.execution_details.active_session_id == SESSION_ID + + async def test_get_status_async_without_task_id(self) -> None: + """Verify that get_status_async raises ValueError when task_id is not set.""" + # GIVEN a CurationTask without a task_id + task = CurationTask() + + # WHEN I call get_status_async + # THEN it should raise ValueError + with pytest.raises(ValueError, match="task_id is required to get"): + await task.get_status_async(synapse_client=self.syn) + + async def test_update_status_async(self) -> None: + """Verify that update_status_async serializes the status correctly and returns the updated CurationTaskStatus.""" + # GIVEN a CurationTask with a task_id and a status to write + task = CurationTask(task_id=TASK_ID) + status_to_update = CurationTaskStatus( + task_id=TASK_ID, + state=TaskState.IN_PROGRESS, + execution_details=GridExecutionDetails(active_session_id=SESSION_ID), + etag=STATUS_ETAG, + ) + expected_payload = status_to_update.to_synapse_request() + + # WHEN I call update_status_async + with patch( + "synapseclient.models.curation.update_curation_task_status", + new_callable=AsyncMock, + return_value=_get_curation_task_status_response( + state="IN_PROGRESS", active_session_id=SESSION_ID + ), + ) as mock_update_status: + result = await task.update_status_async( + curation_task_status=status_to_update, synapse_client=self.syn + ) + + # THEN the API should be called with the serialized status + mock_update_status.assert_called_once_with( + task_id=TASK_ID, + curation_task_status=expected_payload, + synapse_client=self.syn, + ) + + # AND the state enum should be serialized as its string value + assert expected_payload["state"] == "IN_PROGRESS" + assert expected_payload["executionDetails"]["activeSessionId"] == SESSION_ID + + # AND the response should be parsed back into a CurationTaskStatus + assert isinstance(result, CurationTaskStatus) + assert result.state == TaskState.IN_PROGRESS + assert isinstance(result.execution_details, GridExecutionDetails) + assert result.execution_details.active_session_id == SESSION_ID + + async def test_update_status_async_without_task_id(self) -> None: + """Verify that update_status_async raises ValueError when task_id is not set.""" + # GIVEN a CurationTask without a task_id + task = CurationTask() + + # WHEN I call update_status_async + # THEN it should raise ValueError + with pytest.raises(ValueError, match="task_id is required to update"): + await task.update_status_async( + curation_task_status=CurationTaskStatus(), + synapse_client=self.syn, + ) + + async def test_set_active_grid_session_async(self) -> None: + """Verify that set_active_grid_session_async fetches the current status and PUTs a payload with the new session id.""" + # GIVEN a CurationTask with a task_id + task = CurationTask(task_id=TASK_ID) + + # AND a current status fetched from the server with no execution details + # AND an update response that reflects the new active grid session + get_response = _get_curation_task_status_response(state="NOT_STARTED") + put_response = _get_curation_task_status_response( + state="NOT_STARTED", active_session_id=SESSION_ID + ) + + # WHEN I call set_active_grid_session_async + with ( + patch( + "synapseclient.models.curation.get_curation_task_status", + new_callable=AsyncMock, + return_value=get_response, + ) as mock_get_status, + patch( + "synapseclient.models.curation.update_curation_task_status", + new_callable=AsyncMock, + return_value=put_response, + ) as mock_update_status, + ): + result = await task.set_active_grid_session_async( + active_session_id=SESSION_ID, synapse_client=self.syn + ) + + # THEN it should fetch the current status first + mock_get_status.assert_called_once_with( + task_id=TASK_ID, synapse_client=self.syn + ) + + # AND PUT a payload that carries the fresh etag and the new + # GridExecutionDetails linked to the given session id + mock_update_status.assert_called_once() + put_kwargs = mock_update_status.call_args.kwargs + assert put_kwargs["task_id"] == TASK_ID + payload = put_kwargs["curation_task_status"] + assert payload["etag"] == STATUS_ETAG + assert payload["state"] == "NOT_STARTED" + assert payload["executionDetails"]["concreteType"] == GRID_EXECUTION_DETAILS + assert payload["executionDetails"]["activeSessionId"] == SESSION_ID + + # AND it should return the parsed update response + assert isinstance(result, CurationTaskStatus) + assert isinstance(result.execution_details, GridExecutionDetails) + assert result.execution_details.active_session_id == SESSION_ID + + async def test_set_active_grid_session_async_without_task_id(self) -> None: + """Verify that set_active_grid_session_async raises ValueError when task_id is not set.""" + # GIVEN a CurationTask without a task_id + task = CurationTask() + + # WHEN I call set_active_grid_session_async + # THEN it should raise ValueError (propagated from get_status_async) + with pytest.raises(ValueError, match="task_id is required to get"): + await task.set_active_grid_session_async( + active_session_id=SESSION_ID, synapse_client=self.syn + ) + + @pytest.mark.parametrize( + "input_state,expected_state_value", + [ + (TaskState.IN_PROGRESS, "IN_PROGRESS"), + (TaskState.COMPLETED, "COMPLETED"), + (TaskState.CANCELED, "CANCELED"), + ("IN_PROGRESS", "IN_PROGRESS"), + ("COMPLETED", "COMPLETED"), + ("CANCELED", "CANCELED"), + ], + ) + async def test_set_task_state_async( + self, input_state, expected_state_value: str + ) -> None: + """Verify that set_task_state_async accepts a TaskState enum or string and PUTs the correct serialized state.""" + # GIVEN a CurationTask with a task_id + task = CurationTask(task_id=TASK_ID) + + # AND a current status from the server with execution_details set + get_response = _get_curation_task_status_response( + state="NOT_STARTED", active_session_id=SESSION_ID + ) + # AND an update response reflecting the new state but preserving + # execution_details + put_response = _get_curation_task_status_response( + state=expected_state_value, active_session_id=SESSION_ID + ) + + # WHEN I call set_task_state_async with an enum or string + with ( + patch( + "synapseclient.models.curation.get_curation_task_status", + new_callable=AsyncMock, + return_value=get_response, + ) as mock_get_status, + patch( + "synapseclient.models.curation.update_curation_task_status", + new_callable=AsyncMock, + return_value=put_response, + ) as mock_update_status, + ): + result = await task.set_task_state_async( + state=input_state, synapse_client=self.syn + ) + + # THEN it fetches the current status first + mock_get_status.assert_called_once_with( + task_id=TASK_ID, synapse_client=self.syn + ) + + # AND PUTs a payload that carries the fresh etag, the coerced + # state, and preserves the existing execution_details + mock_update_status.assert_called_once() + put_kwargs = mock_update_status.call_args.kwargs + assert put_kwargs["task_id"] == TASK_ID + payload = put_kwargs["curation_task_status"] + assert payload["etag"] == STATUS_ETAG + assert payload["state"] == expected_state_value + assert payload["executionDetails"]["concreteType"] == GRID_EXECUTION_DETAILS + assert payload["executionDetails"]["activeSessionId"] == SESSION_ID + + # AND it returns the parsed update response + assert isinstance(result, CurationTaskStatus) + assert result.state == TaskState(expected_state_value) + assert isinstance(result.execution_details, GridExecutionDetails) + assert result.execution_details.active_session_id == SESSION_ID + + async def test_set_task_state_async_invalid_string(self) -> None: + """Verify that set_task_state_async raises ValueError before any API call when given an unrecognized state string.""" + # GIVEN a CurationTask with a task_id + task = CurationTask(task_id=TASK_ID) + + # WHEN I call set_task_state_async with a string that does not match + # any TaskState member + # THEN it raises ValueError before any API call is made + with ( + patch( + "synapseclient.models.curation.get_curation_task_status", + new_callable=AsyncMock, + ) as mock_get_status, + patch( + "synapseclient.models.curation.update_curation_task_status", + new_callable=AsyncMock, + ) as mock_update_status, + ): + with pytest.raises(ValueError, match="is not a valid TaskState"): + await task.set_task_state_async( + state="NOT_A_REAL_STATE", synapse_client=self.syn + ) + + mock_get_status.assert_not_called() + mock_update_status.assert_not_called() + + async def test_set_task_state_async_without_task_id(self) -> None: + """Verify that set_task_state_async raises ValueError when task_id is not set.""" + # GIVEN a CurationTask without a task_id + task = CurationTask() + + # WHEN I call set_task_state_async + # THEN it should raise ValueError (propagated from get_status_async) + with pytest.raises(ValueError, match="task_id is required to get"): + await task.set_task_state_async( + state=TaskState.IN_PROGRESS, synapse_client=self.syn + ) + + async def test_create_grid_session_async_without_task_id(self) -> None: + """Verify that create_grid_session_async raises ValueError when task_id is not set.""" + # GIVEN a CurationTask without a task_id + task = CurationTask() + + # WHEN I call create_grid_session_async + # THEN it should raise ValueError + with pytest.raises( + ValueError, + match="task_id is required to create a CurationTask grid session", + ): + await task.create_grid_session_async(synapse_client=self.syn) + + async def test_create_grid_session_async_record_based_missing_record_set_id( + self, + ) -> None: + """Verify that create_grid_session_async raises ValueError when task_properties.record_set_id is None.""" + # GIVEN a record-based CurationTask whose record_set_id is missing + task = CurationTask( + task_id=TASK_ID, + task_properties=RecordBasedMetadataTaskProperties(record_set_id=None), + ) + + # WHEN I call create_grid_session_async + # THEN it should raise ValueError + with pytest.raises( + ValueError, match="task_properties.record_set_id is missing" + ): + await task.create_grid_session_async(synapse_client=self.syn) + + async def test_create_grid_session_async_file_based_missing_file_view_id( + self, + ) -> None: + """Verify that create_grid_session_async raises ValueError when task_properties.file_view_id is None.""" + # GIVEN a file-based CurationTask whose file_view_id is missing + task = CurationTask( + task_id=TASK_ID, + task_properties=FileBasedMetadataTaskProperties( + upload_folder_id=UPLOAD_FOLDER_ID, file_view_id=None + ), + ) + + # WHEN I call create_grid_session_async + # THEN it should raise ValueError + with pytest.raises(ValueError, match="task_properties.file_view_id is missing"): + await task.create_grid_session_async(synapse_client=self.syn) + + async def test_create_grid_session_async_record_based_record_set_not_found( + self, + ) -> None: + """Verify that a SynapseHTTPError from RecordSet.get_async propagates out of create_grid_session_async.""" + from synapseclient.core.exceptions import SynapseHTTPError + + # GIVEN a record-based CurationTask with a record_set_id that does not exist + task = CurationTask( + task_id=TASK_ID, + task_properties=RecordBasedMetadataTaskProperties( + record_set_id=RECORD_SET_ID + ), + ) + + # AND RecordSet.get_async raises SynapseHTTPError (e.g. 404) + with patch.object( + RecordSet, + "get_async", + new_callable=AsyncMock, + side_effect=SynapseHTTPError("404 Not Found"), + ): + # WHEN I call create_grid_session_async + # THEN the SynapseHTTPError propagates and no Grid is created + with pytest.raises(SynapseHTTPError): + await task.create_grid_session_async(synapse_client=self.syn) + + async def test_create_grid_session_async_file_based_entity_view_not_found( + self, + ) -> None: + """Verify that a SynapseHTTPError from EntityView.get_async propagates out of create_grid_session_async.""" + from synapseclient.core.exceptions import SynapseHTTPError + + # GIVEN a file-based CurationTask with a file_view_id that does not exist + task = CurationTask( + task_id=TASK_ID, + task_properties=FileBasedMetadataTaskProperties( + upload_folder_id=UPLOAD_FOLDER_ID, file_view_id=FILE_VIEW_ID + ), + ) + + # AND EntityView.get_async raises SynapseHTTPError (e.g. 404) + with patch.object( + EntityView, + "get_async", + new_callable=AsyncMock, + side_effect=SynapseHTTPError("404 Not Found"), + ): + # WHEN I call create_grid_session_async + # THEN the SynapseHTTPError propagates and no Grid is created + with pytest.raises(SynapseHTTPError): + await task.create_grid_session_async(synapse_client=self.syn) + + async def test_create_grid_session_async_unsupported_task_properties( + self, + ) -> None: + """Verify that create_grid_session_async raises ValueError when task_properties is not a recognized type.""" + # GIVEN a CurationTask whose task_properties is of an unsupported type + task = CurationTask(task_id=TASK_ID) + task.task_properties = object() # bypass dataclass typing + + # WHEN I call create_grid_session_async + # THEN it should raise ValueError + with pytest.raises( + ValueError, + match="task_properties must be a FileBasedMetadataTaskProperties", + ): + await task.create_grid_session_async(synapse_client=self.syn) + + async def test_list_async_assigned_to_me_and_assignee_ids_raises(self) -> None: + # GIVEN both assigned_to_me and assignee_ids are provided + # WHEN I call list_async + # THEN it should raise ValueError on first iteration (generators are lazy) + with pytest.raises( + ValueError, match="mutually exclusive.*Got assignee_ids=\\['123'\\]" + ): + async for _ in CurationTask.list_async( + project_id=PROJECT_ID, + assigned_to_me=True, + assignee_ids=["123"], + synapse_client=self.syn, + ): + pass # pragma: no cover + + async def test_list_async_passes_assigned_to_me_to_api(self) -> None: + # GIVEN assigned_to_me=True + async def mock_list(*args, **kwargs): + return + yield # pragma: no cover + + # WHEN I call list_async + with patch( + "synapseclient.models.curation.list_curation_tasks", + side_effect=mock_list, + ) as mock_api: + async for _ in CurationTask.list_async( + project_id=PROJECT_ID, + assigned_to_me=True, + synapse_client=self.syn, + ): + pass + + # THEN the API should be called with assigned_to_me=True + mock_api.assert_called_once_with( + project_id=PROJECT_ID, + assigned_to_me=True, + assignee_ids=None, + state_filter=None, + synapse_client=self.syn, + ) + + async def test_list_async_passes_assignee_ids_to_api(self) -> None: + # GIVEN a list of assignee_ids + async def mock_list(*args, **kwargs): + return + yield # pragma: no cover + + # WHEN I call list_async + with patch( + "synapseclient.models.curation.list_curation_tasks", + side_effect=mock_list, + ) as mock_api: + async for _ in CurationTask.list_async( + project_id=PROJECT_ID, + assignee_ids=["111", "222"], + synapse_client=self.syn, + ): + pass + + # THEN the API should be called with the serialized assignee_ids + mock_api.assert_called_once_with( + project_id=PROJECT_ID, + assigned_to_me=None, + assignee_ids=["111", "222"], + state_filter=None, + synapse_client=self.syn, + ) + + async def test_list_async_passes_state_filter_to_api(self) -> None: + # GIVEN a state_filter with TaskState enums + async def mock_list(*args, **kwargs): + return + yield # pragma: no cover + + # WHEN I call list_async + with patch( + "synapseclient.models.curation.list_curation_tasks", + side_effect=mock_list, + ) as mock_api: + async for _ in CurationTask.list_async( + project_id=PROJECT_ID, + state_filter=[TaskState.NOT_STARTED, TaskState.IN_PROGRESS], + synapse_client=self.syn, + ): + pass + + # THEN the API should be called with serialized string values + mock_api.assert_called_once_with( + project_id=PROJECT_ID, + assigned_to_me=None, + assignee_ids=None, + state_filter=["NOT_STARTED", "IN_PROGRESS"], + synapse_client=self.syn, + ) + + async def test_list_async_state_filter_invalid_string_raises(self) -> None: + # GIVEN a state_filter with an invalid string value + # WHEN I call list_async + # THEN it should raise ValueError before any API call + with pytest.raises(ValueError, match="Invalid value"): + async for _ in CurationTask.list_async( + project_id=PROJECT_ID, + state_filter=["invalid"], + synapse_client=self.syn, + ): + pass # pragma: no cover + class TestGrid: """Unit tests for the Grid model.""" @@ -587,6 +1227,7 @@ def test_fill_from_dict(self) -> None: assert grid.last_replica_id_service == -5 assert grid.grid_json_schema_id == "my-schema-id" assert grid.source_entity_id == SOURCE_ENTITY_ID + assert grid.owner_principal_id == OWNER_PRINCIPAL_ID async def test_create_async_with_record_set_id(self) -> None: # GIVEN a Grid with a record_set_id @@ -664,15 +1305,18 @@ async def mock_list_async(*args, **kwargs): # WHEN I call create_async with attach_to_previous_session=True and no # existing sessions - with patch.object( - Grid, - "list_async", - return_value=mock_list_async(), - ), patch.object( - CreateGridRequest, - "send_job_and_wait_async", - new_callable=AsyncMock, - return_value=mock_create_request, + with ( + patch.object( + Grid, + "list_async", + return_value=mock_list_async(), + ), + patch.object( + CreateGridRequest, + "send_job_and_wait_async", + new_callable=AsyncMock, + return_value=mock_create_request, + ), ): result = await grid.create_async( attach_to_previous_session=True, synapse_client=self.syn @@ -802,6 +1446,163 @@ async def mock_list(*args, **kwargs): assert len(results) == 1 assert results[0].session_id == SESSION_ID + async def test_import_csv_async_without_session_id(self) -> None: + """Test that calling import_csv_async without a session_id raises a ValueError.""" + # GIVEN a Grid without a session_id + grid = Grid() + + # WHEN I call import_csv_async + # THEN it should raise ValueError + with ( + patch("synapseclient.models.curation.os.path.isfile", return_value=True), + pytest.raises( + ValueError, + match="session_id is required to import a CSV into a GridSession", + ), + ): + await grid.import_csv_async(synapse_client=self.syn, path="test.csv") + + async def test_import_csv_async(self) -> None: + """Test the import_csv_async method of the Grid class, ensuring it correctly calls the preview and import requests and logs the results.""" + # GIVEN a Grid with a session_id + grid = Grid(session_id=SESSION_ID) + + csv_table_descriptor = CsvTableDescriptor( + separator=";", + quote_character='"', + escape_character="\\", + line_end=os.linesep, + is_first_line_header=True, + ) + expected_columns = [Column(name="col1", column_type="STRING", maximum_size=50)] + + # Mock preview response with suggested columns + mock_preview_response = UploadToTablePreviewRequest( + upload_file_handle_id=FILE_HANDLE_ID, + csv_table_descriptor=csv_table_descriptor, + suggested_columns=expected_columns, + sample_rows=[["value1"]], + rows_scanned=1, + ) + # Mock import response with row counts + mock_import_response = GridCsvImportRequest( + session_id=SESSION_ID, + file_handle_id=FILE_HANDLE_ID, + schema=expected_columns, + total_count=1, + created_count=1, + updated_count=1, + ) + + mock_preview_instance = MagicMock() + mock_preview_instance.send_job_and_wait_async = AsyncMock( + return_value=mock_preview_response + ) + + mock_import_instance = MagicMock() + mock_import_instance.send_job_and_wait_async = AsyncMock( + return_value=mock_import_response + ) + + # WHEN I call import_csv_async + with ( + patch("synapseclient.models.curation.os.path.isfile", return_value=True), + patch( + "synapseclient.models.curation.upload_synapse_s3", + new_callable=AsyncMock, + return_value={"id": FILE_HANDLE_ID}, + ), + patch( + "synapseclient.models.curation.UploadToTablePreviewRequest", + return_value=mock_preview_instance, + ) as MockPreview, + patch( + "synapseclient.models.curation.GridCsvImportRequest", + return_value=mock_import_instance, + ) as MockImport, + patch.object(self.syn, "logger") as mock_logger, + ): + result = await grid.import_csv_async( + synapse_client=self.syn, + path="test.csv", + csv_table_descriptor=csv_table_descriptor, + ) + + # THEN the grid is returned with the same session + assert result.session_id == SESSION_ID + + # AND UploadToTablePreviewRequest was constructed with the right arguments + MockPreview.assert_called_once_with( + csv_table_descriptor=csv_table_descriptor, + upload_file_handle_id=FILE_HANDLE_ID, + ) + + # AND GridCsvImportRequest was constructed with the schema from the preview + MockImport.assert_called_once_with( + session_id=SESSION_ID, + file_handle_id=FILE_HANDLE_ID, + schema=expected_columns, + csv_descriptor=csv_table_descriptor, + ) + + # AND the log message contains the import counts + mock_logger.info.assert_called_once() + log_message = mock_logger.info.call_args[0][0] + assert "total count: 1" in log_message + assert "total created: 1" in log_message + assert "total updated: 1" in log_message + + async def test_import_csv_no_suggested_columns_async(self) -> None: + """Raises ValueError when the CSV preview returns no suggested columns.""" + # GIVEN a Grid with a session_id + grid = Grid(session_id=SESSION_ID) + + csv_table_descriptor = CsvTableDescriptor( + separator=";", + quote_character='"', + escape_character="\\", + line_end=os.linesep, + is_first_line_header=True, + ) + + # AND a preview response with no columns (e.g. empty CSV file) + mock_preview_response = UploadToTablePreviewRequest( + upload_file_handle_id=FILE_HANDLE_ID, + csv_table_descriptor=csv_table_descriptor, + suggested_columns=[], + sample_rows=[], + rows_scanned=0, + ) + + mock_preview_instance = MagicMock() + mock_preview_instance.send_job_and_wait_async = AsyncMock( + return_value=mock_preview_response + ) + + # WHEN I call import_csv_async + # THEN a ValueError is raised before the import is attempted + with ( + patch("synapseclient.models.curation.os.path.isfile", return_value=True), + patch( + "synapseclient.models.curation.upload_synapse_s3", + new_callable=AsyncMock, + return_value={"id": FILE_HANDLE_ID}, + ), + patch( + "synapseclient.models.curation.UploadToTablePreviewRequest", + return_value=mock_preview_instance, + ), + ): + with pytest.raises( + ValueError, + match=rf"CSV preview for file handle {FILE_HANDLE_ID} returned no suggested columns \(rows scanned: 0\).*separator=';'", + ): + await grid.import_csv_async( + synapse_client=self.syn, + path="test.csv", + csv_table_descriptor=csv_table_descriptor, + ) + class TestCreateGridRequest: """Tests for the CreateGridRequest helper dataclass.""" @@ -846,6 +1647,197 @@ def test_to_synapse_request_with_record_set_id(self) -> None: assert "initialQuery" not in result +class TestUploadToTablePreviewRequest: + """Tests for the UploadToTablePreviewRequest helper dataclass.""" + + def test_fill_from_dict(self) -> None: + # GIVEN a response with upload to table preview data + raw_synapse_response = { + "jobId": "1234", + "concreteType": "org.sagebionetworks.repo.model.table.UploadToTablePreviewResult", + "suggestedColumns": [ + {"name": "etag", "columnType": "STRING", "maximumSize": 50}, + {"name": "Sex", "columnType": "STRING", "maximumSize": 6}, + {"name": "Component", "columnType": "STRING", "maximumSize": 4}, + {"name": "Diagnosis", "columnType": "STRING", "maximumSize": 7}, + {"name": "PatientID", "columnType": "INTEGER"}, + {"name": "CancerType", "columnType": "STRING", "maximumSize": 50}, + {"name": "YearofBirth", "columnType": "STRING", "maximumSize": 50}, + {"name": "FamilyHistory", "columnType": "STRING", "maximumSize": 50}, + ], + "sampleRows": [ + {"values": [None, "Female", "test", "Healthy", "1", None, None, None]} + ], + "rowsScanned": 1, + } + + # WHEN I fill an UploadToTablePreviewRequest from the response + preview_req = UploadToTablePreviewRequest(upload_file_handle_id=FILE_HANDLE_ID) + preview_response = preview_req.fill_from_dict(raw_synapse_response) + + # THEN the fields should be populated correctly + assert len(preview_response.suggested_columns) == 8 + assert preview_response.suggested_columns[0] == Column( + name="etag", column_type="STRING", maximum_size=50 + ) + assert preview_response.suggested_columns[1] == Column( + name="Sex", column_type="STRING", maximum_size=6 + ) + assert preview_response.suggested_columns[2] == Column( + name="Component", column_type="STRING", maximum_size=4 + ) + assert preview_response.suggested_columns[3] == Column( + name="Diagnosis", column_type="STRING", maximum_size=7 + ) + assert preview_response.suggested_columns[4] == Column( + name="PatientID", column_type="INTEGER", maximum_size=None + ) + assert preview_response.sample_rows == [ + [None, "Female", "test", "Healthy", "1", None, None, None] + ] + assert preview_response.rows_scanned == 1 + + def test_to_synapse_request_with_minimal_fields(self) -> None: + # GIVEN an UploadToTablePreviewRequest with only required fields set + preview_req = UploadToTablePreviewRequest( + upload_file_handle_id=FILE_HANDLE_ID, + ) + + # WHEN I convert it to a synapse request + result = preview_req.to_synapse_request() + + # THEN it should contain the correct fields and defaults + assert result["concreteType"] == UPLOAD_TO_TABLE_PREVIEW_REQUEST + assert result["uploadFileHandleId"] == FILE_HANDLE_ID + assert result["csvTableDescriptor"] == CsvTableDescriptor().to_synapse_request() + + def test_to_synapse_request_with_all_fields(self) -> None: + # GIVEN an UploadToTablePreviewRequest + preview_req = UploadToTablePreviewRequest( + upload_file_handle_id=FILE_HANDLE_ID, + lines_to_skip=1, + do_full_file_scan=True, + csv_table_descriptor=CsvTableDescriptor( + separator=";", + quote_character='"', + escape_character="\\", + line_end="\n", + is_first_line_header=True, + ), + ) + + # WHEN I convert it to a synapse request + result = preview_req.to_synapse_request() + + # THEN it should contain the correct fields + assert result["concreteType"] == UPLOAD_TO_TABLE_PREVIEW_REQUEST + assert result["uploadFileHandleId"] == FILE_HANDLE_ID + assert result["linesToSkip"] == 1 + assert result["doFullFileScan"] is True + assert result["csvTableDescriptor"]["separator"] == ";" + assert result["csvTableDescriptor"]["quoteCharacter"] == '"' + assert result["csvTableDescriptor"]["escapeCharacter"] == "\\" + assert result["csvTableDescriptor"]["lineEnd"] == "\n" + assert result["csvTableDescriptor"]["isFirstLineHeader"] is True + + +class TestGridCsvImportRequest: + """Tests for the GridCsvImportRequest helper dataclass.""" + + def test_fill_from_dict(self) -> None: + # GIVEN a response with grid CSV import data + raw_synapse_response = { + "jobId": "1234", + "concreteType": "org.sagebionetworks.repo.model.grid.GridCsvImportResponse", + "sessionId": SESSION_ID, + "totalCount": 3, + "createdCount": 1, + "updatedCount": 2, + } + + # WHEN I fill a GridCsvImportRequest from the response + import_req = GridCsvImportRequest( + session_id=SESSION_ID, + file_handle_id=FILE_HANDLE_ID, + schema=[Column(name="col1", column_type="STRING")], + ) + result = import_req.fill_from_dict(raw_synapse_response) + + # THEN the response fields should be populated correctly + assert result.session_id == SESSION_ID + assert result.total_count == 3 + assert result.created_count == 1 + assert result.updated_count == 2 + + def test_to_synapse_request_with_minimal_fields(self) -> None: + # GIVEN a GridCsvImportRequest with only required fields set + import_req = GridCsvImportRequest( + session_id=SESSION_ID, + file_handle_id=FILE_HANDLE_ID, + schema=[Column(name="col1", column_type="STRING")], + ) + + # WHEN I convert it to a synapse request + result = import_req.to_synapse_request() + + # THEN it should contain the correct fields and defaults + assert result["concreteType"] == GRID_CSV_IMPORT_REQUEST + assert result["sessionId"] == SESSION_ID + assert result["fileHandleId"] == FILE_HANDLE_ID + assert result["csvDescriptor"] == CsvTableDescriptor().to_synapse_request() + assert len(result["schema"]) == 1 + assert ( + result["schema"][0] + == Column(name="col1", column_type="STRING").to_synapse_request() + ) + + def test_to_synapse_request_with_all_fields(self) -> None: + # GIVEN a GridCsvImportRequest with all fields set + import_req = GridCsvImportRequest( + session_id=SESSION_ID, + file_handle_id=FILE_HANDLE_ID, + csv_descriptor=CsvTableDescriptor( + separator=";", + quote_character='"', + escape_character="\\", + line_end="\t", + is_first_line_header=True, + ), + schema=[ + Column(name="ROW_ID", column_type="STRING"), + Column(name="ROW_VERSION", column_type="STRING"), + Column(name="PatientID", column_type="INTEGER"), + Column(name="Diagnosis", column_type="STRING"), + ], + ) + + # WHEN I convert it to a synapse request + result = import_req.to_synapse_request() + + # THEN it should contain the correct fields + assert result["concreteType"] == GRID_CSV_IMPORT_REQUEST + assert result["sessionId"] == SESSION_ID + assert result["fileHandleId"] == FILE_HANDLE_ID + assert result["csvDescriptor"]["separator"] == ";" + assert result["csvDescriptor"]["quoteCharacter"] == '"' + assert result["csvDescriptor"]["escapeCharacter"] == "\\" + assert result["csvDescriptor"]["lineEnd"] == "\t" + assert result["csvDescriptor"]["isFirstLineHeader"] is True + assert len(result["schema"]) == 4 + assert ( + result["schema"][0] + == Column(name="ROW_ID", column_type="STRING").to_synapse_request() + ) + assert ( + result["schema"][2] + == Column(name="PatientID", column_type="INTEGER").to_synapse_request() + ) + assert ( + result["schema"][3] + == Column(name="Diagnosis", column_type="STRING").to_synapse_request() + ) + + class TestGridRecordSetExportRequest: """Tests for the GridRecordSetExportRequest helper dataclass.""" @@ -905,3 +1897,336 @@ def test_to_synapse_request(self) -> None: # THEN it should contain the correct fields assert "concreteType" in result assert result["sessionId"] == SESSION_ID + + +class TestDownloadFromGridRequest: + """Tests for the DownloadFromGridRequest helper dataclass.""" + + def test_to_synapse_request(self) -> None: + # GIVEN a DownloadFromGridRequest with a session_id + request = DownloadFromGridRequest(session_id=SESSION_ID) + + # WHEN I convert it to a synapse request + result = request.to_synapse_request() + + # THEN it should contain the correct fields + assert "concreteType" in result + assert result["sessionId"] == SESSION_ID + + def test_to_synapse_request_all_fields(self) -> None: + # GIVEN a DownloadFromGridRequest with all fields set + table_descriptor = CsvTableDescriptor( + quote_character='"', + escape_character="\\", + line_end=os.linesep, + separator=";", + is_first_line_header=False, + ) + request = DownloadFromGridRequest( + session_id=SESSION_ID, + write_header=False, + include_row_id_and_row_version=False, + include_etag=False, + csv_table_descriptor=table_descriptor, + file_name="my_grid_data.csv", + ) + + # WHEN I convert it to a synapse request + result = request.to_synapse_request() + + # THEN it should contain all the correct fields + assert "concreteType" in result + assert result["sessionId"] == SESSION_ID + assert result["includeRowIdAndRowVersion"] is False + assert result["includeEtag"] is False + assert result["fileName"] == "my_grid_data.csv" + assert result["csvTableDescriptor"]["quoteCharacter"] == '"' + assert result["csvTableDescriptor"]["escapeCharacter"] == "\\" + assert result["csvTableDescriptor"]["lineEnd"] == os.linesep + assert result["csvTableDescriptor"]["separator"] == ";" + assert result["csvTableDescriptor"]["isFirstLineHeader"] is False + + def test_fill_from_dict(self) -> None: + # GIVEN a response with download data + raw_synapse_response = { + "jobId": "123", + "concreteType": "org.sagebionetworks.repo.model.grid.DownloadFromGridResult", + "sessionId": SESSION_ID, + "resultsFileHandleId": FILE_HANDLE_ID, + } + response = DownloadFromGridRequest(session_id=SESSION_ID).fill_from_dict( + raw_synapse_response + ) + assert response.session_id == SESSION_ID + assert response.results_file_handle_id == FILE_HANDLE_ID + + +class TestGridDownloadCsv: + + @pytest.fixture(autouse=True, scope="function") + def init_syn(self, syn: Synapse) -> None: + self.syn = syn + + async def test_download_csv_without_session_id(self) -> None: + # GIVEN a Grid without a session_id + grid = Grid() + + # WHEN I call download_csv + # THEN it should raise ValueError + with pytest.raises(ValueError, match="session_id is required to download"): + await grid.download_csv_async(synapse_client=self.syn) + + async def test_download_csv_async(self): + # GIVEN a Grid with a session_id + grid = Grid(session_id=SESSION_ID) + + # Mock the DownloadFromGridRequest's send_job_and_wait_async + mock_download_request = DownloadFromGridRequest(session_id=SESSION_ID) + mock_download_request.results_file_handle_id = FILE_HANDLE_ID + + mock_file_handle = { + "id": "172705303", + "etag": "mock-etag", + "createdBy": "3443707", + "createdOn": "2026-04-30T21:21:40.000Z", + "modifiedOn": "2026-04-30T21:21:40.000Z", + "concreteType": "org.sagebionetworks.repo.model.file.S3FileHandle", + "contentType": "text/csv", + "contentMd5": "mock-md5", + "fileName": "Job-1234.csv", + "contentSize": 100, + "status": "AVAILABLE", + "bucketName": "proddata.sagebase.org", + "key": "1234/5678/Job-1234.csv", + "isPreview": False, + } + mock_presigned_url = "https://presigned.example.com/file.csv" + expected_download_path = "/tmp/downloaded.csv" + + with ( + patch.object( + DownloadFromGridRequest, + "send_job_and_wait_async", + new_callable=AsyncMock, + return_value=mock_download_request, + ) as mock_send, + patch( + "synapseclient.models.curation.get_file_handle", + new_callable=AsyncMock, + return_value=mock_file_handle, + ) as mock_get_file_handle, + patch( + "synapseclient.models.curation.get_file_handle_presigned_url", + new_callable=AsyncMock, + return_value=mock_presigned_url, + ) as mock_get_presigned_url, + patch( + "synapseclient.models.curation.download_from_url", + return_value=expected_download_path, + ) as mock_download_from_url, + ): + result = await grid.download_csv_async(synapse_client=self.syn) + current_dir = os.getcwd() + + # THEN the async job should be submitted + mock_send.assert_called_once() + + # AND the file handle metadata should be fetched + mock_get_file_handle.assert_called_once_with( + file_handle_id=FILE_HANDLE_ID, + synapse_client=self.syn, + ) + + # AND a presigned URL should be fetched + mock_get_presigned_url.assert_called_once_with( + file_handle_id=FILE_HANDLE_ID, + synapse_client=self.syn, + ) + + # AND download_from_url should be called with the presigned URL and MD5 + call_kwargs = mock_download_from_url.call_args + assert call_kwargs.kwargs["url"] == mock_presigned_url + assert call_kwargs.kwargs["file_handle_id"] == mock_file_handle["id"] + assert call_kwargs.kwargs["expected_md5"] == mock_file_handle["contentMd5"] + assert call_kwargs.kwargs["url_is_presigned"] is True + # AND the destination filename follows the grid_{session_id}-{timestamp}.csv convention + assert call_kwargs.kwargs["destination"].startswith(current_dir) + assert f"grid_{SESSION_ID}-" in call_kwargs.kwargs["destination"] + assert call_kwargs.kwargs["destination"].endswith(".csv") + + # AND the result is the path returned by download_from_url + assert result == expected_download_path + + async def test_download_csv_async_with_custom_file_name(self): + # GIVEN a Grid with a session_id and a caller-supplied file_name + grid = Grid(session_id=SESSION_ID) + custom_file_name = "my_export.csv" + + mock_download_request = DownloadFromGridRequest(session_id=SESSION_ID) + mock_download_request.results_file_handle_id = FILE_HANDLE_ID + + mock_file_handle = { + "id": "172705303", + "contentMd5": "mock-md5", + "fileName": "Job-1234.csv", + } + expected_download_path = "/tmp/my_export.csv" + + with ( + patch.object( + DownloadFromGridRequest, + "send_job_and_wait_async", + new_callable=AsyncMock, + return_value=mock_download_request, + ), + patch( + "synapseclient.models.curation.get_file_handle", + new_callable=AsyncMock, + return_value=mock_file_handle, + ), + patch( + "synapseclient.models.curation.get_file_handle_presigned_url", + new_callable=AsyncMock, + return_value="https://presigned.example.com/file.csv", + ), + patch( + "synapseclient.models.curation.download_from_url", + return_value=expected_download_path, + ) as mock_download_from_url, + ): + result = await grid.download_csv_async( + file_name=custom_file_name, synapse_client=self.syn + ) + current_dir = os.getcwd() + + # THEN the destination uses exactly the caller-supplied file_name + call_kwargs = mock_download_from_url.call_args + assert call_kwargs.kwargs["destination"] == os.path.join( + current_dir, custom_file_name + ) + + # AND the result is the path returned by download_from_url + assert result == expected_download_path + + async def test_download_csv_async_with_invalid_dir(self): + # GIVEN a Grid with a session_id + grid = Grid(session_id=SESSION_ID) + + # WHEN I call download_csv_async with an invalid destination + with pytest.raises( + ValueError, match="Destination ./nonexistent_dir is not a valid directory." + ): + await grid.download_csv_async( + synapse_client=self.syn, destination="./nonexistent_dir" + ) + + async def test_download_csv_async_empty_file_handle_id(self): + # GIVEN a Grid with a session_id + grid = Grid(session_id=SESSION_ID) + + # Mock the DownloadFromGridRequest's send_job_and_wait_async to return an empty file handle ID + mock_download_request = DownloadFromGridRequest(session_id=SESSION_ID) + mock_download_request.results_file_handle_id = "" + + with patch.object( + DownloadFromGridRequest, + "send_job_and_wait_async", + new_callable=AsyncMock, + return_value=mock_download_request, + ): + # WHEN I call download_csv_async + # THEN it should raise ValueError for empty file handle ID + with pytest.raises( + ValueError, + match=f"Download job for grid session '{SESSION_ID}' completed but " + "did not return a file handle ID. The CSV result may be empty or " + "the job may have failed silently.", + ): + await grid.download_csv_async(synapse_client=self.syn) + + +class TestSynchronizeGridRequest: + def test_to_synapse_request(self) -> None: + # GIVEN a SynchronizeGridRequest with all fields set + sync_req = SynchronizeGridRequest( + grid_session_id=SESSION_ID, + ) + + # WHEN I convert it to a synapse request + result = sync_req.to_synapse_request() + + # THEN it should contain the correct fields + assert "concreteType" in result + assert result["gridSessionId"] == SESSION_ID + + def test_fill_from_dict(self) -> None: + # GIVEN a response with synchronize grid session data + raw_response = { + "jobId": "1234", + "concreteType": "org.sagebionetworks.repo.model.grid.SynchronizeGridResponse", + "gridSessionId": SESSION_ID, + "errorMessages": ["test_error"], + } + + # WHEN I fill a SynchronizeGridRequest from the response + sync_req = SynchronizeGridRequest(grid_session_id=SESSION_ID) + response = sync_req.fill_from_dict(raw_response) + assert "test_error" in response.error_messages + assert response.grid_session_id == SESSION_ID + + +class TestSynchronizeGrid: + @pytest.fixture(autouse=True, scope="function") + def init_syn(self, syn: Synapse) -> None: + self.syn = syn + + async def test_synchronize_grid_async_without_session_id_raises(self) -> None: + # GIVEN a Grid without a session_id + grid = Grid() + + # WHEN I call synchronize_async + # THEN it should raise ValueError + with pytest.raises(ValueError, match="session_id is required to synchronize"): + await grid.synchronize_async(synapse_client=self.syn) + + async def test_synchronize_grid_async_empty_error(self) -> None: + # GIVEN a Grid with a session_id + grid = Grid(session_id=SESSION_ID) + mock_sync_response = SynchronizeGridRequest( + grid_session_id=SESSION_ID, + error_messages=[], + ) + + # WHEN I call synchronize_async + with patch( + "synapseclient.models.curation.SynchronizeGridRequest.send_job_and_wait_async", + new_callable=AsyncMock, + return_value=mock_sync_response, + ) as mock_sync: + await grid.synchronize_async(synapse_client=self.syn) + + # THEN the API should be called with the session_id + mock_sync.assert_called_once_with(synapse_client=self.syn, timeout=120) + + async def test_synchronize_grid_async_with_errors(self) -> None: + # GIVEN a Grid with a session_id + grid = Grid(session_id=SESSION_ID) + mock_sync_response = SynchronizeGridRequest( + grid_session_id=SESSION_ID, + error_messages=["sync_error_1", "sync_error_2"], + ) + + # WHEN I call synchronize_async + with patch( + "synapseclient.models.curation.SynchronizeGridRequest.send_job_and_wait_async", + new_callable=AsyncMock, + return_value=mock_sync_response, + ): + with patch.object(self.syn, "logger") as mock_logger: + await grid.synchronize_async(synapse_client=self.syn) + + # THEN the error messages should be logged as an error + mock_logger.error.assert_called_once() + error_message = mock_logger.error.call_args[0][0] + assert "sync_error_1" in error_message + assert "sync_error_2" in error_message diff --git a/tests/unit/synapseclient/models/async/unit_test_docker_async.py b/tests/unit/synapseclient/models/async/unit_test_docker_async.py index b1d42dc5f..c02c8d799 100644 --- a/tests/unit/synapseclient/models/async/unit_test_docker_async.py +++ b/tests/unit/synapseclient/models/async/unit_test_docker_async.py @@ -197,15 +197,18 @@ async def mock_get_from_entity_factory( # Separately set annotations to match real implementation entity_to_update.annotations = Annotations.from_dict(test_annotation) - with patch( - "synapseclient.models.docker.get_entity_id_by_repository_name", - new_callable=AsyncMock, - side_effect=mock_get_entity_id_by_repository_name, - ) as mocked_get_id, patch( - "synapseclient.models.docker.get_from_entity_factory", - new_callable=AsyncMock, - side_effect=mock_get_from_entity_factory, - ) as mocked_get_from_factory: + with ( + patch( + "synapseclient.models.docker.get_entity_id_by_repository_name", + new_callable=AsyncMock, + side_effect=mock_get_entity_id_by_repository_name, + ) as mocked_get_id, + patch( + "synapseclient.models.docker.get_from_entity_factory", + new_callable=AsyncMock, + side_effect=mock_get_from_entity_factory, + ) as mocked_get_from_factory, + ): result = await docker.get_async(synapse_client=self.syn) # Verify repository name lookup was called diff --git a/tests/unit/synapseclient/models/async/unit_test_evaluation_async.py b/tests/unit/synapseclient/models/async/unit_test_evaluation_async.py index 9024c522c..71ee2346f 100644 --- a/tests/unit/synapseclient/models/async/unit_test_evaluation_async.py +++ b/tests/unit/synapseclient/models/async/unit_test_evaluation_async.py @@ -517,15 +517,18 @@ async def test_update_acl_async_with_principal_id_and_access_type(self) -> None: ) # WHEN I call update_acl_async with a principal_id and access_type - with patch( - "synapseclient.api.evaluation_services.get_evaluation_acl", - new_callable=AsyncMock, - return_value=current_acl, - ) as mock_get_acl, patch( - "synapseclient.api.evaluation_services.update_evaluation_acl", - new_callable=AsyncMock, - return_value=updated_acl, - ) as mock_update_acl: + with ( + patch( + "synapseclient.api.evaluation_services.get_evaluation_acl", + new_callable=AsyncMock, + return_value=current_acl, + ) as mock_get_acl, + patch( + "synapseclient.api.evaluation_services.update_evaluation_acl", + new_callable=AsyncMock, + return_value=updated_acl, + ) as mock_update_acl, + ): result = await evaluation.update_acl_async( principal_id=PRINCIPAL_ID, access_type=["READ", "SUBMIT"], @@ -599,15 +602,18 @@ async def test_update_acl_async_adds_new_principal(self) -> None: ) # WHEN I call update_acl_async with a NEW principal_id - with patch( - "synapseclient.api.evaluation_services.get_evaluation_acl", - new_callable=AsyncMock, - return_value=current_acl, - ), patch( - "synapseclient.api.evaluation_services.update_evaluation_acl", - new_callable=AsyncMock, - return_value=expected_updated_acl, - ) as mock_update_acl: + with ( + patch( + "synapseclient.api.evaluation_services.get_evaluation_acl", + new_callable=AsyncMock, + return_value=current_acl, + ), + patch( + "synapseclient.api.evaluation_services.update_evaluation_acl", + new_callable=AsyncMock, + return_value=expected_updated_acl, + ) as mock_update_acl, + ): result = await evaluation.update_acl_async( principal_id=TEAM_PRINCIPAL_ID, access_type=["READ", "SUBMIT"], @@ -640,15 +646,18 @@ async def test_update_acl_async_updates_existing_principal(self) -> None: expected_updated_acl["resourceAccess"][0]["accessType"] = ["READ"] # WHEN I call update_acl_async to update the existing principal's permissions - with patch( - "synapseclient.api.evaluation_services.get_evaluation_acl", - new_callable=AsyncMock, - return_value=current_acl, - ), patch( - "synapseclient.api.evaluation_services.update_evaluation_acl", - new_callable=AsyncMock, - return_value=expected_updated_acl, - ) as mock_update_acl: + with ( + patch( + "synapseclient.api.evaluation_services.get_evaluation_acl", + new_callable=AsyncMock, + return_value=current_acl, + ), + patch( + "synapseclient.api.evaluation_services.update_evaluation_acl", + new_callable=AsyncMock, + return_value=expected_updated_acl, + ) as mock_update_acl, + ): result = await evaluation.update_acl_async( principal_id=OWNER_ID, access_type=["READ"], @@ -685,15 +694,18 @@ async def test_update_acl_async_removes_principal_with_empty_access_type( expected_updated_acl["resourceAccess"] = [] # WHEN I call update_acl_async with an empty access_type list - with patch( - "synapseclient.api.evaluation_services.get_evaluation_acl", - new_callable=AsyncMock, - return_value=current_acl, - ), patch( - "synapseclient.api.evaluation_services.update_evaluation_acl", - new_callable=AsyncMock, - return_value=expected_updated_acl, - ) as mock_update_acl: + with ( + patch( + "synapseclient.api.evaluation_services.get_evaluation_acl", + new_callable=AsyncMock, + return_value=current_acl, + ), + patch( + "synapseclient.api.evaluation_services.update_evaluation_acl", + new_callable=AsyncMock, + return_value=expected_updated_acl, + ) as mock_update_acl, + ): result = await evaluation.update_acl_async( principal_id=OWNER_ID, access_type=[], @@ -758,15 +770,18 @@ async def test_update_acl_async_uppercases_access_type(self) -> None: current_acl = self.get_example_acl_response() # WHEN I call update_acl_async with lowercase access_type values - with patch( - "synapseclient.api.evaluation_services.get_evaluation_acl", - new_callable=AsyncMock, - return_value=current_acl, - ), patch( - "synapseclient.api.evaluation_services.update_evaluation_acl", - new_callable=AsyncMock, - return_value=current_acl, - ) as mock_update_acl: + with ( + patch( + "synapseclient.api.evaluation_services.get_evaluation_acl", + new_callable=AsyncMock, + return_value=current_acl, + ), + patch( + "synapseclient.api.evaluation_services.update_evaluation_acl", + new_callable=AsyncMock, + return_value=current_acl, + ) as mock_update_acl, + ): await evaluation.update_acl_async( principal_id=TEAM_PRINCIPAL_ID, access_type=["read", "submit"], diff --git a/tests/unit/synapseclient/models/async/unit_test_file_async.py b/tests/unit/synapseclient/models/async/unit_test_file_async.py index 9514e5aeb..64139ffba 100644 --- a/tests/unit/synapseclient/models/async/unit_test_file_async.py +++ b/tests/unit/synapseclient/models/async/unit_test_file_async.py @@ -1,4 +1,5 @@ """Unit tests for the File model""" + import os from typing import Dict, Union from unittest.mock import AsyncMock, patch @@ -186,19 +187,23 @@ async def test_store_with_id_and_path(self) -> None: file = File(id=SYN_123, path=PATH, description=MODIFIED_DESCRIPTION) # WHEN I store the example file - with patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value=(self.get_example_rest_api_file_output()), - ) as mocked_get_entity_bundle, patch( - "synapseclient.models.file.upload_file_handle", - new_callable=AsyncMock, - return_value=(self.get_example_synapse_file_handle()), - ) as mocked_file_handle_upload, patch( - "synapseclient.models.file.store_entity", - new_callable=AsyncMock, - return_value=(self.get_example_synapse_file_output()), - ) as mocked_store_entity: + with ( + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=(self.get_example_rest_api_file_output()), + ) as mocked_get_entity_bundle, + patch( + "synapseclient.models.file.upload_file_handle", + new_callable=AsyncMock, + return_value=(self.get_example_synapse_file_handle()), + ) as mocked_file_handle_upload, + patch( + "synapseclient.models.file.store_entity", + new_callable=AsyncMock, + return_value=(self.get_example_synapse_file_output()), + ) as mocked_store_entity, + ): result = await file.store_async(synapse_client=self.syn) # THEN we should call the method with this data @@ -265,19 +270,23 @@ async def test_store_with_id_and_file_handle(self) -> None: ) # WHEN I store the example file - with patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value=(self.get_example_rest_api_file_output(path=None)), - ) as mocked_get_entity_bundle, patch( - "synapseclient.models.file.upload_file_handle", - new_callable=AsyncMock, - return_value=(self.get_example_synapse_file_handle()), - ) as mocked_file_handle_upload, patch( - "synapseclient.models.file.store_entity", - new_callable=AsyncMock, - return_value=(self.get_example_synapse_file_output()), - ) as mocked_store_entity: + with ( + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=(self.get_example_rest_api_file_output(path=None)), + ) as mocked_get_entity_bundle, + patch( + "synapseclient.models.file.upload_file_handle", + new_callable=AsyncMock, + return_value=(self.get_example_synapse_file_handle()), + ) as mocked_file_handle_upload, + patch( + "synapseclient.models.file.store_entity", + new_callable=AsyncMock, + return_value=(self.get_example_synapse_file_output()), + ) as mocked_store_entity, + ): result = await file.store_async(synapse_client=self.syn) # THEN we should call the method with this data @@ -339,23 +348,28 @@ async def test_store_with_parent_and_path(self) -> None: ) # WHEN I store the example file - with patch.object( - self.syn, - "get", - return_value=None, - ) as mocked_get_call, patch( - "synapseclient.models.file.get_id", - new_callable=AsyncMock, - return_value=None, - ), patch( - "synapseclient.models.file.upload_file_handle", - new_callable=AsyncMock, - return_value=(self.get_example_synapse_file_handle()), - ) as mocked_file_handle_upload, patch( - "synapseclient.models.file.store_entity", - new_callable=AsyncMock, - return_value=(self.get_example_synapse_file_output(path=bogus_file)), - ) as mocked_store_entity: + with ( + patch.object( + self.syn, + "get", + return_value=None, + ) as mocked_get_call, + patch( + "synapseclient.models.file.get_id", + new_callable=AsyncMock, + return_value=None, + ), + patch( + "synapseclient.models.file.upload_file_handle", + new_callable=AsyncMock, + return_value=(self.get_example_synapse_file_handle()), + ) as mocked_file_handle_upload, + patch( + "synapseclient.models.file.store_entity", + new_callable=AsyncMock, + return_value=(self.get_example_synapse_file_output(path=bogus_file)), + ) as mocked_store_entity, + ): result = await file.store_async( parent=Project(id=ACTUAL_PARENT_ID), synapse_client=self.syn ) @@ -427,23 +441,28 @@ async def test_store_with_parent_id_and_path(self) -> None: ) # WHEN I store the example file - with patch.object( - self.syn, - "get", - return_value=None, - ) as mocked_get_call, patch( - "synapseclient.models.file.get_id", - new_callable=AsyncMock, - return_value=None, - ), patch( - "synapseclient.models.file.upload_file_handle", - new_callable=AsyncMock, - return_value=(self.get_example_synapse_file_handle()), - ) as mocked_file_handle_upload, patch( - "synapseclient.models.file.store_entity", - new_callable=AsyncMock, - return_value=(self.get_example_synapse_file_output(path=bogus_file)), - ) as mocked_store_entity: + with ( + patch.object( + self.syn, + "get", + return_value=None, + ) as mocked_get_call, + patch( + "synapseclient.models.file.get_id", + new_callable=AsyncMock, + return_value=None, + ), + patch( + "synapseclient.models.file.upload_file_handle", + new_callable=AsyncMock, + return_value=(self.get_example_synapse_file_handle()), + ) as mocked_file_handle_upload, + patch( + "synapseclient.models.file.store_entity", + new_callable=AsyncMock, + return_value=(self.get_example_synapse_file_output(path=bogus_file)), + ) as mocked_store_entity, + ): result = await file.store_async( parent=Project(id=ACTUAL_PARENT_ID), synapse_client=self.syn ) @@ -522,30 +541,37 @@ async def test_store_with_components(self) -> None: ) # WHEN I store the example file - with patch.object( - self.syn, - "get", - return_value=None, - ) as mocked_get_call, patch( - "synapseclient.models.file.get_id", - new_callable=AsyncMock, - return_value=None, - ), patch( - "synapseclient.models.file.upload_file_handle", - new_callable=AsyncMock, - return_value=(self.get_example_synapse_file_handle()), - ) as mocked_file_handle_upload, patch( - "synapseclient.models.file.store_entity", - new_callable=AsyncMock, - return_value=(self.get_example_synapse_file_output(path=bogus_file)), - ) as mocked_store_entity, patch( - "synapseclient.models.file.store_entity_components", - return_value=True, - ) as mocked_store_entity_components, patch.object( - file, - "get_async", - return_value=file, - ) as mocked_get: + with ( + patch.object( + self.syn, + "get", + return_value=None, + ) as mocked_get_call, + patch( + "synapseclient.models.file.get_id", + new_callable=AsyncMock, + return_value=None, + ), + patch( + "synapseclient.models.file.upload_file_handle", + new_callable=AsyncMock, + return_value=(self.get_example_synapse_file_handle()), + ) as mocked_file_handle_upload, + patch( + "synapseclient.models.file.store_entity", + new_callable=AsyncMock, + return_value=(self.get_example_synapse_file_output(path=bogus_file)), + ) as mocked_store_entity, + patch( + "synapseclient.models.file.store_entity_components", + return_value=True, + ) as mocked_store_entity_components, + patch.object( + file, + "get_async", + return_value=file, + ) as mocked_get, + ): result = await file.store_async(synapse_client=self.syn) # THEN we should not call the get method when just the path is supplied. @@ -795,16 +821,18 @@ async def test_get_with_path(self) -> None: file = File(path=PATH, description=MODIFIED_DESCRIPTION) # WHEN I get the example file - with patch( - "synapseclient.api.entity_factory._search_for_file_by_md5", - new_callable=AsyncMock, - return_value=(self.get_example_rest_api_file_output()), - ) as mocked_search_for_file, patch.object( - file, - "_load_local_md5", - return_value=(None), - ), patch( - "os.path.isfile", return_value=True + with ( + patch( + "synapseclient.api.entity_factory._search_for_file_by_md5", + new_callable=AsyncMock, + return_value=(self.get_example_rest_api_file_output()), + ) as mocked_search_for_file, + patch.object( + file, + "_load_local_md5", + return_value=(None), + ), + patch("os.path.isfile", return_value=True), ): result = await file.get_async(synapse_client=self.syn) @@ -857,15 +885,17 @@ async def test_from_path(self) -> None: path = PATH # WHEN I get the example file - with patch( - "synapseclient.api.entity_factory._search_for_file_by_md5", - new_callable=AsyncMock, - return_value=(self.get_example_rest_api_file_output()), - ) as mocked_search_for_file, patch( - "synapseclient.models.file.File._load_local_md5", - return_value=(None), - ), patch( - "os.path.isfile", return_value=True + with ( + patch( + "synapseclient.api.entity_factory._search_for_file_by_md5", + new_callable=AsyncMock, + return_value=(self.get_example_rest_api_file_output()), + ) as mocked_search_for_file, + patch( + "synapseclient.models.file.File._load_local_md5", + return_value=(None), + ), + patch("os.path.isfile", return_value=True), ): result = await File.from_path_async(path=path, synapse_client=self.syn) diff --git a/tests/unit/synapseclient/models/async/unit_test_folder_async.py b/tests/unit/synapseclient/models/async/unit_test_folder_async.py index d096d5a07..c296496f9 100644 --- a/tests/unit/synapseclient/models/async/unit_test_folder_async.py +++ b/tests/unit/synapseclient/models/async/unit_test_folder_async.py @@ -1,16 +1,20 @@ """Tests for the Folder class.""" + +import os import uuid from typing import Dict -from unittest.mock import AsyncMock, patch +from unittest.mock import AsyncMock, MagicMock, patch import pytest from synapseclient import Folder as Synapse_Folder from synapseclient import Synapse from synapseclient.core.constants import concrete_types -from synapseclient.core.constants.concrete_types import FILE_ENTITY +from synapseclient.core.constants.concrete_types import FILE_ENTITY, FOLDER_ENTITY from synapseclient.core.exceptions import SynapseNotFoundError from synapseclient.models import FailureStrategy, File, Folder +from synapseclient.models.project_setting import ProjectSetting +from synapseclient.models.services.migration_types import MigrationResult SYN_123 = "syn123" SYN_456 = "syn456" @@ -89,22 +93,25 @@ async def test_store_with_id(self) -> None: folder.description = description # WHEN I call `store` with the Folder object - with patch( - "synapseclient.models.services.storable_entity.put_entity", - new_callable=AsyncMock, - return_value=(self.get_example_synapse_folder_output()), - ) as mocked_client_call, patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value=( - { - "entity": { - "concreteType": concrete_types.FOLDER_ENTITY, - "id": folder.id, + with ( + patch( + "synapseclient.models.services.storable_entity.put_entity", + new_callable=AsyncMock, + return_value=(self.get_example_synapse_folder_output()), + ) as mocked_client_call, + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=( + { + "entity": { + "concreteType": concrete_types.FOLDER_ENTITY, + "id": folder.id, + } } - } - ), - ) as mocked_get: + ), + ) as mocked_get, + ): result = await folder.store_async(synapse_client=self.syn) # THEN we should call the method with this data @@ -142,21 +149,24 @@ async def test_store_with_no_changes(self) -> None: ) # WHEN I call `store` with the Folder object - with patch.object( - self.syn, - "store", - ) as mocked_store, patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value=( - { - "entity": { - "concreteType": concrete_types.FOLDER_ENTITY, - "id": folder.id, + with ( + patch.object( + self.syn, + "store", + ) as mocked_store, + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=( + { + "entity": { + "concreteType": concrete_types.FOLDER_ENTITY, + "id": folder.id, + } } - } - ), - ) as mocked_get: + ), + ) as mocked_get, + ): result = await folder.store_async(synapse_client=self.syn) # THEN we should not call store because there are no changes @@ -195,16 +205,19 @@ async def test_store_after_get(self) -> None: assert folder.id == SYN_123 # WHEN I call `store` with the Folder object - with patch.object( - self.syn, - "store", - ) as mocked_store, patch.object( - self.syn, - "get", - return_value=Synapse_Folder( - id=folder.id, - ), - ) as mocked_get: + with ( + patch.object( + self.syn, + "store", + ) as mocked_store, + patch.object( + self.syn, + "get", + return_value=Synapse_Folder( + id=folder.id, + ), + ) as mocked_get, + ): result = await folder.store_async(synapse_client=self.syn) # THEN we should not call store because there are no changes @@ -247,14 +260,17 @@ async def test_store_after_get_with_changes(self) -> None: folder.description = description # WHEN I call `store` with the Folder object - with patch( - "synapseclient.models.services.storable_entity.put_entity", - new_callable=AsyncMock, - return_value=(self.get_example_synapse_folder_output()), - ) as mocked_store, patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - ) as mocked_get: + with ( + patch( + "synapseclient.models.services.storable_entity.put_entity", + new_callable=AsyncMock, + return_value=(self.get_example_synapse_folder_output()), + ) as mocked_store, + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + ) as mocked_get, + ): result = await folder.store_async(synapse_client=self.syn) # THEN we should call store because there are changes @@ -303,25 +319,29 @@ async def test_store_with_annotations(self) -> None: folder.description = description # WHEN I call `store` with the Folder object - with patch( - "synapseclient.models.folder.store_entity_components", - return_value=(None), - ) as mocked_store_entity_components, patch( - "synapseclient.models.services.storable_entity.put_entity", - new_callable=AsyncMock, - return_value=(self.get_example_synapse_folder_output()), - ) as mocked_client_call, patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value=( - { - "entity": { - "concreteType": concrete_types.FOLDER_ENTITY, - "id": folder.id, + with ( + patch( + "synapseclient.models.folder.store_entity_components", + return_value=(None), + ) as mocked_store_entity_components, + patch( + "synapseclient.models.services.storable_entity.put_entity", + new_callable=AsyncMock, + return_value=(self.get_example_synapse_folder_output()), + ) as mocked_client_call, + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=( + { + "entity": { + "concreteType": concrete_types.FOLDER_ENTITY, + "id": folder.id, + } } - } - ), - ) as mocked_get: + ), + ) as mocked_get, + ): result = await folder.store_async(synapse_client=self.syn) # THEN we should call the method with this data @@ -371,26 +391,30 @@ async def test_store_with_name_and_parent_id(self) -> None: folder.description = description # WHEN I call `store` with the Folder object - with patch( - "synapseclient.models.services.storable_entity.put_entity", - new_callable=AsyncMock, - return_value=(self.get_example_synapse_folder_output()), - ) as mocked_client_call, patch.object( - self.syn, - "findEntityId", - return_value=SYN_123, - ) as mocked_get, patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value=( - { - "entity": { - "concreteType": concrete_types.FOLDER_ENTITY, - "id": folder.id, + with ( + patch( + "synapseclient.models.services.storable_entity.put_entity", + new_callable=AsyncMock, + return_value=(self.get_example_synapse_folder_output()), + ) as mocked_client_call, + patch.object( + self.syn, + "findEntityId", + return_value=SYN_123, + ) as mocked_get, + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=( + { + "entity": { + "concreteType": concrete_types.FOLDER_ENTITY, + "id": folder.id, + } } - } - ), - ) as mocked_get: + ), + ) as mocked_get, + ): result = await folder.store_async(synapse_client=self.syn) # THEN we should call the method with this data @@ -436,26 +460,30 @@ async def test_store_with_name_and_parent(self) -> None: folder.description = description # WHEN I call `store` with the Folder object - with patch( - "synapseclient.models.services.storable_entity.put_entity", - new_callable=AsyncMock, - return_value=(self.get_example_synapse_folder_output()), - ) as mocked_client_call, patch.object( - self.syn, - "findEntityId", - return_value=SYN_123, - ) as mocked_get, patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value=( - { - "entity": { - "concreteType": concrete_types.FOLDER_ENTITY, - "id": folder.id, + with ( + patch( + "synapseclient.models.services.storable_entity.put_entity", + new_callable=AsyncMock, + return_value=(self.get_example_synapse_folder_output()), + ) as mocked_client_call, + patch.object( + self.syn, + "findEntityId", + return_value=SYN_123, + ) as mocked_get, + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=( + { + "entity": { + "concreteType": concrete_types.FOLDER_ENTITY, + "id": folder.id, + } } - } - ), - ) as mocked_get: + ), + ) as mocked_get, + ): result = await folder.store_async( parent=Folder(id=PARENT_ID), synapse_client=self.syn ) @@ -572,15 +600,18 @@ async def test_get_by_name_and_parent(self) -> None: ) # WHEN I call `get` with the Folder object - with patch.object( - self.syn, - "findEntityId", - return_value=(SYN_123), - ) as mocked_client_search, patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value=self.get_example_rest_api_folder_output(), - ) as mocked_client_call: + with ( + patch.object( + self.syn, + "findEntityId", + return_value=(SYN_123), + ) as mocked_client_search, + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=self.get_example_rest_api_folder_output(), + ) as mocked_client_call, + ): result = await folder.get_async(synapse_client=self.syn) # THEN we should call the method with this data @@ -675,16 +706,20 @@ async def test_copy(self) -> None: } # WHEN I call `copy` with the Folder object - with patch( - "synapseclient.models.folder.copy", - return_value=(copy_mapping), - ) as mocked_copy, patch( - "synapseclient.models.folder.Folder.get_async", - return_value=(returned_folder), - ) as mocked_get, patch( - "synapseclient.models.folder.Folder.sync_from_synapse_async", - return_value=(returned_folder), - ) as mocked_sync: + with ( + patch( + "synapseclient.models.folder.copy", + return_value=(copy_mapping), + ) as mocked_copy, + patch( + "synapseclient.models.folder.Folder.get_async", + return_value=(returned_folder), + ) as mocked_get, + patch( + "synapseclient.models.folder.Folder.sync_from_synapse_async", + return_value=(returned_folder), + ) as mocked_sync, + ): result = await folder.copy_async( parent_id="destination_id", synapse_client=self.syn ) @@ -754,16 +789,20 @@ async def mock_get_children(*args, **kwargs): for child in children: yield child - with patch( - "synapseclient.models.mixins.storable_container.get_children", - side_effect=mock_get_children, - ) as mocked_children_call, patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value=self.get_example_rest_api_folder_output(), - ) as mocked_folder_get, patch( - "synapseclient.models.file.File.get_async", - return_value=(File(id=SYN_456, name="example_file_1")), + with ( + patch( + "synapseclient.models.mixins.storable_container.get_children", + side_effect=mock_get_children, + ) as mocked_children_call, + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=self.get_example_rest_api_folder_output(), + ) as mocked_folder_get, + patch( + "synapseclient.models.file.File.get_async", + return_value=(File(id=SYN_456, name="example_file_1")), + ), ): result = await folder.sync_from_synapse_async(synapse_client=self.syn) @@ -785,3 +824,784 @@ async def mock_get_children(*args, **kwargs): assert result.modified_by == MODIFIED_BY assert result.files[0].id == SYN_456 assert result.files[0].name == "example_file_1" + + async def test_sync_from_synapse_manifest_all_generates_per_directory( + self, + ) -> None: + SUB_FOLDER_ID = "syn789" + SUB_FOLDER_NAME = "sub_folder" + FILE_2_ID = "syn012" + FILE_2_NAME = "example_file_2" + + # GIVEN a root folder with one file and one subfolder containing one file + folder = Folder(id=SYN_123) + + root_children = [ + {"id": SYN_456, "type": FILE_ENTITY, "name": "example_file_1"}, + {"id": SUB_FOLDER_ID, "type": FOLDER_ENTITY, "name": SUB_FOLDER_NAME}, + ] + sub_children = [ + {"id": FILE_2_ID, "type": FILE_ENTITY, "name": FILE_2_NAME}, + ] + get_children_call_count = 0 + + async def mock_get_children(*args, **kwargs): + nonlocal get_children_call_count + children = root_children if get_children_call_count == 0 else sub_children + get_children_call_count += 1 + for child in children: + yield child + + downloaded_file_1 = File( + id=SYN_456, + name="example_file_1", + parent_id=SYN_123, + ) + downloaded_file_2 = File( + id=FILE_2_ID, + name=FILE_2_NAME, + parent_id=SUB_FOLDER_ID, + ) + file_map = {SYN_456: downloaded_file_1, FILE_2_ID: downloaded_file_2} + + async def mock_file_get(self_file, **kwargs): + return file_map[self_file.id] + + async def mock_get_entity_bundle(entity_id, *args, **kwargs): + if entity_id == SUB_FOLDER_ID: + return { + "entity": { + "concreteType": concrete_types.FOLDER_ENTITY, + "id": SUB_FOLDER_ID, + "name": SUB_FOLDER_NAME, + "parentId": SYN_123, + "etag": ETAG, + "createdOn": CREATED_ON, + "modifiedOn": MODIFIED_ON, + "createdBy": CREATED_BY, + "modifiedBy": MODIFIED_BY, + } + } + return self.get_example_rest_api_folder_output() + + # WHEN I call sync_from_synapse with manifest="all" and a path + with ( + patch( + "synapseclient.models.mixins.storable_container.get_children", + side_effect=mock_get_children, + ), + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + side_effect=mock_get_entity_bundle, + ), + patch( + "synapseclient.models.file.File.get_async", + side_effect=mock_file_get, + ), + patch( + "synapseclient.models.mixins.storable_container.os.path.exists", + return_value=True, + ), + patch( + "synapseclient.models.mixins.storable_container.generate_manifest_csv", + ) as mock_generate, + ): + await folder.sync_from_synapse_async( + path="/tmp/mydir", manifest="all", synapse_client=self.syn + ) + + # THEN generate_manifest_csv is called once per directory (root + subfolder) + assert mock_generate.call_count == 2 + calls_by_path = { + c.kwargs["path"]: c.kwargs["all_files"] + for c in mock_generate.call_args_list + } + assert any(f.id == SYN_456 for f in calls_by_path["/tmp/mydir"]) + assert any( + f.id == FILE_2_ID + for f in calls_by_path[os.path.join("/tmp/mydir", SUB_FOLDER_NAME)] + ) + + async def test_sync_from_synapse_manifest_root_generates_only_at_root( + self, + ) -> None: + # GIVEN a Folder object with a path + folder = Folder(id=SYN_123) + children = [{"id": SYN_456, "type": FILE_ENTITY, "name": "example_file_1"}] + + async def mock_get_children(*args, **kwargs): + for child in children: + yield child + + downloaded_file = File( + id=SYN_456, + name="example_file_1", + path="/tmp/mydir/example_file_1.txt", + parent_id=SYN_123, + ) + + # WHEN I call sync_from_synapse with manifest="root" and a path + with ( + patch( + "synapseclient.models.mixins.storable_container.get_children", + side_effect=mock_get_children, + ), + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=self.get_example_rest_api_folder_output(), + ), + patch( + "synapseclient.models.file.File.get_async", + return_value=downloaded_file, + ), + patch( + "synapseclient.models.mixins.storable_container.generate_manifest_csv", + ) as mock_generate, + ): + await folder.sync_from_synapse_async( + path="/tmp/mydir", manifest="root", synapse_client=self.syn + ) + + # THEN generate_manifest_csv should be called exactly once with the root path + mock_generate.assert_called_once() + assert mock_generate.call_args.kwargs["path"] == "/tmp/mydir" + assert mock_generate.call_args.kwargs["all_files"][0].id == SYN_456 + + async def test_sync_from_synapse_manifest_suppress_skips_generation( + self, + ) -> None: + # GIVEN a Folder object with a path + folder = Folder(id=SYN_123) + children = [{"id": SYN_456, "type": FILE_ENTITY, "name": "example_file_1"}] + + async def mock_get_children(*args, **kwargs): + for child in children: + yield child + + # WHEN I call sync_from_synapse with manifest="suppress" + with ( + patch( + "synapseclient.models.mixins.storable_container.get_children", + side_effect=mock_get_children, + ), + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=self.get_example_rest_api_folder_output(), + ), + patch( + "synapseclient.models.file.File.get_async", + return_value=(File(id=SYN_456, name="example_file_1")), + ), + patch( + "synapseclient.models.mixins.storable_container.generate_manifest_csv", + ) as mock_generate, + ): + await folder.sync_from_synapse_async( + path="/tmp/mydir", manifest="suppress", synapse_client=self.syn + ) + + # THEN generate_manifest_csv should never be called + mock_generate.assert_not_called() + + async def test_sync_from_synapse_no_manifest_without_path(self) -> None: + # GIVEN a Folder with no path specified + folder = Folder(id=SYN_123) + children = [{"id": SYN_456, "type": FILE_ENTITY, "name": "example_file_1"}] + + async def mock_get_children(*args, **kwargs): + for child in children: + yield child + + # WHEN I call sync_from_synapse with no path (default manifest="all") + with ( + patch( + "synapseclient.models.mixins.storable_container.get_children", + side_effect=mock_get_children, + ), + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=self.get_example_rest_api_folder_output(), + ), + patch( + "synapseclient.models.file.File.get_async", + return_value=(File(id=SYN_456, name="example_file_1")), + ), + patch( + "synapseclient.models.mixins.storable_container.generate_manifest_csv", + ) as mock_generate, + ): + await folder.sync_from_synapse_async(synapse_client=self.syn) + + # THEN generate_manifest_csv should not be called (no path to write to) + mock_generate.assert_not_called() + + +class TestStorageLocationMixin: + """Tests for ProjectSettingsMixin methods on Folder.""" + + STORAGE_LOCATION_ID = 12345 + SETTING_ID = "setting_abc" + + @pytest.fixture(autouse=True, scope="function") + def init_syn(self, syn: Synapse) -> None: + self.syn = syn + + @pytest.fixture() + def example_setting(self): + return ProjectSetting( + id=self.SETTING_ID, + project_id=SYN_123, + settings_type="upload", + locations=[self.STORAGE_LOCATION_ID], + ) + + # ------------------------------------------------------------------------- + # set_storage_location_async + # ------------------------------------------------------------------------- + + async def test_set_storage_location_creates_new_custom_storage_location( + self, example_setting + ) -> None: + """Test that when there is no existing project setting and we set a storage location, a new project setting is created.""" + folder = Folder(id=SYN_123) + + with ( + patch.object( + ProjectSetting, "get_async", new_callable=AsyncMock, return_value=None + ), + patch.object( + ProjectSetting, + "store_async", + autospec=True, + return_value=example_setting, + ) as mocked_store, + ): + result = await folder.set_storage_location_async( + storage_location_id=self.STORAGE_LOCATION_ID, + synapse_client=self.syn, + ) + + # THEN store was called and the new setting has the correct locations and project + stored_setting = mocked_store.call_args.args[0] + assert stored_setting.project_id == SYN_123 + assert stored_setting.locations == [self.STORAGE_LOCATION_ID] + assert result.id == self.SETTING_ID + + async def test_set_storage_location_updates_existing_setting( + self, example_setting + ) -> None: + """Test that when there is an existing project setting and we set a storage location, the existing project setting is updated.""" + folder = Folder(id=SYN_123) + + updated_setting = ProjectSetting( + id=self.SETTING_ID, + project_id=SYN_123, + settings_type="upload", + locations=[99999], + ) + + with ( + patch.object( + ProjectSetting, + "get_async", + new_callable=AsyncMock, + return_value=example_setting, + ), + patch.object( + ProjectSetting, + "store_async", + autospec=True, + return_value=updated_setting, + ) as mocked_store, + ): + result = await folder.set_storage_location_async( + storage_location_id=99999, + synapse_client=self.syn, + ) + + # THEN store was called with the updated locations + stored_setting = mocked_store.call_args.args[0] + assert stored_setting.locations == [99999] + assert result.locations == [99999] + + async def test_set_storage_location_replaces_all_existing_locations(self) -> None: + """Test that set_storage_location_async is destructive — the provided + location(s) fully replace any previously configured locations.""" + folder = Folder(id=SYN_123) + + existing_setting = ProjectSetting( + id=self.SETTING_ID, + project_id=SYN_123, + settings_type="upload", + locations=[111, 222], + ) + updated_setting = ProjectSetting( + id=self.SETTING_ID, + project_id=SYN_123, + settings_type="upload", + locations=[333], + ) + + with ( + patch.object( + ProjectSetting, + "get_async", + new_callable=AsyncMock, + return_value=existing_setting, + ), + patch.object( + ProjectSetting, + "store_async", + new_callable=AsyncMock, + return_value=updated_setting, + ), + ): + result = await folder.set_storage_location_async( + storage_location_id=333, + synapse_client=self.syn, + ) + + # THEN only the new location is present — the previous [111, 222] are gone + assert result.locations == [333] + + async def test_set_storage_location_use_default_storage_location_instead( + self, example_setting + ) -> None: + """Test that when storage_location_id is not provided, the default Synapse S3 storage location is used.""" + from synapseclient.models.mixins.storage_location_mixin import ( + DEFAULT_STORAGE_LOCATION_ID, + ) + + folder = Folder(id=SYN_123) + + default_setting = ProjectSetting( + id=self.SETTING_ID, + project_id=SYN_123, + settings_type="upload", + locations=[DEFAULT_STORAGE_LOCATION_ID], + ) + + with ( + patch.object( + ProjectSetting, + "get_async", + new_callable=AsyncMock, + return_value=example_setting, + ), + patch.object( + ProjectSetting, + "store_async", + autospec=True, + return_value=default_setting, + ) as mocked_store, + ): + result = await folder.set_storage_location_async( + synapse_client=self.syn, + ) + + stored_setting = mocked_store.call_args.args[0] + assert stored_setting.locations == [DEFAULT_STORAGE_LOCATION_ID] + assert result.locations == [DEFAULT_STORAGE_LOCATION_ID] + + async def test_set_storage_location_uses_default_storage_location_instead_when_storage_location_id_is_none( + self, example_setting + ) -> None: + """Test that when storage_location_id is not provided, the default Synapse S3 storage location is used.""" + from synapseclient.models.mixins.storage_location_mixin import ( + DEFAULT_STORAGE_LOCATION_ID, + ) + + folder = Folder(id=SYN_123) + + default_setting = ProjectSetting( + id=self.SETTING_ID, + project_id=SYN_123, + settings_type="upload", + locations=[DEFAULT_STORAGE_LOCATION_ID], + ) + + with ( + patch.object( + ProjectSetting, + "get_async", + new_callable=AsyncMock, + return_value=example_setting, + ), + patch.object( + ProjectSetting, + "store_async", + autospec=True, + return_value=default_setting, + ) as mocked_store, + ): + result = await folder.set_storage_location_async( + storage_location_id=None, + synapse_client=self.syn, + ) + + stored_setting = mocked_store.call_args.args[0] + assert stored_setting.locations == [DEFAULT_STORAGE_LOCATION_ID] + assert result.locations == [DEFAULT_STORAGE_LOCATION_ID] + + async def test_set_storage_location_accepts_list_of_ids( + self, example_setting + ) -> None: + """Test that when storage_location_id is a list of integers, all are stored as-is.""" + folder = Folder(id=SYN_123) + + with ( + patch.object( + ProjectSetting, "get_async", new_callable=AsyncMock, return_value=None + ), + patch.object( + ProjectSetting, + "store_async", + autospec=True, + return_value=example_setting, + ) as mocked_store, + ): + await folder.set_storage_location_async( + storage_location_id=[111, 222, 333], + synapse_client=self.syn, + ) + + stored_setting = mocked_store.call_args.args[0] + assert stored_setting.locations == [111, 222, 333] + + async def test_set_storage_location_converts_single_id_to_list( + self, example_setting + ) -> None: + """Test that when storage_location_id is a single integer, it is wrapped in a list.""" + folder = Folder(id=SYN_123) + + with ( + patch.object( + ProjectSetting, "get_async", new_callable=AsyncMock, return_value=None + ), + patch.object( + ProjectSetting, + "store_async", + autospec=True, + return_value=example_setting, + ) as mocked_store, + ): + await folder.set_storage_location_async( + storage_location_id=111, + synapse_client=self.syn, + ) + + stored_setting = mocked_store.call_args.args[0] + assert stored_setting.locations == [111] + + async def test_partial_update_locations_via_get_and_store(self) -> None: + """Test the partial update pattern: retrieve the existing setting, append a + location, and store — without losing previously configured locations.""" + folder = Folder(id=SYN_123) + + existing_setting = ProjectSetting( + id=self.SETTING_ID, + project_id=SYN_123, + settings_type="upload", + locations=[111, 222], + ) + updated_setting = ProjectSetting( + id=self.SETTING_ID, + project_id=SYN_123, + settings_type="upload", + locations=[111, 222, 333], + ) + + with ( + patch.object( + ProjectSetting, + "get_async", + new_callable=AsyncMock, + return_value=existing_setting, + ), + patch.object( + ProjectSetting, + "store_async", + new_callable=AsyncMock, + return_value=updated_setting, + ) as mocked_store, + ): + setting = await folder.get_project_setting_async( + setting_type="upload", + synapse_client=self.syn, + ) + setting.locations.append(333) + result = await setting.store_async(synapse_client=self.syn) + + # THEN all three locations are present — the existing ones were preserved + assert result.locations == [111, 222, 333] + mocked_store.assert_awaited_once_with(synapse_client=self.syn) + + async def test_set_storage_location_raises_when_no_id(self) -> None: + """Test that when a folder without an id, an error is raised.""" + folder = Folder() + + with pytest.raises(ValueError, match="The entity must have an id set."): + await folder.set_storage_location_async( + storage_location_id=self.STORAGE_LOCATION_ID, + synapse_client=self.syn, + ) + + # ------------------------------------------------------------------------- + # get_project_setting_async + # ------------------------------------------------------------------------- + + async def test_get_project_setting_returns_setting(self, example_setting) -> None: + """Test that when a project setting exists, it is returned.""" + folder = Folder(id=SYN_123) + + with patch.object( + ProjectSetting, + "get_async", + new_callable=AsyncMock, + return_value=example_setting, + ): + result = await folder.get_project_setting_async( + setting_type="upload", + synapse_client=self.syn, + ) + + assert result.id == self.SETTING_ID + assert result.locations == [self.STORAGE_LOCATION_ID] + + async def test_get_project_setting_raises_when_no_id(self) -> None: + """Test that when a folder without an id, an error is raised.""" + folder = Folder() + + with pytest.raises(ValueError, match="The entity must have an id set."): + await folder.get_project_setting_async(synapse_client=self.syn) + + # ------------------------------------------------------------------------- + # delete_project_setting_async + # ------------------------------------------------------------------------- + + async def test_delete_project_setting_calls_service(self) -> None: + """Test that when a project setting exists, it is deleted.""" + folder = Folder(id=SYN_123) + + with patch.object( + ProjectSetting, "delete_async", new_callable=AsyncMock, return_value=None + ) as mocked_delete: + await folder.delete_project_setting_async( + setting_id=self.SETTING_ID, + synapse_client=self.syn, + ) + + mocked_delete.assert_awaited_once_with(synapse_client=self.syn) + + async def test_delete_project_setting_raises_when_no_id(self) -> None: + """Test that when a folder without an id, an error is raised.""" + folder = Folder(id=SYN_123) + + with pytest.raises( + ValueError, match="The id is required to delete a project setting." + ): + await folder.delete_project_setting_async( + setting_id=None, + synapse_client=self.syn, + ) + + # ------------------------------------------------------------------------- + # get_sts_storage_token_async + # ------------------------------------------------------------------------- + + async def test_get_sts_storage_token_returns_credentials(self) -> None: + """Test that when a folder with an id, the STS credentials are returned.""" + folder = Folder(id=SYN_123) + + expected_credentials = { + "aws_access_key_id": "AKIA...", + "aws_secret_access_key": "secret", + "aws_session_token": "token", + } + + with patch( + "synapseclient.models.mixins.storage_location_mixin.asyncio.to_thread", + new_callable=AsyncMock, + return_value=expected_credentials, + ) as mocked_to_thread: + result = await folder.get_sts_storage_token_async( + permission="read_only", + synapse_client=self.syn, + ) + + mocked_to_thread.assert_called_once() + call_args = mocked_to_thread.call_args + assert call_args.args[0].__name__ == "get_sts_credentials" + assert call_args.args[2] == SYN_123 + assert call_args.args[3] == "read_only" + assert call_args.kwargs["output_format"] == "json" + assert call_args.kwargs["min_remaining_life"] is None + + assert result == expected_credentials + + async def test_get_sts_storage_token_passes_output_format_and_min_remaining_life( + self, + ) -> None: + """Test that when a folder with an id, the STS credentials are returned with the output format and min remaining life.""" + folder = Folder(id=SYN_123) + + with patch( + "synapseclient.models.mixins.storage_location_mixin.asyncio.to_thread", + new_callable=AsyncMock, + return_value={}, + ) as mocked_to_thread: + await folder.get_sts_storage_token_async( + permission="read_write", + output_format="boto", + min_remaining_life=300, + synapse_client=self.syn, + ) + + call_args = mocked_to_thread.call_args + assert call_args.args[0].__name__ == "get_sts_credentials" + assert call_args.args[2] == SYN_123 + assert call_args.args[3] == "read_write" + assert call_args.kwargs["output_format"] == "boto" + assert call_args.kwargs["min_remaining_life"] == 300 + + async def test_get_sts_storage_token_raises_when_no_id(self) -> None: + """Test that when a folder without an id, an error is raised.""" + folder = Folder() + + with pytest.raises(ValueError, match="The entity must have an id set."): + await folder.get_sts_storage_token_async( + permission="read_only", + synapse_client=self.syn, + ) + + # ------------------------------------------------------------------------- + # index_files_for_migration_async + # ------------------------------------------------------------------------- + + async def test_index_files_for_migration_calls_service(self) -> None: + """Test that when a folder with an id, the files are indexed.""" + folder = Folder(id=SYN_123) + + mock_result = MagicMock(spec=MigrationResult) + + with patch( + "synapseclient.models.mixins.storage_location_mixin._index_files_for_migration_async", + new_callable=AsyncMock, + return_value=mock_result, + ) as mocked_index: + result = await folder.index_files_for_migration_async( + dest_storage_location_id=self.STORAGE_LOCATION_ID, + synapse_client=self.syn, + ) + + mocked_index.assert_called_once_with( + folder, + dest_storage_location_id=str(self.STORAGE_LOCATION_ID), + db_path=None, + source_storage_location_ids=None, + file_version_strategy="new", + include_table_files=False, + continue_on_error=False, + synapse_client=self.syn, + ) + assert result == mock_result + + async def test_index_files_for_migration_converts_source_ids_to_strings( + self, + ) -> None: + """Test that when source_storage_location_ids are integers, they are converted to strings.""" + folder = Folder(id=SYN_123) + + with patch( + "synapseclient.models.mixins.storage_location_mixin._index_files_for_migration_async", + new_callable=AsyncMock, + return_value=MagicMock(spec=MigrationResult), + ) as mocked_index: + await folder.index_files_for_migration_async( + dest_storage_location_id=self.STORAGE_LOCATION_ID, + source_storage_location_ids=[111, 222], + synapse_client=self.syn, + ) + + call_kwargs = mocked_index.call_args.kwargs + assert call_kwargs["source_storage_location_ids"] == ["111", "222"] + + async def test_index_files_for_migration_raises_when_no_id(self) -> None: + """Test that when a folder without an id, an error is raised.""" + folder = Folder() + + with pytest.raises(ValueError, match="The entity must have an id set."): + await folder.index_files_for_migration_async( + dest_storage_location_id=self.STORAGE_LOCATION_ID, + synapse_client=self.syn, + ) + + # ------------------------------------------------------------------------- + # migrate_indexed_files_async + # ------------------------------------------------------------------------- + + async def test_migrate_indexed_files_calls_service(self) -> None: + """Test that when a folder with an id, the files are migrated.""" + folder = Folder(id=SYN_123) + + db_path = "/tmp/migration.db" + mock_result = MagicMock(spec=MigrationResult) + + with patch( + "synapseclient.models.mixins.storage_location_mixin._migrate_indexed_files_async", + new_callable=AsyncMock, + return_value=mock_result, + ) as mocked_migrate: + result = await folder.migrate_indexed_files_async( + db_path=db_path, + synapse_client=self.syn, + ) + + mocked_migrate.assert_called_once_with( + db_path=db_path, + create_table_snapshots=True, + continue_on_error=False, + force=False, + synapse_client=self.syn, + ) + assert result == mock_result + + async def test_migrate_indexed_files_passes_all_options(self) -> None: + """Test that when a folder with an id, the files are migrated with all options.""" + folder = Folder(id=SYN_123) + + mock_result = MagicMock(spec=MigrationResult) + with patch( + "synapseclient.models.mixins.storage_location_mixin._migrate_indexed_files_async", + new_callable=AsyncMock, + return_value=mock_result, + ) as mocked_migrate: + result = await folder.migrate_indexed_files_async( + db_path="/tmp/migration.db", + create_table_snapshots=False, + continue_on_error=True, + force=True, + synapse_client=self.syn, + ) + + mocked_migrate.assert_called_once_with( + db_path="/tmp/migration.db", + create_table_snapshots=False, + continue_on_error=True, + force=True, + synapse_client=self.syn, + ) + assert result == mock_result + + async def test_migrate_indexed_files_raises_when_no_id(self) -> None: + """Test that when a folder without an id, an error is raised.""" + folder = Folder() + + with pytest.raises(ValueError, match="The entity must have an id set."): + await folder.migrate_indexed_files_async( + db_path="/tmp/migration.db", + synapse_client=self.syn, + ) diff --git a/tests/unit/synapseclient/models/async/unit_test_form_async.py b/tests/unit/synapseclient/models/async/unit_test_form_async.py index 24a4f68b3..1e0d9af03 100644 --- a/tests/unit/synapseclient/models/async/unit_test_form_async.py +++ b/tests/unit/synapseclient/models/async/unit_test_form_async.py @@ -287,16 +287,20 @@ async def test_download_async(self, syn): form_data = FormData(form_data_id="67890", data_file_handle_id="54321") # WHEN downloading the form data - with patch( - "synapseclient.core.download.download_functions.download_by_file_handle", - new_callable=AsyncMock, - ) as mock_download_file_handle, patch.object(syn, "cache") as mock_cache, patch( - "synapseclient.core.download.download_functions.ensure_download_location_is_directory", - ) as mock_ensure_dir: + with ( + patch( + "synapseclient.core.download.download_functions.download_by_file_handle", + new_callable=AsyncMock, + ) as mock_download_file_handle, + patch.object(syn, "cache") as mock_cache, + patch( + "synapseclient.core.download.download_functions.ensure_download_location_is_directory", + ) as mock_ensure_dir, + ): mock_cache.get.side_effect = "/tmp/foo" - mock_ensure_dir.return_value = ( - mock_cache.get_cache_dir.return_value - ) = "/tmp/download" + mock_ensure_dir.return_value = mock_cache.get_cache_dir.return_value = ( + "/tmp/download" + ) mock_file_name = f"SYNAPSE_FORM_{form_data.data_file_handle_id}.csv" await form_data.download_async( diff --git a/tests/unit/synapseclient/models/async/unit_test_link_async.py b/tests/unit/synapseclient/models/async/unit_test_link_async.py index 9e40a6450..34debbe54 100644 --- a/tests/unit/synapseclient/models/async/unit_test_link_async.py +++ b/tests/unit/synapseclient/models/async/unit_test_link_async.py @@ -225,18 +225,22 @@ async def test_get_by_id_follow_link_true(self) -> None: link = Link(id=LINK_ID) # WHEN we call get_async with follow_link=True (default) - with patch( - "synapseclient.models.link.get_id", - new_callable=AsyncMock, - return_value=LINK_ID, - ) as mocked_get_id, patch( - "synapseclient.models.link.get_from_entity_factory", - new_callable=AsyncMock, - ) as mocked_get_entity_factory, patch( - "synapseclient.operations.factory_operations.get_async", - new_callable=AsyncMock, - return_value="followed_entity", - ) as mocked_factory_get_async: + with ( + patch( + "synapseclient.models.link.get_id", + new_callable=AsyncMock, + return_value=LINK_ID, + ) as mocked_get_id, + patch( + "synapseclient.models.link.get_from_entity_factory", + new_callable=AsyncMock, + ) as mocked_get_entity_factory, + patch( + "synapseclient.operations.factory_operations.get_async", + new_callable=AsyncMock, + return_value="followed_entity", + ) as mocked_factory_get_async, + ): # Set up get_from_entity_factory to populate the link async def fill_link(synapse_id_or_path, entity_to_update, synapse_client): entity_to_update.fill_from_dict(self.get_example_rest_api_response()) @@ -271,14 +275,17 @@ async def test_get_by_id_follow_link_false(self) -> None: link = Link(id=LINK_ID) # WHEN we call get_async with follow_link=False - with patch( - "synapseclient.models.link.get_id", - new_callable=AsyncMock, - return_value=LINK_ID, - ) as mocked_get_id, patch( - "synapseclient.models.link.get_from_entity_factory", - new_callable=AsyncMock, - ) as mocked_get_entity_factory: + with ( + patch( + "synapseclient.models.link.get_id", + new_callable=AsyncMock, + return_value=LINK_ID, + ) as mocked_get_id, + patch( + "synapseclient.models.link.get_from_entity_factory", + new_callable=AsyncMock, + ) as mocked_get_entity_factory, + ): async def fill_link(synapse_id_or_path, entity_to_update, synapse_client): entity_to_update.fill_from_dict(self.get_example_rest_api_response()) @@ -309,14 +316,17 @@ async def test_get_by_name_and_parent_id(self) -> None: link = Link(name=LINK_NAME, parent_id=PARENT_ID) # WHEN we call get_async - with patch( - "synapseclient.models.link.get_id", - new_callable=AsyncMock, - return_value=LINK_ID, - ) as mocked_get_id, patch( - "synapseclient.models.link.get_from_entity_factory", - new_callable=AsyncMock, - ) as mocked_get_entity_factory: + with ( + patch( + "synapseclient.models.link.get_id", + new_callable=AsyncMock, + return_value=LINK_ID, + ) as mocked_get_id, + patch( + "synapseclient.models.link.get_from_entity_factory", + new_callable=AsyncMock, + ) as mocked_get_entity_factory, + ): async def fill_link(synapse_id_or_path, entity_to_update, synapse_client): entity_to_update.fill_from_dict(self.get_example_rest_api_response()) @@ -340,14 +350,17 @@ async def test_get_by_name_and_parent_from_argument(self) -> None: parent = Folder(id=PARENT_ID) # WHEN we call get_async with the parent - with patch( - "synapseclient.models.link.get_id", - new_callable=AsyncMock, - return_value=LINK_ID, - ), patch( - "synapseclient.models.link.get_from_entity_factory", - new_callable=AsyncMock, - ) as mocked_get_entity_factory: + with ( + patch( + "synapseclient.models.link.get_id", + new_callable=AsyncMock, + return_value=LINK_ID, + ), + patch( + "synapseclient.models.link.get_from_entity_factory", + new_callable=AsyncMock, + ) as mocked_get_entity_factory, + ): async def fill_link(synapse_id_or_path, entity_to_update, synapse_client): entity_to_update.fill_from_dict(self.get_example_rest_api_response()) @@ -369,14 +382,17 @@ async def test_get_by_name_and_parent_project(self) -> None: parent = Project(id=PARENT_ID) # WHEN we call get_async with the parent - with patch( - "synapseclient.models.link.get_id", - new_callable=AsyncMock, - return_value=LINK_ID, - ), patch( - "synapseclient.models.link.get_from_entity_factory", - new_callable=AsyncMock, - ) as mocked_get_entity_factory: + with ( + patch( + "synapseclient.models.link.get_id", + new_callable=AsyncMock, + return_value=LINK_ID, + ), + patch( + "synapseclient.models.link.get_from_entity_factory", + new_callable=AsyncMock, + ) as mocked_get_entity_factory, + ): async def fill_link(synapse_id_or_path, entity_to_update, synapse_client): entity_to_update.fill_from_dict(self.get_example_rest_api_response()) @@ -424,18 +440,22 @@ async def test_get_follow_link_with_file_options(self) -> None: mock_file_options = object() # WHEN we call get_async with follow_link=True and file_options - with patch( - "synapseclient.models.link.get_id", - new_callable=AsyncMock, - return_value=LINK_ID, - ), patch( - "synapseclient.models.link.get_from_entity_factory", - new_callable=AsyncMock, - ) as mocked_get_entity_factory, patch( - "synapseclient.operations.factory_operations.get_async", - new_callable=AsyncMock, - return_value="followed_file_entity", - ) as mocked_factory_get_async: + with ( + patch( + "synapseclient.models.link.get_id", + new_callable=AsyncMock, + return_value=LINK_ID, + ), + patch( + "synapseclient.models.link.get_from_entity_factory", + new_callable=AsyncMock, + ) as mocked_get_entity_factory, + patch( + "synapseclient.operations.factory_operations.get_async", + new_callable=AsyncMock, + return_value="followed_file_entity", + ) as mocked_factory_get_async, + ): async def fill_link(synapse_id_or_path, entity_to_update, synapse_client): entity_to_update.fill_from_dict(self.get_example_rest_api_response()) @@ -464,14 +484,17 @@ async def test_get_sets_last_persistent_instance(self) -> None: link = Link(id=LINK_ID) # WHEN we call get_async with follow_link=False - with patch( - "synapseclient.models.link.get_id", - new_callable=AsyncMock, - return_value=LINK_ID, - ), patch( - "synapseclient.models.link.get_from_entity_factory", - new_callable=AsyncMock, - ) as mocked_get_entity_factory: + with ( + patch( + "synapseclient.models.link.get_id", + new_callable=AsyncMock, + return_value=LINK_ID, + ), + patch( + "synapseclient.models.link.get_from_entity_factory", + new_callable=AsyncMock, + ) as mocked_get_entity_factory, + ): async def fill_link(synapse_id_or_path, entity_to_update, synapse_client): entity_to_update.fill_from_dict(self.get_example_rest_api_response()) @@ -498,19 +521,23 @@ async def test_store_new_link(self) -> None: ) # WHEN we call store_async - with patch( - "synapseclient.models.link.get_id", - new_callable=AsyncMock, - return_value=None, - ), patch( - "synapseclient.models.link.store_entity", - new_callable=AsyncMock, - return_value=self.get_example_rest_api_response(), - ) as mocked_store_entity, patch( - "synapseclient.models.link.store_entity_components", - new_callable=AsyncMock, - return_value=False, - ) as mocked_store_components: + with ( + patch( + "synapseclient.models.link.get_id", + new_callable=AsyncMock, + return_value=None, + ), + patch( + "synapseclient.models.link.store_entity", + new_callable=AsyncMock, + return_value=self.get_example_rest_api_response(), + ) as mocked_store_entity, + patch( + "synapseclient.models.link.store_entity_components", + new_callable=AsyncMock, + return_value=False, + ) as mocked_store_components, + ): result = await link.store_async(synapse_client=self.syn) # THEN store_entity should have been called @@ -550,21 +577,26 @@ async def test_store_existing_link_with_id(self) -> None: ) # WHEN we call store_async - with patch( - "synapseclient.models.link.get_id", - new_callable=AsyncMock, - return_value=LINK_ID, - ), patch( - "synapseclient.models.link.get_from_entity_factory", - new_callable=AsyncMock, - ) as mocked_get_entity_factory, patch( - "synapseclient.models.link.store_entity", - new_callable=AsyncMock, - return_value=self.get_example_rest_api_response(), - ) as mocked_store_entity, patch( - "synapseclient.models.link.store_entity_components", - new_callable=AsyncMock, - return_value=False, + with ( + patch( + "synapseclient.models.link.get_id", + new_callable=AsyncMock, + return_value=LINK_ID, + ), + patch( + "synapseclient.models.link.get_from_entity_factory", + new_callable=AsyncMock, + ) as mocked_get_entity_factory, + patch( + "synapseclient.models.link.store_entity", + new_callable=AsyncMock, + return_value=self.get_example_rest_api_response(), + ) as mocked_store_entity, + patch( + "synapseclient.models.link.store_entity_components", + new_callable=AsyncMock, + return_value=False, + ), ): # Set up get_from_entity_factory to populate the link copy in # _find_existing_entity @@ -593,18 +625,22 @@ async def test_store_with_parent_argument(self) -> None: parent = Folder(id=PARENT_ID) # WHEN we call store_async with the parent argument - with patch( - "synapseclient.models.link.get_id", - new_callable=AsyncMock, - return_value=None, - ), patch( - "synapseclient.models.link.store_entity", - new_callable=AsyncMock, - return_value=self.get_example_rest_api_response(), - ) as mocked_store_entity, patch( - "synapseclient.models.link.store_entity_components", - new_callable=AsyncMock, - return_value=False, + with ( + patch( + "synapseclient.models.link.get_id", + new_callable=AsyncMock, + return_value=None, + ), + patch( + "synapseclient.models.link.store_entity", + new_callable=AsyncMock, + return_value=self.get_example_rest_api_response(), + ) as mocked_store_entity, + patch( + "synapseclient.models.link.store_entity_components", + new_callable=AsyncMock, + return_value=False, + ), ): result = await link.store_async(parent=parent, synapse_client=self.syn) @@ -627,18 +663,22 @@ async def test_store_with_parent_project(self) -> None: parent = Project(id=PARENT_ID) # WHEN we call store_async with the parent argument - with patch( - "synapseclient.models.link.get_id", - new_callable=AsyncMock, - return_value=None, - ), patch( - "synapseclient.models.link.store_entity", - new_callable=AsyncMock, - return_value=self.get_example_rest_api_response(), - ), patch( - "synapseclient.models.link.store_entity_components", - new_callable=AsyncMock, - return_value=False, + with ( + patch( + "synapseclient.models.link.get_id", + new_callable=AsyncMock, + return_value=None, + ), + patch( + "synapseclient.models.link.store_entity", + new_callable=AsyncMock, + return_value=self.get_example_rest_api_response(), + ), + patch( + "synapseclient.models.link.store_entity_components", + new_callable=AsyncMock, + return_value=False, + ), ): result = await link.store_async(parent=parent, synapse_client=self.syn) @@ -677,21 +717,26 @@ async def test_store_skips_validation_when_id_is_set(self) -> None: link = Link(id=LINK_ID) # WHEN we call store_async, it should NOT raise ValueError - with patch( - "synapseclient.models.link.get_id", - new_callable=AsyncMock, - return_value=LINK_ID, - ), patch( - "synapseclient.models.link.get_from_entity_factory", - new_callable=AsyncMock, - ) as mocked_get_entity_factory, patch( - "synapseclient.models.link.store_entity", - new_callable=AsyncMock, - return_value=self.get_example_rest_api_response(), - ), patch( - "synapseclient.models.link.store_entity_components", - new_callable=AsyncMock, - return_value=False, + with ( + patch( + "synapseclient.models.link.get_id", + new_callable=AsyncMock, + return_value=LINK_ID, + ), + patch( + "synapseclient.models.link.get_from_entity_factory", + new_callable=AsyncMock, + ) as mocked_get_entity_factory, + patch( + "synapseclient.models.link.store_entity", + new_callable=AsyncMock, + return_value=self.get_example_rest_api_response(), + ), + patch( + "synapseclient.models.link.store_entity_components", + new_callable=AsyncMock, + return_value=False, + ), ): async def fill_link(synapse_id_or_path, entity_to_update, synapse_client): @@ -713,14 +758,17 @@ async def test_store_no_changes_skips_store_entity(self) -> None: ) # AND get_async has been called (which sets _last_persistent_instance) - with patch( - "synapseclient.models.link.get_id", - new_callable=AsyncMock, - return_value=LINK_ID, - ), patch( - "synapseclient.models.link.get_from_entity_factory", - new_callable=AsyncMock, - ) as mocked_get_entity_factory: + with ( + patch( + "synapseclient.models.link.get_id", + new_callable=AsyncMock, + return_value=LINK_ID, + ), + patch( + "synapseclient.models.link.get_from_entity_factory", + new_callable=AsyncMock, + ) as mocked_get_entity_factory, + ): async def fill_link(synapse_id_or_path, entity_to_update, synapse_client): entity_to_update.fill_from_dict(self.get_example_rest_api_response()) @@ -729,13 +777,16 @@ async def fill_link(synapse_id_or_path, entity_to_update, synapse_client): await link.get_async(follow_link=False, synapse_client=self.syn) # WHEN we call store_async without making changes - with patch( - "synapseclient.models.link.store_entity", - new_callable=AsyncMock, - ) as mocked_store_entity, patch( - "synapseclient.models.link.store_entity_components", - new_callable=AsyncMock, - return_value=False, + with ( + patch( + "synapseclient.models.link.store_entity", + new_callable=AsyncMock, + ) as mocked_store_entity, + patch( + "synapseclient.models.link.store_entity_components", + new_callable=AsyncMock, + return_value=False, + ), ): result = await link.store_async(synapse_client=self.syn) @@ -749,14 +800,17 @@ async def test_store_with_changes_after_get(self) -> None: # GIVEN a Link that was previously retrieved from Synapse link = Link(id=LINK_ID) - with patch( - "synapseclient.models.link.get_id", - new_callable=AsyncMock, - return_value=LINK_ID, - ), patch( - "synapseclient.models.link.get_from_entity_factory", - new_callable=AsyncMock, - ) as mocked_get_entity_factory: + with ( + patch( + "synapseclient.models.link.get_id", + new_callable=AsyncMock, + return_value=LINK_ID, + ), + patch( + "synapseclient.models.link.get_from_entity_factory", + new_callable=AsyncMock, + ) as mocked_get_entity_factory, + ): async def fill_link(synapse_id_or_path, entity_to_update, synapse_client): entity_to_update.fill_from_dict(self.get_example_rest_api_response()) @@ -771,14 +825,17 @@ async def fill_link(synapse_id_or_path, entity_to_update, synapse_client): updated_response = self.get_example_rest_api_response() updated_response["description"] = "New description" - with patch( - "synapseclient.models.link.store_entity", - new_callable=AsyncMock, - return_value=updated_response, - ) as mocked_store_entity, patch( - "synapseclient.models.link.store_entity_components", - new_callable=AsyncMock, - return_value=False, + with ( + patch( + "synapseclient.models.link.store_entity", + new_callable=AsyncMock, + return_value=updated_response, + ) as mocked_store_entity, + patch( + "synapseclient.models.link.store_entity_components", + new_callable=AsyncMock, + return_value=False, + ), ): result = await link.store_async(synapse_client=self.syn) @@ -797,23 +854,28 @@ async def test_store_re_reads_when_components_change(self) -> None: ) # WHEN we call store_async and store_entity_components returns True - with patch( - "synapseclient.models.link.get_id", - new_callable=AsyncMock, - return_value=None, - ), patch( - "synapseclient.models.link.store_entity", - new_callable=AsyncMock, - return_value=self.get_example_rest_api_response(), - ), patch( - "synapseclient.models.link.store_entity_components", - new_callable=AsyncMock, - return_value=True, - ), patch.object( - link, - "get_async", - new_callable=AsyncMock, - ) as mocked_get_async: + with ( + patch( + "synapseclient.models.link.get_id", + new_callable=AsyncMock, + return_value=None, + ), + patch( + "synapseclient.models.link.store_entity", + new_callable=AsyncMock, + return_value=self.get_example_rest_api_response(), + ), + patch( + "synapseclient.models.link.store_entity_components", + new_callable=AsyncMock, + return_value=True, + ), + patch.object( + link, + "get_async", + new_callable=AsyncMock, + ) as mocked_get_async, + ): result = await link.store_async(synapse_client=self.syn) # THEN get_async should have been called for a re-read @@ -830,23 +892,28 @@ async def test_store_does_not_re_read_when_no_component_changes(self) -> None: ) # WHEN we call store_async and store_entity_components returns False - with patch( - "synapseclient.models.link.get_id", - new_callable=AsyncMock, - return_value=None, - ), patch( - "synapseclient.models.link.store_entity", - new_callable=AsyncMock, - return_value=self.get_example_rest_api_response(), - ), patch( - "synapseclient.models.link.store_entity_components", - new_callable=AsyncMock, - return_value=False, - ), patch.object( - link, - "get_async", - new_callable=AsyncMock, - ) as mocked_get_async: + with ( + patch( + "synapseclient.models.link.get_id", + new_callable=AsyncMock, + return_value=None, + ), + patch( + "synapseclient.models.link.store_entity", + new_callable=AsyncMock, + return_value=self.get_example_rest_api_response(), + ), + patch( + "synapseclient.models.link.store_entity_components", + new_callable=AsyncMock, + return_value=False, + ), + patch.object( + link, + "get_async", + new_callable=AsyncMock, + ) as mocked_get_async, + ): result = await link.store_async(synapse_client=self.syn) # THEN get_async should NOT have been called for a re-read @@ -864,14 +931,17 @@ async def test_find_existing_entity_when_entity_exists(self) -> None: ) # WHEN we call _find_existing_entity and an entity is found - with patch( - "synapseclient.models.link.get_id", - new_callable=AsyncMock, - return_value=LINK_ID, - ), patch( - "synapseclient.models.link.get_from_entity_factory", - new_callable=AsyncMock, - ) as mocked_get_entity_factory: + with ( + patch( + "synapseclient.models.link.get_id", + new_callable=AsyncMock, + return_value=LINK_ID, + ), + patch( + "synapseclient.models.link.get_from_entity_factory", + new_callable=AsyncMock, + ) as mocked_get_entity_factory, + ): async def fill_link(synapse_id_or_path, entity_to_update, synapse_client): entity_to_update.fill_from_dict(self.get_example_rest_api_response()) diff --git a/tests/unit/synapseclient/models/async/unit_test_manifest_async.py b/tests/unit/synapseclient/models/async/unit_test_manifest_async.py new file mode 100644 index 000000000..63577acfd --- /dev/null +++ b/tests/unit/synapseclient/models/async/unit_test_manifest_async.py @@ -0,0 +1,2145 @@ +"""Unit tests for synapseclient.models.services.manifest (upload-side).""" + +import asyncio +import os +from pathlib import Path +from typing import Any +from unittest.mock import AsyncMock, MagicMock, patch + +import pandas as pd +import pytest + +from synapseclient import Synapse +from synapseclient.core.exceptions import SynapseProvenanceError +from synapseclient.models.services.manifest import ( + NON_ANNOTATION_COLUMNS, + UploadSyncFile, + _apply_synapse_store_defaults, + _build_activity_linkage, + _build_annotations_for_file, + _build_upload_files, + _build_upload_plan, + _check_file_names, + _check_parent_containers_async, + _check_path_and_normalize, + _check_required_columns, + _check_size_each_file, + _check_unique_paths, + _clean_manifest, + _convert_value, + _create_upload_tasks, + _default_name_column, + _expand_path, + _local_path_refs, + _parse_annotation_cell, + _parse_force_version, + _parse_literal, + _read_and_filter_errors, + _resolve_local_file_provenance, + _resolve_provenance_column, + _resolve_provenance_item, + _resolve_row, + _sort_and_fix_provenance, + _split_csv_cell, + _upload_file_async, + _validate_manifest, + read_manifest_for_upload, + upload_sync_files, +) + + +class TestReadManifestForUpload: + @pytest.fixture(autouse=True) + def init_syn(self, syn: Synapse) -> None: + self.syn = syn + + async def test_missing_path_column_raises(self, tmp_path: Path) -> None: + """A manifest without a 'path' column raises ValueError.""" + csv = "parentId\nsyn1\n" + manifest = tmp_path / "manifest.csv" + manifest.write_text(csv) + with pytest.raises(ValueError, match="'path'"): + await read_manifest_for_upload(str(manifest), self.syn, True, False) + + async def test_missing_parent_id_column_raises(self, tmp_path: Path) -> None: + """A manifest without a 'parentId' column raises ValueError.""" + f = tmp_path / "file.txt" + f.write_text("content") + csv = f"path\n{f}\n" + manifest = tmp_path / "manifest.csv" + manifest.write_text(csv) + with pytest.raises(ValueError, match="'parentId'"): + await read_manifest_for_upload(str(manifest), self.syn, True, False) + + async def test_rows_with_error_are_skipped(self, tmp_path: Path) -> None: + """Rows with a non-empty 'error' cell are excluded from the returned items list.""" + f = tmp_path / "file.txt" + f.write_text("hello") + csv = f"path,parentId,error\n{f},syn1,some error\n" + manifest = tmp_path / "manifest.csv" + manifest.write_text(csv) + items, total = await read_manifest_for_upload( + str(manifest), self.syn, True, False + ) + assert items == [] + assert total == 0 + + async def test_all_rows_have_errors_returns_empty(self, tmp_path: Path) -> None: + """When every row has an error, both the items list and total size are zero.""" + csv = "path,parentId,error\n/x/y.txt,syn1,fail\n/x/z.txt,syn1,fail\n" + manifest = tmp_path / "manifest.csv" + manifest.write_text(csv) + items, total = await read_manifest_for_upload( + str(manifest), self.syn, True, False + ) + assert items == [] + assert total == 0 + + async def test_duplicate_paths_raise(self, tmp_path: Path) -> None: + """Two rows referencing the same file path raise ValueError about unique file paths.""" + f = tmp_path / "file.txt" + f.write_text("hi") + csv = f"path,parentId\n{f},syn1\n{f},syn2\n" + manifest = tmp_path / "manifest.csv" + manifest.write_text(csv) + with ( + patch( + "synapseclient.models.services.manifest._check_parent_containers_async", + new=AsyncMock(), + ), + ): + with pytest.raises(ValueError, match="unique file path"): + await read_manifest_for_upload(str(manifest), self.syn, True, False) + + async def test_empty_file_raises(self, tmp_path: Path) -> None: + """A manifest row pointing to a zero-byte file raises ValueError.""" + f = tmp_path / "empty.txt" + f.write_text("") # 0 bytes + csv = f"path,parentId\n{f},syn1\n" + manifest = tmp_path / "manifest.csv" + manifest.write_text(csv) + with pytest.raises(ValueError, match="empty"): + await read_manifest_for_upload(str(manifest), self.syn, True, False) + + async def test_valid_manifest_returns_items_and_size(self, tmp_path: Path) -> None: + """A valid manifest returns one upload item and the correct total file size.""" + f = tmp_path / "file.txt" + f.write_text("hello world") + size = f.stat().st_size + csv = f"path,parentId,ID\n{f},syn1,syn42\n" + manifest = tmp_path / "manifest.csv" + manifest.write_text(csv) + + with patch( + "synapseclient.models.services.manifest._check_parent_containers_async", + new=AsyncMock(), + ): + items, total = await read_manifest_for_upload( + str(manifest), self.syn, True, False + ) + + assert total == size + assert len(items) == 1 + assert items[0].entity.parent_id == "syn1" + assert items[0].entity.id == "syn42" + + async def test_name_derived_from_basename_when_absent(self, tmp_path: Path) -> None: + """When the manifest has no 'name' column, the entity name defaults to the file's basename.""" + f = tmp_path / "myfile.csv" + f.write_text("data") + csv = f"path,parentId\n{f},syn1\n" + manifest = tmp_path / "manifest.csv" + manifest.write_text(csv) + + with patch( + "synapseclient.models.services.manifest._check_parent_containers_async", + new=AsyncMock(), + ): + items, _ = await read_manifest_for_upload( + str(manifest), self.syn, True, False + ) + + assert items[0].entity.name == "myfile.csv" + + async def test_url_path_sets_synapse_store_false_and_excluded_from_size( + self, tmp_path: Path + ) -> None: + """A URL path sets synapse_store=False on the item and contributes 0 bytes to the total size.""" + url = "https://example.com/data.csv" + csv = f"path,parentId\n{url},syn1\n" + manifest = tmp_path / "manifest.csv" + manifest.write_text(csv) + + with patch( + "synapseclient.models.services.manifest._check_parent_containers_async", + new=AsyncMock(), + ): + items, total = await read_manifest_for_upload( + str(manifest), self.syn, True, False + ) + + assert total == 0 + assert items[0].entity.synapse_store is False + + async def test_empty_error_column_row_is_kept(self, tmp_path: Path) -> None: + """A row with an empty 'error' cell is treated as valid and included in the upload items.""" + f = tmp_path / "file.txt" + f.write_text("content") + csv = f"path,parentId,error\n{f},syn1,\n" + manifest = tmp_path / "manifest.csv" + manifest.write_text(csv) + + with patch( + "synapseclient.models.services.manifest._check_parent_containers_async", + new=AsyncMock(), + ): + items, _ = await read_manifest_for_upload( + str(manifest), self.syn, True, False + ) + + assert len(items) == 1 + + async def test_synapse_store_defaults_to_true(self, tmp_path: Path) -> None: + """When 'synapseStore' is absent from the manifest, it defaults to True.""" + f = tmp_path / "file.txt" + f.write_text("content") + csv = f"path,parentId\n{f},syn1\n" + manifest = tmp_path / "manifest.csv" + manifest.write_text(csv) + + with patch( + "synapseclient.models.services.manifest._check_parent_containers_async", + new=AsyncMock(), + ): + items, _ = await read_manifest_for_upload( + str(manifest), self.syn, True, False + ) + + assert items[0].entity.synapse_store is True + + async def test_empty_csv_returns_empty(self, tmp_path: Path) -> None: + """A CSV with headers but no data rows returns ([], 0).""" + manifest = tmp_path / "manifest.csv" + manifest.write_text("path,parentId\n") + items, total = await read_manifest_for_upload( + str(manifest), self.syn, True, False + ) + assert items == [] + assert total == 0 + + async def test_explicit_synapse_store_false_preserved(self, tmp_path: Path) -> None: + """An explicit 'False' in the synapseStore column is preserved.""" + f = tmp_path / "file.txt" + f.write_text("content") + csv = f"path,parentId,synapseStore\n{f},syn1,False\n" + manifest = tmp_path / "manifest.csv" + manifest.write_text(csv) + + with patch( + "synapseclient.models.services.manifest._check_parent_containers_async", + new=AsyncMock(), + ): + items, _ = await read_manifest_for_upload( + str(manifest), self.syn, True, False + ) + + assert items[0].entity.synapse_store is False + + +class TestCleanManifest: + def test_returns_cleaned_dataframe(self, tmp_path: Path) -> None: + """Returns a DataFrame with required columns and normalized paths.""" + f = tmp_path / "data.txt" + f.write_text("content") + csv = tmp_path / "manifest.csv" + csv.write_text(f"path,parentId\n{f},syn1\n") + df = _clean_manifest(str(csv)) + assert len(df) == 1 + assert df.iloc[0]["path"] == str(f) + assert "name" in df.columns + + def test_empty_after_error_filter_returns_empty(self, tmp_path: Path) -> None: + """Returns empty DataFrame when all rows have errors.""" + csv = tmp_path / "manifest.csv" + csv.write_text("path,parentId,error\n/a.txt,syn1,oops\n") + df = _clean_manifest(str(csv)) + assert df.empty + + def test_missing_path_column_raises(self, tmp_path: Path) -> None: + """ValueError when the path column is missing.""" + csv = tmp_path / "manifest.csv" + csv.write_text("parentId\nsyn1\n") + with pytest.raises(ValueError, match="path"): + _clean_manifest(str(csv)) + + def test_synapse_store_defaults_applied(self, tmp_path: Path) -> None: + """synapseStore column is created and defaulted when absent.""" + f = tmp_path / "data.txt" + f.write_text("content") + csv = tmp_path / "manifest.csv" + csv.write_text(f"path,parentId\n{f},syn1\n") + df = _clean_manifest(str(csv)) + assert "synapseStore" in df.columns + assert df.iloc[0]["synapseStore"] == True # noqa: E712 + + +class TestReadAndFilterErrors: + def test_rows_without_error_column_returned(self, tmp_path: Path) -> None: + """A manifest with no error column returns all rows.""" + csv = tmp_path / "manifest.csv" + csv.write_text("path,parentId\n/a.txt,syn1\n/b.txt,syn2\n") + df = _read_and_filter_errors(str(csv)) + assert len(df) == 2 + + def test_error_rows_filtered(self, tmp_path: Path) -> None: + """Rows with a non-empty error column are dropped.""" + csv = tmp_path / "manifest.csv" + csv.write_text( + "path,parentId,error\n/a.txt,syn1,\n/b.txt,syn2,download failed\n" + ) + df = _read_and_filter_errors(str(csv)) + assert len(df) == 1 + assert df.iloc[0]["path"] == "/a.txt" + + def test_all_errors_returns_empty(self, tmp_path: Path) -> None: + """If every row has an error, the result is empty.""" + csv = tmp_path / "manifest.csv" + csv.write_text("path,parentId,error\n/a.txt,syn1,bad\n") + df = _read_and_filter_errors(str(csv)) + assert df.empty + + +class TestCheckRequiredColumns: + def test_valid_columns_pass(self) -> None: + """No error when both path and parentId columns are present.""" + df = pd.DataFrame({"path": ["/a.txt"], "parentId": ["syn1"]}) + _check_required_columns(df) + + def test_missing_path_raises(self) -> None: + """ValueError when path column is missing.""" + df = pd.DataFrame({"parentId": ["syn1"]}) + with pytest.raises(ValueError, match="path"): + _check_required_columns(df) + + def test_missing_parent_id_raises(self) -> None: + """ValueError when parentId column is missing.""" + df = pd.DataFrame({"path": ["/a.txt"]}) + with pytest.raises(ValueError, match="parentId"): + _check_required_columns(df) + + +class TestCheckUniquePaths: + def test_unique_paths_pass(self) -> None: + """No error when all paths are unique.""" + df = pd.DataFrame({"path": ["/a.txt", "/b.txt"]}) + _check_unique_paths(df) + + def test_duplicate_paths_raise(self) -> None: + """ValueError when a path appears more than once.""" + df = pd.DataFrame({"path": ["/a.txt", "/a.txt"]}) + with pytest.raises(ValueError, match="unique"): + _check_unique_paths(df) + + +class TestDefaultNameColumn: + def test_creates_name_from_path(self) -> None: + """Creates the name column from path basenames when absent.""" + df = pd.DataFrame({"path": ["/dir/file.txt", "/dir/other.csv"]}) + _default_name_column(df) + assert list(df["name"]) == ["file.txt", "other.csv"] + + def test_fills_empty_names(self) -> None: + """Fills blank names from the path column, preserving existing names.""" + df = pd.DataFrame( + {"path": ["/dir/file.txt", "/dir/other.csv"], "name": ["custom", ""]} + ) + _default_name_column(df) + assert list(df["name"]) == ["custom", "other.csv"] + + def test_existing_names_preserved(self) -> None: + """Non-empty names are not overwritten.""" + df = pd.DataFrame({"path": ["/dir/file.txt"], "name": ["keepme"]}) + _default_name_column(df) + assert df.iloc[0]["name"] == "keepme" + + +class TestValidateManifest: + def test_returns_total_size(self, tmp_path: Path) -> None: + """Returns the combined size of all local files.""" + f1 = tmp_path / "a.txt" + f1.write_text("hello") + f2 = tmp_path / "b.txt" + f2.write_text("world!") + df = pd.DataFrame( + { + "path": [str(f1), str(f2)], + "parentId": ["syn1", "syn2"], + "name": ["a.txt", "b.txt"], + "synapseStore": [True, True], + } + ) + total = _validate_manifest(df) + assert total == f1.stat().st_size + f2.stat().st_size + + def test_invalid_name_raises(self, tmp_path: Path) -> None: + """Raises ValueError for invalid file names.""" + f = tmp_path / "good.txt" + f.write_text("data") + df = pd.DataFrame( + { + "path": [str(f)], + "parentId": ["syn1"], + "name": ["bad/name"], + "synapseStore": [True], + } + ) + with pytest.raises(ValueError, match="name"): + _validate_manifest(df) + + def test_empty_file_raises(self, tmp_path: Path) -> None: + """Raises ValueError for empty (0-byte) files.""" + f = tmp_path / "empty.txt" + f.write_text("") + df = pd.DataFrame( + { + "path": [str(f)], + "parentId": ["syn1"], + "name": ["empty.txt"], + "synapseStore": [True], + } + ) + with pytest.raises(ValueError, match="empty"): + _validate_manifest(df) + + +class TestApplySynapseStoreDefaults: + def test_creates_column_when_missing(self) -> None: + """Creates synapseStore column defaulting to True for local paths.""" + df = pd.DataFrame({"path": ["/a.txt"]}) + _apply_synapse_store_defaults(df) + assert df.iloc[0]["synapseStore"] == True # noqa: E712 + + def test_url_rows_set_to_false(self) -> None: + """URL paths are set to synapseStore=False.""" + df = pd.DataFrame({"path": ["https://example.com/data.csv"]}) + _apply_synapse_store_defaults(df) + assert df.iloc[0]["synapseStore"] == False # noqa: E712 + + def test_explicit_false_preserved(self) -> None: + """An explicit False is preserved even for local paths.""" + df = pd.DataFrame({"path": ["/a.txt"], "synapseStore": [False]}) + _apply_synapse_store_defaults(df) + assert df.iloc[0]["synapseStore"] == False # noqa: E712 + + def test_null_defaults_to_true(self) -> None: + """Null/NaN values default to True for local paths.""" + df = pd.DataFrame({"path": ["/a.txt"], "synapseStore": [None]}) + _apply_synapse_store_defaults(df) + assert df.iloc[0]["synapseStore"] == True # noqa: E712 + + def test_column_is_bool_dtype(self) -> None: + """The synapseStore column is cast to bool dtype.""" + df = pd.DataFrame({"path": ["/a.txt", "https://example.com/f.csv"]}) + _apply_synapse_store_defaults(df) + assert df["synapseStore"].dtype == bool + + +class TestCheckPathAndNormalize: + @pytest.fixture(autouse=True) + def init_syn(self, syn: Synapse) -> None: + self.syn = syn + + def test_url_passes_through(self) -> None: + """URLs are returned unchanged without any filesystem check.""" + url = "https://example.com/file.txt" + assert _check_path_and_normalize(url) == url + + def test_existing_file_returns_absolute_path(self, tmp_path: Path) -> None: + """A relative or home-relative path to an existing file is resolved to an + absolute path.""" + f = tmp_path / "test.txt" + f.write_text("hello") + result = _check_path_and_normalize(str(f)) + assert os.path.isabs(result) + assert result == str(f.resolve()) + + def test_missing_file_raises(self) -> None: + """A path that does not point to an existing file raises IOError.""" + with pytest.raises(IOError): + _check_path_and_normalize("/nonexistent/path/file.txt") + + +class TestExpandPath: + def test_absolute_path_unchanged(self, tmp_path: Path) -> None: + """An absolute path with no special characters is returned as-is.""" + p = str(tmp_path / "file.txt") + assert _expand_path(p) == p + + def test_tilde_expanded(self) -> None: + """A leading ~ is expanded to the user's home directory.""" + result = _expand_path("~/somefile.txt") + assert not result.startswith("~") + assert os.path.isabs(result) + + def test_relative_path_becomes_absolute(self) -> None: + """A relative path is resolved to an absolute path.""" + result = _expand_path("relative/path.txt") + assert os.path.isabs(result) + + def test_env_var_expanded(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Environment variables in the path are expanded.""" + monkeypatch.setenv("MY_TEST_DIR", "/tmp/test_dir") + result = _expand_path("$MY_TEST_DIR/file.txt") + assert "$MY_TEST_DIR" not in result + expected = os.path.abspath(os.path.join("/tmp/test_dir", "file.txt")) + assert result == expected + + def test_combined_tilde_and_env_var(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Both ~ and environment variables are expanded in the same path.""" + monkeypatch.setenv("MY_SUBDIR", "docs") + result = _expand_path("~/$MY_SUBDIR/file.txt") + assert "~" not in result + assert "$MY_SUBDIR" not in result + assert result.endswith(os.sep + "docs" + os.sep + "file.txt") + + +class TestCheckSizeEachFile: + def test_returns_total_size(self, tmp_path: Path) -> None: + """Returns the combined size of all local files.""" + f1 = tmp_path / "a.txt" + f1.write_text("hello") + f2 = tmp_path / "b.txt" + f2.write_text("world!") + df = pd.DataFrame({"path": [str(f1), str(f2)]}) + total = _check_size_each_file(df) + assert total == f1.stat().st_size + f2.stat().st_size + + def test_empty_file_raises(self, tmp_path: Path) -> None: + """A zero-byte file raises ValueError.""" + f = tmp_path / "empty.txt" + f.write_text("") + df = pd.DataFrame({"path": [str(f)]}) + with pytest.raises(ValueError, match="empty"): + _check_size_each_file(df) + + def test_url_rows_skipped(self, tmp_path: Path) -> None: + """Rows whose path is a URL are skipped and not counted.""" + f = tmp_path / "local.txt" + f.write_text("data") + df = pd.DataFrame({"path": [str(f), "https://example.com/file.csv"]}) + total = _check_size_each_file(df) + assert total == f.stat().st_size + + def test_all_urls_returns_zero(self) -> None: + """If every row is a URL, the total size is zero.""" + df = pd.DataFrame({"path": ["https://a.com/f1", "https://b.com/f2"]}) + assert _check_size_each_file(df) == 0 + + +class TestCheckFileNameNewFormat: + def test_valid_names_pass(self) -> None: + """Unique, validly named files in the same parent do not raise.""" + df = pd.DataFrame( + { + "path": ["/a/file1.txt", "/a/file2.txt"], + "name": ["file1.txt", "file2.txt"], + "parentId": ["syn1", "syn1"], + } + ) + _check_file_names(df) + + def test_invalid_name_raises(self) -> None: + """A file name containing characters not permitted by Synapse raises ValueError.""" + df = pd.DataFrame( + { + "path": ["/a/bad!name.txt"], + "name": ["bad!name.txt"], + "parentId": ["syn1"], + } + ) + with pytest.raises(ValueError, match="cannot be stored to Synapse"): + _check_file_names(df) + + def test_duplicate_name_and_parent_raises(self) -> None: + """Two files with the same name uploaded to the same parent raise ValueError.""" + df = pd.DataFrame( + { + "path": ["/a/file.txt", "/b/file.txt"], + "name": ["file.txt", "file.txt"], + "parentId": ["syn1", "syn1"], + } + ) + with pytest.raises(ValueError, match="unique file name"): + _check_file_names(df) + + def test_same_name_different_parent_is_ok(self) -> None: + """The same file name is allowed in different parent containers.""" + df = pd.DataFrame( + { + "path": ["/a/file.txt", "/b/file.txt"], + "name": ["file.txt", "file.txt"], + "parentId": ["syn1", "syn2"], + } + ) + _check_file_names(df) + + @pytest.mark.parametrize( + "name", + [ + "file`name.txt", + "file+name.txt", + "file (1).txt", + "file-name.txt", + "file.name.txt", + ], + ) + def test_special_characters_in_name_accepted(self, name: str) -> None: + """Backticks, plus signs, parentheses, hyphens, and periods are valid in file names.""" + df = pd.DataFrame( + { + "path": ["/a/" + name], + "name": [name], + "parentId": ["syn1"], + } + ) + _check_file_names(df) + + +class TestSortAndFixProvenance: + @pytest.fixture(autouse=True) + def init_syn(self, syn: Synapse) -> None: + self.syn = syn + + async def test_no_provenance_columns_returns_rows_unchanged( + self, tmp_path: Path + ) -> None: + """A manifest with no 'used' or 'executed' columns is returned as-is + (same rows, same order, 'path' column preserved).""" + f1 = tmp_path / "a.txt" + f2 = tmp_path / "b.txt" + f1.write_text("a") + f2.write_text("b") + df = pd.DataFrame({"path": [str(f1), str(f2)], "parentId": ["syn1", "syn1"]}) + + result = await _sort_and_fix_provenance(self.syn, df) + + assert list(result["path"]) == [str(f1), str(f2)] + + async def test_used_column_split_and_resolved(self, tmp_path: Path) -> None: + """Semicolon-delimited 'used' strings are split, resolved via _resolve_provenance_item, + and stored as lists on the returned DataFrame.""" + f = tmp_path / "file.txt" + f.write_text("content") + df = pd.DataFrame( + { + "path": [str(f)], + "parentId": ["syn1"], + "used": ["syn111;https://example.com"], + } + ) + + result = await _sort_and_fix_provenance(self.syn, df) + + assert result.loc[0, "used"] == ["syn111", "https://example.com"] + + async def test_executed_column_split_and_resolved(self, tmp_path: Path) -> None: + """Semicolon-delimited 'executed' strings are split and stored as lists.""" + f = tmp_path / "file.txt" + f.write_text("content") + df = pd.DataFrame( + { + "path": [str(f)], + "parentId": ["syn1"], + "executed": ["https://github.com/a;https://github.com/b"], + } + ) + + result = await _sort_and_fix_provenance(self.syn, df) + + assert result.loc[0, "executed"] == [ + "https://github.com/a", + "https://github.com/b", + ] + + async def test_empty_used_cell_produces_empty_list(self, tmp_path: Path) -> None: + """A 'used' cell that is empty or whitespace-only is resolved to an empty list.""" + f = tmp_path / "file.txt" + f.write_text("content") + df = pd.DataFrame({"path": [str(f)], "parentId": ["syn1"], "used": [""]}) + + result = await _sort_and_fix_provenance(self.syn, df) + + assert result.loc[0, "used"] == [] + + async def test_topological_sort_orders_dependency_before_dependent( + self, tmp_path: Path + ) -> None: + """When file B lists file A as 'used', the returned DataFrame places A + before B so it is uploaded first.""" + fa = tmp_path / "a.txt" + fb = tmp_path / "b.txt" + fa.write_text("a") + fb.write_text("b") + abs_a = str(fa.resolve()) + abs_b = str(fb.resolve()) + # b depends on a + df = pd.DataFrame( + { + "path": [abs_b, abs_a], + "parentId": ["syn1", "syn1"], + "used": [abs_a, ""], + } + ) + + result = await _sort_and_fix_provenance(self.syn, df) + + paths = list(result["path"]) + assert paths.index(abs_a) < paths.index(abs_b) + + async def test_used_cell_already_a_list_is_not_split(self, tmp_path: Path) -> None: + """A 'used' cell that was already converted to a Python list by + _parse_annotation_cell is handled without calling + .strip()/.split() — which would raise AttributeError on a list.""" + f = tmp_path / "file.txt" + f.write_text("content") + df = pd.DataFrame( + { + "path": [str(f)], + "parentId": ["syn1"], + "used": [["syn111", "https://example.com"]], + } + ) + + result = await _sort_and_fix_provenance(self.syn, df) + + assert result.loc[0, "used"] == ["syn111", "https://example.com"] + + async def test_used_cell_list_with_non_string_item_does_not_crash( + self, tmp_path: Path + ) -> None: + """Non-string items already in a 'used' list (e.g. a File object) are + passed to _resolve_provenance_item without calling .strip(), which would raise + AttributeError on a non-string.""" + from synapseclient.models.file import File + + f = tmp_path / "file.txt" + f.write_text("content") + existing_file = MagicMock(spec=File) + df = pd.DataFrame( + { + "path": [str(f)], + "parentId": ["syn1"], + "used": [[existing_file]], + } + ) + + with patch( + "synapseclient.models.services.manifest._resolve_provenance_item", + new=AsyncMock(return_value=existing_file), + ) as mock_check: + result = await _sort_and_fix_provenance(self.syn, df) + + # _resolve_provenance_item must be called with the File object, not item.strip() + mock_check.assert_awaited_once_with( + existing_file, + owner_path=str(f), + syn=self.syn, + df=mock_check.call_args.kwargs["df"], + ) + assert result.loc[0, "used"] == [existing_file] + + async def test_invalid_provenance_item_propagates_error( + self, tmp_path: Path + ) -> None: + """A provenance reference that is not a file path, URL, or Synapse ID + propagates SynapseProvenanceError from _resolve_provenance_item.""" + from synapseclient.core.exceptions import SynapseProvenanceError + + f = tmp_path / "file.txt" + f.write_text("content") + df = pd.DataFrame( + { + "path": [str(f)], + "parentId": ["syn1"], + "used": ["not_a_valid_reference"], + } + ) + + with pytest.raises(SynapseProvenanceError): + await _sort_and_fix_provenance(self.syn, df) + + +class TestLocalPathRefs: + def test_extracts_string_refs(self) -> None: + """String references (local paths) are extracted from resolved provenance.""" + file_mock = MagicMock() + resolved = { + "used": ["/a.txt", file_mock, "/b.txt"], + "executed": ["/c.txt"], + } + result = _local_path_refs(resolved) + assert result == ["/a.txt", "/b.txt", "/c.txt"] + + @pytest.mark.parametrize( + "resolved", + [ + pytest.param({}, id="empty_dict"), + pytest.param( + {"used": [MagicMock()], "executed": [MagicMock()]}, + id="no_string_refs", + ), + ], + ) + def test_returns_empty_list(self, resolved: dict) -> None: + """An empty dict or one with only non-string refs returns an empty list.""" + assert _local_path_refs(resolved) == [] + + +class TestCheckParentContainersAsync: + @pytest.fixture(autouse=True) + def init_syn(self, syn: Synapse) -> None: + self.syn = syn + + @pytest.mark.parametrize( + "container_cls", + [ + pytest.param("Project", id="project"), + pytest.param("Folder", id="folder"), + ], + ) + async def test_valid_container_passes(self, container_cls: str) -> None: + """A parent ID that resolves to a Project or Folder does not raise.""" + from synapseclient.models.folder import Folder + from synapseclient.models.project import Project + + cls = {"Project": Project, "Folder": Folder}[container_cls] + mock_container = MagicMock(spec=cls) + with patch( + "synapseclient.models.services.manifest.get_async", + new=AsyncMock(return_value=mock_container), + ): + await _check_parent_containers_async(["syn1"], syn=self.syn) + + async def test_non_container_raises(self) -> None: + """A parent ID that resolves to a non-container Synapse entity raises ValueError.""" + mock_entity = MagicMock() # not a Folder or Project + with patch( + "synapseclient.models.services.manifest.get_async", + new=AsyncMock(return_value=mock_entity), + ): + with pytest.raises(ValueError, match="not a Folder or Project"): + await _check_parent_containers_async(["syn1"], syn=self.syn) + + async def test_empty_parent_id_skipped(self) -> None: + """An empty parent ID string is skipped without calling the Synapse API.""" + with patch( + "synapseclient.models.services.manifest.get_async", + new=AsyncMock(), + ) as mock_get: + await _check_parent_containers_async([""], syn=self.syn) + mock_get.assert_not_called() + + async def test_nonexistent_parent_id_reraises_http_error(self) -> None: + """A parent ID that does not exist in Synapse re-raises the SynapseHTTPError.""" + from synapseclient.core.exceptions import SynapseHTTPError + + with patch( + "synapseclient.models.services.manifest.get_async", + new=AsyncMock(side_effect=SynapseHTTPError("Not found")), + ): + with pytest.raises(SynapseHTTPError): + await _check_parent_containers_async(["syn999"], syn=self.syn) + + +class TestBuildUploadItems: + def test_parent_id_mapped_to_parent_id(self) -> None: + """The parentId column is mapped to the entity's parent_id attribute.""" + df = pd.DataFrame( + { + "path": ["/a/file.txt"], + "parentId": ["syn99"], + "ID": ["syn1"], + "name": ["file.txt"], + "synapseStore": [True], + "contentType": ["text/plain"], + "forceVersion": [True], + "used": [[]], + "executed": [[]], + "activityName": [""], + "activityDescription": [""], + } + ) + items = _build_upload_files( + df, + merge_existing_annotations=True, + associate_activity_to_new_version=False, + ) + assert len(items) == 1 + assert items[0].entity.parent_id == "syn99" + + def test_id_column_mapped_to_file_id(self) -> None: + """The ID column is mapped to the entity's id attribute.""" + df = pd.DataFrame( + { + "path": ["/a/file.txt"], + "parentId": ["syn99"], + "ID": ["syn42"], + "name": ["file.txt"], + "synapseStore": [True], + "contentType": ["text/plain"], + "forceVersion": [True], + "used": [[]], + "executed": [[]], + "activityName": [""], + "activityDescription": [""], + } + ) + items = _build_upload_files( + df, + merge_existing_annotations=True, + associate_activity_to_new_version=False, + ) + assert items[0].entity.id == "syn42" + + def test_empty_id_becomes_none(self) -> None: + """An empty ID cell is normalised to None on the resulting entity.""" + df = pd.DataFrame( + { + "path": ["/a/file.txt"], + "parentId": ["syn99"], + "ID": [""], + "name": ["file.txt"], + "synapseStore": [True], + "contentType": [""], + "forceVersion": [True], + "used": [[]], + "executed": [[]], + "activityName": [""], + "activityDescription": [""], + } + ) + items = _build_upload_files( + df, + merge_existing_annotations=True, + associate_activity_to_new_version=False, + ) + assert items[0].entity.id is None + + def test_used_executed_activity_fields_mapped(self) -> None: + """Provenance columns (used, executed, activityName, activityDescription) are mapped to the upload item.""" + df = pd.DataFrame( + { + "path": ["/a/file.txt"], + "parentId": ["syn99"], + "name": ["file.txt"], + "synapseStore": [True], + "contentType": [""], + "forceVersion": [True], + "used": [["syn1"]], + "executed": [["https://github.com/example"]], + "activityName": ["my activity"], + "activityDescription": ["a description"], + } + ) + items = _build_upload_files( + df, + merge_existing_annotations=True, + associate_activity_to_new_version=False, + ) + assert items[0].used == ["syn1"] + assert items[0].executed == ["https://github.com/example"] + assert items[0].activity_name == "my activity" + assert items[0].activity_description == "a description" + + def test_empty_force_version_defaults_to_true(self) -> None: + """An empty string in the forceVersion column (from fillna('')) defaults to True.""" + df = pd.DataFrame( + { + "path": ["/a/file.txt"], + "parentId": ["syn99"], + "forceVersion": [""], + } + ) + items = _build_upload_files( + df, + merge_existing_annotations=True, + associate_activity_to_new_version=False, + ) + assert items[0].entity.force_version is True + + @pytest.mark.parametrize( + "raw, expected", + [ + ("True", True), + ("False", False), + ("true", True), + ("false", False), + ], + ) + def test_force_version_string_converted_to_bool( + self, raw: str, expected: bool + ) -> None: + """String values like 'True'/'False' in forceVersion are converted to bool.""" + df = pd.DataFrame( + { + "path": ["/a/file.txt"], + "parentId": ["syn99"], + "forceVersion": [raw], + } + ) + items = _build_upload_files( + df, + merge_existing_annotations=True, + associate_activity_to_new_version=False, + ) + assert items[0].entity.force_version is expected + + def test_force_version_bool_false_preserved(self) -> None: + """An explicit bool False for forceVersion is preserved, not overridden to True.""" + df = pd.DataFrame( + { + "path": ["/a/file.txt"], + "parentId": ["syn99"], + "forceVersion": [False], + } + ) + items = _build_upload_files( + df, + merge_existing_annotations=True, + associate_activity_to_new_version=False, + ) + assert items[0].entity.force_version is False + + @pytest.mark.parametrize( + "column, item_attr", + [ + ("contentType", "entity.content_type"), + ("activityName", "activity_name"), + ("activityDescription", "activity_description"), + ], + ) + def test_empty_optional_field_becomes_none( + self, column: str, item_attr: str + ) -> None: + """An empty string in an optional column is coerced to None on the resulting item.""" + df = pd.DataFrame( + { + "path": ["/a/file.txt"], + "parentId": ["syn99"], + column: [""], + } + ) + items = _build_upload_files( + df, + merge_existing_annotations=True, + associate_activity_to_new_version=False, + ) + obj = items[0] + for part in item_attr.split("."): + obj = getattr(obj, part) + assert obj is None + + def test_missing_used_and_executed_columns_default_to_empty_list(self) -> None: + """When 'used' and 'executed' columns are absent, both default to empty lists.""" + df = pd.DataFrame({"path": ["/a/file.txt"], "parentId": ["syn99"]}) + items = _build_upload_files( + df, + merge_existing_annotations=True, + associate_activity_to_new_version=False, + ) + assert items[0].used == [] + assert items[0].executed == [] + + def test_annotation_columns_excluded_from_non_annotation_set(self) -> None: + """Extra columns become file annotations; standard manifest columns are not included as annotations.""" + df = pd.DataFrame( + { + "path": ["/a/file.txt"], + "parentId": ["syn99"], + "name": ["file.txt"], + "synapseStore": [True], + "contentType": [""], + "forceVersion": [True], + "used": [[]], + "executed": [[]], + "activityName": [""], + "activityDescription": [""], + "my_annotation": ["hello"], + } + ) + items = _build_upload_files( + df, + merge_existing_annotations=True, + associate_activity_to_new_version=False, + ) + assert "my_annotation" in items[0].entity.annotations + # Standard columns must not be passed as annotations + for col in NON_ANNOTATION_COLUMNS: + assert col not in items[0].entity.annotations + + +class TestParseForceVersion: + @pytest.mark.parametrize("blank", ["", None]) + def test_blank_defaults_to_true(self, blank: str | None) -> None: + """Missing or empty forceVersion defaults to True.""" + assert _parse_force_version(blank) is True + + @pytest.mark.parametrize("value", [True, False]) + def test_bool_passthrough(self, value: bool) -> None: + """A bool value is returned as-is.""" + assert _parse_force_version(value) is value + + @pytest.mark.parametrize("raw, expected", [("True", True), ("False", False)]) + def test_string_parsed(self, raw: str, expected: bool) -> None: + """String booleans are parsed case-insensitively.""" + assert _parse_force_version(raw) is expected + + @pytest.mark.parametrize("raw", ["yes", "1", "garbage"]) + def test_unparseable_defaults_to_true(self, raw: str) -> None: + """Unrecognizable values fall back to True.""" + assert _parse_force_version(raw) is True + + +class TestBuildAnnotationsForFile: + @pytest.mark.parametrize("empty_value", ["", None]) + def test_empty_and_none_values_omitted(self, empty_value: str | None) -> None: + """Empty strings and None values are silently dropped from the annotations dict.""" + result = _build_annotations_for_file({"key": empty_value}) + assert "key" not in result + + @pytest.mark.parametrize( + "input_dict, key, expected", + [ + ({"score": "42"}, "score", [42]), + ({"count": 7}, "count", [7]), + ], + ids=["string_value_converted", "non_string_wrapped_in_list"], + ) + def test_value_converted_to_list( + self, input_dict: dict, key: str, expected: list + ) -> None: + """String values are parsed via _parse_annotation_cell and non-string values + are wrapped in a single-element list.""" + result = _build_annotations_for_file(input_dict) + assert result[key] == expected + + def test_multiple_keys_mixed(self) -> None: + """Empty and None values are dropped while valid string and non-string values + are retained and converted correctly.""" + result = _build_annotations_for_file( + {"keep": "hello", "drop": "", "also_drop": None, "num": "3.14"} + ) + assert result == {"keep": ["hello"], "num": [3.14]} + + def test_empty_dict_returns_empty(self) -> None: + """An empty annotations dict returns an empty dict.""" + assert _build_annotations_for_file({}) == {} + + +class TestConvertCellInManifestToPythonTypes: + @pytest.mark.parametrize( + "cell, expected", + [ + ("hello", ["hello"]), + ("42", [42]), + ("3.14", [3.14]), + ("true", [True]), + ("false", [False]), + ("[a, b, c]", ["a", "b", "c"]), + ("[1, 2, 3]", [1, 2, 3]), + ('["foo bar", "baz"]', ["foo bar", "baz"]), + ("[hello]", ["hello"]), + (" 42 ", [42]), + ("[]", []), + ("[a, , c]", ["a", "c"]), + ], + ) + def test_scalar_and_array_conversions(self, cell: str, expected: Any) -> None: + """Strings, numbers, bools, and bracket-delimited arrays are always returned + as lists. Single-value cells produce a one-element list.""" + assert _parse_annotation_cell(cell) == expected + + def test_datetime_string(self) -> None: + """An ISO-8601 datetime string is converted to a one-element list containing + a datetime.datetime object.""" + import datetime + + result = _parse_annotation_cell("1970-01-01T00:00:00.000Z") + assert isinstance(result, list) + assert isinstance(result[0], datetime.datetime) + + +class TestConvertValue: + @pytest.mark.parametrize( + "value, expected", + [ + ("42", 42), + ("-7", -7), + ("3.14", 3.14), + ("-0.5", -0.5), + ], + ) + def test_numeric_strings_converted(self, value: str, expected: int | float) -> None: + """Numeric strings are converted to int or float.""" + assert _convert_value(value) == expected + assert type(_convert_value(value)) is type(expected) + + @pytest.mark.parametrize( + "value, expected", + [ + ("true", True), + ("false", False), + ("True", True), + ("False", False), + ("TRUE", True), + ("FALSE", False), + ], + ) + def test_bool_strings_converted_case_insensitively( + self, value: str, expected: bool + ) -> None: + """Bool strings are converted case-insensitively and returned as bool, not int.""" + result = _convert_value(value) + assert result is expected + assert type(result) is bool + + def test_datetime_string_converted(self) -> None: + """ISO date strings are converted to datetime.datetime.""" + import datetime + + result = _convert_value("2024-01-15") + assert isinstance(result, datetime.datetime) + + def test_datetime_wins_over_numeric(self) -> None: + """A string like '2024-01-01' is parsed as datetime, not as a subtraction + expression (which ast.literal_eval cannot parse anyway, but datetime must + run first to ensure correct priority).""" + import datetime + + result = _convert_value("2024-01-01") + assert isinstance(result, datetime.datetime) + + def test_plain_string_returned_unchanged(self) -> None: + """Plain unquoted strings that don't match any type are returned as-is.""" + assert _convert_value("hello") == "hello" + assert type(_convert_value("hello")) is str + + def test_quoted_string_literal_unquoted(self) -> None: + """A token that is a quoted Python string literal is unquoted by ast.literal_eval.""" + assert _convert_value('"foo bar"') == "foo bar" + + def test_bool_not_returned_as_int(self) -> None: + """'True' must not come back as the integer 1 — bool_or_none runs before + ast.literal_eval to prevent bool being treated as a subclass of int.""" + result = _convert_value("True") + assert result is True + assert type(result) is bool + assert result != 1 or type(result) is not int + + +class TestParseLiteral: + @pytest.mark.parametrize( + "value, expected", + [ + ("42", 42), + ("-7", -7), + ("3.14", 3.14), + ("-0.5", -0.5), + ('"hello"', "hello"), + ('"foo bar"', "foo bar"), + ], + ) + def test_valid_scalars_are_returned( + self, value: str, expected: int | float | str + ) -> None: + """Valid int, float, and quoted-string literals are parsed and returned.""" + assert _parse_literal(value) == expected + + @pytest.mark.parametrize( + "value", + [ + "hello", + "foo bar", + "", + " ", + ], + ) + def test_plain_strings_return_none(self, value: str) -> None: + """Plain unquoted strings are not Python literals and return None.""" + assert _parse_literal(value) is None + + @pytest.mark.parametrize( + "value", + [ + "True", + "False", + "true", + "false", + ], + ) + def test_bool_strings_return_none(self, value: str) -> None: + """Bool literals return None so that bool_or_none handles them instead, + ensuring consistent case-insensitive parsing.""" + assert _parse_literal(value) is None + + @pytest.mark.parametrize( + "value", + [ + "(1, 2)", + "[1, 2]", + "{'a': 1}", + ], + ) + def test_complex_literals_return_none(self, value: str) -> None: + """Tuples, lists, and dicts are not valid Synapse annotation types and return None.""" + assert _parse_literal(value) is None + + +def _make_file_mock(path: str, file_id: str = "syn100") -> MagicMock: + """Create a MagicMock that behaves like a File entity for upload tests.""" + mock = MagicMock() + mock.path = path + mock.id = file_id + mock.store_async = AsyncMock(return_value=mock) + return mock + + +def _make_item( + path: str, + file_id: str = "syn100", + used: list | None = None, + executed: list | None = None, + activity_name: str | None = None, + activity_description: str | None = None, +) -> UploadSyncFile: + """Create a UploadSyncFile backed by a mock File entity.""" + return UploadSyncFile( + entity=_make_file_mock(path, file_id), + used=used or [], + executed=executed or [], + activity_name=activity_name, + activity_description=activity_description, + ) + + +class TestSyncToSynapseAsync: + @pytest.fixture(autouse=True) + def init_syn(self, syn: Synapse) -> None: + self.syn = syn + + async def test_dry_run_skips_upload(self, tmp_path: Path) -> None: + """With dry_run=True, the manifest is read and validated but upload is never called, returning [].""" + f = tmp_path / "file.txt" + f.write_text("hi") + csv = f"path,parentId\n{f},syn1\n" + manifest = tmp_path / "manifest.csv" + manifest.write_text(csv) + + from synapseclient.models import Project + + project = Project(id="syn123", name="test") + project._last_persistent_instance = project + + mock_items = [MagicMock()] + with ( + patch( + "synapseclient.models.mixins.storable_container.read_manifest_for_upload", + new=AsyncMock(return_value=(mock_items, 100)), + ) as mock_read, + patch( + "synapseclient.models.mixins.storable_container.upload_sync_files" + ) as mock_upload, + ): + result = await project.sync_to_synapse_async( + manifest_path=str(manifest), + dry_run=True, + synapse_client=self.syn, + ) + mock_read.assert_awaited_once() + mock_upload.assert_not_called() + assert result == [] + + async def test_upload_called_with_items(self, tmp_path: Path) -> None: + """With valid items, the uploader is called and the returned File list is passed back to the caller.""" + f = tmp_path / "file.txt" + f.write_text("hi") + csv = f"path,parentId\n{f},syn1\n" + manifest = tmp_path / "manifest.csv" + manifest.write_text(csv) + + from synapseclient.models import Project + + project = Project(id="syn123", name="test") + project._last_persistent_instance = project + + mock_items = [MagicMock()] + mock_uploaded = [MagicMock()] + + with ( + patch( + "synapseclient.models.mixins.storable_container.read_manifest_for_upload", + new=AsyncMock(return_value=(mock_items, 100)), + ), + patch( + "synapseclient.models.mixins.storable_container.upload_sync_files", + new=AsyncMock(return_value=mock_uploaded), + ) as mock_upload, + ): + result = await project.sync_to_synapse_async( + manifest_path=str(manifest), + dry_run=False, + send_messages=False, + synapse_client=self.syn, + ) + mock_upload.assert_awaited_once_with(mock_items, syn=self.syn) + assert result is mock_uploaded + + async def test_empty_items_skips_upload(self, tmp_path: Path) -> None: + """When read_manifest_for_upload returns no items, the uploader is not called and [] is returned.""" + f = tmp_path / "manifest.csv" + f.write_text("path,parentId,error\n/x.txt,syn1,fail\n") + + from synapseclient.models import Project + + project = Project(id="syn123", name="test") + project._last_persistent_instance = project + + with ( + patch( + "synapseclient.models.mixins.storable_container.read_manifest_for_upload", + new=AsyncMock(return_value=([], 0)), + ), + patch( + "synapseclient.models.mixins.storable_container.upload_sync_files" + ) as mock_upload, + ): + result = await project.sync_to_synapse_async( + manifest_path=str(f), + synapse_client=self.syn, + ) + mock_upload.assert_not_called() + assert result == [] + + async def test_merge_existing_annotations_passed_through( + self, tmp_path: Path + ) -> None: + """The merge_existing_annotations flag is forwarded to read_manifest_for_upload.""" + f = tmp_path / "manifest.csv" + f.write_text("") + + from synapseclient.models import Project + + project = Project(id="syn123", name="test") + project._last_persistent_instance = project + + mock_read = AsyncMock(return_value=([], 0)) + with patch( + "synapseclient.models.mixins.storable_container.read_manifest_for_upload", + new=mock_read, + ): + await project.sync_to_synapse_async( + manifest_path=str(f), + merge_existing_annotations=False, + synapse_client=self.syn, + ) + _, kwargs = mock_read.call_args + assert kwargs["merge_existing_annotations"] is False + + async def test_associate_activity_to_new_version_passed_through( + self, tmp_path + ) -> None: + """The associate_activity_to_new_version flag is forwarded to read_manifest_for_upload.""" + f = tmp_path / "manifest.csv" + f.write_text("") + + from synapseclient.models import Project + + project = Project(id="syn123", name="test") + project._last_persistent_instance = project + + mock_read = AsyncMock(return_value=([], 0)) + with patch( + "synapseclient.models.mixins.storable_container.read_manifest_for_upload", + new=mock_read, + ): + await project.sync_to_synapse_async( + manifest_path=str(f), + associate_activity_to_new_version=True, + synapse_client=self.syn, + ) + _, kwargs = mock_read.call_args + assert kwargs["associate_activity_to_new_version"] is True + + +class TestUploadSyncFiles: + @pytest.fixture(autouse=True) + def init_syn(self, syn: Synapse) -> None: + self.syn = syn + + async def test_single_item_no_provenance(self) -> None: + """A single item with no dependencies is uploaded and returned.""" + item = _make_item("/a.txt", file_id="syn1") + + results = await upload_sync_files([item], syn=self.syn) + + assert len(results) == 1 + item.entity.store_async.assert_awaited_once() + + async def test_multiple_independent_items(self) -> None: + """Multiple items with no inter-dependencies are all uploaded.""" + items = [ + _make_item("/a.txt", file_id="syn1"), + _make_item("/b.txt", file_id="syn2"), + _make_item("/c.txt", file_id="syn3"), + ] + + results = await upload_sync_files(items, syn=self.syn) + + assert len(results) == 3 + for item in items: + item.entity.store_async.assert_awaited_once() + + async def test_empty_items(self) -> None: + """An empty item list produces an empty result.""" + results = await upload_sync_files([], syn=self.syn) + assert results == [] + + async def test_dependent_items_uploaded_in_order(self, tmp_path: Path) -> None: + """When item B depends on item A, A is stored before B.""" + f_dep = tmp_path / "dep.txt" + f_dep.write_text("dep") + f_main = tmp_path / "main.txt" + f_main.write_text("main") + + call_order = [] + + dep_mock = _make_file_mock(str(f_dep), "syn_dep") + + async def dep_store(**kwargs): + call_order.append("dep") + return dep_mock + + dep_mock.store_async = AsyncMock(side_effect=dep_store) + + main_mock = _make_file_mock(str(f_main), "syn_main") + + async def main_store(**kwargs): + call_order.append("main") + return main_mock + + main_mock.store_async = AsyncMock(side_effect=main_store) + + dep_item = UploadSyncFile( + entity=dep_mock, + used=[], + executed=[], + activity_name=None, + activity_description=None, + ) + main_item = UploadSyncFile( + entity=main_mock, + used=[str(f_dep)], + executed=[], + activity_name="uses dep", + activity_description=None, + ) + + results = await upload_sync_files([dep_item, main_item], syn=self.syn) + + assert len(results) == 2 + assert call_order.index("dep") < call_order.index("main") + + +class TestBuildDependencyGraph: + def test_no_provenance(self) -> None: + """Items without provenance produce a graph with no dependencies.""" + item = _make_item("/a.txt") + graph = _build_upload_plan([item]) + + assert graph.path_to_dependencies == {"/a.txt": []} + assert graph.path_to_upload_item["/a.txt"] is item + + def test_file_dependency_between_items(self, tmp_path: Path) -> None: + """A file dependency between two items is captured in the graph.""" + f1 = tmp_path / "dep.txt" + f1.write_text("dep") + f2 = tmp_path / "main.txt" + f2.write_text("main") + + dep_item = _make_item(str(f1), file_id="syn1") + main_item = _make_item(str(f2), file_id="syn2", used=[str(f1)]) + + graph = _build_upload_plan([dep_item, main_item]) + + assert graph.path_to_dependencies[str(f2)] == [str(f1)] + assert graph.path_to_dependencies[str(f1)] == [] + + @pytest.mark.parametrize( + "used, executed, description", + [ + ([_make_file_mock("/resolved.txt", "syn999")], [], "File object in used"), + (["https://example.com/code.py"], [], "URL in used"), + ([], ["syn12345"], "Synapse ID in executed"), + ], + ids=["file_object", "url", "synapse_id"], + ) + def test_non_file_provenance_not_a_dependency( + self, used: list, executed: list, description: str + ) -> None: + """Non-local-file provenance references (File objects, URLs, Synapse IDs) + are not treated as dependencies in the graph.""" + item = _make_item("/a.txt", used=used, executed=executed) + + graph = _build_upload_plan([item]) + + assert graph.path_to_dependencies["/a.txt"] == [] + + def test_missing_file_dependency_raises(self, tmp_path: Path) -> None: + """If an item depends on a local file not in the upload batch, ValueError is raised.""" + dep = tmp_path / "dep.txt" + dep.write_text("dep") + item = _make_item("/main.txt", used=[str(dep)]) + + with pytest.raises(ValueError, match="depends on"): + _build_upload_plan([item]) + + def test_cached_file_check(self, tmp_path: Path) -> None: + """The file-check cache avoids redundant os.path.isfile calls.""" + f1 = tmp_path / "dep.txt" + f1.write_text("dep") + + dep_item = _make_item(str(f1), file_id="syn1") + item_a = _make_item(str(tmp_path / "a.txt"), file_id="syn2", used=[str(f1)]) + # Give a.txt a real file so the item_a entity.path resolves + (tmp_path / "a.txt").write_text("a") + item_b = _make_item(str(tmp_path / "b.txt"), file_id="syn3", used=[str(f1)]) + (tmp_path / "b.txt").write_text("b") + + graph = _build_upload_plan([dep_item, item_a, item_b]) + # dep.txt should appear in the file-check cache (checked once, then reused) + assert str(f1) in graph.path_to_file_check + assert graph.path_to_file_check[str(f1)] is True + + +class TestCreateUploadTasks: + async def test_creates_tasks_for_each_item(self) -> None: + """One asyncio task is created per item in the upload plan.""" + from synapseclient.models.services.manifest import _UploadPlan + + item_a = _make_item("/a.txt", file_id="syn1") + item_b = _make_item("/b.txt", file_id="syn2") + + plan = _UploadPlan( + path_to_dependencies={"/a.txt": [], "/b.txt": []}, + path_to_upload_item={"/a.txt": item_a, "/b.txt": item_b}, + path_to_file_check={}, + ) + mock_syn = MagicMock(spec=Synapse) + + tasks = _create_upload_tasks(plan, mock_syn) + assert len(tasks) == 2 + # Clean up tasks to avoid warnings + for t in tasks: + t.cancel() + await asyncio.gather(*tasks, return_exceptions=True) + + async def test_dependent_task_receives_prerequisite(self, tmp_path: Path) -> None: + """A dependent item's task receives its prerequisite task.""" + from synapseclient.models.services.manifest import _UploadPlan + + f_dep = tmp_path / "dep.txt" + f_dep.write_text("dep") + f_main = tmp_path / "main.txt" + f_main.write_text("main") + + dep_item = _make_item(str(f_dep), file_id="syn1") + main_item = _make_item(str(f_main), file_id="syn2", used=[str(f_dep)]) + + plan = _UploadPlan( + path_to_dependencies={str(f_dep): [], str(f_main): [str(f_dep)]}, + path_to_upload_item={str(f_dep): dep_item, str(f_main): main_item}, + path_to_file_check={str(f_dep): True}, + ) + mock_syn = MagicMock(spec=Synapse) + + tasks = _create_upload_tasks(plan, mock_syn) + assert len(tasks) == 2 + # Clean up tasks to avoid warnings + for t in tasks: + t.cancel() + await asyncio.gather(*tasks, return_exceptions=True) + + async def test_empty_plan_returns_empty(self) -> None: + """An empty upload plan produces no tasks.""" + from synapseclient.models.services.manifest import _UploadPlan + + plan = _UploadPlan( + path_to_dependencies={}, + path_to_upload_item={}, + path_to_file_check={}, + ) + mock_syn = MagicMock(spec=Synapse) + + tasks = _create_upload_tasks(plan, mock_syn) + assert tasks == [] + + +class TestBuildActivityLinkage: + def test_resolved_file_id(self) -> None: + """A local path that was resolved to a Synapse ID uses UsedEntity.""" + from synapseclient.models import UsedEntity + + result = _build_activity_linkage( + used_or_executed=["/dep.txt"], + resolved_file_ids={"/dep.txt": "syn999"}, + ) + assert len(result) == 1 + assert isinstance(result[0], UsedEntity) + assert result[0].target_id == "syn999" + + def test_url_uses_used_url(self) -> None: + """A URL provenance item becomes a UsedURL.""" + from synapseclient.models import UsedURL + + result = _build_activity_linkage( + used_or_executed=["https://github.com/repo"], + resolved_file_ids={}, + ) + assert len(result) == 1 + assert isinstance(result[0], UsedURL) + assert result[0].url == "https://github.com/repo" + + def test_synapse_id(self) -> None: + """A bare Synapse ID becomes a UsedEntity with the correct target_id.""" + from synapseclient.models import UsedEntity + + result = _build_activity_linkage( + used_or_executed=["syn12345"], + resolved_file_ids={}, + ) + assert len(result) == 1 + assert isinstance(result[0], UsedEntity) + assert result[0].target_id == "syn12345" + + def test_synapse_id_with_version(self) -> None: + """A Synapse ID with a version suffix is parsed correctly.""" + from synapseclient.models import UsedEntity + + result = _build_activity_linkage( + used_or_executed=["syn12345.3"], + resolved_file_ids={}, + ) + assert len(result) == 1 + assert isinstance(result[0], UsedEntity) + assert result[0].target_id == "syn12345" + assert result[0].target_version_number == 3 + + def test_file_object_uses_its_id(self) -> None: + """A File object in the list is converted to a UsedEntity using its .id.""" + from synapseclient.models import UsedEntity + + file_obj = _make_file_mock("/resolved.txt", "syn777") + result = _build_activity_linkage( + used_or_executed=[file_obj], + resolved_file_ids={}, + ) + assert len(result) == 1 + assert isinstance(result[0], UsedEntity) + assert result[0].target_id == "syn777" + + def test_invalid_string_raises(self) -> None: + """A string that is not a URL, Synapse ID, or resolved path raises ValueError.""" + with pytest.raises(ValueError, match="not a valid Synapse id"): + _build_activity_linkage( + used_or_executed=["not-a-valid-reference"], + resolved_file_ids={}, + ) + + def test_empty_list(self) -> None: + """An empty list returns an empty list.""" + result = _build_activity_linkage( + used_or_executed=[], + resolved_file_ids={}, + ) + assert result == [] + + def test_mixed_provenance_types(self) -> None: + """Different provenance types in a single list are all handled correctly.""" + from synapseclient.models import UsedEntity, UsedURL + + file_obj = _make_file_mock("/already.txt", "syn555") + result = _build_activity_linkage( + used_or_executed=[ + "/local.txt", + "https://example.com", + "syn123", + file_obj, + ], + resolved_file_ids={"/local.txt": "syn444"}, + ) + assert len(result) == 4 + assert isinstance(result[0], UsedEntity) + assert result[0].target_id == "syn444" + assert isinstance(result[1], UsedURL) + assert isinstance(result[2], UsedEntity) + assert result[2].target_id == "syn123" + assert isinstance(result[3], UsedEntity) + assert result[3].target_id == "syn555" + + +class TestUploadItemAsync: + @pytest.fixture(autouse=True) + def init_syn(self, syn: Synapse) -> None: + self.syn = syn + + async def test_no_provenance_no_activity(self) -> None: + """When used and executed are empty, no Activity is set on the file.""" + mock_file = _make_file_mock("/a.txt", "syn1") + # Remove the auto-created attribute so we can detect if it gets set + del mock_file.activity + + result = await _upload_file_async( + file_entity=mock_file, + used=[], + executed=[], + activity_name=None, + activity_description=None, + prerequisite_tasks=[], + syn=self.syn, + ) + + assert result is mock_file + mock_file.store_async.assert_awaited_once_with(synapse_client=self.syn) + assert not hasattr(mock_file, "activity") + + @pytest.mark.parametrize( + "used, executed, expected_used_count, expected_executed_count", + [ + (["syn999"], [], 1, 0), + ([], ["https://github.com/code.py"], 0, 1), + (["syn999"], ["https://github.com/code.py"], 1, 1), + ], + ids=["used_only", "executed_only", "both"], + ) + async def test_provenance_sets_activity( + self, + used: list, + executed: list, + expected_used_count: int, + expected_executed_count: int, + ) -> None: + """When used and/or executed are provided, an Activity is attached before store.""" + from synapseclient.models import Activity + + mock_file = _make_file_mock("/a.txt", "syn1") + del mock_file.activity + + await _upload_file_async( + file_entity=mock_file, + used=used, + executed=executed, + activity_name="my activity", + activity_description="my description", + prerequisite_tasks=[], + syn=self.syn, + ) + + assert isinstance(mock_file.activity, Activity) + assert mock_file.activity.name == "my activity" + assert mock_file.activity.description == "my description" + assert len(mock_file.activity.used) == expected_used_count + assert len(mock_file.activity.executed) == expected_executed_count + + async def test_prerequisite_tasks_resolved(self) -> None: + """Prerequisite futures are awaited and their results populate resolved_file_ids.""" + dep_file = _make_file_mock("/dep.txt", "syn_dep") + + # Create a real future that resolves to dep_file + dep_future = asyncio.get_running_loop().create_future() + dep_future.set_result(dep_file) + + mock_file = _make_file_mock("/main.txt", "syn_main") + del mock_file.activity + + await _upload_file_async( + file_entity=mock_file, + used=["/dep.txt"], + executed=[], + activity_name="test", + activity_description=None, + prerequisite_tasks=[dep_future], + syn=self.syn, + ) + + # The dependency's path should have been resolved to its Synapse ID + assert mock_file.activity.used[0].target_id == "syn_dep" + + +class TestSplitCsvCell: + @pytest.mark.parametrize( + "cell, expected", + [ + ("hello", ["hello"]), + ("a, b, c", ["a", "b", "c"]), + ('"foo, bar", baz', ['"foo, bar"', "baz"]), + (" a , b ", ["a", "b"]), + ("", [""]), + ("a, b,", ["a", "b", ""]), + ], + ids=[ + "single_value", + "multiple_values", + "quoted_commas_preserved", + "whitespace_stripped", + "empty_string", + "trailing_comma", + ], + ) + def test_split_csv_cell(self, cell: str, expected: list[str]) -> None: + """CSV cell strings are split correctly, respecting quotes and whitespace.""" + assert _split_csv_cell(cell) == expected + + +class TestResolveRow: + async def test_resolves_used_and_executed(self) -> None: + """Both used and executed columns are resolved for a row.""" + df = pd.DataFrame( + {"path": ["/a.txt"], "used": ["syn123"], "executed": ["syn456"]} + ).set_index("path") + row = df.iloc[0] + mock_syn = MagicMock(spec=Synapse) + + with patch( + "synapseclient.models.services.manifest._resolve_provenance_column", + new_callable=AsyncMock, + side_effect=[["resolved_used"], ["resolved_exec"]], + ): + path, resolved = await _resolve_row("/a.txt", row, df, mock_syn) + + assert path == "/a.txt" + assert "used" in resolved + assert "executed" in resolved + + async def test_missing_columns_skipped(self) -> None: + """Columns not present in the row are not included in the result.""" + df = pd.DataFrame({"path": ["/a.txt"]}).set_index("path") + row = df.iloc[0] + mock_syn = MagicMock(spec=Synapse) + + path, resolved = await _resolve_row("/a.txt", row, df, mock_syn) + + assert path == "/a.txt" + assert resolved == {} + + +class TestResolveProvenanceColumn: + @pytest.fixture(autouse=True) + def init_syn(self, syn: Synapse) -> None: + self.syn = syn + + def _make_df(self, paths: list[str]) -> pd.DataFrame: + """Return a path-indexed DataFrame with the given paths as the index.""" + return pd.DataFrame(index=paths) + + @pytest.mark.parametrize("cell", ["", " "]) + async def test_empty_or_whitespace_string_returns_empty_list( + self, cell: str + ) -> None: + """An empty or whitespace-only string cell returns [] with no provenance calls.""" + df = self._make_df([]) + result = await _resolve_provenance_column(cell, "/file.txt", self.syn, df) + assert result == [] + + async def test_single_synapse_id_string_resolved(self) -> None: + """A single Synapse ID string is resolved and returned as a one-element list.""" + df = self._make_df([]) + result = await _resolve_provenance_column("syn123", "/file.txt", self.syn, df) + assert result == ["syn123"] + + async def test_semicolon_delimited_string_split_and_resolved(self) -> None: + """A semicolon-delimited string is split into individual items, each resolved.""" + df = self._make_df([]) + result = await _resolve_provenance_column( + "syn111 ; https://example.com", "/file.txt", self.syn, df + ) + assert result == ["syn111", "https://example.com"] + + async def test_already_a_list_passed_through_without_splitting(self) -> None: + """A cell that is already a Python list is not split — items are resolved directly.""" + df = self._make_df([]) + result = await _resolve_provenance_column( + ["syn111", "https://example.com"], "/file.txt", self.syn, df + ) + assert result == ["syn111", "https://example.com"] + + async def test_non_string_list_item_passed_without_strip(self) -> None: + """Non-string items in an already-parsed list are forwarded to _resolve_provenance_item + without calling .strip(), which would raise AttributeError.""" + from synapseclient.models.file import File + + existing_file = MagicMock(spec=File) + df = self._make_df([]) + with patch( + "synapseclient.models.services.manifest._resolve_provenance_item", + new=AsyncMock(return_value=existing_file), + ) as mock_check: + result = await _resolve_provenance_column( + [existing_file], "/file.txt", self.syn, df + ) + mock_check.assert_awaited_once_with( + existing_file, owner_path="/file.txt", syn=self.syn, df=df + ) + assert result == [existing_file] + + +class TestCheckProvenance: + @pytest.fixture(autouse=True) + def init_syn(self, syn: Synapse) -> None: + self.syn = syn + + def _make_df(self, paths: list[str]) -> pd.DataFrame: + """Return a path-indexed DataFrame with the given paths as the index.""" + return pd.DataFrame(index=paths) + + @pytest.mark.parametrize( + "item", + [ + None, + "https://github.com/example/repo", + "syn123456", + ], + ) + async def test_passthrough_items(self, item: str | None) -> None: + """None, URLs, and Synapse IDs are returned unchanged without any lookup.""" + df = self._make_df([]) + result = await _resolve_provenance_item(item, "/some/file.txt", self.syn, df) + assert result == item + + async def test_local_file_in_upload_batch_returned_as_path( + self, tmp_path: Path + ) -> None: + """A local file that is part of the current upload batch is returned as its + resolved absolute path so the topological sort can order it correctly.""" + f = tmp_path / "dep.txt" + f.write_text("content") + abs_path = str(f.resolve()) + df = self._make_df([abs_path]) + result = await _resolve_provenance_item(str(f), "/some/file.txt", self.syn, df) + assert result == abs_path + + async def test_local_file_not_in_batch_found_in_synapse( + self, tmp_path: Path + ) -> None: + """A local file that is not in the upload batch but exists in Synapse is + resolved to a File model object via MD5 lookup.""" + from synapseclient.models.file import File + + f = tmp_path / "existing.txt" + f.write_text("content") + synapse_file = MagicMock(spec=File) + df = self._make_df([]) # file not in upload batch + with patch( + "synapseclient.models.file.File.from_path_async", + new=AsyncMock(return_value=synapse_file), + ): + result = await _resolve_provenance_item( + str(f), "/some/file.txt", self.syn, df + ) + assert result is synapse_file + + async def test_local_file_not_in_batch_not_in_synapse_raises( + self, tmp_path: Path + ) -> None: + """A local file that is neither in the upload batch nor found in Synapse + raises SynapseProvenanceError — it cannot be used as a provenance reference.""" + from synapseclient.core.exceptions import ( + SynapseFileNotFoundError, + SynapseProvenanceError, + ) + + f = tmp_path / "orphan.txt" + f.write_text("content") + df = self._make_df([]) + with patch( + "synapseclient.models.file.File.from_path_async", + new=AsyncMock(side_effect=SynapseFileNotFoundError("not found")), + ): + with pytest.raises( + SynapseProvenanceError, match="not being uploaded and is not in Synapse" + ): + await _resolve_provenance_item(str(f), "/some/file.txt", self.syn, df) + + async def test_invalid_item_raises(self) -> None: + """A string that is not a local file path, URL, or Synapse ID raises + SynapseProvenanceError.""" + from synapseclient.core.exceptions import SynapseProvenanceError + + df = self._make_df([]) + with pytest.raises(SynapseProvenanceError): + await _resolve_provenance_item( + "not_a_url_or_synapse_id", "/some/file.txt", self.syn, df + ) + + +class TestResolveLocalFileProvenance: + async def test_file_in_batch_returns_absolute_path(self, tmp_path: Path) -> None: + """A file that exists on disk and is in the upload batch returns its path.""" + f = tmp_path / "data.txt" + f.write_text("content") + manifest_df = pd.DataFrame({"col": ["val"]}, index=[str(f)]) + mock_syn = MagicMock(spec=Synapse) + + result = await _resolve_local_file_provenance( + str(f), "/owner.txt", mock_syn, manifest_df + ) + assert result == str(f) + + async def test_file_not_on_disk_raises(self, tmp_path: Path) -> None: + """A file that does not exist on disk raises SynapseProvenanceError.""" + manifest_df = pd.DataFrame({"col": ["val"]}, index=["/other.txt"]) + mock_syn = MagicMock(spec=Synapse) + + with pytest.raises(SynapseProvenanceError, match="not an existing file"): + await _resolve_local_file_provenance( + str(tmp_path / "missing.txt"), "/owner.txt", mock_syn, manifest_df + ) + + async def test_file_not_in_batch_found_in_synapse(self, tmp_path: Path) -> None: + """A file on disk but not in the batch is looked up in Synapse by MD5.""" + f = tmp_path / "external.txt" + f.write_text("content") + manifest_df = pd.DataFrame({"col": ["val"]}, index=["/other.txt"]) + mock_syn = MagicMock(spec=Synapse) + mock_file = MagicMock() + + with patch( + "synapseclient.models.file.File.from_path_async", + new_callable=AsyncMock, + return_value=mock_file, + ): + result = await _resolve_local_file_provenance( + str(f), "/owner.txt", mock_syn, manifest_df + ) + + assert result is mock_file + + async def test_file_not_in_batch_not_in_synapse_raises( + self, tmp_path: Path + ) -> None: + """A file on disk, not in the batch, and not in Synapse raises.""" + from synapseclient.core.exceptions import SynapseFileNotFoundError + + f = tmp_path / "orphan.txt" + f.write_text("content") + manifest_df = pd.DataFrame({"col": ["val"]}, index=["/other.txt"]) + mock_syn = MagicMock(spec=Synapse) + + with patch( + "synapseclient.models.file.File.from_path_async", + new_callable=AsyncMock, + side_effect=SynapseFileNotFoundError("not found"), + ): + with pytest.raises(SynapseProvenanceError, match="not being uploaded"): + await _resolve_local_file_provenance( + str(f), "/owner.txt", mock_syn, manifest_df + ) diff --git a/tests/unit/synapseclient/models/async/unit_test_project_async.py b/tests/unit/synapseclient/models/async/unit_test_project_async.py index c7e41fe80..ec3fa5e43 100644 --- a/tests/unit/synapseclient/models/async/unit_test_project_async.py +++ b/tests/unit/synapseclient/models/async/unit_test_project_async.py @@ -1,7 +1,8 @@ """Tests for the synapseclient.models.Project class.""" + import uuid from typing import Dict -from unittest.mock import AsyncMock, patch +from unittest.mock import AsyncMock, MagicMock, patch import pytest @@ -11,6 +12,8 @@ from synapseclient.core.constants.concrete_types import FILE_ENTITY from synapseclient.core.exceptions import SynapseNotFoundError from synapseclient.models import FailureStrategy, File, Project +from synapseclient.models.project_setting import ProjectSetting +from synapseclient.models.services.migration_types import MigrationResult PROJECT_ID = "syn123" DERSCRIPTION_PROJECT = "This is an example project." @@ -88,22 +91,25 @@ async def test_store_with_id(self) -> None: project.description = description # WHEN I call `store` with the Project object - with patch( - "synapseclient.models.services.storable_entity.put_entity", - new_callable=AsyncMock, - return_value=(self.get_example_synapse_project_output()), - ) as mocked_client_call, patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value=( - { - "entity": { - "concreteType": concrete_types.PROJECT_ENTITY, - "id": project.id, + with ( + patch( + "synapseclient.models.services.storable_entity.put_entity", + new_callable=AsyncMock, + return_value=(self.get_example_synapse_project_output()), + ) as mocked_client_call, + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=( + { + "entity": { + "concreteType": concrete_types.PROJECT_ENTITY, + "id": project.id, + } } - } - ), - ) as mocked_get: + ), + ) as mocked_get, + ): result = await project.store_async(synapse_client=self.syn) # THEN we should call the method with this data @@ -141,21 +147,24 @@ async def test_store_with_no_changes(self) -> None: ) # WHEN I call `store` with the Project object - with patch.object( - self.syn, - "store", - ) as mocked_store, patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value=( - { - "entity": { - "concreteType": concrete_types.PROJECT_ENTITY, - "id": project.id, + with ( + patch.object( + self.syn, + "store", + ) as mocked_store, + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=( + { + "entity": { + "concreteType": concrete_types.PROJECT_ENTITY, + "id": project.id, + } } - } - ), - ) as mocked_get: + ), + ) as mocked_get, + ): result = await project.store_async(synapse_client=self.syn) # THEN we should not call store because there are no changes @@ -194,21 +203,24 @@ async def test_store_after_get(self) -> None: assert project.id == PROJECT_ID # WHEN I call `store` with the Project object - with patch.object( - self.syn, - "store", - ) as mocked_store, patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value=( - { - "entity": { - "concreteType": concrete_types.PROJECT_ENTITY, - "id": project.id, + with ( + patch.object( + self.syn, + "store", + ) as mocked_store, + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=( + { + "entity": { + "concreteType": concrete_types.PROJECT_ENTITY, + "id": project.id, + } } - } - ), - ) as mocked_get: + ), + ) as mocked_get, + ): result = await project.store_async(synapse_client=self.syn) # THEN we should not call store because there are no changes @@ -251,13 +263,16 @@ async def test_store_after_get_with_changes(self) -> None: project.description = description # WHEN I call `store` with the Project object - with patch( - "synapseclient.models.services.storable_entity.put_entity", - new_callable=AsyncMock, - return_value=(self.get_example_synapse_project_output()), - ) as mocked_store, patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - ) as mocked_get: + with ( + patch( + "synapseclient.models.services.storable_entity.put_entity", + new_callable=AsyncMock, + return_value=(self.get_example_synapse_project_output()), + ) as mocked_store, + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + ) as mocked_get, + ): result = await project.store_async(synapse_client=self.syn) # THEN we should call store because there are changes @@ -306,25 +321,29 @@ async def test_store_with_annotations(self) -> None: project.description = description # WHEN I call `store` with the Project object - with patch( - "synapseclient.models.project.store_entity_components", - return_value=(None), - ) as mocked_store_entity_components, patch( - "synapseclient.models.services.storable_entity.put_entity", - new_callable=AsyncMock, - return_value=(self.get_example_synapse_project_output()), - ) as mocked_client_call, patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value=( - { - "entity": { - "concreteType": concrete_types.PROJECT_ENTITY, - "id": project.id, + with ( + patch( + "synapseclient.models.project.store_entity_components", + return_value=(None), + ) as mocked_store_entity_components, + patch( + "synapseclient.models.services.storable_entity.put_entity", + new_callable=AsyncMock, + return_value=(self.get_example_synapse_project_output()), + ) as mocked_client_call, + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=( + { + "entity": { + "concreteType": concrete_types.PROJECT_ENTITY, + "id": project.id, + } } - } - ), - ) as mocked_get: + ), + ) as mocked_get, + ): result = await project.store_async(synapse_client=self.syn) # THEN we should call the method with this data @@ -374,26 +393,30 @@ async def test_store_with_name_and_parent_id(self) -> None: project.description = description # WHEN I call `store` with the Project object - with patch( - "synapseclient.models.services.storable_entity.put_entity", - new_callable=AsyncMock, - return_value=(self.get_example_synapse_project_output()), - ) as mocked_client_call, patch.object( - self.syn, - "findEntityId", - return_value=PROJECT_ID, - ) as mocked_get, patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value=( - { - "entity": { - "concreteType": concrete_types.PROJECT_ENTITY, - "id": project.id, + with ( + patch( + "synapseclient.models.services.storable_entity.put_entity", + new_callable=AsyncMock, + return_value=(self.get_example_synapse_project_output()), + ) as mocked_client_call, + patch.object( + self.syn, + "findEntityId", + return_value=PROJECT_ID, + ) as mocked_get, + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=( + { + "entity": { + "concreteType": concrete_types.PROJECT_ENTITY, + "id": project.id, + } } - } - ), - ) as mocked_get: + ), + ) as mocked_get, + ): result = await project.store_async(synapse_client=self.syn) # THEN we should call the method with this data @@ -476,15 +499,18 @@ async def test_get_by_name_and_parent(self) -> None: ) # WHEN I call `get` with the Project object - with patch.object( - self.syn, - "findEntityId", - return_value=(PROJECT_ID), - ) as mocked_client_search, patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value=(self.get_example_rest_api_project_output()), - ) as mocked_client_call: + with ( + patch.object( + self.syn, + "findEntityId", + return_value=(PROJECT_ID), + ) as mocked_client_search, + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=(self.get_example_rest_api_project_output()), + ) as mocked_client_call, + ): result = await project.get_async(synapse_client=self.syn) # THEN we should call the method with this data @@ -579,16 +605,20 @@ async def test_copy(self) -> None: } # WHEN I call `copy` with the Project object - with patch( - "synapseclient.models.project.copy", - return_value=(copy_mapping), - ) as mocked_copy, patch( - "synapseclient.models.project.Project.get_async", - return_value=(returned_project), - ) as mocked_get, patch( - "synapseclient.models.project.Project.sync_from_synapse_async", - return_value=(returned_project), - ) as mocked_sync: + with ( + patch( + "synapseclient.models.project.copy", + return_value=(copy_mapping), + ) as mocked_copy, + patch( + "synapseclient.models.project.Project.get_async", + return_value=(returned_project), + ) as mocked_get, + patch( + "synapseclient.models.project.Project.sync_from_synapse_async", + return_value=(returned_project), + ) as mocked_sync, + ): result = await project.copy_async( destination_id="destination_id", synapse_client=self.syn ) @@ -661,16 +691,20 @@ async def mock_get_children(*args, **kwargs): for child in children: yield child - with patch( - "synapseclient.models.mixins.storable_container.get_children", - side_effect=mock_get_children, - ) as mocked_children_call, patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value=(self.get_example_rest_api_project_output()), - ) as mocked_project_get, patch( - "synapseclient.models.file.File.get_async", - return_value=(File(id="syn456", name="example_file_1")), + with ( + patch( + "synapseclient.models.mixins.storable_container.get_children", + side_effect=mock_get_children, + ) as mocked_children_call, + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=(self.get_example_rest_api_project_output()), + ) as mocked_project_get, + patch( + "synapseclient.models.file.File.get_async", + return_value=(File(id="syn456", name="example_file_1")), + ), ): result = await project.sync_from_synapse_async(synapse_client=self.syn) @@ -692,3 +726,505 @@ async def mock_get_children(*args, **kwargs): assert result.modified_by == MODIFIED_BY assert result.files[0].id == "syn456" assert result.files[0].name == "example_file_1" + + +class TestStorageLocationMixin: + """Tests for StorageLocationConfigurable mixin methods on Project.""" + + STORAGE_LOCATION_ID = 12345 + SETTING_ID = "setting_abc" + + @pytest.fixture(autouse=True, scope="function") + def init_syn(self, syn: Synapse) -> None: + self.syn = syn + + @pytest.fixture() + def example_setting(self): + return ProjectSetting( + id=self.SETTING_ID, + project_id=PROJECT_ID, + settings_type="upload", + locations=[self.STORAGE_LOCATION_ID], + ) + + # ------------------------------------------------------------------------- + # set_storage_location_async + # ------------------------------------------------------------------------- + + async def test_set_storage_location_creates_new_when_no_existing_setting( + self, example_setting + ) -> None: + """Test that when there is no existing project setting and we set a storage location, a new project setting is created.""" + project = Project(id=PROJECT_ID) + + with ( + patch.object( + ProjectSetting, "get_async", new_callable=AsyncMock, return_value=None + ), + patch.object( + ProjectSetting, + "store_async", + autospec=True, + return_value=example_setting, + ) as mocked_store, + ): + result = await project.set_storage_location_async( + storage_location_id=self.STORAGE_LOCATION_ID, + synapse_client=self.syn, + ) + + stored_setting = mocked_store.call_args.args[0] + assert stored_setting.project_id == PROJECT_ID + assert stored_setting.locations == [self.STORAGE_LOCATION_ID] + assert result.id == self.SETTING_ID + + async def test_set_storage_location_updates_existing_setting( + self, example_setting + ) -> None: + """Test that when there is an existing project setting and we set a storage location, the existing project setting is updated.""" + project = Project(id=PROJECT_ID) + + updated_setting = ProjectSetting( + id=self.SETTING_ID, + project_id=PROJECT_ID, + settings_type="upload", + locations=[99999], + ) + + with ( + patch.object( + ProjectSetting, + "get_async", + new_callable=AsyncMock, + return_value=example_setting, + ), + patch.object( + ProjectSetting, + "store_async", + autospec=True, + return_value=updated_setting, + ) as mocked_store, + ): + result = await project.set_storage_location_async( + storage_location_id=99999, + synapse_client=self.syn, + ) + + stored_setting = mocked_store.call_args.args[0] + assert stored_setting.locations == [99999] + assert result.locations == [99999] + + async def test_set_storage_location_use_default_storage_location_instead( + self, example_setting + ) -> None: + """Test that when storage_location_id is not provided, the default Synapse S3 storage location is used.""" + from synapseclient.models.mixins.storage_location_mixin import ( + DEFAULT_STORAGE_LOCATION_ID, + ) + + project = Project(id=PROJECT_ID) + + default_setting = ProjectSetting( + id=self.SETTING_ID, + project_id=PROJECT_ID, + settings_type="upload", + locations=[DEFAULT_STORAGE_LOCATION_ID], + ) + + with ( + patch.object( + ProjectSetting, + "get_async", + new_callable=AsyncMock, + return_value=example_setting, + ), + patch.object( + ProjectSetting, + "store_async", + autospec=True, + return_value=default_setting, + ) as mocked_store, + ): + result = await project.set_storage_location_async( + synapse_client=self.syn, + ) + + stored_setting = mocked_store.call_args.args[0] + assert stored_setting.locations == [DEFAULT_STORAGE_LOCATION_ID] + assert result.locations == [DEFAULT_STORAGE_LOCATION_ID] + + async def test_set_storage_location_uses_default_storage_location_instead_when_storage_location_id_is_none( + self, example_setting + ) -> None: + """Test that when storage_location_id is not provided, the default Synapse S3 storage location is used.""" + from synapseclient.models.mixins.storage_location_mixin import ( + DEFAULT_STORAGE_LOCATION_ID, + ) + + folder = Project(id=PROJECT_ID) + + default_setting = ProjectSetting( + id=self.SETTING_ID, + project_id=PROJECT_ID, + settings_type="upload", + locations=[DEFAULT_STORAGE_LOCATION_ID], + ) + + with ( + patch.object( + ProjectSetting, + "get_async", + new_callable=AsyncMock, + return_value=example_setting, + ), + patch.object( + ProjectSetting, + "store_async", + autospec=True, + return_value=default_setting, + ) as mocked_store, + ): + result = await folder.set_storage_location_async( + storage_location_id=None, + synapse_client=self.syn, + ) + + stored_setting = mocked_store.call_args.args[0] + assert stored_setting.locations == [DEFAULT_STORAGE_LOCATION_ID] + assert result.locations == [DEFAULT_STORAGE_LOCATION_ID] + + async def test_set_storage_location_accepts_list_of_ids( + self, example_setting + ) -> None: + """Test that when storage_location_id is a list of integers, all are stored as-is.""" + project = Project(id=PROJECT_ID) + + with ( + patch.object( + ProjectSetting, "get_async", new_callable=AsyncMock, return_value=None + ), + patch.object( + ProjectSetting, + "store_async", + autospec=True, + return_value=example_setting, + ) as mocked_store, + ): + await project.set_storage_location_async( + storage_location_id=[111, 222, 333], + synapse_client=self.syn, + ) + + stored_setting = mocked_store.call_args.args[0] + assert stored_setting.locations == [111, 222, 333] + + async def test_partial_update_locations_via_get_and_store(self) -> None: + """Test the partial update pattern: retrieve the existing setting, append a + location, and store — without losing previously configured locations.""" + from synapseclient.models.project_setting import ProjectSetting + + project = Project(id=PROJECT_ID) + + existing_setting = ProjectSetting( + id=self.SETTING_ID, + project_id=PROJECT_ID, + settings_type="upload", + locations=[111, 222], + ) + updated_setting = ProjectSetting( + id=self.SETTING_ID, + project_id=PROJECT_ID, + settings_type="upload", + locations=[111, 222, 333], + ) + + with ( + patch.object( + ProjectSetting, + "get_async", + new_callable=AsyncMock, + return_value=existing_setting, + ), + patch.object( + ProjectSetting, + "store_async", + new_callable=AsyncMock, + return_value=updated_setting, + ) as mocked_store, + ): + setting = await project.get_project_setting_async( + setting_type="upload", + synapse_client=self.syn, + ) + setting.locations.append(333) + result = await setting.store_async(synapse_client=self.syn) + + # THEN all three locations are present — the existing ones were preserved + assert result.locations == [111, 222, 333] + mocked_store.assert_awaited_once_with(synapse_client=self.syn) + + async def test_set_storage_location_raises_when_no_id(self) -> None: + """Test that when a project without an id, an error is raised.""" + project = Project() + + with pytest.raises(ValueError, match="The entity must have an id set."): + await project.set_storage_location_async( + storage_location_id=self.STORAGE_LOCATION_ID, + synapse_client=self.syn, + ) + + # ------------------------------------------------------------------------- + # get_project_setting_async + # ------------------------------------------------------------------------- + + async def test_get_project_setting_returns_setting(self, example_setting) -> None: + """Test that when a project setting exists, it is returned.""" + project = Project(id=PROJECT_ID) + + with patch.object( + ProjectSetting, + "get_async", + new_callable=AsyncMock, + return_value=example_setting, + ): + result = await project.get_project_setting_async( + setting_type="upload", + synapse_client=self.syn, + ) + + assert result.id == self.SETTING_ID + assert result.locations == [self.STORAGE_LOCATION_ID] + + async def test_get_project_setting_raises_when_no_id(self) -> None: + """Test that when a project without an id, an error is raised.""" + project = Project() + + with pytest.raises(ValueError, match="The entity must have an id set."): + await project.get_project_setting_async(synapse_client=self.syn) + + # ------------------------------------------------------------------------- + # delete_project_setting_async + # ------------------------------------------------------------------------- + + async def test_delete_project_setting_calls_service(self) -> None: + """Test that when a project setting exists, it is deleted.""" + project = Project(id=PROJECT_ID) + + with patch.object( + ProjectSetting, "delete_async", new_callable=AsyncMock, return_value=None + ) as mocked_delete: + await project.delete_project_setting_async( + setting_id=self.SETTING_ID, + synapse_client=self.syn, + ) + + mocked_delete.assert_awaited_once_with(synapse_client=self.syn) + + async def test_delete_project_setting_raises_when_no_id(self) -> None: + """Test that when a project without an id, an error is raised.""" + project = Project(id=PROJECT_ID) + + with pytest.raises( + ValueError, match="The id is required to delete a project setting." + ): + await project.delete_project_setting_async( + setting_id=None, + synapse_client=self.syn, + ) + + # ------------------------------------------------------------------------- + # get_sts_storage_token_async + # ------------------------------------------------------------------------- + + async def test_get_sts_storage_token_returns_credentials(self) -> None: + """Test that when a project with an id, the STS credentials are returned.""" + project = Project(id=PROJECT_ID) + + expected_credentials = { + "aws_access_key_id": "AKIA...", + "aws_secret_access_key": "secret", + "aws_session_token": "token", + } + + with patch( + "synapseclient.models.mixins.storage_location_mixin.asyncio.to_thread", + new_callable=AsyncMock, + return_value=expected_credentials, + ) as mocked_to_thread: + result = await project.get_sts_storage_token_async( + permission="read_only", + synapse_client=self.syn, + ) + + mocked_to_thread.assert_called_once() + call_args = mocked_to_thread.call_args + assert call_args.args[0].__name__ == "get_sts_credentials" + assert call_args.args[2] == PROJECT_ID + assert call_args.args[3] == "read_only" + assert call_args.kwargs["output_format"] == "json" + assert call_args.kwargs["min_remaining_life"] is None + + assert result == expected_credentials + + async def test_get_sts_storage_token_passes_output_format_and_min_remaining_life( + self, + ) -> None: + """Test that when a project with an id, the STS credentials are returned with the output format and min remaining life.""" + project = Project(id=PROJECT_ID) + + with patch( + "synapseclient.models.mixins.storage_location_mixin.asyncio.to_thread", + new_callable=AsyncMock, + return_value={}, + ) as mocked_to_thread: + await project.get_sts_storage_token_async( + permission="read_write", + output_format="boto", + min_remaining_life=300, + synapse_client=self.syn, + ) + + call_args = mocked_to_thread.call_args + assert call_args.args[0].__name__ == "get_sts_credentials" + assert call_args.args[2] == PROJECT_ID + assert call_args.args[3] == "read_write" + assert call_args.kwargs["output_format"] == "boto" + assert call_args.kwargs["min_remaining_life"] == 300 + + async def test_get_sts_storage_token_raises_when_no_id(self) -> None: + """Test that when a project without an id, an error is raised.""" + project = Project() + + with pytest.raises(ValueError, match="The entity must have an id set."): + await project.get_sts_storage_token_async( + permission="read_only", + synapse_client=self.syn, + ) + + # ------------------------------------------------------------------------- + # index_files_for_migration_async + # ------------------------------------------------------------------------- + + async def test_index_files_for_migration_calls_service(self) -> None: + """Test that when a project with an id, the files are indexed.""" + project = Project(id=PROJECT_ID) + + mock_result = MagicMock(spec=MigrationResult) + + with patch( + "synapseclient.models.mixins.storage_location_mixin._index_files_for_migration_async", + new_callable=AsyncMock, + return_value=mock_result, + ) as mocked_index: + result = await project.index_files_for_migration_async( + dest_storage_location_id=self.STORAGE_LOCATION_ID, + synapse_client=self.syn, + ) + + mocked_index.assert_called_once_with( + project, + dest_storage_location_id=str(self.STORAGE_LOCATION_ID), + db_path=None, + source_storage_location_ids=None, + file_version_strategy="new", + include_table_files=False, + continue_on_error=False, + synapse_client=self.syn, + ) + assert result == mock_result + + async def test_index_files_for_migration_converts_source_ids_to_strings( + self, + ) -> None: + """Test that when source_storage_location_ids are integers, they are converted to strings.""" + project = Project(id=PROJECT_ID) + + with patch( + "synapseclient.models.mixins.storage_location_mixin._index_files_for_migration_async", + new_callable=AsyncMock, + return_value=MagicMock(spec=MigrationResult), + ) as mocked_index: + await project.index_files_for_migration_async( + dest_storage_location_id=self.STORAGE_LOCATION_ID, + source_storage_location_ids=[111, 222], + synapse_client=self.syn, + ) + + call_kwargs = mocked_index.call_args.kwargs + assert call_kwargs["source_storage_location_ids"] == ["111", "222"] + + async def test_index_files_for_migration_raises_when_no_id(self) -> None: + """Test that when a project without an id, an error is raised.""" + project = Project() + + with pytest.raises(ValueError, match="The entity must have an id set."): + await project.index_files_for_migration_async( + dest_storage_location_id=self.STORAGE_LOCATION_ID, + synapse_client=self.syn, + ) + + # ------------------------------------------------------------------------- + # migrate_indexed_files_async + # ------------------------------------------------------------------------- + + async def test_migrate_indexed_files_calls_service(self) -> None: + """Test that when a project with an id, the files are migrated.""" + project = Project(id=PROJECT_ID) + + db_path = "/tmp/migration.db" + mock_result = MagicMock(spec=MigrationResult) + + with patch( + "synapseclient.models.mixins.storage_location_mixin._migrate_indexed_files_async", + new_callable=AsyncMock, + return_value=mock_result, + ) as mocked_migrate: + result = await project.migrate_indexed_files_async( + db_path=db_path, + synapse_client=self.syn, + ) + + mocked_migrate.assert_called_once_with( + db_path=db_path, + create_table_snapshots=True, + continue_on_error=False, + force=False, + synapse_client=self.syn, + ) + assert result == mock_result + + async def test_migrate_indexed_files_passes_all_options(self) -> None: + """Test that when a project with an id, the files are migrated with all options.""" + project = Project(id=PROJECT_ID) + mock_result = MagicMock(spec=MigrationResult) + with patch( + "synapseclient.models.mixins.storage_location_mixin._migrate_indexed_files_async", + new_callable=AsyncMock, + return_value=mock_result, + ) as mocked_migrate: + result = await project.migrate_indexed_files_async( + db_path="/tmp/migration.db", + create_table_snapshots=False, + continue_on_error=True, + force=True, + synapse_client=self.syn, + ) + + mocked_migrate.assert_called_once_with( + db_path="/tmp/migration.db", + create_table_snapshots=False, + continue_on_error=True, + force=True, + synapse_client=self.syn, + ) + assert result == mock_result + + async def test_migrate_indexed_files_raises_when_no_id(self) -> None: + """Test that when a project without an id, an error is raised.""" + project = Project() + + with pytest.raises(ValueError, match="The entity must have an id set."): + await project.migrate_indexed_files_async( + db_path="/tmp/migration.db", + synapse_client=self.syn, + ) diff --git a/tests/unit/synapseclient/models/async/unit_test_recordset_async.py b/tests/unit/synapseclient/models/async/unit_test_recordset_async.py index a01823024..2b4e2dcf5 100644 --- a/tests/unit/synapseclient/models/async/unit_test_recordset_async.py +++ b/tests/unit/synapseclient/models/async/unit_test_recordset_async.py @@ -297,24 +297,30 @@ async def mock_semaphore_ctx(*args, **kwargs): self.syn._get_parallel_file_transfer_semaphore = mock_semaphore_ctx # WHEN I call store_async - with patch( - "synapseclient.models.recordset.get_id", - new_callable=AsyncMock, - return_value=None, - ), patch( - "synapseclient.models.file._upload_file", - new_callable=AsyncMock, - ) as mock_upload, patch( - "synapseclient.models.recordset.store_entity", - new_callable=AsyncMock, - return_value=entity_response, - ) as mock_store_entity, patch( - "synapseclient.models.recordset.store_entity_components", - new_callable=AsyncMock, - return_value=False, - ), patch( - "os.path.expanduser", - return_value=PATH, + with ( + patch( + "synapseclient.models.recordset.get_id", + new_callable=AsyncMock, + return_value=None, + ), + patch( + "synapseclient.models.file._upload_file", + new_callable=AsyncMock, + ) as mock_upload, + patch( + "synapseclient.models.recordset.store_entity", + new_callable=AsyncMock, + return_value=entity_response, + ) as mock_store_entity, + patch( + "synapseclient.models.recordset.store_entity_components", + new_callable=AsyncMock, + return_value=False, + ), + patch( + "os.path.expanduser", + return_value=PATH, + ), ): result = await record_set.store_async(synapse_client=self.syn) @@ -343,18 +349,22 @@ async def test_store_async_with_data_file_handle_id(self) -> None: self.syn.cache.get.return_value = None # WHEN I call store_async - with patch( - "synapseclient.models.recordset.get_id", - new_callable=AsyncMock, - return_value=None, - ), patch( - "synapseclient.models.recordset.store_entity", - new_callable=AsyncMock, - return_value=entity_response, - ) as mock_store_entity, patch( - "synapseclient.models.recordset.store_entity_components", - new_callable=AsyncMock, - return_value=False, + with ( + patch( + "synapseclient.models.recordset.get_id", + new_callable=AsyncMock, + return_value=None, + ), + patch( + "synapseclient.models.recordset.store_entity", + new_callable=AsyncMock, + return_value=entity_response, + ) as mock_store_entity, + patch( + "synapseclient.models.recordset.store_entity_components", + new_callable=AsyncMock, + return_value=False, + ), ): result = await record_set.store_async(synapse_client=self.syn) @@ -388,24 +398,30 @@ async def mock_semaphore_ctx(*args, **kwargs): self.syn._get_parallel_file_transfer_semaphore = mock_semaphore_ctx # WHEN I call store_async with a parent object - with patch( - "synapseclient.models.recordset.get_id", - new_callable=AsyncMock, - return_value=None, - ), patch( - "synapseclient.models.file._upload_file", - new_callable=AsyncMock, - ), patch( - "synapseclient.models.recordset.store_entity", - new_callable=AsyncMock, - return_value=entity_response, - ), patch( - "synapseclient.models.recordset.store_entity_components", - new_callable=AsyncMock, - return_value=False, - ), patch( - "os.path.expanduser", - return_value=PATH, + with ( + patch( + "synapseclient.models.recordset.get_id", + new_callable=AsyncMock, + return_value=None, + ), + patch( + "synapseclient.models.file._upload_file", + new_callable=AsyncMock, + ), + patch( + "synapseclient.models.recordset.store_entity", + new_callable=AsyncMock, + return_value=entity_response, + ), + patch( + "synapseclient.models.recordset.store_entity_components", + new_callable=AsyncMock, + return_value=False, + ), + patch( + "os.path.expanduser", + return_value=PATH, + ), ): result = await record_set.store_async( parent=parent, synapse_client=self.syn @@ -449,28 +465,35 @@ async def mock_semaphore_ctx(*args, **kwargs): self.syn._get_parallel_file_transfer_semaphore = mock_semaphore_ctx # WHEN I call store_async and an existing entity is found - with patch( - "synapseclient.models.recordset.get_id", - new_callable=AsyncMock, - return_value=SYN_123, - ), patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value={"entity": existing_entity_response, "fileHandles": []}, - ), patch( - "synapseclient.models.file._upload_file", - new_callable=AsyncMock, - ), patch( - "synapseclient.models.recordset.store_entity", - new_callable=AsyncMock, - return_value=updated_entity_response, - ), patch( - "synapseclient.models.recordset.store_entity_components", - new_callable=AsyncMock, - return_value=False, - ), patch( - "os.path.expanduser", - return_value=PATH, + with ( + patch( + "synapseclient.models.recordset.get_id", + new_callable=AsyncMock, + return_value=SYN_123, + ), + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value={"entity": existing_entity_response, "fileHandles": []}, + ), + patch( + "synapseclient.models.file._upload_file", + new_callable=AsyncMock, + ), + patch( + "synapseclient.models.recordset.store_entity", + new_callable=AsyncMock, + return_value=updated_entity_response, + ), + patch( + "synapseclient.models.recordset.store_entity_components", + new_callable=AsyncMock, + return_value=False, + ), + patch( + "os.path.expanduser", + return_value=PATH, + ), ): result = await record_set.store_async(synapse_client=self.syn) @@ -491,24 +514,29 @@ async def test_store_async_re_read_required(self) -> None: self.syn.cache.get.return_value = None # WHEN I call store_async and store_entity_components returns True - with patch( - "synapseclient.models.recordset.get_id", - new_callable=AsyncMock, - return_value=None, - ), patch( - "synapseclient.models.recordset.store_entity", - new_callable=AsyncMock, - return_value=entity_response, - ), patch( - "synapseclient.models.recordset.store_entity_components", - new_callable=AsyncMock, - return_value=True, - ), patch.object( - RecordSet, - "get_async", - new_callable=AsyncMock, - return_value=record_set, - ) as mock_get: + with ( + patch( + "synapseclient.models.recordset.get_id", + new_callable=AsyncMock, + return_value=None, + ), + patch( + "synapseclient.models.recordset.store_entity", + new_callable=AsyncMock, + return_value=entity_response, + ), + patch( + "synapseclient.models.recordset.store_entity_components", + new_callable=AsyncMock, + return_value=True, + ), + patch.object( + RecordSet, + "get_async", + new_callable=AsyncMock, + return_value=record_set, + ) as mock_get, + ): result = await record_set.store_async(synapse_client=self.syn) # THEN get_async should be called again to re-read the entity @@ -551,12 +579,15 @@ async def test_get_async_with_path(self) -> None: entity_response = _get_record_set_entity_response() # WHEN I call get_async - with patch( - "synapseclient.models.recordset.get_from_entity_factory", - new_callable=AsyncMock, - ) as mock_factory, patch( - "os.path.isfile", - return_value=False, + with ( + patch( + "synapseclient.models.recordset.get_from_entity_factory", + new_callable=AsyncMock, + ) as mock_factory, + patch( + "os.path.isfile", + return_value=False, + ), ): async def side_effect(**kwargs): @@ -596,14 +627,17 @@ async def test_get_async_include_activity(self) -> None: ) # WHEN I call get_async with include_activity=True - with patch( - "synapseclient.models.recordset.get_from_entity_factory", - new_callable=AsyncMock, - ) as mock_factory, patch( - "synapseclient.models.Activity.from_parent_async", - new_callable=AsyncMock, - return_value=activity_response, - ) as mock_from_parent: + with ( + patch( + "synapseclient.models.recordset.get_from_entity_factory", + new_callable=AsyncMock, + ) as mock_factory, + patch( + "synapseclient.models.Activity.from_parent_async", + new_callable=AsyncMock, + return_value=activity_response, + ) as mock_from_parent, + ): async def side_effect(**kwargs): entity_to_update = kwargs["entity_to_update"] @@ -726,16 +760,20 @@ async def test_get_detailed_validation_results_async_with_handle_id( self.syn.cache.get_cache_dir.return_value = "/syn_cache_dir" # WHEN I call get_detailed_validation_results_async - with patch( - "synapseclient.models.recordset.test_import_pandas", - ), patch( - "synapseclient.models.recordset.download_by_file_handle", - new_callable=AsyncMock, - return_value="/cached/validation_results.csv", - ) as mock_download, patch( - "pandas.read_csv", - return_value=mock_df, - ) as mock_read_csv: + with ( + patch( + "synapseclient.models.recordset.test_import_pandas", + ), + patch( + "synapseclient.models.recordset.download_by_file_handle", + new_callable=AsyncMock, + return_value="/cached/validation_results.csv", + ) as mock_download, + patch( + "pandas.read_csv", + return_value=mock_df, + ) as mock_read_csv, + ): result = await record_set.get_detailed_validation_results_async( synapse_client=self.syn ) @@ -754,10 +792,13 @@ async def test_get_detailed_validation_results_async_no_handle_id(self) -> None: record_set = RecordSet(id=SYN_123) # WHEN I call get_detailed_validation_results_async - with patch( - "synapseclient.models.recordset.test_import_pandas", - ), patch( - "pandas.read_csv", + with ( + patch( + "synapseclient.models.recordset.test_import_pandas", + ), + patch( + "pandas.read_csv", + ), ): result = await record_set.get_detailed_validation_results_async( synapse_client=self.syn @@ -782,18 +823,23 @@ async def test_get_detailed_validation_results_async_download_location( self.syn.cache.get_cache_dir.return_value = "/syn_cache_dir" # WHEN I call get_detailed_validation_results_async with a download_location - with patch( - "synapseclient.models.recordset.test_import_pandas", - ), patch( - "synapseclient.models.recordset.ensure_download_location_is_directory", - return_value=download_location, - ), patch( - "synapseclient.models.recordset.download_by_file_handle", - new_callable=AsyncMock, - return_value=f"{download_location}/SYNAPSE_RECORDSET_VALIDATION_{VALIDATION_FILE_HANDLE_ID}.csv", - ) as mock_download, patch( - "pandas.read_csv", - return_value=mock_df, + with ( + patch( + "synapseclient.models.recordset.test_import_pandas", + ), + patch( + "synapseclient.models.recordset.ensure_download_location_is_directory", + return_value=download_location, + ), + patch( + "synapseclient.models.recordset.download_by_file_handle", + new_callable=AsyncMock, + return_value=f"{download_location}/SYNAPSE_RECORDSET_VALIDATION_{VALIDATION_FILE_HANDLE_ID}.csv", + ) as mock_download, + patch( + "pandas.read_csv", + return_value=mock_df, + ), ): result = await record_set.get_detailed_validation_results_async( download_location=download_location, synapse_client=self.syn diff --git a/tests/unit/synapseclient/models/async/unit_test_schema_organization_async.py b/tests/unit/synapseclient/models/async/unit_test_schema_organization_async.py index 24f9cd7fe..525c21b99 100644 --- a/tests/unit/synapseclient/models/async/unit_test_schema_organization_async.py +++ b/tests/unit/synapseclient/models/async/unit_test_schema_organization_async.py @@ -253,15 +253,18 @@ async def test_delete_async_without_id_triggers_get(self) -> None: org = SchemaOrganization(name=ORG_NAME) # WHEN I call delete_async - with patch( - "synapseclient.models.schema_organization.get_organization", - new_callable=AsyncMock, - return_value=_get_organization_response(), - ) as mock_get, patch( - "synapseclient.models.schema_organization.delete_organization", - new_callable=AsyncMock, - return_value=None, - ) as mock_delete: + with ( + patch( + "synapseclient.models.schema_organization.get_organization", + new_callable=AsyncMock, + return_value=_get_organization_response(), + ) as mock_get, + patch( + "synapseclient.models.schema_organization.delete_organization", + new_callable=AsyncMock, + return_value=None, + ) as mock_delete, + ): await org.delete_async(synapse_client=self.syn) # THEN get should be called first to obtain the id @@ -341,15 +344,18 @@ async def test_get_acl_async_without_id_triggers_get(self) -> None: acl_response = _get_acl_response() # WHEN I call get_acl_async (id will be fetched first) - with patch( - "synapseclient.models.schema_organization.get_organization", - new_callable=AsyncMock, - return_value=_get_organization_response(), - ) as mock_get, patch( - "synapseclient.models.schema_organization.get_organization_acl", - new_callable=AsyncMock, - return_value=acl_response, - ) as mock_get_acl: + with ( + patch( + "synapseclient.models.schema_organization.get_organization", + new_callable=AsyncMock, + return_value=_get_organization_response(), + ) as mock_get, + patch( + "synapseclient.models.schema_organization.get_organization_acl", + new_callable=AsyncMock, + return_value=acl_response, + ) as mock_get_acl, + ): result = await org.get_acl_async(synapse_client=self.syn) # THEN get should be called first to obtain the id @@ -365,15 +371,18 @@ async def test_update_acl_async_add_new_principal(self) -> None: acl_response = _get_acl_response() # WHEN I call update_acl_async with a new principal - with patch( - "synapseclient.models.schema_organization.get_organization_acl", - new_callable=AsyncMock, - return_value=acl_response, - ), patch( - "synapseclient.models.schema_organization.update_organization_acl", - new_callable=AsyncMock, - return_value=None, - ) as mock_update: + with ( + patch( + "synapseclient.models.schema_organization.get_organization_acl", + new_callable=AsyncMock, + return_value=acl_response, + ), + patch( + "synapseclient.models.schema_organization.update_organization_acl", + new_callable=AsyncMock, + return_value=None, + ) as mock_update, + ): await org.update_acl_async( principal_id=PRINCIPAL_ID_2, access_type=["READ"], @@ -404,15 +413,18 @@ async def test_update_acl_async_update_existing_principal(self) -> None: acl_response = _get_acl_response() # WHEN I call update_acl_async for an existing principal with new permissions - with patch( - "synapseclient.models.schema_organization.get_organization_acl", - new_callable=AsyncMock, - return_value=acl_response, - ), patch( - "synapseclient.models.schema_organization.update_organization_acl", - new_callable=AsyncMock, - return_value=None, - ) as mock_update: + with ( + patch( + "synapseclient.models.schema_organization.get_organization_acl", + new_callable=AsyncMock, + return_value=acl_response, + ), + patch( + "synapseclient.models.schema_organization.update_organization_acl", + new_callable=AsyncMock, + return_value=None, + ) as mock_update, + ): await org.update_acl_async( principal_id=PRINCIPAL_ID_1, access_type=["READ", "CREATE", "DELETE"], @@ -681,13 +693,16 @@ async def mock_list(*args, **kwargs): yield schema_response # WHEN I call get_async (org exists and schema is found) - with patch( - "synapseclient.models.schema_organization.get_organization", - new_callable=AsyncMock, - return_value=_get_organization_response(), - ), patch( - "synapseclient.models.schema_organization.list_json_schemas", - return_value=mock_list(), + with ( + patch( + "synapseclient.models.schema_organization.get_organization", + new_callable=AsyncMock, + return_value=_get_organization_response(), + ), + patch( + "synapseclient.models.schema_organization.list_json_schemas", + return_value=mock_list(), + ), ): result = await schema.get_async(synapse_client=self.syn) @@ -709,13 +724,16 @@ async def mock_list(*args, **kwargs): yield other_schema_response # WHEN I call get_async - with patch( - "synapseclient.models.schema_organization.get_organization", - new_callable=AsyncMock, - return_value=_get_organization_response(), - ), patch( - "synapseclient.models.schema_organization.list_json_schemas", - return_value=mock_list(), + with ( + patch( + "synapseclient.models.schema_organization.get_organization", + new_callable=AsyncMock, + return_value=_get_organization_response(), + ), + patch( + "synapseclient.models.schema_organization.list_json_schemas", + return_value=mock_list(), + ), ): # THEN it should raise ValueError with pytest.raises(ValueError, match="does not contain a schema with name"): diff --git a/tests/unit/synapseclient/models/async/unit_test_storable_container_async.py b/tests/unit/synapseclient/models/async/unit_test_storable_container_async.py new file mode 100644 index 000000000..00a6151bd --- /dev/null +++ b/tests/unit/synapseclient/models/async/unit_test_storable_container_async.py @@ -0,0 +1,761 @@ +"""Unit tests for StorableContainer""" + +import csv +import os +import platform +import uuid +from pathlib import Path +from typing import Any +from unittest.mock import AsyncMock, patch + +import pytest + +from synapseclient import Synapse +from synapseclient.models import File, Folder, Project +from synapseclient.models.services import manifest as manifest_module + + +def _write_manifest(rows: list[dict], tmp_path: Path) -> Path: + """Write a minimal CSV manifest to a unique path under *tmp_path*.""" + path = tmp_path / f"{uuid.uuid4()}_manifest.csv" + if not rows: + return path + fieldnames = list(rows[0].keys()) + with open(path, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter( + f, fieldnames=fieldnames, restval="", extrasaction="ignore" + ) + writer.writeheader() + writer.writerows(rows) + return path + + +class TestSyncToSynapse: + """Tests for StorableContainer.sync_to_synapse_async that do not require a Synapse connection.""" + + @pytest.fixture(autouse=True) + def init_syn(self, syn: Synapse) -> None: + self.syn = syn + + async def test_missing_path_column_raises(self, tmp_path: Path) -> None: + """A manifest without a 'path' column raises ValueError immediately.""" + manifest_path = _write_manifest([{"parentId": "syn123", "name": "x"}], tmp_path) + project = Project(id="test", name="test") + + with pytest.raises(ValueError, match="'path'"): + await project.sync_to_synapse_async( + manifest_path=str(manifest_path), synapse_client=self.syn + ) + + async def test_missing_parent_id_column_raises(self, tmp_path: Path) -> None: + """A manifest without a 'parentId' column raises ValueError immediately.""" + local_file = tmp_path / "f.txt" + local_file.write_text("x") + + manifest_path = _write_manifest( + [{"path": str(local_file), "name": "f.txt"}], tmp_path + ) + project = Project(id="test", name="test") + + with pytest.raises(ValueError, match="'parentId'"): + await project.sync_to_synapse_async( + manifest_path=str(manifest_path), synapse_client=self.syn + ) + + async def test_all_rows_have_errors_no_crash(self, tmp_path: Path) -> None: + """When every row has an error the call completes without uploading anything.""" + manifest_path = _write_manifest( + [ + { + "path": "/nonexistent/file.txt", + "parentId": "syn123", + "name": "ignored.txt", + "error": "Some failure", + } + ], + tmp_path, + ) + project = Project(id="test", name="test") + + with patch( + "synapseclient.models.mixins.storable_container.upload_sync_files" + ) as mock_upload: + result = await project.sync_to_synapse_async( + manifest_path=str(manifest_path), synapse_client=self.syn + ) + mock_upload.assert_not_called() + assert result == [] + + +class TestGenerateSyncManifest: + """Tests for StorableContainer.generate_sync_manifest_async.""" + + async def test_unstored_container_raises( + self, tmp_path: Path, syn: Synapse + ) -> None: + """Calling the method on a container whose id is None is a caller bug + and must surface as a ValueError rather than a confusing downstream + error. The integration suite cannot exercise this case because every + container fixture has already been stored.""" + # GIVEN a local directory with one file + src = tmp_path / "src" + src.mkdir() + (src / "a.txt").write_text("hello") + manifest = tmp_path / "manifest.csv" + + # WHEN generate_sync_manifest_async is called on a Project that has + # never been stored (id is None) + # THEN a ValueError is raised explaining the container is not stored + with pytest.raises(ValueError, match="has not been stored in Synapse"): + await Project(name="unstored").generate_sync_manifest_async( + directory_path=str(src), + manifest_path=str(manifest), + synapse_client=syn, + ) + + async def test_empty_directory_logs_warning( + self, tmp_path: Path, syn: Synapse + ) -> None: + """An empty directory logs a 'No uploadable files found' warning so + the caller knows why their manifest is empty. The integration suite + covers the resulting header-only file content.""" + # GIVEN an empty local directory + empty_dir = tmp_path / "empty" + empty_dir.mkdir() + manifest = tmp_path / "manifest.csv" + + # WHEN we generate a manifest for it (parent-id validation is stubbed + # so this stays offline) + with ( + patch( + "synapseclient.models.services.manifest._validate_target_container_async", + new=AsyncMock(return_value=None), + ), + patch.object(syn.logger, "warning") as mock_warning, + ): + await Folder(id="syn1").generate_sync_manifest_async( + directory_path=str(empty_dir), + manifest_path=str(manifest), + synapse_client=syn, + ) + + # THEN a "no uploadable files" warning is logged so the caller knows + # why their manifest is empty + assert any( + "No uploadable files found" in call.args[0] + for call in mock_warning.call_args_list + ) + + @pytest.mark.parametrize( + "kind", + ["missing-path", "file-path"], + ) + async def test_invalid_directory_path_raises( + self, tmp_path: Path, syn: Synapse, kind: str + ) -> None: + """The public surface raises ValueError for both a non-existent path + and a regular file path, and writes no manifest. The integration suite + only exercises the missing-path case, so the file-path case is covered + here.""" + # GIVEN either a non-existent path or a regular file path + if kind == "missing-path": + path = tmp_path / "does_not_exist" + else: + path = tmp_path / "a.txt" + path.write_text("hello") + manifest = tmp_path / "manifest.csv" + + # WHEN we try to generate a manifest for it + # THEN a ValueError is raised + with pytest.raises(ValueError, match="is not a directory or does not exist"): + await Folder(id="syn1").generate_sync_manifest_async( + directory_path=str(path), + manifest_path=str(manifest), + synapse_client=syn, + ) + + # AND no manifest file is written (failure happens before any output) + assert not manifest.exists() + + async def test_parent_id_validated_upfront( + self, tmp_path: Path, syn: Synapse + ) -> None: + """The container's id is validated via _validate_target_container_async + before the directory is walked. This ordering invariant cannot be + verified through the live API; integration tests can only observe the + end state.""" + # GIVEN a directory with one file + src = tmp_path / "src" + src.mkdir() + (src / "a.txt").write_text("hello") + manifest = tmp_path / "manifest.csv" + + # WHEN we generate a manifest under a Folder with id "synROOT" with + # the parent-id validation stubbed so it stays offline + with patch( + "synapseclient.models.services.manifest._validate_target_container_async", + new_callable=AsyncMock, + ) as mock_validate: + await Folder(id="synROOT").generate_sync_manifest_async( + directory_path=str(src), + manifest_path=str(manifest), + synapse_client=syn, + ) + + # THEN _validate_target_container_async was awaited exactly once + # with that id, ensuring the parent check happens before traversal + mock_validate.assert_awaited_once_with("synROOT", client=syn) + + +class TestResolveAndValidateDirectoryPath: + """Tests for manifest._resolve_and_validate_directory_path.""" + + async def test_valid_directory_returns_realpath(self, tmp_path: Path) -> None: + """A valid directory returns the realpath-resolved absolute path.""" + # GIVEN a real directory on disk + src = tmp_path / "src" + src.mkdir() + + # WHEN we validate the directory path + result = manifest_module._resolve_and_validate_directory_path( + directory_path=str(src) + ) + + # THEN the realpath-resolved absolute path is returned + assert result == os.path.realpath(str(src)) + + @pytest.mark.parametrize( + "kind", + ["missing-path", "file-path"], + ) + async def test_non_directory_path_raises(self, tmp_path: Path, kind: str) -> None: + """Both a non-existent path and a regular file path raise ValueError.""" + # GIVEN either a non-existent path or a regular file path + if kind == "missing-path": + path = tmp_path / "missing" + else: + path = tmp_path / "f.txt" + path.write_text("hello") + + # WHEN we validate the directory path + # THEN a ValueError is raised explaining the path is not a directory + with pytest.raises(ValueError, match="is not a directory or does not exist"): + manifest_module._resolve_and_validate_directory_path( + directory_path=str(path) + ) + + @pytest.mark.skipif( + platform.system() == "Windows", + reason="Symlink creation requires elevated privileges on Windows.", + ) + async def test_symlink_resolved_to_target(self, tmp_path: Path) -> None: + """If directory_path is a symlink, the resolved target path is returned + so the manifest survives the original symlink being removed/redirected.""" + # GIVEN a target directory and a symlink pointing to it + target = tmp_path / "target" + target.mkdir() + link = tmp_path / "link" + os.symlink(str(target), str(link)) + + # WHEN we validate the symlink path + result = manifest_module._resolve_and_validate_directory_path( + directory_path=str(link) + ) + + # THEN the realpath of the symlink target is returned (not the link) + assert result == os.path.realpath(str(target)) + + +class TestValidateTargetContainer: + """Tests for manifest._validate_target_container_async.""" + + @pytest.mark.parametrize( + "container", + [ + pytest.param(Folder(id="syn1", name="folder"), id="folder"), + pytest.param(Project(id="syn1", name="project"), id="project"), + ], + ) + async def test_container_passes( + self, syn: Synapse, container: Folder | Project + ) -> None: + """Both Folder and Project pass validation""" + # GIVEN get_async is stubbed to return either a Folder or Project + with patch.object( + manifest_module, + "get_async", + new_callable=AsyncMock, + return_value=container, + ): + # WHEN we validate the target container by id + result = await manifest_module._validate_target_container_async( + "syn1", client=syn + ) + # THEN validation returns None (no error raised) + assert result is None + + async def test_non_container_raises_value_error(self, syn: Synapse) -> None: + """A File (or any non-container) raises ValueError naming the id.""" + # GIVEN get_async is stubbed to return a File (non-container) entity + not_container = File(id="syn1", name="file") + with patch.object( + manifest_module, + "get_async", + new_callable=AsyncMock, + return_value=not_container, + ): + # WHEN we validate the target container by id + # THEN a ValueError is raised naming the id and rejecting the type + with pytest.raises( + ValueError, match=r"Container syn1 is not a Folder or Project" + ): + await manifest_module._validate_target_container_async( + "syn1", client=syn + ) + + +class TestCollectManifestRows: + """Tests for manifest._collect_manifest_rows_async.""" + + async def test_empty_directory_returns_empty_list( + self, tmp_path: Path, syn: Synapse + ) -> None: + """An empty directory produces no rows.""" + # GIVEN an empty local directory + empty = tmp_path / "empty" + empty.mkdir() + + # WHEN we collect manifest rows for it + rows = await manifest_module._collect_manifest_rows_async( + directory_path=str(empty), parent_id="syn1", client=syn + ) + + # THEN the result is an empty list + assert rows == [] + + async def test_flat_directory_uses_root_parent_id( + self, tmp_path: Path, syn: Synapse + ) -> None: + """Files at the top level all share the supplied parent_id and are + emitted in sorted order. No Synapse folders are created when the + local tree has no subdirectories.""" + # GIVEN a flat directory containing two files in non-sorted order + src = tmp_path / "src" + src.mkdir() + (src / "b.txt").write_text("two") + (src / "a.txt").write_text("one") + + # WHEN we collect manifest rows under a root parent_id + with patch( + "synapseclient.models.Folder.store_async", + new_callable=AsyncMock, + ) as mock_store_async: + rows = await manifest_module._collect_manifest_rows_async( + directory_path=str(src), parent_id="synROOT", client=syn + ) + + # THEN the rows are emitted in sorted filename order + assert [os.path.basename(row["path"]) for row in rows] == ["a.txt", "b.txt"] + # AND every row uses the root parent_id + assert all(row["parentId"] == "synROOT" for row in rows) + # AND no Synapse folders are created (no subdirectories to mirror) + mock_store_async.assert_not_called() + + async def test_nested_directories_use_created_folder_ids( + self, tmp_path: Path, syn: Synapse + ) -> None: + """Files in nested subdirectories receive the id of the Synapse folder + created for their innermost local directory.""" + # GIVEN a nested directory tree with one file at the deepest level + src = tmp_path / "src" + (src / "level1" / "level2").mkdir(parents=True) + (src / "level1" / "level2" / "deep.txt").write_text("content") + + # AND a fake _create_child_folders_async that returns Folders with + # deterministic ids (syn_) so the test can verify how the + # production code threads parent ids from one walk iteration to + # the next. Records (parent_id, dirnames) per call to assert call + # order AND that each call's parent_id is the fake id assigned to + # the previously-created folder. + observed: list[tuple[str, list[str]]] = [] + + async def fake_create_child_folders_async( + parent_id: str, dirnames: list[str], client: Synapse + ) -> dict[str, Folder]: + observed.append((parent_id, list(dirnames))) + return {d: Folder(name=d, id=f"syn_{d}") for d in dirnames} + + # WHEN we collect manifest rows under the root parent_id + with patch.object( + manifest_module, + "_create_child_folders_async", + new=fake_create_child_folders_async, + ): + rows = await manifest_module._collect_manifest_rows_async( + directory_path=str(src), parent_id="synROOT", client=syn + ) + + # AND folders are created top-down: level1 under root, then level2 + # under the freshly-created level1 (id syn_level1), and a final + # no-op call for the leaf directory. + assert observed == [ + ("synROOT", ["level1"]), + ("syn_level1", ["level2"]), + ("syn_level2", []), + ] + # AND the deep file's parentId is the innermost folder's id + assert rows == [ + { + "path": os.path.join(str(src), "level1", "level2", "deep.txt"), + "parentId": "syn_level2", + } + ] + + async def test_mixed_flat_and_nested_uses_correct_parent_ids( + self, tmp_path: Path, syn: Synapse + ) -> None: + """Files at the root use the supplied parent_id while files inside + subdirectories use the id of the folder created for their containing + directory. The flat-only and nested-only tests each cover one branch + in isolation; this combined case asserts both work together in a + single walk.""" + # GIVEN a tree with one file at the root AND a subdir with a file + src = tmp_path / "src" + src.mkdir() + (src / "root.txt").write_text("root-data") + (src / "child").mkdir() + (src / "child" / "nested.txt").write_text("nested-data") + + # AND a fake _create_child_folders_async that assigns deterministic + # ids so the test can distinguish root vs. nested rows by parentId + async def fake_create_child_folders_async( + parent_id: str, dirnames: list[str], client: Synapse + ) -> dict[str, Folder]: + return {d: Folder(name=d, id=f"syn_{d}") for d in dirnames} + + # WHEN we collect manifest rows under the root parent_id + with patch.object( + manifest_module, + "_create_child_folders_async", + new=fake_create_child_folders_async, + ): + rows = await manifest_module._collect_manifest_rows_async( + directory_path=str(src), parent_id="synROOT", client=syn + ) + + # THEN the root-level file uses the root parent_id and the nested + # file uses the id assigned to its containing folder + by_basename = {os.path.basename(r["path"]): r["parentId"] for r in rows} + assert by_basename == { + "root.txt": "synROOT", + "nested.txt": "syn_child", + } + + async def test_multiple_siblings_share_parent_id_in_one_batch( + self, tmp_path: Path, syn: Synapse + ) -> None: + """Sibling directories at the same depth are passed to + _create_child_folders_async in a single batch (one call), all sharing + the same parent_id.""" + # GIVEN three sibling directories at the same depth, each with a + # uniquely-named file so rows can be matched back to their folder + src = tmp_path / "src" + src.mkdir() + for name in ("alpha", "beta", "gamma"): + (src / name).mkdir() + (src / name / f"{name}.txt").write_text("data") + + # AND a fake _create_child_folders_async that records each call's + # (parent_id, dirnames) so we can assert there's exactly one batch + # for the three siblings + observed: list[tuple[str, list[str]]] = [] + + async def fake_create_child_folders_async( + parent_id: str, dirnames: list[str], client: Synapse + ) -> dict[str, Folder]: + observed.append((parent_id, list(dirnames))) + return {d: Folder(name=d, id=f"syn_{d}") for d in dirnames} + + # WHEN we collect manifest rows under the root parent_id + with patch.object( + manifest_module, + "_create_child_folders_async", + new=fake_create_child_folders_async, + ): + rows = await manifest_module._collect_manifest_rows_async( + directory_path=str(src), parent_id="synROOT", client=syn + ) + + # THEN the first call passes all three siblings in sorted order + # under the root parent_id (one batch, not three) + assert observed[0] == ("synROOT", ["alpha", "beta", "gamma"]) + # AND each subsequent call (one per sibling, for its own children) + # passes an empty dirnames list because each sibling is a leaf dir + assert all(call[1] == [] for call in observed[1:]) + # AND every file's parentId matches the id of its containing folder + assert {os.path.basename(r["path"]): r["parentId"] for r in rows} == { + "alpha.txt": "syn_alpha", + "beta.txt": "syn_beta", + "gamma.txt": "syn_gamma", + } + + +class TestLogWalkError: + """Tests for manifest._log_walk_error.""" + + def test_logs_warning_with_filename_and_message(self, syn: Synapse) -> None: + """The warning message references the offending filename and the + underlying OSError representation.""" + # GIVEN an OSError carrying an errno, message, and filename + err = OSError(13, "permission denied", "/tmp/unreadable") + + # WHEN _log_walk_error is invoked with that error + with patch.object(syn.logger, "warning") as mock_warning: + manifest_module._log_walk_error(syn, err) + + # THEN logger.warning is called exactly once + mock_warning.assert_called_once() + # AND the message references the offending filename and OSError text + message = mock_warning.call_args.args[0] + assert "/tmp/unreadable" in message + assert "permission denied" in message + + def test_logs_warning_with_no_filename(self, syn: Synapse) -> None: + """An OSError without a filename still produces a warning rather than + raising (best-effort during traversal).""" + # GIVEN an OSError without a filename attribute + err = OSError("io failure") + + # WHEN _log_walk_error is invoked with that error + with patch.object(syn.logger, "warning") as mock_warning: + manifest_module._log_walk_error(syn, err) + + # THEN logger.warning is called once (no exception is raised) + mock_warning.assert_called_once() + + +class TestCreateChildFolders: + """Tests for manifest._create_child_folders_async.""" + + async def test_empty_dirnames_returns_empty_dict(self, syn: Synapse) -> None: + """No dirnames produces no folders and never invokes Folder.store_async.""" + # GIVEN an empty list of dirnames + dirnames = [] + # WHEN we ask for child folders + with patch( + "synapseclient.models.Folder.store_async", + new_callable=AsyncMock, + ) as mock_store_async: + result = await manifest_module._create_child_folders_async( + parent_id="synROOT", dirnames=dirnames, client=syn + ) + + # THEN the result is an empty dict and no Synapse calls were made + assert result == {} + mock_store_async.assert_not_called() + + async def test_returns_dirname_to_folder_mapping(self, syn: Synapse) -> None: + """Each input dirname maps to the Folder returned by store_async, all + sharing the supplied parent_id, regardless of completion order.""" + # GIVEN three sibling dirnames to create under a single parent + dirnames = ["alpha", "beta", "gamma"] + + # AND a fake Folder.store_async that assigns a deterministic id per + # call so the returned Folders can be asserted against by id. + # Patched onto Folder.store_async as an unbound method, so the bound + # Folder instance arrives as `self`. Mutating self.id and returning + # self matches the real signature without hitting the network. + observed_parent_ids: list[str] = [] + + async def fake_store_async(self: Any, *args: Any, **kwargs: Any) -> Any: + observed_parent_ids.append(self.parent_id) + self.id = f"syn_{self.name}" + return self + + # WHEN we create the child folders concurrently + with patch( + "synapseclient.models.Folder.store_async", + new=fake_store_async, + ): + result = await manifest_module._create_child_folders_async( + parent_id="synROOT", dirnames=dirnames, client=syn + ) + + # THEN every dirname maps to its own Folder with the expected id + assert set(result.keys()) == set(dirnames) + for dirname in dirnames: + assert result[dirname].name == dirname + assert result[dirname].id == f"syn_{dirname}" + + # AND every store call used the same parent_id + assert observed_parent_ids == ["synROOT"] * len(dirnames) + + +class TestPruneSymlinksAndSortDirnames: + """Tests for manifest._prune_symlinks_and_sort_dirnames.""" + + def test_sorts_dirnames_in_place(self, tmp_path: Path) -> None: + """Plain directories are sorted in place; the original list object is + mutated (os.walk relies on identity, not return value).""" + # GIVEN three real directories and an unsorted dirnames list referring to them + for name in ("c", "a", "b"): + (tmp_path / name).mkdir() + dirnames = ["c", "a", "b"] + + # WHEN we prune and sort + manifest_module._prune_symlinks_and_sort_dirnames(dirnames, str(tmp_path)) + + # AND the caller's list is sorted alphabetically + assert dirnames == ["a", "b", "c"] + + @pytest.mark.skipif( + platform.system() == "Windows", + reason="Symlink creation requires elevated privileges on Windows.", + ) + def test_drops_symlinked_subdirectories(self, tmp_path: Path) -> None: + """Symlinked subdirectories are pruned so we don't mirror folders whose + contents os.walk(followlinks=False) won't visit.""" + # GIVEN one real subdirectory and one symlink pointing at another + # directory, both listed in dirnames + real = tmp_path / "real" + real.mkdir() + target = tmp_path / "target" + target.mkdir() + link = tmp_path / "link" + os.symlink(str(target), str(link)) + + dirnames = ["link", "real"] + + # WHEN we prune and sort + manifest_module._prune_symlinks_and_sort_dirnames(dirnames, str(tmp_path)) + + # THEN only the real directory remains; the symlink was dropped + assert dirnames == ["real"] + + def test_empty_list_unchanged(self, tmp_path: Path) -> None: + """An empty list remains empty.""" + # GIVEN an empty dirnames list + dirnames: list[str] = [] + # WHEN we prune and sort + manifest_module._prune_symlinks_and_sort_dirnames(dirnames, str(tmp_path)) + # THEN it is still empty + assert dirnames == [] + + @pytest.mark.skipif( + platform.system() == "Windows", + reason="Symlink creation requires elevated privileges on Windows.", + ) + def test_prunes_symlinks_and_sorts_in_one_call(self, tmp_path: Path) -> None: + """Symlinks are pruned AND the surviving entries are sorted in a + single call. The prune-only and sort-only tests cover each behavior + in isolation, so this guards against a future change where one step + accidentally undoes the other (e.g., sort happening before prune + and reintroducing the link, or prune leaving the list in walk + order).""" + # GIVEN two real subdirs and one symlinked subdir, in non-sorted + # order with the symlink between them + for name in ("c", "a"): + (tmp_path / name).mkdir() + target = tmp_path / "target" + target.mkdir() + link = tmp_path / "link" + os.symlink(str(target), str(link)) + + dirnames = ["c", "link", "a"] + + # WHEN we prune and sort + manifest_module._prune_symlinks_and_sort_dirnames(dirnames, str(tmp_path)) + + # THEN the symlink is gone and the remaining real dirs are sorted + assert dirnames == ["a", "c"] + + +class TestBuildManifestRows: + """Tests for manifest._build_manifest_rows.""" + + def test_returns_rows_in_sorted_order(self, tmp_path: Path, syn: Synapse) -> None: + """Filenames are visited in sorted order so manifest output is + deterministic regardless of filesystem yield order.""" + # GIVEN three real files and an unsorted filenames list + for name in ("c.txt", "a.txt", "b.txt"): + (tmp_path / name).write_text("data") + + # WHEN we build manifest rows under "syn1" + rows = manifest_module._build_manifest_rows( + dirpath=str(tmp_path), + filenames=["c.txt", "a.txt", "b.txt"], + parent_id="syn1", + client=syn, + ) + + # THEN the rows are emitted in sorted filename order + assert [os.path.basename(row["path"]) for row in rows] == [ + "a.txt", + "b.txt", + "c.txt", + ] + # AND every row uses the supplied parent_id + assert all(row["parentId"] == "syn1" for row in rows) + + def test_skips_zero_byte_and_missing(self, tmp_path: Path, syn: Synapse) -> None: + """Zero-byte files and unreadable/missing files are dropped via + _is_uploadable_file.""" + # GIVEN one real non-empty file and one zero-byte file (and a + # filename for a file that doesn't exist on disk) + (tmp_path / "ok.txt").write_text("hello") + (tmp_path / "empty.txt").write_text("") + + # WHEN we build manifest rows for all three filenames + with patch.object(syn.logger, "warning"): + rows = manifest_module._build_manifest_rows( + dirpath=str(tmp_path), + filenames=["ok.txt", "empty.txt", "missing.txt"], + parent_id="syn1", + client=syn, + ) + + # THEN only the non-empty real file produces a row; the zero-byte + # and missing files are filtered out + assert [os.path.basename(row["path"]) for row in rows] == ["ok.txt"] + + def test_empty_filenames_returns_empty(self, tmp_path: Path, syn: Synapse) -> None: + """No filenames produce no rows.""" + # GIVEN an empty filenames list + # WHEN we build manifest rows + rows = manifest_module._build_manifest_rows( + dirpath=str(tmp_path), + filenames=[], + parent_id="syn1", + client=syn, + ) + # THEN the result is empty + assert rows == [] + + +class TestIsUploadableFile: + """Tests for manifest._is_uploadable_file.""" + + def test_regular_file_is_uploadable(self, tmp_path: Path, syn: Synapse) -> None: + """A non-empty readable file returns True.""" + # GIVEN a regular non-empty readable file + f = tmp_path / "ok.txt" + f.write_text("hello") + # THEN the result is True + assert manifest_module._is_uploadable_file(str(f), syn) is True + + def test_zero_byte_file_skipped(self, tmp_path: Path, syn: Synapse) -> None: + """A zero-byte file is skipped because Synapse rejects empty uploads.""" + # GIVEN a zero-byte file + f = tmp_path / "empty.txt" + f.write_text("") + # THEN the result is False (Synapse rejects empty uploads) + assert manifest_module._is_uploadable_file(str(f), syn) is False + + def test_missing_file_skipped(self, tmp_path: Path, syn: Synapse) -> None: + """A path that doesn't exist (e.g. broken symlink, race) is skipped + rather than raising OSError up to the caller.""" + # GIVEN a path that doesn't exist on disk (e.g. broken symlink, + # race against rmdir) + missing = tmp_path / "nope.txt" + # THEN the result is False rather than raising OSError + assert manifest_module._is_uploadable_file(str(missing), syn) is False diff --git a/tests/unit/synapseclient/models/async/unit_test_submission_async.py b/tests/unit/synapseclient/models/async/unit_test_submission_async.py index 8d37ecf65..dbb0465d3 100644 --- a/tests/unit/synapseclient/models/async/unit_test_submission_async.py +++ b/tests/unit/synapseclient/models/async/unit_test_submission_async.py @@ -1,4 +1,5 @@ """Async unit tests for the synapseclient.models.Submission class.""" + import uuid from typing import Dict, List, Union from unittest.mock import AsyncMock, MagicMock, call, patch @@ -200,15 +201,18 @@ async def test_fetch_latest_entity_docker_repository_async(self) -> None: submission = Submission(entity_id=ENTITY_ID, evaluation_id=EVALUATION_ID) # WHEN I call _fetch_latest_entity with mocked Docker repository responses - with patch( - "synapseclient.api.entity_services.get_entity", - new_callable=AsyncMock, - return_value=self.get_example_docker_entity_response(), - ) as mock_get_entity, patch( - "synapseclient.api.docker_commit_services.get_docker_tag", - new_callable=AsyncMock, - return_value=self.get_example_docker_tag_response(), - ) as mock_get_docker_tag: + with ( + patch( + "synapseclient.api.entity_services.get_entity", + new_callable=AsyncMock, + return_value=self.get_example_docker_entity_response(), + ) as mock_get_entity, + patch( + "synapseclient.api.docker_commit_services.get_docker_tag", + new_callable=AsyncMock, + return_value=self.get_example_docker_tag_response(), + ) as mock_get_docker_tag, + ): entity_info = await submission._fetch_latest_entity(synapse_client=self.syn) # THEN it should return the entity information with latest docker tag info @@ -234,15 +238,18 @@ async def test_fetch_latest_entity_docker_empty_results_async(self) -> None: submission = Submission(entity_id=ENTITY_ID, evaluation_id=EVALUATION_ID) # WHEN I call _fetch_latest_entity with empty docker tag results - with patch( - "synapseclient.api.entity_services.get_entity", - new_callable=AsyncMock, - return_value=self.get_example_docker_entity_response(), - ) as mock_get_entity, patch( - "synapseclient.api.docker_commit_services.get_docker_tag", - new_callable=AsyncMock, - return_value={"totalNumberOfResults": 0, "results": []}, - ) as mock_get_docker_tag: + with ( + patch( + "synapseclient.api.entity_services.get_entity", + new_callable=AsyncMock, + return_value=self.get_example_docker_entity_response(), + ) as mock_get_entity, + patch( + "synapseclient.api.docker_commit_services.get_docker_tag", + new_callable=AsyncMock, + return_value={"totalNumberOfResults": 0, "results": []}, + ) as mock_get_docker_tag, + ): entity_info = await submission._fetch_latest_entity(synapse_client=self.syn) # THEN it should return the entity information without docker tag info @@ -268,15 +275,18 @@ async def test_fetch_latest_entity_docker_complex_tag_selection_async(self) -> N submission = Submission(entity_id=ENTITY_ID, evaluation_id=EVALUATION_ID) # WHEN I call _fetch_latest_entity with multiple docker tags with different dates - with patch( - "synapseclient.api.entity_services.get_entity", - new_callable=AsyncMock, - return_value=self.get_example_docker_entity_response(), - ) as mock_get_entity, patch( - "synapseclient.api.docker_commit_services.get_docker_tag", - new_callable=AsyncMock, - return_value=self.get_complex_docker_tag_response(), - ) as mock_get_docker_tag: + with ( + patch( + "synapseclient.api.entity_services.get_entity", + new_callable=AsyncMock, + return_value=self.get_example_docker_entity_response(), + ) as mock_get_entity, + patch( + "synapseclient.api.docker_commit_services.get_docker_tag", + new_callable=AsyncMock, + return_value=self.get_complex_docker_tag_response(), + ) as mock_get_docker_tag, + ): entity_info = await submission._fetch_latest_entity(synapse_client=self.syn) # THEN it should select the tag with the latest createdOn timestamp (v3.0) @@ -302,16 +312,19 @@ async def test_store_async_success(self) -> None: ) # WHEN I call store_async with mocked dependencies - with patch.object( - submission, - "_fetch_latest_entity", - new_callable=AsyncMock, - return_value=self.get_example_entity_response(), - ) as mock_fetch_entity, patch( - "synapseclient.api.evaluation_services.create_submission", - new_callable=AsyncMock, - return_value=self.get_example_submission_response(), - ) as mock_create_submission: + with ( + patch.object( + submission, + "_fetch_latest_entity", + new_callable=AsyncMock, + return_value=self.get_example_entity_response(), + ) as mock_fetch_entity, + patch( + "synapseclient.api.evaluation_services.create_submission", + new_callable=AsyncMock, + return_value=self.get_example_submission_response(), + ) as mock_create_submission, + ): stored_submission = await submission.store_async(synapse_client=self.syn) # THEN it should fetch entity information, create the submission, and fill the object @@ -342,16 +355,19 @@ async def test_store_async_docker_repository_success(self) -> None: } ) - with patch.object( - submission, - "_fetch_latest_entity", - new_callable=AsyncMock, - return_value=docker_entity_with_tag, - ) as mock_fetch_entity, patch( - "synapseclient.api.evaluation_services.create_submission", - new_callable=AsyncMock, - return_value=self.get_example_submission_response(), - ) as mock_create_submission: + with ( + patch.object( + submission, + "_fetch_latest_entity", + new_callable=AsyncMock, + return_value=docker_entity_with_tag, + ) as mock_fetch_entity, + patch( + "synapseclient.api.evaluation_services.create_submission", + new_callable=AsyncMock, + return_value=self.get_example_submission_response(), + ) as mock_create_submission, + ): stored_submission = await submission.store_async(synapse_client=self.syn) # THEN it should handle Docker repository specific logic @@ -375,16 +391,19 @@ async def test_store_async_with_team_data_success(self) -> None: ) # WHEN I call store_async with mocked dependencies - with patch.object( - submission, - "_fetch_latest_entity", - new_callable=AsyncMock, - return_value=self.get_example_entity_response(), - ) as mock_fetch_entity, patch( - "synapseclient.api.evaluation_services.create_submission", - new_callable=AsyncMock, - return_value=self.get_example_submission_response(), - ) as mock_create_submission: + with ( + patch.object( + submission, + "_fetch_latest_entity", + new_callable=AsyncMock, + return_value=self.get_example_entity_response(), + ) as mock_fetch_entity, + patch( + "synapseclient.api.evaluation_services.create_submission", + new_callable=AsyncMock, + return_value=self.get_example_submission_response(), + ) as mock_create_submission, + ): stored_submission = await submission.store_async(synapse_client=self.syn) # THEN it should preserve team information in the stored submission @@ -426,12 +445,15 @@ async def test_delete_async_success(self) -> None: submission = Submission(id=SUBMISSION_ID) # WHEN I call delete_async with mocked dependencies - with patch( - "synapseclient.api.evaluation_services.delete_submission", - new_callable=AsyncMock, - ) as mock_delete_submission, patch( - "synapseclient.Synapse.get_client", - return_value=self.syn, + with ( + patch( + "synapseclient.api.evaluation_services.delete_submission", + new_callable=AsyncMock, + ) as mock_delete_submission, + patch( + "synapseclient.Synapse.get_client", + return_value=self.syn, + ), ): # Mock the logger self.syn.logger = MagicMock() @@ -453,13 +475,16 @@ async def test_cancel_async_success(self) -> None: submission = Submission(id=SUBMISSION_ID) # WHEN I call cancel_async with mocked dependencies - with patch( - "synapseclient.api.evaluation_services.cancel_submission", - new_callable=AsyncMock, - return_value=self.get_example_submission_response(), - ) as mock_cancel_submission, patch( - "synapseclient.Synapse.get_client", - return_value=self.syn, + with ( + patch( + "synapseclient.api.evaluation_services.cancel_submission", + new_callable=AsyncMock, + return_value=self.get_example_submission_response(), + ) as mock_cancel_submission, + patch( + "synapseclient.Synapse.get_client", + return_value=self.syn, + ), ): # Mock the logger self.syn.logger = MagicMock() diff --git a/tests/unit/synapseclient/models/async/unit_test_submission_bundle_async.py b/tests/unit/synapseclient/models/async/unit_test_submission_bundle_async.py index 03dc00fda..959079042 100644 --- a/tests/unit/synapseclient/models/async/unit_test_submission_bundle_async.py +++ b/tests/unit/synapseclient/models/async/unit_test_submission_bundle_async.py @@ -246,7 +246,9 @@ async def mock_async_gen(*args, **kwargs): mock_get_bundles.side_effect = mock_async_gen result = [] - async for bundle in SubmissionBundle.get_evaluation_submission_bundles_async( + async for ( + bundle + ) in SubmissionBundle.get_evaluation_submission_bundles_async( evaluation_id=EVALUATION_ID, status="RECEIVED", synapse_client=self.syn, @@ -296,7 +298,9 @@ async def mock_async_gen(*args, **kwargs): mock_get_bundles.side_effect = mock_async_gen result = [] - async for bundle in SubmissionBundle.get_evaluation_submission_bundles_async( + async for ( + bundle + ) in SubmissionBundle.get_evaluation_submission_bundles_async( evaluation_id=EVALUATION_ID, synapse_client=self.syn, ): diff --git a/tests/unit/synapseclient/models/async/unit_test_wiki_async.py b/tests/unit/synapseclient/models/async/unit_test_wiki_async.py index b6b8c943c..fabaa1600 100644 --- a/tests/unit/synapseclient/models/async/unit_test_wiki_async.py +++ b/tests/unit/synapseclient/models/async/unit_test_wiki_async.py @@ -1,4 +1,5 @@ """Tests for the synapseclient.models.wiki classes.""" + import copy import os from typing import Any, AsyncGenerator, Dict, List @@ -96,12 +97,15 @@ async def test_store_async_missing_owner_id(self) -> None: # WHEN I call `store_async` # THEN it should raise ValueError - with patch( - "synapseclient.models.wiki.put_wiki_order_hint", - new_callable=AsyncMock, - return_value=self.api_response, - ) as mocked_put, pytest.raises( - ValueError, match="Must provide owner_id to store wiki order hint." + with ( + patch( + "synapseclient.models.wiki.put_wiki_order_hint", + new_callable=AsyncMock, + return_value=self.api_response, + ) as mocked_put, + pytest.raises( + ValueError, match="Must provide owner_id to store wiki order hint." + ), ): await order_hint.store_async(synapse_client=self.syn) # THEN the API should not be called @@ -130,10 +134,11 @@ async def test_get_async_missing_owner_id(self) -> None: self.order_hint.owner_id = None # WHEN I call `get_async` # THEN it should raise ValueError - with patch( - "synapseclient.models.wiki.get_wiki_order_hint" - ) as mocked_get, pytest.raises( - ValueError, match="Must provide owner_id to get wiki order hint." + with ( + patch("synapseclient.models.wiki.get_wiki_order_hint") as mocked_get, + pytest.raises( + ValueError, match="Must provide owner_id to get wiki order hint." + ), ): await self.order_hint.get_async(synapse_client=self.syn) # THEN the API should not be called @@ -236,10 +241,11 @@ async def mock_async_generator() -> AsyncGenerator[WikiHistorySnapshot, None]: async def test_get_async_missing_owner_id(self) -> None: # WHEN I call `get_async` - with patch( - "synapseclient.models.wiki.get_wiki_history" - ) as mocked_get, pytest.raises( - ValueError, match="Must provide owner_id to get wiki history." + with ( + patch("synapseclient.models.wiki.get_wiki_history") as mocked_get, + pytest.raises( + ValueError, match="Must provide owner_id to get wiki history." + ), ): async for _ in WikiHistorySnapshot.get_async( owner_id=None, @@ -252,10 +258,9 @@ async def test_get_async_missing_owner_id(self) -> None: async def test_get_async_missing_id(self) -> None: # WHEN I call `get_async` - with patch( - "synapseclient.models.wiki.get_wiki_history" - ) as mocked_get, pytest.raises( - ValueError, match="Must provide id to get wiki history." + with ( + patch("synapseclient.models.wiki.get_wiki_history") as mocked_get, + pytest.raises(ValueError, match="Must provide id to get wiki history."), ): async for _ in WikiHistorySnapshot.get_async( owner_id="syn123", @@ -343,10 +348,11 @@ async def mock_async_generator() -> AsyncGenerator[WikiHeader, None]: async def test_get_async_missing_owner_id(self) -> None: # WHEN I call `get_async` # THEN it should raise ValueError - with patch( - "synapseclient.models.wiki.get_wiki_header_tree" - ) as mocked_get, pytest.raises( - ValueError, match="Must provide owner_id to get wiki header tree." + with ( + patch("synapseclient.models.wiki.get_wiki_header_tree") as mocked_get, + pytest.raises( + ValueError, match="Must provide owner_id to get wiki header tree." + ), ): async for _ in WikiHeader.get_async(owner_id=None, synapse_client=self.syn): pass @@ -419,9 +425,11 @@ def test_to_gzip_file_with_string_content(self) -> None: self.syn.cache.cache_root_dir = "temp_cache_dir" # WHEN I call `_to_gzip_file` with a markdown string - with patch("builtins.open") as mock_open_file, patch( - "gzip.open" - ) as mock_gzip_open, patch("os.path.exists", return_value=True): + with ( + patch("builtins.open") as mock_open_file, + patch("gzip.open") as mock_gzip_open, + patch("os.path.exists", return_value=True), + ): file_path = self.wiki_page._to_gzip_file(self.wiki_page.markdown, self.syn) # THEN the content should be written to a gzipped file @@ -446,9 +454,11 @@ def test_to_gzip_file_with_string_content(self) -> None: ) def test_to_gzip_file_with_gzipped_file(self) -> None: - with patch("os.path.isfile"), patch("gzip.open") as mock_gzip_open, patch( - "builtins.open" - ) as mock_open_file: + with ( + patch("os.path.isfile"), + patch("gzip.open") as mock_gzip_open, + patch("builtins.open") as mock_open_file, + ): self.syn.cache.cache_root_dir = "temp_cache_dir" markdown_file_path = "wiki_markdown_Test Wiki Page.md.gz" @@ -462,10 +472,13 @@ def test_to_gzip_file_with_non_gzipped_file(self) -> None: self.syn.cache.cache_root_dir = "temp_cache_dir" # WHEN I call `_to_gzip_file` with a file path - with patch("os.path.isfile", return_value=True), patch( - "builtins.open", new=mock_open(read_data=b"test content") - ) as mock_open_file, patch("gzip.open") as mock_gzip_open, patch( - "os.path.exists", return_value=True + with ( + patch("os.path.isfile", return_value=True), + patch( + "builtins.open", new=mock_open(read_data=b"test content") + ) as mock_open_file, + patch("gzip.open") as mock_gzip_open, + patch("os.path.exists", return_value=True), ): test_file_path = os.path.join("file_path", "test.txt") file_path = self.wiki_page._to_gzip_file(test_file_path, self.syn) @@ -501,9 +514,11 @@ def test_unzip_gzipped_file_with_markdown(self) -> None: markdown_content_bytes = markdown_content.encode("utf-8") # WHEN I call `_unzip_gzipped_file` with a binary file - with patch("gzip.open") as mock_gzip_open, patch( - "builtins.open" - ) as mock_open_file, patch("pprint.pp") as mock_pprint: + with ( + patch("gzip.open") as mock_gzip_open, + patch("builtins.open") as mock_open_file, + patch("pprint.pp") as mock_pprint, + ): mock_gzip_open.return_value.__enter__.return_value.read.return_value = ( markdown_content_bytes ) @@ -529,9 +544,11 @@ def test_unzip_gzipped_file_with_binary_file(self) -> None: binary_content = b"\x00\x01\x02\x03\xff\xfe\xfd" # WHEN I call `_unzip_gzipped_file` with a binary file - with patch("gzip.open") as mock_gzip_open, patch( - "builtins.open" - ) as mock_open_file, patch("pprint.pp") as mock_pprint: + with ( + patch("gzip.open") as mock_gzip_open, + patch("builtins.open") as mock_open_file, + patch("pprint.pp") as mock_pprint, + ): mock_gzip_open.return_value.__enter__.return_value.read.return_value = ( binary_content ) @@ -556,11 +573,11 @@ def test_unzip_gzipped_file_with_text_file(self) -> None: text_content_bytes = text_content.encode("utf-8") # WHEN I call `_unzip_gzipped_file` with a text file - with patch("gzip.open") as mock_gzip_open, patch( - "builtins.open" - ) as mock_open_file, patch( - "synapseclient.models.wiki.pprint.pp" - ) as mock_pprint: + with ( + patch("gzip.open") as mock_gzip_open, + patch("builtins.open") as mock_open_file, + patch("synapseclient.models.wiki.pprint.pp") as mock_pprint, + ): mock_gzip_open.return_value.__enter__.return_value.read.return_value = ( text_content_bytes ) @@ -650,21 +667,20 @@ def test_should_gzip_file_with_invalid_content(self) -> None: WikiPage._should_gzip_file(123) async def test_get_markdown_file_handle_success_with_markdown(self) -> WikiPage: - with patch( - "synapseclient.models.wiki.WikiPage._to_gzip_file", - return_value=("test.txt.gz"), - ) as mock_to_gzip_file, patch( - "synapseclient.models.wiki.upload_file_handle", - return_value={"id": "handle1"}, - ) as mock_upload, patch.object( - self.syn.logger, "info" - ) as mock_logger_info, patch.object( - self.syn.logger, "debug" - ) as mock_logger_debug, patch( - "os.path.exists", return_value=True - ), patch( - "os.remove" - ) as mock_remove: + with ( + patch( + "synapseclient.models.wiki.WikiPage._to_gzip_file", + return_value=("test.txt.gz"), + ) as mock_to_gzip_file, + patch( + "synapseclient.models.wiki.upload_file_handle", + return_value={"id": "handle1"}, + ) as mock_upload, + patch.object(self.syn.logger, "info") as mock_logger_info, + patch.object(self.syn.logger, "debug") as mock_logger_debug, + patch("os.path.exists", return_value=True), + patch("os.remove") as mock_remove, + ): # WHEN I call `_get_markdown_file_handle` results = await self.wiki_page._get_markdown_file_handle( synapse_client=self.syn @@ -721,21 +737,20 @@ async def test_get_attachment_file_handles_success_multiple_attachments( {"id": "handle2"}, ] - with patch( - "synapseclient.models.wiki.WikiPage._to_gzip_file", - side_effect=mock_to_gzip_file_responses, - ) as mock_to_gzip_file, patch( - "synapseclient.models.wiki.upload_file_handle", - side_effect=mock_upload_responses, - ) as mock_upload, patch.object( - self.syn.logger, "info" - ) as mock_logger_info, patch.object( - self.syn.logger, "debug" - ) as mock_logger_debug, patch( - "os.path.exists", return_value=True - ), patch( - "os.remove" - ) as mock_remove: + with ( + patch( + "synapseclient.models.wiki.WikiPage._to_gzip_file", + side_effect=mock_to_gzip_file_responses, + ) as mock_to_gzip_file, + patch( + "synapseclient.models.wiki.upload_file_handle", + side_effect=mock_upload_responses, + ) as mock_upload, + patch.object(self.syn.logger, "info") as mock_logger_info, + patch.object(self.syn.logger, "debug") as mock_logger_debug, + patch("os.path.exists", return_value=True), + patch("os.remove") as mock_remove, + ): # WHEN I call `_get_attachment_file_handles` results = await self.wiki_page._get_attachment_file_handles( synapse_client=self.syn @@ -817,21 +832,20 @@ async def test_get_attachment_file_handles_single_attachment(self) -> WikiPage: owner_id="syn123", ) - with patch( - "synapseclient.models.wiki.WikiPage._to_gzip_file", - return_value=("/tmp/cache/test_1.txt.gz"), - ) as mock_to_gzip_file, patch( - "synapseclient.models.wiki.upload_file_handle", - return_value={"id": "handle1"}, - ) as mock_upload, patch.object( - self.syn.logger, "info" - ) as mock_logger_info, patch.object( - self.syn.logger, "debug" - ) as mock_logger_debug, patch( - "os.path.exists", return_value=True - ), patch( - "os.remove" - ) as mock_remove: + with ( + patch( + "synapseclient.models.wiki.WikiPage._to_gzip_file", + return_value=("/tmp/cache/test_1.txt.gz"), + ) as mock_to_gzip_file, + patch( + "synapseclient.models.wiki.upload_file_handle", + return_value={"id": "handle1"}, + ) as mock_upload, + patch.object(self.syn.logger, "info") as mock_logger_info, + patch.object(self.syn.logger, "debug") as mock_logger_debug, + patch("os.path.exists", return_value=True), + patch("os.remove") as mock_remove, + ): # WHEN I call `_get_attachment_file_handles` results = await wiki_page._get_attachment_file_handles( synapse_client=self.syn @@ -874,21 +888,20 @@ async def test_get_attachment_file_handles_cache_dir_not_exists(self) -> WikiPag owner_id="syn123", ) - with patch( - "synapseclient.models.wiki.WikiPage._to_gzip_file", - return_value=("/tmp/cache/test_1.txt.gz"), - ), patch( - "synapseclient.models.wiki.upload_file_handle", - return_value={"id": "handle1"}, - ), patch( - "os.path.exists", return_value=False - ), patch.object( - self.syn.logger, "info" - ) as mock_logger_info, patch.object( - self.syn.logger, "debug" - ) as mock_logger_debug, patch( - "os.remove" - ) as mock_remove: + with ( + patch( + "synapseclient.models.wiki.WikiPage._to_gzip_file", + return_value=("/tmp/cache/test_1.txt.gz"), + ), + patch( + "synapseclient.models.wiki.upload_file_handle", + return_value={"id": "handle1"}, + ), + patch("os.path.exists", return_value=False), + patch.object(self.syn.logger, "info") as mock_logger_info, + patch.object(self.syn.logger, "debug") as mock_logger_debug, + patch("os.remove") as mock_remove, + ): # WHEN I call `_get_attachment_file_handles` results = await wiki_page._get_attachment_file_handles( synapse_client=self.syn @@ -913,19 +926,19 @@ async def test_get_attachment_file_handles_upload_failure(self) -> WikiPage: owner_id="syn123", ) - with patch( - "synapseclient.models.wiki.WikiPage._to_gzip_file", - return_value=("/tmp/cache/test_1.txt.gz"), - ), patch( - "synapseclient.models.wiki.upload_file_handle", - side_effect=Exception("Upload failed"), - ), patch( - "os.path.exists", return_value=True - ), patch.object( - self.syn.logger, "debug" - ) as mock_logger_debug, patch( - "os.remove" - ) as mock_remove: + with ( + patch( + "synapseclient.models.wiki.WikiPage._to_gzip_file", + return_value=("/tmp/cache/test_1.txt.gz"), + ), + patch( + "synapseclient.models.wiki.upload_file_handle", + side_effect=Exception("Upload failed"), + ), + patch("os.path.exists", return_value=True), + patch.object(self.syn.logger, "debug") as mock_logger_debug, + patch("os.remove") as mock_remove, + ): # WHEN I call `_get_attachment_file_handles` # THEN it should raise the exception with pytest.raises(Exception, match="Upload failed"): @@ -1074,20 +1087,25 @@ async def test_store_async_new_root_wiki_success(self) -> None: ] # AND mock responses - with patch( - "synapseclient.models.wiki.WikiPage._determine_wiki_action", - return_value="create_root_wiki_page", - ), patch( - "synapseclient.models.wiki.WikiPage._get_markdown_file_handle", - return_value=mock_wiki_with_markdown, - ), patch( - "synapseclient.models.wiki.WikiPage._get_attachment_file_handles", - return_value=mock_wiki_with_attachments, - ), patch( - "synapseclient.models.wiki.post_wiki_page", return_value=post_api_response - ) as mock_post_wiki, patch.object( - self.syn.logger, "info" - ) as mock_logger: + with ( + patch( + "synapseclient.models.wiki.WikiPage._determine_wiki_action", + return_value="create_root_wiki_page", + ), + patch( + "synapseclient.models.wiki.WikiPage._get_markdown_file_handle", + return_value=mock_wiki_with_markdown, + ), + patch( + "synapseclient.models.wiki.WikiPage._get_attachment_file_handles", + return_value=mock_wiki_with_attachments, + ), + patch( + "synapseclient.models.wiki.post_wiki_page", + return_value=post_api_response, + ) as mock_post_wiki, + patch.object(self.syn.logger, "info") as mock_logger, + ): # WHEN I call `store_async` results = await new_wiki_page.store_async(synapse_client=self.syn) @@ -1161,24 +1179,29 @@ async def test_store_async_update_existing_wiki_success(self) -> None: ] # AND mock responses - with patch( - "synapseclient.models.wiki.WikiPage._determine_wiki_action", - return_value="update_existing_wiki_page", - ), patch( - "synapseclient.models.wiki.WikiPage._get_markdown_file_handle", - return_value=mock_wiki_with_markdown, - ), patch( - "synapseclient.models.wiki.WikiPage._get_attachment_file_handles", - return_value=mock_wiki_with_attachments, - ), patch( - "synapseclient.models.wiki.get_wiki_page", - return_value=mock_get_wiki_response, - ) as mock_get_wiki, patch( - "synapseclient.models.wiki.put_wiki_page", - return_value=mock_put_wiki_response, - ) as mock_put_wiki, patch.object( - self.syn.logger, "info" - ) as mock_logger: + with ( + patch( + "synapseclient.models.wiki.WikiPage._determine_wiki_action", + return_value="update_existing_wiki_page", + ), + patch( + "synapseclient.models.wiki.WikiPage._get_markdown_file_handle", + return_value=mock_wiki_with_markdown, + ), + patch( + "synapseclient.models.wiki.WikiPage._get_attachment_file_handles", + return_value=mock_wiki_with_attachments, + ), + patch( + "synapseclient.models.wiki.get_wiki_page", + return_value=mock_get_wiki_response, + ) as mock_get_wiki, + patch( + "synapseclient.models.wiki.put_wiki_page", + return_value=mock_put_wiki_response, + ) as mock_put_wiki, + patch.object(self.syn.logger, "info") as mock_logger, + ): # WHEN I call `store_async` results = await new_wiki_page.store_async(synapse_client=self.syn) # THEN the existing wiki should be retrieved @@ -1244,20 +1267,25 @@ async def test_store_async_create_sub_wiki_success(self) -> None: ] # AND mock responses - with patch( - "synapseclient.models.wiki.WikiPage._determine_wiki_action", - return_value="create_sub_wiki_page", - ), patch( - "synapseclient.models.wiki.WikiPage._get_markdown_file_handle", - return_value=mock_wiki_with_markdown, - ), patch( - "synapseclient.models.wiki.WikiPage._get_attachment_file_handles", - return_value=mock_wiki_with_attachments, - ), patch( - "synapseclient.models.wiki.post_wiki_page", return_value=post_api_response - ) as mock_post_wiki, patch.object( - self.syn.logger, "info" - ) as mock_logger: + with ( + patch( + "synapseclient.models.wiki.WikiPage._determine_wiki_action", + return_value="create_sub_wiki_page", + ), + patch( + "synapseclient.models.wiki.WikiPage._get_markdown_file_handle", + return_value=mock_wiki_with_markdown, + ), + patch( + "synapseclient.models.wiki.WikiPage._get_attachment_file_handles", + return_value=mock_wiki_with_attachments, + ), + patch( + "synapseclient.models.wiki.post_wiki_page", + return_value=post_api_response, + ) as mock_post_wiki, + patch.object(self.syn.logger, "info") as mock_logger, + ): # WHEN I call `store_async` results = await self.wiki_page.store_async(synapse_client=self.syn) @@ -1313,9 +1341,10 @@ async def test_restore_async_missing_required_parameters( ) -> None: # WHEN I call `restore_async` # THEN it should raise ValueError - with patch( - "synapseclient.models.wiki.put_wiki_version" - ) as mocked_put, pytest.raises(ValueError, match=expected_error): + with ( + patch("synapseclient.models.wiki.put_wiki_version") as mocked_put, + pytest.raises(ValueError, match=expected_error), + ): await wiki_page.restore_async(synapse_client=self.syn) # THEN the API should not be called mocked_put.assert_not_called() @@ -1389,12 +1418,16 @@ async def mock_async_generator(values): for item in values: yield item - with patch( - "synapseclient.models.wiki.get_wiki_header_tree", - return_value=mock_async_generator(mock_responses), - ) as mock_get_header_tree, patch( - "synapseclient.models.wiki.get_wiki_page", return_value=self.api_response - ) as mock_get_wiki: + with ( + patch( + "synapseclient.models.wiki.get_wiki_header_tree", + return_value=mock_async_generator(mock_responses), + ) as mock_get_header_tree, + patch( + "synapseclient.models.wiki.get_wiki_page", + return_value=self.api_response, + ) as mock_get_wiki, + ): # WHEN I call `get_async` results = await wiki.get_async(synapse_client=self.syn) @@ -1468,9 +1501,10 @@ async def test_delete_async_missing_required_parameters( ) -> None: # WHEN I call `delete_async` # THEN it should raise ValueError - with patch( - "synapseclient.models.wiki.delete_wiki_page" - ) as mocked_delete, pytest.raises(ValueError, match=expected_error): + with ( + patch("synapseclient.models.wiki.delete_wiki_page") as mocked_delete, + pytest.raises(ValueError, match=expected_error), + ): await wiki_page.delete_async(synapse_client=self.syn) # THEN the API should not be called mocked_delete.assert_not_called() @@ -1504,9 +1538,10 @@ async def test_get_attachment_handles_async_missing_required_parameters( ) -> None: # WHEN I call `get_attachment_handles_async` # THEN it should raise ValueError - with patch( - "synapseclient.models.wiki.get_attachment_handles" - ) as mocked_get, pytest.raises(ValueError, match=expected_error): + with ( + patch("synapseclient.models.wiki.get_attachment_handles") as mocked_get, + pytest.raises(ValueError, match=expected_error), + ): await wiki_page.get_attachment_handles_async(synapse_client=self.syn) # THEN the API should not be called mocked_get.assert_not_called() @@ -1558,9 +1593,10 @@ async def test_get_attachment_async_missing_required_parameters( ) -> None: # WHEN I call `get_attachment_async` # THEN it should raise ValueError - with patch( - "synapseclient.models.wiki.get_attachment_url" - ) as mocked_get, pytest.raises(ValueError, match=expected_error): + with ( + patch("synapseclient.models.wiki.get_attachment_url") as mocked_get, + pytest.raises(ValueError, match=expected_error), + ): await wiki_page.get_attachment_async( file_name=file_name, synapse_client=self.syn, @@ -1581,30 +1617,34 @@ async def test_get_attachment_async_download_file_success(self, file_size) -> No ] } - with patch( - "synapseclient.models.wiki.get_attachment_url", - return_value=mock_attachment_url, - ) as mock_get_url, patch( - "synapseclient.models.wiki.get_attachment_handles", - return_value=mock_filehandle_dict, - ) as mock_get_handles, patch( - "synapseclient.models.wiki.download_from_url", - return_value="/tmp/download/test.txt.gz", - ) as mock_download_from_url, patch( - "synapseclient.models.wiki.download_from_url_multi_threaded", - return_value="/tmp/download/test.txt.gz", - ) as mock_download_from_url_multi_threaded, patch( - "synapseclient.models.wiki._pre_signed_url_expiration_time", - return_value="2030-01-01T00:00:00.000Z", - ) as mock_expiration_time, patch.object( - self.syn.logger, "info" - ) as mock_logger_info, patch( - "os.remove" - ) as mock_remove, patch( - "synapseclient.models.wiki.WikiPage.unzip_gzipped_file" - ) as mock_unzip_gzipped_file, patch.object( - self.syn.logger, "debug" - ) as mock_logger_debug: + with ( + patch( + "synapseclient.models.wiki.get_attachment_url", + return_value=mock_attachment_url, + ) as mock_get_url, + patch( + "synapseclient.models.wiki.get_attachment_handles", + return_value=mock_filehandle_dict, + ) as mock_get_handles, + patch( + "synapseclient.models.wiki.download_from_url", + return_value="/tmp/download/test.txt.gz", + ) as mock_download_from_url, + patch( + "synapseclient.models.wiki.download_from_url_multi_threaded", + return_value="/tmp/download/test.txt.gz", + ) as mock_download_from_url_multi_threaded, + patch( + "synapseclient.models.wiki._pre_signed_url_expiration_time", + return_value="2030-01-01T00:00:00.000Z", + ) as mock_expiration_time, + patch.object(self.syn.logger, "info") as mock_logger_info, + patch("os.remove") as mock_remove, + patch( + "synapseclient.models.wiki.WikiPage.unzip_gzipped_file" + ) as mock_unzip_gzipped_file, + patch.object(self.syn.logger, "debug") as mock_logger_debug, + ): # WHEN I call `get_attachment_async` with download_file=True result = await self.wiki_page.get_attachment_async( file_name="test.txt", @@ -1698,12 +1738,15 @@ async def test_get_attachment_async_download_file_missing_location(self) -> None # AND a mock attachment URL mock_attachment_url = "https://example.com/attachment.txt" - with patch( - "synapseclient.models.wiki.get_attachment_url", - return_value=mock_attachment_url, - ) as mock_get_url, patch( - "synapseclient.models.wiki.get_attachment_handles" - ) as mock_get_handles: + with ( + patch( + "synapseclient.models.wiki.get_attachment_url", + return_value=mock_attachment_url, + ) as mock_get_url, + patch( + "synapseclient.models.wiki.get_attachment_handles" + ) as mock_get_handles, + ): # WHEN I call `get_attachment_async` with download_file=True but no download_location # THEN it should raise ValueError with pytest.raises( @@ -1753,9 +1796,10 @@ async def test_get_attachment_preview_async_missing_required_parameters( ) -> None: # WHEN I call `get_attachment_preview_url_async` # THEN it should raise ValueError - with patch( - "synapseclient.models.wiki.get_attachment_preview_url" - ) as mocked_get, pytest.raises(ValueError, match=expected_error): + with ( + patch("synapseclient.models.wiki.get_attachment_preview_url") as mocked_get, + pytest.raises(ValueError, match=expected_error), + ): await wiki_page.get_attachment_preview_async( file_name=file_name, synapse_client=self.syn, @@ -1778,24 +1822,29 @@ async def test_get_attachment_preview_async_download_file_success( ] } - with patch( - "synapseclient.models.wiki.get_attachment_preview_url", - return_value=mock_attachment_url, - ) as mock_get_url, patch( - "synapseclient.models.wiki.get_attachment_handles", - return_value=mock_filehandle_dict, - ) as mock_get_handles, patch( - "synapseclient.models.wiki.download_from_url", - return_value="/tmp/download/test.txt.gz", - ) as mock_download_from_url, patch( - "synapseclient.models.wiki.download_from_url_multi_threaded", - return_value="/tmp/download/test.txt.gz", - ) as mock_download_from_url_multi_threaded, patch( - "synapseclient.models.wiki._pre_signed_url_expiration_time", - return_value="2030-01-01T00:00:00.000Z", - ) as mock_expiration_time, patch.object( - self.syn.logger, "info" - ) as mock_logger_info: + with ( + patch( + "synapseclient.models.wiki.get_attachment_preview_url", + return_value=mock_attachment_url, + ) as mock_get_url, + patch( + "synapseclient.models.wiki.get_attachment_handles", + return_value=mock_filehandle_dict, + ) as mock_get_handles, + patch( + "synapseclient.models.wiki.download_from_url", + return_value="/tmp/download/test.txt.gz", + ) as mock_download_from_url, + patch( + "synapseclient.models.wiki.download_from_url_multi_threaded", + return_value="/tmp/download/test.txt.gz", + ) as mock_download_from_url_multi_threaded, + patch( + "synapseclient.models.wiki._pre_signed_url_expiration_time", + return_value="2030-01-01T00:00:00.000Z", + ) as mock_expiration_time, + patch.object(self.syn.logger, "info") as mock_logger_info, + ): # WHEN I call `get_attachment_async` with download_file=True result = await self.wiki_page.get_attachment_preview_async( file_name="test.txt", @@ -1883,12 +1932,15 @@ async def test_get_attachment_preview_async_download_file_missing_location( # AND a mock attachment URL mock_attachment_url = "https://example.com/attachment.txt" - with patch( - "synapseclient.models.wiki.get_attachment_preview_url", - return_value=mock_attachment_url, - ) as mock_get_url, patch( - "synapseclient.models.wiki.get_attachment_handles" - ) as mock_get_handles: + with ( + patch( + "synapseclient.models.wiki.get_attachment_preview_url", + return_value=mock_attachment_url, + ) as mock_get_url, + patch( + "synapseclient.models.wiki.get_attachment_handles" + ) as mock_get_handles, + ): # WHEN I call `get_attachment_async` with download_file=True but no download_location # THEN it should raise ValueError with pytest.raises( @@ -1930,9 +1982,10 @@ async def test_get_markdown_file_async_missing_required_parameters( ) -> None: # WHEN I call `get_markdown_async` # THEN it should raise ValueError - with patch( - "synapseclient.models.wiki.get_markdown_url" - ) as mocked_get, pytest.raises(ValueError, match=expected_error): + with ( + patch("synapseclient.models.wiki.get_markdown_url") as mocked_get, + pytest.raises(ValueError, match=expected_error), + ): await wiki_page.get_markdown_file_async(synapse_client=self.syn) # THEN the API should not be called mocked_get.assert_not_called() @@ -1941,21 +1994,22 @@ async def test_get_markdown_file_async_download_file_success(self) -> None: # Mock responses mock_markdown_url = "https://example.com/markdown.md.gz" - with patch( - "synapseclient.models.wiki.get_markdown_url", - return_value=mock_markdown_url, - ) as mock_get_url, patch( - "synapseclient.models.wiki.download_from_url", - return_value="/tmp/download/markdown.md.gz", - ) as mock_download_from_url, patch( - "synapseclient.models.wiki.WikiPage.unzip_gzipped_file" - ) as mock_unzip_gzipped_file, patch.object( - self.syn.logger, "info" - ) as mock_logger_info, patch.object( - self.syn.logger, "debug" - ) as mock_logger_debug, patch( - "os.remove" - ) as mock_remove: + with ( + patch( + "synapseclient.models.wiki.get_markdown_url", + return_value=mock_markdown_url, + ) as mock_get_url, + patch( + "synapseclient.models.wiki.download_from_url", + return_value="/tmp/download/markdown.md.gz", + ) as mock_download_from_url, + patch( + "synapseclient.models.wiki.WikiPage.unzip_gzipped_file" + ) as mock_unzip_gzipped_file, + patch.object(self.syn.logger, "info") as mock_logger_info, + patch.object(self.syn.logger, "debug") as mock_logger_debug, + patch("os.remove") as mock_remove, + ): # WHEN I call `get_markdown_async` with download_file=True result = await self.wiki_page.get_markdown_file_async( download_file=True, @@ -2025,12 +2079,15 @@ async def test_get_markdown_file_async_download_file_missing_location(self) -> N # AND a mock markdown URL mock_markdown_url = "https://example.com/markdown.md" - with patch( - "synapseclient.models.wiki.get_markdown_url", - return_value=mock_markdown_url, - ) as mock_get_url, patch( - "synapseclient.models.wiki.get_attachment_handles" - ) as mock_get_handles: + with ( + patch( + "synapseclient.models.wiki.get_markdown_url", + return_value=mock_markdown_url, + ) as mock_get_url, + patch( + "synapseclient.models.wiki.get_attachment_handles" + ) as mock_get_handles, + ): # WHEN I call `get_markdown_async` with download_file=True but no download_location # THEN it should raise ValueError with pytest.raises( diff --git a/tests/unit/synapseclient/models/synchronous/unit_test_docker.py b/tests/unit/synapseclient/models/synchronous/unit_test_docker.py index 48b44996c..c27b8ecfe 100644 --- a/tests/unit/synapseclient/models/synchronous/unit_test_docker.py +++ b/tests/unit/synapseclient/models/synchronous/unit_test_docker.py @@ -197,15 +197,18 @@ async def mock_get_from_entity_factory( # Separately set annotations to match real implementation entity_to_update.annotations = Annotations.from_dict(test_annotation) - with patch( - "synapseclient.models.docker.get_entity_id_by_repository_name", - new_callable=AsyncMock, - side_effect=mock_get_entity_id_by_repository_name, - ) as mocked_get_id, patch( - "synapseclient.models.docker.get_from_entity_factory", - new_callable=AsyncMock, - side_effect=mock_get_from_entity_factory, - ) as mocked_get_from_factory: + with ( + patch( + "synapseclient.models.docker.get_entity_id_by_repository_name", + new_callable=AsyncMock, + side_effect=mock_get_entity_id_by_repository_name, + ) as mocked_get_id, + patch( + "synapseclient.models.docker.get_from_entity_factory", + new_callable=AsyncMock, + side_effect=mock_get_from_entity_factory, + ) as mocked_get_from_factory, + ): result = docker.get(synapse_client=self.syn) # Verify repository name lookup was called diff --git a/tests/unit/synapseclient/models/unit_test_entityview.py b/tests/unit/synapseclient/models/unit_test_entityview.py index e275ef99d..9a2f89447 100644 --- a/tests/unit/synapseclient/models/unit_test_entityview.py +++ b/tests/unit/synapseclient/models/unit_test_entityview.py @@ -1,4 +1,5 @@ """Tests for the EntityView class.""" + import pytest from synapseclient import Synapse diff --git a/tests/unit/synapseclient/models/unit_test_storage_location.py b/tests/unit/synapseclient/models/unit_test_storage_location.py new file mode 100644 index 000000000..3404005aa --- /dev/null +++ b/tests/unit/synapseclient/models/unit_test_storage_location.py @@ -0,0 +1,1067 @@ +"""Unit tests for the synapseclient.models.StorageLocation class.""" + +import re +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from synapseclient import Synapse +from synapseclient.models.storage_location import ( + _CONCRETE_UPLOAD_TO_STORAGE_TYPE, + _STORAGE_TYPE_TO_UPLOAD_TYPE, + StorageLocation, + StorageLocationType, + UploadType, +) + + +def test_storage_location_type_concrete_type_values(): + """Test that StorageLocationType instances have the correct concrete_type values.""" + assert StorageLocationType.SYNAPSE_S3.concrete_type == "S3StorageLocationSetting" + assert ( + StorageLocationType.EXTERNAL_S3.concrete_type + == "ExternalS3StorageLocationSetting" + ) + assert ( + StorageLocationType.EXTERNAL_GOOGLE_CLOUD.concrete_type + == "ExternalGoogleCloudStorageLocationSetting" + ) + assert ( + StorageLocationType.EXTERNAL_SFTP.concrete_type + == "ExternalStorageLocationSetting" + ) + assert ( + StorageLocationType.EXTERNAL_HTTPS.concrete_type + == "ExternalStorageLocationSetting" + ) + assert StorageLocationType.EXTERNAL_SFTP is not StorageLocationType.EXTERNAL_HTTPS + assert ( + StorageLocationType.EXTERNAL_OBJECT_STORE.concrete_type + == "ExternalObjectStorageLocationSetting" + ) + assert StorageLocationType.PROXY.concrete_type == "ProxyStorageLocationSettings" + + +def test_upload_type_enum_values(): + """Test that UploadType enum has correct values.""" + assert UploadType.S3.value == "S3" + assert UploadType.GOOGLE_CLOUD_STORAGE.value == "GOOGLECLOUDSTORAGE" + assert UploadType.SFTP.value == "SFTP" + assert UploadType.HTTPS.value == "HTTPS" + assert UploadType.PROXYLOCAL.value == "PROXYLOCAL" + assert UploadType.NONE.value == "NONE" + + +def test_storage_location_type_to_upload_type_mapping(): + """Test that StorageLocationType to UploadType mapping is correct.""" + assert _STORAGE_TYPE_TO_UPLOAD_TYPE[StorageLocationType.SYNAPSE_S3] == UploadType.S3 + assert ( + _STORAGE_TYPE_TO_UPLOAD_TYPE[StorageLocationType.EXTERNAL_S3] == UploadType.S3 + ) + assert ( + _STORAGE_TYPE_TO_UPLOAD_TYPE[StorageLocationType.EXTERNAL_GOOGLE_CLOUD] + == UploadType.GOOGLE_CLOUD_STORAGE + ) + assert ( + _STORAGE_TYPE_TO_UPLOAD_TYPE[StorageLocationType.EXTERNAL_SFTP] + == UploadType.SFTP + ) + assert ( + _STORAGE_TYPE_TO_UPLOAD_TYPE[StorageLocationType.EXTERNAL_HTTPS] + == UploadType.HTTPS + ) + assert ( + _STORAGE_TYPE_TO_UPLOAD_TYPE[StorageLocationType.EXTERNAL_OBJECT_STORE] + == UploadType.S3 + ) + assert ( + _STORAGE_TYPE_TO_UPLOAD_TYPE[StorageLocationType.PROXY] == UploadType.PROXYLOCAL + ) + + +def test_concrete_upload_to_storage_type_mapping(): + """Test that concrete type to StorageLocationType mapping is correct.""" + assert ( + _CONCRETE_UPLOAD_TO_STORAGE_TYPE[("S3StorageLocationSetting", "S3")] + == StorageLocationType.SYNAPSE_S3 + ) + assert ( + _CONCRETE_UPLOAD_TO_STORAGE_TYPE[("ExternalS3StorageLocationSetting", "S3")] + == StorageLocationType.EXTERNAL_S3 + ) + assert ( + _CONCRETE_UPLOAD_TO_STORAGE_TYPE[ + ("ExternalGoogleCloudStorageLocationSetting", "GOOGLECLOUDSTORAGE") + ] + == StorageLocationType.EXTERNAL_GOOGLE_CLOUD + ) + assert ( + _CONCRETE_UPLOAD_TO_STORAGE_TYPE[("ExternalStorageLocationSetting", "SFTP")] + == StorageLocationType.EXTERNAL_SFTP + ) + assert ( + _CONCRETE_UPLOAD_TO_STORAGE_TYPE[("ExternalStorageLocationSetting", "HTTPS")] + == StorageLocationType.EXTERNAL_HTTPS + ) + assert ( + _CONCRETE_UPLOAD_TO_STORAGE_TYPE[("ExternalObjectStorageLocationSetting", "S3")] + == StorageLocationType.EXTERNAL_OBJECT_STORE + ) + assert ( + _CONCRETE_UPLOAD_TO_STORAGE_TYPE[("ProxyStorageLocationSettings", "PROXYLOCAL")] + == StorageLocationType.PROXY + ) + + +class TestStorageLocation: + """Unit tests for basic StorageLocation model functionality.""" + + @pytest.fixture(autouse=True, scope="function") + def init_syn(self, syn: Synapse) -> None: + self.syn = syn + + @pytest.mark.parametrize( + "kwargs,expected", + [ + pytest.param( + dict(storage_type=StorageLocationType.SYNAPSE_S3, sts_enabled=False), + { + "concreteType": "org.sagebionetworks.repo.model.project.S3StorageLocationSetting", + "uploadType": "S3", + "banner": None, + "description": None, + "stsEnabled": False, + }, + id="synapse_s3", + ), + pytest.param( + dict( + storage_type=StorageLocationType.EXTERNAL_S3, + bucket="my-bucket", + base_key="my/prefix", + sts_enabled=True, + banner="Upload banner", + description="Test storage location", + ), + { + "concreteType": "org.sagebionetworks.repo.model.project.ExternalS3StorageLocationSetting", + "uploadType": "S3", + "banner": "Upload banner", + "description": "Test storage location", + "bucket": "my-bucket", + "baseKey": "my/prefix", + "stsEnabled": True, + "endpointUrl": "https://s3.amazonaws.com", + }, + id="external_s3", + ), + pytest.param( + dict( + storage_type=StorageLocationType.EXTERNAL_GOOGLE_CLOUD, + bucket="my-gcs-bucket", + base_key="gcs/prefix", + ), + { + "concreteType": "org.sagebionetworks.repo.model.project.ExternalGoogleCloudStorageLocationSetting", + "uploadType": "GOOGLECLOUDSTORAGE", + "banner": None, + "description": None, + "bucket": "my-gcs-bucket", + "baseKey": "gcs/prefix", + }, + id="external_google_cloud", + ), + pytest.param( + dict( + storage_type=StorageLocationType.EXTERNAL_SFTP, + url="sftp://example.com/path", + supports_subfolders=True, + ), + { + "concreteType": "org.sagebionetworks.repo.model.project.ExternalStorageLocationSetting", + "uploadType": "SFTP", + "banner": None, + "description": None, + "url": "sftp://example.com/path", + "supportsSubfolders": True, + }, + id="external_sftp", + ), + pytest.param( + dict( + storage_type=StorageLocationType.EXTERNAL_HTTPS, + url="https://example.com/data", + supports_subfolders=False, + ), + { + "concreteType": "org.sagebionetworks.repo.model.project.ExternalStorageLocationSetting", + "uploadType": "HTTPS", + "banner": None, + "description": None, + "url": "https://example.com/data", + "supportsSubfolders": False, + }, + id="external_https", + ), + pytest.param( + dict( + storage_type=StorageLocationType.PROXY, + proxy_url="https://proxy.example.com", + secret_key="my-secret-key", + benefactor_id="syn123", + ), + { + "concreteType": "org.sagebionetworks.repo.model.project.ProxyStorageLocationSettings", + "uploadType": "PROXYLOCAL", + "banner": None, + "description": None, + "proxyUrl": "https://proxy.example.com", + "secretKey": "my-secret-key", + "benefactorId": "syn123", + }, + id="proxy", + ), + pytest.param( + dict( + storage_type=StorageLocationType.EXTERNAL_OBJECT_STORE, + bucket="my-s3-like-bucket", + endpoint_url="https://s3.custom.com", + ), + { + "concreteType": "org.sagebionetworks.repo.model.project.ExternalObjectStorageLocationSetting", + "uploadType": "S3", + "banner": None, + "description": None, + "bucket": "my-s3-like-bucket", + "endpointUrl": "https://s3.custom.com", + }, + id="external_object_store", + ), + ], + ) + def test_to_synapse_request(self, kwargs, expected): + """Test generating a request body for each storage location type.""" + # GIVEN a storage location constructed with the given kwargs + storage = StorageLocation(**kwargs) + # WHEN we generate a request body + request_body = storage._to_synapse_request() + + # THEN it should match the expected structure + assert request_body == expected + + def test_to_synapse_request_missing_storage_type(self): + """Test that _to_synapse_request raises ValueError when storage_type is missing.""" + # GIVEN a storage location without a storage_type + storage = StorageLocation( + bucket="my-bucket", + ) + + # THEN it should raise ValueError + with pytest.raises(ValueError, match="storage_type is required"): + storage._to_synapse_request() + + @pytest.mark.parametrize( + "response,expected_attrs", + [ + pytest.param( + { + "storageLocationId": 12345, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalS3StorageLocationSetting", + "uploadType": "S3", + "bucket": "my-bucket", + "baseKey": "my/prefix", + "stsEnabled": True, + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + { + "storage_location_id": 12345, + "concrete_type": "org.sagebionetworks.repo.model.project.ExternalS3StorageLocationSetting", + "storage_type": StorageLocationType.EXTERNAL_S3, + "upload_type": UploadType.S3, + "bucket": "my-bucket", + "base_key": "my/prefix", + "sts_enabled": True, + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "created_on": "2024-01-01T00:00:00.000Z", + "created_by": 123456, + }, + id="external_s3", + ), + pytest.param( + { + "storageLocationId": 67890, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalGoogleCloudStorageLocationSetting", + "uploadType": "GOOGLECLOUDSTORAGE", + "bucket": "my-gcs-bucket", + "baseKey": "gcs/prefix", + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + { + "storage_location_id": 67890, + "concrete_type": "org.sagebionetworks.repo.model.project.ExternalGoogleCloudStorageLocationSetting", + "storage_type": StorageLocationType.EXTERNAL_GOOGLE_CLOUD, + "upload_type": UploadType.GOOGLE_CLOUD_STORAGE, + "bucket": "my-gcs-bucket", + "base_key": "gcs/prefix", + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "created_on": "2024-01-01T00:00:00.000Z", + "created_by": 123456, + }, + id="external_google_cloud", + ), + pytest.param( + { + "storageLocationId": 11111, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalStorageLocationSetting", + "uploadType": "SFTP", + "url": "sftp://example.com/path", + "supportsSubfolders": True, + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + { + "storage_location_id": 11111, + "storage_type": StorageLocationType.EXTERNAL_SFTP, + "upload_type": UploadType.SFTP, + "url": "sftp://example.com/path", + "supports_subfolders": True, + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "created_on": "2024-01-01T00:00:00.000Z", + "created_by": 123456, + }, + id="external_sftp", + ), + pytest.param( + { + "storageLocationId": 11112, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalStorageLocationSetting", + "uploadType": "HTTPS", + "url": "https://example.com/data", + "supportsSubfolders": False, + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + { + "storage_location_id": 11112, + "storage_type": StorageLocationType.EXTERNAL_HTTPS, + "upload_type": UploadType.HTTPS, + "url": "https://example.com/data", + "supports_subfolders": False, + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "created_on": "2024-01-01T00:00:00.000Z", + "created_by": 123456, + }, + id="external_https", + ), + pytest.param( + { + "storageLocationId": 22222, + "concreteType": "org.sagebionetworks.repo.model.project.ProxyStorageLocationSettings", + "uploadType": "PROXYLOCAL", + "proxyUrl": "https://proxy.example.com", + "secretKey": "my-secret-key", + "benefactorId": "syn123", + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + { + "storage_location_id": 22222, + "storage_type": StorageLocationType.PROXY, + "upload_type": UploadType.PROXYLOCAL, + "proxy_url": "https://proxy.example.com", + "secret_key": "my-secret-key", + "benefactor_id": "syn123", + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "created_on": "2024-01-01T00:00:00.000Z", + "created_by": 123456, + }, + id="proxy", + ), + pytest.param( + { + "storageLocationId": 33333, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalObjectStorageLocationSetting", + "uploadType": "S3", + "bucket": "my-object-store-bucket", + "endpointUrl": "https://s3.custom.com", + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + { + "storage_location_id": 33333, + "storage_type": StorageLocationType.EXTERNAL_OBJECT_STORE, + "upload_type": UploadType.S3, + "bucket": "my-object-store-bucket", + "endpoint_url": "https://s3.custom.com", + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "created_on": "2024-01-01T00:00:00.000Z", + "created_by": 123456, + }, + id="external_object_store", + ), + ], + ) + def test_fill_from_dict(self, response, expected_attrs): + """Test filling from a REST API response for each storage location type.""" + # GIVEN a storage location + storage = StorageLocation() + + # WHEN we fill from the response + storage.fill_from_dict(response) + + # THEN the storage location should be populated correctly + for attr, value in expected_attrs.items(): + assert getattr(storage, attr) == value + + def test_fill_from_dict_unknown_concrete_type_logs_warning(self): + """Test that fill_from_dict logs a warning for an unrecognized concreteType/uploadType pair + instead of raising an exception, and still populates common fields.""" + # GIVEN a response with an unrecognized concreteType paired with a valid uploadType. + # "FutureStorageLocationSetting" is not in _CONCRETE_UPLOAD_TO_STORAGE_TYPE, + # so the (suffix, uploadType) key won't match any known storage type. + response = { + "storageLocationId": 99999, + "concreteType": "org.sagebionetworks.repo.model.project.FutureStorageLocationSetting", + "uploadType": "S3", + "banner": "some banner", + "description": "some description", + "etag": "xyz", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 111, + } + storage = StorageLocation() + mock_client = MagicMock() + + # WHEN we fill from the response + with patch( + "synapseclient.models.storage_location.Synapse.get_client", + return_value=mock_client, + ): + storage.fill_from_dict(response) + + # THEN a warning is logged + mock_client.logger.warning.assert_called_once_with( + "Unrecognized concreteType/uploadType pair " + "(org.sagebionetworks.repo.model.project.FutureStorageLocationSetting, S3); " + "storage_type will not be set and type-specific fields will be empty." + ) + # AND common fields are still populated + assert storage.storage_location_id == 99999 + assert storage.banner == "some banner" + assert storage.description == "some description" + assert storage.etag == "xyz" + assert storage.created_on == "2024-01-01T00:00:00.000Z" + assert storage.created_by == 111 + + # AND storage_type is not set + assert storage.storage_type is None + + def test_fill_from_dict_type_isolation(self): + """Test that fill_from_dict only populates fields relevant to the storage type.""" + # GIVEN an EXTERNAL_SFTP response (no S3 or proxy fields) + sftp_response = { + "storageLocationId": 44444, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalStorageLocationSetting", + "uploadType": "SFTP", + "url": "sftp://example.com/path", + } + storage = StorageLocation() + storage.fill_from_dict(sftp_response) + + # THEN S3/proxy fields are not populated + assert storage.bucket is None + assert storage.base_key is None + assert storage.sts_enabled is False + assert storage.endpoint_url == "https://s3.amazonaws.com" + assert storage.proxy_url is None + assert storage.secret_key is None + + def test_upload_type_enum_coercion_on_init(self): + """Test that upload_type string values are coerced to UploadType via EnumCoercionMixin.""" + # GIVEN a StorageLocation constructed with a string value for upload_type + # (upload_type is the only field in _ENUM_FIELDS; storage_type is not coerced) + storage = StorageLocation(upload_type="S3") + + # THEN upload_type is coerced to the enum type + assert storage.upload_type is UploadType.S3 + + def test_upload_type_enum_coercion_on_setattr(self): + """Test that assigning a string to upload_type coerces it to the enum type.""" + # GIVEN a StorageLocation + storage = StorageLocation() + + # WHEN we assign a string value to upload_type + storage.upload_type = "HTTPS" + + # THEN it is coerced to the enum type + assert storage.upload_type is UploadType.HTTPS + + @pytest.mark.asyncio + async def test_store_async_missing_storage_type(self): + """Test that store_async raises ValueError when storage_type is missing.""" + # GIVEN a storage location without a storage_type + storage = StorageLocation(bucket="my-bucket") + + # THEN it should raise ValueError + with pytest.raises( + ValueError, + match="storage_type is required when creating a storage location", + ): + await storage.store_async() + + @pytest.mark.asyncio + @pytest.mark.parametrize( + "storage_type,kwargs,missing_field", + [ + (StorageLocationType.EXTERNAL_S3, {}, "bucket"), + (StorageLocationType.EXTERNAL_GOOGLE_CLOUD, {}, "bucket"), + ( + StorageLocationType.EXTERNAL_OBJECT_STORE, + {"bucket": "12345", "endpoint_url": None}, + "endpoint_url", + ), + (StorageLocationType.EXTERNAL_SFTP, {}, "url"), + (StorageLocationType.EXTERNAL_HTTPS, {}, "url"), + ( + StorageLocationType.PROXY, + {"secret_key": "key", "benefactor_id": "syn123"}, + "proxy_url", + ), + ( + StorageLocationType.PROXY, + {"proxy_url": "https://proxy.example.com", "benefactor_id": "syn123"}, + "secret_key", + ), + ( + StorageLocationType.PROXY, + {"proxy_url": "https://proxy.example.com", "secret_key": "key"}, + "benefactor_id", + ), + ], + ) + async def test_store_async_missing_attributes( + self, storage_type, kwargs, missing_field + ): + """Test that store_async raises ValueError when missing required attributes.""" + # GIVEN a storage location with missing required attributes + storage = StorageLocation(storage_type=storage_type, **kwargs) + + # THEN it should raise ValueError + with pytest.raises( + ValueError, + match=re.escape( + f"missing the '{missing_field}' attribute for {storage_type}" + ), + ): + await storage.store_async(synapse_client=self.syn) + + @pytest.mark.parametrize( + "kwargs,mock_response,expected_attrs", + [ + pytest.param( + dict( + storage_type=StorageLocationType.EXTERNAL_S3, + bucket="my-bucket", + base_key="my/prefix", + ), + { + "storageLocationId": 12345, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalS3StorageLocationSetting", + "uploadType": "S3", + "bucket": "my-bucket", + "baseKey": "my/prefix", + "stsEnabled": False, + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + dict( + storage_location_id=12345, + storage_type=StorageLocationType.EXTERNAL_S3, + upload_type=UploadType.S3, + bucket="my-bucket", + base_key="my/prefix", + sts_enabled=False, + banner=None, + description=None, + etag="abc123", + created_on="2024-01-01T00:00:00.000Z", + created_by=123456, + ), + id="external_s3", + ), + pytest.param( + dict( + storage_type=StorageLocationType.EXTERNAL_GOOGLE_CLOUD, + bucket="my-gcs-bucket", + ), + { + "storageLocationId": 67890, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalGoogleCloudStorageLocationSetting", + "uploadType": "GOOGLECLOUDSTORAGE", + "bucket": "my-gcs-bucket", + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + dict( + storage_location_id=67890, + storage_type=StorageLocationType.EXTERNAL_GOOGLE_CLOUD, + upload_type=UploadType.GOOGLE_CLOUD_STORAGE, + bucket="my-gcs-bucket", + ), + id="external_google_cloud", + ), + pytest.param( + dict( + storage_type=StorageLocationType.EXTERNAL_OBJECT_STORE, + bucket="my-object-store-bucket", + endpoint_url="https://s3.custom.com", + ), + { + "storageLocationId": 33333, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalObjectStorageLocationSetting", + "uploadType": "S3", + "bucket": "my-object-store-bucket", + "endpointUrl": "https://s3.custom.com", + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + dict( + storage_location_id=33333, + storage_type=StorageLocationType.EXTERNAL_OBJECT_STORE, + upload_type=UploadType.S3, + bucket="my-object-store-bucket", + endpoint_url="https://s3.custom.com", + banner=None, + description=None, + etag="abc123", + created_on="2024-01-01T00:00:00.000Z", + created_by=123456, + ), + id="external_object_store", + ), + pytest.param( + dict( + storage_type=StorageLocationType.EXTERNAL_SFTP, + url="sftp://example.com/path", + ), + { + "storageLocationId": 11111, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalStorageLocationSetting", + "uploadType": "SFTP", + "url": "sftp://example.com/path", + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + dict( + storage_location_id=11111, + storage_type=StorageLocationType.EXTERNAL_SFTP, + upload_type=UploadType.SFTP, + url="sftp://example.com/path", + ), + id="external_sftp", + ), + pytest.param( + dict( + storage_type=StorageLocationType.EXTERNAL_HTTPS, + url="https://example.com/data", + ), + { + "storageLocationId": 11112, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalStorageLocationSetting", + "uploadType": "HTTPS", + "url": "https://example.com/data", + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + dict( + storage_location_id=11112, + storage_type=StorageLocationType.EXTERNAL_HTTPS, + upload_type=UploadType.HTTPS, + url="https://example.com/data", + banner=None, + description=None, + etag="abc123", + created_on="2024-01-01T00:00:00.000Z", + created_by=123456, + ), + id="external_https", + ), + pytest.param( + dict( + storage_type=StorageLocationType.PROXY, + proxy_url="https://proxy.example.com", + secret_key="my-secret-key", + benefactor_id="syn123", + ), + { + "storageLocationId": 22222, + "concreteType": "org.sagebionetworks.repo.model.project.ProxyStorageLocationSettings", + "uploadType": "PROXYLOCAL", + "proxyUrl": "https://proxy.example.com", + "secretKey": "my-secret-key", + "benefactorId": "syn123", + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + dict( + storage_location_id=22222, + storage_type=StorageLocationType.PROXY, + upload_type=UploadType.PROXYLOCAL, + proxy_url="https://proxy.example.com", + secret_key="my-secret-key", + benefactor_id="syn123", + banner=None, + description=None, + etag="abc123", + created_on="2024-01-01T00:00:00.000Z", + created_by=123456, + ), + id="proxy", + ), + ], + ) + async def test_store_async_successful_creation( + self, kwargs, mock_response, expected_attrs + ): + """Test that store_async creates a storage location successfully for each storage type.""" + # GIVEN a storage location + storage = StorageLocation(**kwargs) + + # WHEN we create the storage location + with patch( + "synapseclient.models.storage_location.create_storage_location_setting", + new_callable=AsyncMock, + return_value=mock_response, + ): + await storage.store_async(synapse_client=self.syn) + + # THEN it should be populated from the mock response + for attr, value in expected_attrs.items(): + assert getattr(storage, attr) == value + + @pytest.mark.asyncio + async def test_get_async_missing_id(self): + """Test that get_async raises ValueError when storage_location_id is missing.""" + # GIVEN a storage location without an ID + storage = StorageLocation() + + # THEN it should raise ValueError + with pytest.raises( + ValueError, + match="storage_location_id is required to retrieve a storage location", + ): + await storage.get_async(synapse_client=self.syn) + + @pytest.mark.parametrize( + "mock_response,expected_attrs", + [ + ( + { + "storageLocationId": 12345, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalS3StorageLocationSetting", + "uploadType": "S3", + "bucket": "my-bucket", + "baseKey": "my/prefix", + "stsEnabled": False, + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + dict( + storage_location_id=12345, + storage_type=StorageLocationType.EXTERNAL_S3, + upload_type=UploadType.S3, + bucket="my-bucket", + base_key="my/prefix", + sts_enabled=False, + banner=None, + description=None, + etag="abc123", + created_on="2024-01-01T00:00:00.000Z", + created_by=123456, + ), + ), + ( + { + "storageLocationId": 67890, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalGoogleCloudStorageLocationSetting", + "uploadType": "GOOGLECLOUDSTORAGE", + "bucket": "my-gcs-bucket", + "baseKey": "gcs/prefix", + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + dict( + storage_location_id=67890, + storage_type=StorageLocationType.EXTERNAL_GOOGLE_CLOUD, + upload_type=UploadType.GOOGLE_CLOUD_STORAGE, + bucket="my-gcs-bucket", + base_key="gcs/prefix", + banner="Upload banner", + description="Test storage location", + ), + ), + ( + { + "storageLocationId": 33333, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalObjectStorageLocationSetting", + "uploadType": "S3", + "bucket": "my-object-store-bucket", + "endpointUrl": "https://s3.custom.com", + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + dict( + storage_location_id=33333, + storage_type=StorageLocationType.EXTERNAL_OBJECT_STORE, + upload_type=UploadType.S3, + bucket="my-object-store-bucket", + endpoint_url="https://s3.custom.com", + banner="Upload banner", + description="Test storage location", + ), + ), + ( + { + "storageLocationId": 12345, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalS3StorageLocationSetting", + "uploadType": "S3", + "bucket": "my-bucket", + "baseKey": "my/prefix", + "stsEnabled": False, + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + dict( + storage_location_id=12345, + storage_type=StorageLocationType.EXTERNAL_S3, + upload_type=UploadType.S3, + bucket="my-bucket", + base_key="my/prefix", + sts_enabled=False, + banner=None, + description=None, + etag="abc123", + created_on="2024-01-01T00:00:00.000Z", + created_by=123456, + ), + ), + ( + { + "storageLocationId": 67890, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalGoogleCloudStorageLocationSetting", + "uploadType": "GOOGLECLOUDSTORAGE", + "bucket": "my-gcs-bucket", + "baseKey": "gcs/prefix", + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + dict( + storage_location_id=67890, + storage_type=StorageLocationType.EXTERNAL_GOOGLE_CLOUD, + upload_type=UploadType.GOOGLE_CLOUD_STORAGE, + bucket="my-gcs-bucket", + base_key="gcs/prefix", + banner="Upload banner", + description="Test storage location", + ), + ), + ( + { + "storageLocationId": 33333, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalObjectStorageLocationSetting", + "uploadType": "S3", + "bucket": "my-object-store-bucket", + "endpointUrl": "https://s3.custom.com", + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + dict( + storage_location_id=33333, + storage_type=StorageLocationType.EXTERNAL_OBJECT_STORE, + upload_type=UploadType.S3, + bucket="my-object-store-bucket", + endpoint_url="https://s3.custom.com", + banner="Upload banner", + description="Test storage location", + etag="abc123", + created_on="2024-01-01T00:00:00.000Z", + created_by=123456, + ), + ), + ( + { + "storageLocationId": 11111, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalStorageLocationSetting", + "uploadType": "SFTP", + "url": "sftp://example.com/path", + "supportsSubfolders": True, + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + dict( + storage_location_id=11111, + storage_type=StorageLocationType.EXTERNAL_SFTP, + upload_type=UploadType.SFTP, + url="sftp://example.com/path", + supports_subfolders=True, + banner="Upload banner", + description="Test storage location", + etag="abc123", + created_on="2024-01-01T00:00:00.000Z", + created_by=123456, + ), + ), + ( + { + "storageLocationId": 11112, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalStorageLocationSetting", + "uploadType": "HTTPS", + "url": "https://example.com/data", + "supportsSubfolders": False, + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + dict( + storage_location_id=11112, + storage_type=StorageLocationType.EXTERNAL_HTTPS, + upload_type=UploadType.HTTPS, + url="https://example.com/data", + supports_subfolders=False, + banner="Upload banner", + description="Test storage location", + etag="abc123", + created_on="2024-01-01T00:00:00.000Z", + created_by=123456, + ), + ), + ( + { + "storageLocationId": 22222, + "concreteType": "org.sagebionetworks.repo.model.project.ProxyStorageLocationSettings", + "uploadType": "PROXYLOCAL", + "proxyUrl": "https://proxy.example.com", + "secretKey": "my-secret-key", + "benefactorId": "syn123", + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + dict( + storage_location_id=22222, + storage_type=StorageLocationType.PROXY, + upload_type=UploadType.PROXYLOCAL, + proxy_url="https://proxy.example.com", + secret_key="my-secret-key", + benefactor_id="syn123", + banner="Upload banner", + description="Test storage location", + etag="abc123", + created_on="2024-01-01T00:00:00.000Z", + created_by=123456, + ), + ), + ( + { + "storageLocationId": 33333, + "concreteType": "org.sagebionetworks.repo.model.project.ExternalObjectStorageLocationSetting", + "uploadType": "S3", + "bucket": "my-object-store-bucket", + "endpointUrl": "https://s3.custom.com", + "banner": "Upload banner", + "description": "Test storage location", + "etag": "abc123", + "createdOn": "2024-01-01T00:00:00.000Z", + "createdBy": 123456, + }, + dict( + storage_location_id=33333, + storage_type=StorageLocationType.EXTERNAL_OBJECT_STORE, + upload_type=UploadType.S3, + bucket="my-object-store-bucket", + endpoint_url="https://s3.custom.com", + banner="Upload banner", + description="Test storage location", + etag="abc123", + created_on="2024-01-01T00:00:00.000Z", + created_by=123456, + ), + ), + ], + ) + @pytest.mark.asyncio + async def test_get_async_successful_retrieval(self, mock_response, expected_attrs): + """Test that get_async retrieves a storage location successfully.""" + # GIVEN a storage location with an ID + storage = StorageLocation(storage_location_id=12345) + + # WHEN we retrieve the storage location + with patch( + "synapseclient.models.storage_location.get_storage_location_setting", + new_callable=AsyncMock, + return_value=mock_response, + ): + await storage.get_async(synapse_client=self.syn) + + # THEN it should be populated from the mock response + for attr, value in expected_attrs.items(): + assert getattr(storage, attr) == value diff --git a/tests/unit/synapseclient/operations/unit_test_download_list_operations.py b/tests/unit/synapseclient/operations/unit_test_download_list_operations.py new file mode 100644 index 000000000..38a45a56d --- /dev/null +++ b/tests/unit/synapseclient/operations/unit_test_download_list_operations.py @@ -0,0 +1,464 @@ +"""Unit tests for download_list operation functions.""" + +import csv +import json +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from synapseclient import Synapse +from synapseclient.core.exceptions import SynapseError, SynapseHTTPError +from synapseclient.models.table_components import CsvTableDescriptor +from synapseclient.operations import DownloadListItem +from synapseclient.operations.download_list_operations import ( + _download_manifest_file, + _read_manifest_rows, + _validate_and_extend_columns, + download_list_add_async, + download_list_clear_async, + download_list_files_async, + download_list_manifest_async, + download_list_remove_async, +) + + +class TestReadManifestRows: + """Tests for _read_manifest_rows.""" + + def _write_csv(self, tmp_path: Path, header: list[str], rows: list[dict]) -> str: + path = str(tmp_path / "manifest.csv") + with open(path, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=header) + writer.writeheader() + writer.writerows(rows) + return path + + @pytest.mark.parametrize( + "csv_content, expected_columns, expected_row_count, row_checks", + [ + pytest.param( + "ID,versionNumber,name\nsyn111,1,file_a.txt\nsyn222,3,file_b.txt\n", + ["ID", "versionNumber", "name"], + 2, + {0: {"ID": "syn111", "versionNumber": "1"}, 1: {"ID": "syn222"}}, + id="standard_manifest", + ), + pytest.param( + "ID,versionNumber\n", + ["ID", "versionNumber"], + 0, + {}, + id="headers_only_no_rows", + ), + pytest.param( + "", + None, + 0, + {}, + id="empty_file", + ), + pytest.param( + "ID\nsyn999\n", + ["ID"], + 1, + {0: {"ID": "syn999"}}, + id="single_column", + ), + pytest.param( + "ID,versionNumber\nsyn123,\n", + ["ID", "versionNumber"], + 1, + {0: {"ID": "syn123", "versionNumber": ""}}, + id="empty_string_values_preserved", + ), + pytest.param( + 'ID,name\nsyn123,"file, with comma.txt"\n', + ["ID", "name"], + 1, + {0: {"name": "file, with comma.txt"}}, + id="quoted_field_with_comma", + ), + ], + ) + def test_read_manifest_rows( + self, + tmp_path: Path, + csv_content: str, + expected_columns: list[str], + expected_row_count: int, + row_checks: dict[int, dict[str, str]], + ) -> None: + """_read_manifest_rows returns correct columns and rows for various CSV shapes.""" + # GIVEN a CSV file with the specified content + path = str(tmp_path / "manifest.csv") + with open(path, "w", newline="") as f: + f.write(csv_content) + + # WHEN I read the manifest + columns, rows = _read_manifest_rows(path) + + # THEN columns and row count match expectations + assert columns == expected_columns + assert len(rows) == expected_row_count + + # AND specific cell values match + for row_idx, expected_values in row_checks.items(): + for key, value in expected_values.items(): + assert rows[row_idx][key] == value + + def test_many_rows(self, tmp_path: Path) -> None: + """Reading a manifest with many rows returns all of them.""" + # GIVEN a CSV with 500 rows + header = ["ID", "versionNumber"] + data = [{"ID": f"syn{i}", "versionNumber": str(i)} for i in range(500)] + path = self._write_csv(tmp_path, header, data) + + # WHEN I read the manifest + columns, rows = _read_manifest_rows(path) + + # THEN all 500 rows are returned + assert columns == header + assert len(rows) == 500 + assert rows[0]["ID"] == "syn0" + assert rows[499]["ID"] == "syn499" + + +class TestValidateAndExtendColumns: + """Tests for _validate_and_extend_columns.""" + + @pytest.mark.parametrize( + "columns, expected", + [ + pytest.param( + ["ID", "versionNumber"], + ["ID", "versionNumber", "path", "error"], + id="standard_columns", + ), + pytest.param( + ["ID"], + ["ID", "path", "error"], + id="single_column", + ), + pytest.param( + ["ID", "versionNumber", "name", "createdBy"], + ["ID", "versionNumber", "name", "createdBy", "path", "error"], + id="many_columns", + ), + ], + ) + def test_appends_path_and_error( + self, columns: list[str], expected: list[str] + ) -> None: + """Valid columns are returned with path and error appended.""" + assert _validate_and_extend_columns(columns) == expected + + def test_none_columns_raises(self) -> None: + """None columns (empty manifest) raises SynapseError.""" + with pytest.raises(SynapseError, match="no headers"): + _validate_and_extend_columns(None) + + @pytest.mark.parametrize( + "columns", + [ + pytest.param(["ID", "path"], id="contains_path"), + pytest.param(["ID", "error"], id="contains_error"), + pytest.param(["path", "error"], id="contains_both"), + ], + ) + def test_reserved_column_names_raise(self, columns: list[str]) -> None: + """Columns containing reserved names 'path' or 'error' raise SynapseError.""" + with pytest.raises(SynapseError, match="reserved column names"): + _validate_and_extend_columns(columns) + + +class TestDownloadListClearAsync: + """Tests for download_list_clear_async.""" + + async def test_download_list_clear_async(self, syn: Synapse) -> None: + """download_list_clear_async issues a DELETE to /download/list via the client.""" + # GIVEN a mocked rest_delete_async on the client + with patch.object( + syn, + "rest_delete_async", + new_callable=AsyncMock, + return_value=None, + ) as mocked_delete: + # WHEN I call download_list_clear_async with an explicit client + result = await download_list_clear_async(synapse_client=syn) + + # THEN the client issues a DELETE to /download/list + mocked_delete.assert_awaited_once_with("/download/list") + # AND the method returns None + assert result is None + + +class TestDownloadListAddAsync: + """Tests for download_list_add_async.""" + + async def test_download_list_add_async(self, syn: Synapse) -> None: + """download_list_add_async POSTs the batch to /download/list/add and returns the count.""" + # GIVEN a list of files to add and a mocked rest_post_async on the client + files = [ + DownloadListItem(file_entity_id="syn111", version_number=1), + DownloadListItem(file_entity_id="syn222", version_number=None), + ] + with patch.object( + syn, + "rest_post_async", + new_callable=AsyncMock, + return_value={"numberOfFilesAdded": 2}, + ) as mocked_post: + # WHEN I call download_list_add_async with an explicit client + result = await download_list_add_async(files=files, synapse_client=syn) + + # THEN the client POSTs the batch to /download/list/add + mocked_post.assert_awaited_once() + call = mocked_post.await_args + assert call.args == ("/download/list/add",) + assert json.loads(call.kwargs["body"]) == { + "batchToAdd": [ + {"fileEntityId": "syn111", "versionNumber": 1}, + {"fileEntityId": "syn222", "versionNumber": None}, + ] + } + # AND the method returns the number of files added + assert result == 2 + + +class TestDownloadListRemoveAsync: + """Tests for download_list_remove_async.""" + + async def test_download_list_remove_async(self, syn: Synapse) -> None: + """download_list_remove_async POSTs the batch to /download/list/remove and returns the count.""" + # GIVEN a list of files to remove and a mocked rest_post_async on the client + files = [ + DownloadListItem(file_entity_id="syn111", version_number=1), + DownloadListItem(file_entity_id="syn222", version_number=None), + ] + with patch.object( + syn, + "rest_post_async", + new_callable=AsyncMock, + return_value={"numberOfFilesRemoved": 2}, + ) as mocked_post: + # WHEN I call download_list_remove_async with an explicit client + result = await download_list_remove_async(files=files, synapse_client=syn) + + # THEN the client POSTs the batch to /download/list/remove + mocked_post.assert_awaited_once() + call = mocked_post.await_args + assert call.args == ("/download/list/remove",) + assert json.loads(call.kwargs["body"]) == { + "batchToRemove": [ + {"fileEntityId": "syn111", "versionNumber": 1}, + {"fileEntityId": "syn222", "versionNumber": None}, + ] + } + # AND the method returns the number of files removed + assert result == 2 + + +class TestDownloadListManifestAsync: + """Tests for download_list_manifest_async.""" + + async def test_download_list_manifest_async(self, syn: Synapse) -> None: + """download_list_manifest_async submits the request and returns the downloaded manifest path.""" + # GIVEN a mocked DownloadListManifestRequest whose job populates manifest_path + manifest_path = "/tmp/manifest.csv" + mock_instance = MagicMock() + mock_instance.send_job_and_wait_async = AsyncMock(return_value=None) + mock_instance.manifest_path = manifest_path + descriptor = CsvTableDescriptor() + with patch( + "synapseclient.operations.download_list_operations._DownloadListManifestRequest", + return_value=mock_instance, + ) as mocked_request_cls: + # WHEN I call download_list_manifest_async with an explicit descriptor and destination + result = await download_list_manifest_async( + csv_table_descriptor=descriptor, + destination="/tmp/out", + synapse_client=syn, + ) + + # THEN the request is built with the provided descriptor + mocked_request_cls.assert_called_once_with(csv_table_descriptor=descriptor) + # AND the job is awaited once with the destination and client + mock_instance.send_job_and_wait_async.assert_awaited_once_with( + post_exchange_args={"destination": "/tmp/out"}, + synapse_client=syn, + ) + # AND the method returns the manifest path set by the job + assert result == manifest_path + + async def test_download_list_manifest_async_no_file_produced( + self, syn: Synapse + ) -> None: + """download_list_manifest_async raises SynapseError when the job finishes without a file.""" + # GIVEN a mocked DownloadListManifestRequest whose job leaves manifest_path None + mock_instance = MagicMock() + mock_instance.send_job_and_wait_async = AsyncMock(return_value=None) + mock_instance.manifest_path = None + with patch( + "synapseclient.operations.download_list_operations._DownloadListManifestRequest", + return_value=mock_instance, + ): + # WHEN I call download_list_manifest_async + # THEN a SynapseError is raised + with pytest.raises(SynapseError, match="no local file was produced"): + await download_list_manifest_async(synapse_client=syn) + + +class TestDownloadListFilesAsync: + """Tests for download_list_files_async.""" + + async def test_empty_cart_propagates_synapse_http_error(self, syn: Synapse) -> None: + """download_list_files_async propagates the server's 'No files available for + download' error when the cart is empty. + + Synapse returns this error from the manifest async job rather than + returning an empty manifest, and the method must not swallow it. + """ + # GIVEN download_list_manifest_async raises SynapseHTTPError (simulating an empty cart) + with patch( + "synapseclient.operations.download_list_operations.download_list_manifest_async", + new_callable=AsyncMock, + side_effect=SynapseHTTPError("No files available for download"), + ): + # WHEN I call download_list_files_async + # THEN the error propagates to the caller unchanged + with pytest.raises( + SynapseHTTPError, match="No files available for download" + ): + await download_list_files_async(synapse_client=syn) + + +class TestDownloadManifestFile: + """Tests for _download_manifest_file.""" + + async def test_success_annotates_row_and_returns_item(self, syn: Synapse) -> None: + """On success, the row is annotated with path/error and a DownloadListItem + is returned with the resolved entity id and version.""" + # GIVEN a manifest row with a version and a mocked File whose + # get_async returns a file with a local path + row = {"ID": "syn111", "versionNumber": "2"} + mock_file = MagicMock() + mock_file.path = "/tmp/downloads/file_a.txt" + mock_file_cls = MagicMock( + return_value=MagicMock(get_async=AsyncMock(return_value=mock_file)) + ) + with patch( + "synapseclient.models.file.File", + mock_file_cls, + ): + # WHEN I call _download_manifest_file + result = await _download_manifest_file( + row, + download_location="/tmp/downloads", + synapse_client=syn, + ) + + # THEN the File is constructed with the coerced int version and + # download_location as path + mock_file_cls.assert_called_once_with( + id="syn111", + version_number=2, + path="/tmp/downloads", + ) + # AND the row is annotated with the local path and empty error + assert row["path"] == "/tmp/downloads/file_a.txt" + assert row["error"] == "" + # AND the returned DownloadListItem carries the entity id and version + assert result == DownloadListItem(file_entity_id="syn111", version_number=2) + + @pytest.mark.parametrize( + "row", + [ + pytest.param({"ID": "syn111"}, id="no_version_key"), + pytest.param({"ID": "syn111", "versionNumber": ""}, id="blank_version"), + pytest.param({"ID": "syn111", "versionNumber": None}, id="none_version"), + ], + ) + async def test_missing_version_fetches_latest( + self, syn: Synapse, row: dict + ) -> None: + """A missing or blank versionNumber is passed through as None so + File.get_async fetches the latest version.""" + # GIVEN a manifest row without a usable version and a mocked File + mock_file = MagicMock() + mock_file.path = "/tmp/downloads/latest.txt" + mock_file_cls = MagicMock( + return_value=MagicMock(get_async=AsyncMock(return_value=mock_file)) + ) + with patch( + "synapseclient.models.file.File", + mock_file_cls, + ): + # WHEN I call _download_manifest_file + result = await _download_manifest_file( + row, + download_location="/tmp/downloads", + synapse_client=syn, + ) + + # THEN File is constructed with version_number=None (meaning latest) + mock_file_cls.assert_called_once_with( + id="syn111", + version_number=None, + path="/tmp/downloads", + ) + # AND the row is annotated for success + assert row["path"] == "/tmp/downloads/latest.txt" + assert row["error"] == "" + # AND the returned DownloadListItem also carries version_number=None + assert result == DownloadListItem(file_entity_id="syn111", version_number=None) + + async def test_get_async_failure_annotates_row_and_returns_none( + self, syn: Synapse + ) -> None: + """When File.get_async raises, the exception is swallowed, the row is + annotated with the error message, and None is returned so the batch + continues.""" + # GIVEN a manifest row and a File whose get_async raises + row = {"ID": "syn999", "versionNumber": "1"} + error_message = "boom" + mock_file_cls = MagicMock( + return_value=MagicMock( + get_async=AsyncMock(side_effect=RuntimeError(error_message)) + ) + ) + with patch( + "synapseclient.models.file.File", + mock_file_cls, + ): + # WHEN I call _download_manifest_file + result = await _download_manifest_file(row, synapse_client=syn) + + # THEN the row is annotated with the error message and empty path + assert row["path"] == "" + assert row["error"] == error_message + # AND None is returned (so the caller skips this row) + assert result is None + + async def test_file_with_no_path_sets_row_path_empty(self, syn: Synapse) -> None: + """If get_async returns a file whose path is None, the row's path is + normalized to an empty string rather than the literal None.""" + # GIVEN a mocked File whose returned instance has path=None + row = {"ID": "syn111", "versionNumber": "1"} + mock_file = MagicMock() + mock_file.path = None + mock_file_cls = MagicMock( + return_value=MagicMock(get_async=AsyncMock(return_value=mock_file)) + ) + with patch( + "synapseclient.models.file.File", + mock_file_cls, + ): + # WHEN I call _download_manifest_file + result = await _download_manifest_file(row, synapse_client=syn) + + # THEN the row's path is an empty string (not None) + assert row["path"] == "" + assert row["error"] == "" + # AND a DownloadListItem is still returned for the successful call + assert result == DownloadListItem(file_entity_id="syn111", version_number=1) diff --git a/tests/unit/synapseclient/operations/unit_test_utility_operations.py b/tests/unit/synapseclient/operations/unit_test_utility_operations.py index 0f122ae65..e97aea51f 100644 --- a/tests/unit/synapseclient/operations/unit_test_utility_operations.py +++ b/tests/unit/synapseclient/operations/unit_test_utility_operations.py @@ -1,4 +1,5 @@ """Unit tests for utility_operations wrapper functions.""" + import json from unittest.mock import AsyncMock, MagicMock, patch diff --git a/tests/unit/synapseclient/services/unit_test_json_schema.py b/tests/unit/synapseclient/services/unit_test_json_schema.py index e109118a0..0587b9053 100644 --- a/tests/unit/synapseclient/services/unit_test_json_schema.py +++ b/tests/unit/synapseclient/services/unit_test_json_schema.py @@ -1,4 +1,5 @@ """TODO: Add more tests""" + import pytest from synapseclient.services import json_schema diff --git a/tests/unit/synapseclient/services/unit_test_manifest.py b/tests/unit/synapseclient/services/unit_test_manifest.py new file mode 100644 index 000000000..ef276aeaf --- /dev/null +++ b/tests/unit/synapseclient/services/unit_test_manifest.py @@ -0,0 +1,543 @@ +import csv +import datetime +import os +import tempfile +from unittest.mock import patch + +import pytest + +from synapseclient.models import Activity, File +from synapseclient.models.activity import UsedEntity, UsedURL +from synapseclient.models.services.manifest import ( + MANIFEST_CSV_FILENAME, + _convert_manifest_data_items_to_string_list, + _convert_manifest_data_row_to_dict, + _extract_entity_metadata_for_manifest_csv, + _get_entity_provenance_dict_for_manifest, + _manifest_csv_filename, + _write_manifest_data_csv, + generate_manifest_csv, +) + + +class TestManifestCsvFilename: + """Tests for the _manifest_csv_filename helper.""" + + def test_plain_directory(self) -> None: + # GIVEN a plain absolute path + # WHEN _manifest_csv_filename is called + result = _manifest_csv_filename("/tmp/mydir") + + # THEN it joins the path with the manifest filename + assert result == os.path.join("/tmp/mydir", MANIFEST_CSV_FILENAME) + + def test_tilde_is_expanded(self) -> None: + # GIVEN a path starting with ~ + # WHEN _manifest_csv_filename is called + result = _manifest_csv_filename("~/mydir") + + # THEN ~ is expanded to the user's home directory + assert result == os.path.join( + os.path.expanduser("~/mydir"), MANIFEST_CSV_FILENAME + ) + assert "~" not in result + + def test_filename_is_manifest_csv(self) -> None: + # GIVEN any directory path + # WHEN _manifest_csv_filename is called + result = _manifest_csv_filename("/some/path") + + # THEN the basename of the result is MANIFEST_CSV_FILENAME + assert os.path.basename(result) == MANIFEST_CSV_FILENAME + + +class TestGenerateManifestCsv: + """Tests for the generate_manifest_csv and related helper functions.""" + + @pytest.fixture(scope="function", autouse=True) + def setup_method(self, syn) -> None: + self.syn = syn + + def _make_file( + self, + syn_id: str = "syn123", + name: str = "file.txt", + path: str = "/data/file.txt", + parent_id: str = "syn456", + content_type: str = "text/plain", + synapse_store: bool = True, + annotations: dict = None, + activity: Activity = None, + ) -> File: + f = File( + id=syn_id, + name=name, + path=path, + parent_id=parent_id, + content_type=content_type, + synapse_store=synapse_store, + ) + if annotations: + f.annotations = annotations + if activity: + f.activity = activity + return f + + def test_extract_entity_metadata_includes_annotations_and_activity(self) -> None: + # GIVEN a File entity with provenance + activity = Activity( + name="My Pipeline", + description="Run analysis", + used=[UsedEntity(target_id="syn111", target_version_number=1)], + executed=[UsedURL(url="https://github.com/example/pipeline")], + ) + f = self._make_file( + activity=activity, annotations={"tissue": ["brain"], "count": [42]} + ) + + # WHEN metadata is extracted + keys, data = _extract_entity_metadata_for_manifest_csv([f]) + + # THEN provenance keys are present in the column list + assert { + "used", + "executed", + "activityName", + "activityDescription", + "tissue", + "count", + }.issubset(keys) + + assert data[0]["parentId"] == "syn456" + assert data[0]["ID"] == "syn123" + assert data[0]["path"] == "/data/file.txt" + assert data[0]["name"] == "file.txt" + assert data[0]["activityName"] == "My Pipeline" + assert data[0]["activityDescription"] == "Run analysis" + assert data[0]["used"] == "syn111.1" + assert data[0]["executed"] == "https://github.com/example/pipeline" + assert data[0]["tissue"] == "brain" + assert data[0]["count"] == "42" + + def test_generate_manifest_csv_data_items_are_converted_to_strings(self) -> None: + # GIVEN a File with a name containing a comma and mixed-type annotations + f = self._make_file( + name="a, b, c", + path="/data/file.txt", + annotations={ + "single_str": "hello", + "multi_str": ["a", "b", "c"], + "str_with_comma": ["hello,world", "plain text"], + "booleans": [True, False], + "integers": [1], + "floats": [1.0], + "single_dt": [ + datetime.datetime(2020, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc) + ], + "multi_dt": [ + datetime.datetime( + 2020, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc + ), + datetime.datetime( + 2021, 6, 15, 12, 30, 0, tzinfo=datetime.timezone.utc + ), + ], + }, + ) + + with ( + tempfile.TemporaryDirectory() as tmpdir, + patch.object(self.syn.logger, "info") as mock_logger_info, + ): + # WHEN generate_manifest_csv is called + generate_manifest_csv(all_files=[f], path=tmpdir, syn=self.syn) + manifest_path = os.path.join(tmpdir, "manifest.csv") + content = open(manifest_path, encoding="utf8").read() + with open(manifest_path, newline="", encoding="utf8") as fp: + row = next(csv.DictReader(fp)) + + mock_logger_info.assert_called_once_with( + f"Manifest file {manifest_path} has been generated." + ) + assert '"a, b, c"' in content + assert row["single_str"] == "hello" + assert row["multi_str"] == "[a,b,c]" + assert row["str_with_comma"] == '["hello,world",plain text]' + assert row["booleans"] == "[True,False]" + assert row["integers"] == "1" + assert row["floats"] == "1.0" + assert row["single_dt"] == "2020-01-01T00:00:00Z" + assert row["multi_dt"] == "[2020-01-01T00:00:00Z,2021-06-15T12:30:00Z]" + + def test_generate_manifest_csv_with_only_header_row(self) -> None: + # GIVEN an empty file list + with ( + tempfile.TemporaryDirectory() as tmpdir, + patch.object(self.syn.logger, "info") as mock_logger_info, + ): + # WHEN generate_manifest_csv is called with no files + generate_manifest_csv(all_files=[], path=tmpdir, syn=self.syn) + + # THEN the manifest.csv file is created with only the header row and no data rows + manifest_path = os.path.join(tmpdir, "manifest.csv") + with open(manifest_path, newline="", encoding="utf8") as fp: + reader = csv.DictReader(fp) + rows = list(reader) + assert reader.fieldnames == [ + "path", + "parentId", + "name", + "ID", + "synapseStore", + "contentType", + "used", + "executed", + "activityName", + "activityDescription", + ] + assert rows == [] + mock_logger_info.assert_called_once_with( + f"Manifest file {manifest_path} has been generated." + ) + + def test_generate_manifest_csv_with_path_None_raises_ValueError(self) -> None: + # GIVEN an empty file list + with tempfile.TemporaryDirectory() as tmpdir: + # WHEN generate_manifest_csv is called with path=None + with pytest.raises( + ValueError, + match="The path argument is required to generate a manifest.csv file.", + ): + generate_manifest_csv(all_files=[], path=None, syn=self.syn) + + def test_generate_manifest_csv_quotes_values_with_commas(self) -> None: + # GIVEN a File whose name contains a comma + f = self._make_file(name="file, extra.txt", path="/tmp/file, extra.txt") + + with ( + tempfile.TemporaryDirectory() as tmpdir, + patch.object(self.syn.logger, "info") as mock_logger_info, + ): + generate_manifest_csv(all_files=[f], path=tmpdir, syn=self.syn) + manifest_path = os.path.join(tmpdir, "manifest.csv") + content = open(manifest_path, encoding="utf8").read() + # THEN the comma-containing value is quoted in the CSV + assert '"file, extra.txt"' in content + mock_logger_info.assert_called_once_with( + f"Manifest file {manifest_path} has been generated." + ) + + +class TestWriteManifestDataCsv: + """Tests for the _write_manifest_data_csv helper.""" + + @pytest.fixture(scope="function", autouse=True) + def setup_method(self, syn) -> None: + self.syn = syn + + def test_writes_header_and_rows(self) -> None: + # GIVEN keys and one row of data + keys = ["path", "parentId", "name"] + data = [{"path": "/data/f.txt", "parentId": "syn1", "name": "f.txt"}] + + with ( + tempfile.TemporaryDirectory() as tmpdir, + patch.object(self.syn.logger, "info") as mock_logger_info, + ): + filename = os.path.join(tmpdir, "manifest.csv") + # WHEN _write_manifest_data_csv is called + _write_manifest_data_csv(filename, keys, data, syn=self.syn) + + with open(filename, newline="", encoding="utf8") as fp: + rows = list(csv.DictReader(fp)) + + # THEN header and row values are written correctly + assert len(rows) == 1 + assert rows[0]["path"] == "/data/f.txt" + assert rows[0]["parentId"] == "syn1" + assert rows[0]["name"] == "f.txt" + mock_logger_info.assert_called_once_with( + f"Manifest file {filename} has been generated." + ) + + def test_missing_keys_use_empty_string(self) -> None: + # GIVEN a row missing the "name" key + keys = ["path", "parentId", "name"] + data = [{"path": "/data/f.txt", "parentId": "syn1"}] + + with ( + tempfile.TemporaryDirectory() as tmpdir, + patch.object(self.syn.logger, "info") as mock_logger_info, + ): + filename = os.path.join(tmpdir, "manifest.csv") + _write_manifest_data_csv(filename, keys, data, syn=self.syn) + + with open(filename, newline="", encoding="utf8") as fp: + rows = list(csv.DictReader(fp)) + + # THEN the missing field is written as an empty string + assert rows[0]["name"] == "" + mock_logger_info.assert_called_once_with( + f"Manifest file {filename} has been generated." + ) + + def test_extra_keys_in_row_are_ignored(self) -> None: + # GIVEN a row with a key not in the fieldnames list + keys = ["path", "name"] + data = [{"path": "/data/f.txt", "name": "f.txt", "extra": "ignored"}] + + with ( + tempfile.TemporaryDirectory() as tmpdir, + patch.object(self.syn.logger, "info") as mock_logger_info, + ): + filename = os.path.join(tmpdir, "manifest.csv") + # WHEN _write_manifest_data_csv is called + # THEN no exception is raised and only declared keys appear + _write_manifest_data_csv(filename, keys, data, syn=self.syn) + + with open(filename, newline="", encoding="utf8") as fp: + reader = csv.DictReader(fp) + rows = list(reader) + assert "extra" not in reader.fieldnames + + assert rows[0]["path"] == "/data/f.txt" + mock_logger_info.assert_called_once_with( + f"Manifest file {filename} has been generated." + ) + + def test_values_with_commas_are_quoted(self) -> None: + # GIVEN a value that contains a comma + keys = ["name", "parentId"] + data = [{"name": "file, with comma.txt", "parentId": "syn1"}] + + with ( + tempfile.TemporaryDirectory() as tmpdir, + patch.object(self.syn.logger, "info") as mock_logger_info, + ): + filename = os.path.join(tmpdir, "manifest.csv") + _write_manifest_data_csv(filename, keys, data, syn=self.syn) + content = open(filename, encoding="utf8").read() + + with open(filename, newline="", encoding="utf8") as fp: + rows = list(csv.DictReader(fp)) + + # THEN the comma-containing value is quoted in the raw CSV + assert '"file, with comma.txt"' in content + # AND reads back correctly + assert rows[0]["name"] == "file, with comma.txt" + mock_logger_info.assert_called_once_with( + f"Manifest file {filename} has been generated." + ) + + def test_empty_data_writes_header_only(self) -> None: + # GIVEN no data rows + keys = ["path", "parentId", "name"] + + with ( + tempfile.TemporaryDirectory() as tmpdir, + patch.object(self.syn.logger, "info") as mock_logger_info, + ): + filename = os.path.join(tmpdir, "manifest.csv") + _write_manifest_data_csv(filename, keys, [], syn=self.syn) + + with open(filename, newline="", encoding="utf8") as fp: + reader = csv.DictReader(fp) + rows = list(reader) + header = reader.fieldnames + + # THEN the file exists with only the header + assert rows == [] + assert header == keys + mock_logger_info.assert_called_once_with( + f"Manifest file {filename} has been generated." + ) + + def test_unicode_values_are_written_correctly(self) -> None: + # GIVEN a value with non-ASCII characters + keys = ["name", "parentId"] + data = [{"name": "données_été.txt", "parentId": "syn1"}] + + with ( + tempfile.TemporaryDirectory() as tmpdir, + patch.object(self.syn.logger, "info") as mock_logger_info, + ): + filename = os.path.join(tmpdir, "manifest.csv") + _write_manifest_data_csv(filename, keys, data, syn=self.syn) + + with open(filename, newline="", encoding="utf8") as fp: + rows = list(csv.DictReader(fp)) + + # THEN unicode characters round-trip correctly + assert rows[0]["name"] == "données_été.txt" + mock_logger_info.assert_called_once_with( + f"Manifest file {filename} has been generated." + ) + + def test_multiple_rows_written_in_order(self) -> None: + # GIVEN multiple rows + keys = ["name", "parentId"] + data = [ + {"name": "a.txt", "parentId": "syn1"}, + {"name": "b.txt", "parentId": "syn2"}, + {"name": "c.txt", "parentId": "syn3"}, + ] + + with ( + tempfile.TemporaryDirectory() as tmpdir, + patch.object(self.syn.logger, "info") as mock_logger_info, + ): + filename = os.path.join(tmpdir, "manifest.csv") + _write_manifest_data_csv(filename, keys, data, syn=self.syn) + + with open(filename, newline="", encoding="utf8") as fp: + rows = list(csv.DictReader(fp)) + + # THEN all rows are present and in order + assert len(rows) == 3 + assert [r["name"] for r in rows] == ["a.txt", "b.txt", "c.txt"] + mock_logger_info.assert_called_once_with( + f"Manifest file {filename} has been generated." + ) + + +class TestGetEntityProvenanceDictForManifest: + """Tests for _get_entity_provenance_dict_for_manifest.""" + + def _make_file_with_activity(self, activity: Activity = None) -> File: + f = File(id="syn1", name="f.txt", path="/f.txt", parent_id="syn2") + if activity: + f.activity = activity + return f + + def test_returns_empty_dict_when_no_activity(self) -> None: + f = self._make_file_with_activity() + result = _get_entity_provenance_dict_for_manifest(f) + assert result == {} + + def test_returns_all_provenance_keys_with_activity(self) -> None: + activity = Activity( + name="Pipeline", + description="Runs analysis", + used=[UsedEntity(target_id="syn10", target_version_number=2)], + executed=[UsedURL(url="https://github.com/example/run")], + ) + f = self._make_file_with_activity(activity) + + result = _get_entity_provenance_dict_for_manifest(f) + assert result["used"] == "syn10.2" + assert result["executed"] == "https://github.com/example/run" + assert result["activityName"] == "Pipeline" + assert result["activityDescription"] == "Runs analysis" + + def test_activity_name_and_description_default_to_empty_string(self) -> None: + activity = Activity(name=None, description=None) + f = self._make_file_with_activity(activity) + + result = _get_entity_provenance_dict_for_manifest(f) + assert result["activityName"] == "" + assert result["activityDescription"] == "" + + def test_empty_used_and_executed_lists(self) -> None: + activity = Activity(name="minimal", used=[], executed=[]) + f = self._make_file_with_activity(activity) + + result = _get_entity_provenance_dict_for_manifest(f) + + assert result["activityName"] == "minimal" + assert result["used"] == "" + assert result["executed"] == "" + + def test_multiple_used_and_executed_are_semicolon_joined(self) -> None: + # GIVEN an activity with multiple used and executed entries + activity = Activity( + name="multi", + used=[ + UsedEntity(target_id="syn1", target_version_number=1), + UsedEntity(target_id="syn2", target_version_number=3), + ], + executed=[ + UsedURL(url="https://github.com/a"), + UsedURL(url="https://github.com/b"), + ], + ) + f = self._make_file_with_activity(activity) + + result = _get_entity_provenance_dict_for_manifest(f) + + assert result["activityName"] == "multi" + assert result["used"] == "syn1.1;syn2.3" + assert result["executed"] == "https://github.com/a;https://github.com/b" + + +_UTC = datetime.timezone.utc + + +class TestConvertManifestDataItemsToStringList: + """Tests for _convert_manifest_data_items_to_string_list.""" + + @pytest.mark.parametrize( + "items,expected", + [ + ([], ""), + (["hello"], "hello"), + # single item with comma is NOT quoted — quoting only applies in multi-item lists + (["hello,world"], "hello,world"), + (["a", "b", "c"], "[a,b,c]"), + (["hello,world", "plain"], '["hello,world",plain]'), + ([True], "True"), + ([True, False], "[True,False]"), + ([42], "42"), + ([1, 2, 3], "[1,2,3]"), + ([1.5], "1.5"), + ( + [datetime.datetime(2020, 1, 1, tzinfo=_UTC)], + "2020-01-01T00:00:00Z", + ), + ( + [ + datetime.datetime(2020, 1, 1, tzinfo=_UTC), + datetime.datetime(2021, 6, 15, 12, 30, tzinfo=_UTC), + ], + "[2020-01-01T00:00:00Z,2021-06-15T12:30:00Z]", + ), + ], + ) + def test_converts_items(self, items: list, expected: str) -> None: + assert _convert_manifest_data_items_to_string_list(items) == expected + + +class TestConvertManifestDataRowToDict: + """Tests for _convert_manifest_data_row_to_dict.""" + + def test_all_keys_present_passes_through(self) -> None: + row = {"path": "/f.txt", "parentId": "syn1", "name": "f.txt"} + keys = ["path", "parentId", "name"] + + result = _convert_manifest_data_row_to_dict(row, keys) + + assert result == {"path": "/f.txt", "parentId": "syn1", "name": "f.txt"} + + def test_missing_key_defaults_to_empty_string(self) -> None: + row = {"path": "/f.txt", "parentId": "syn1"} + keys = ["path", "parentId", "name"] + + result = _convert_manifest_data_row_to_dict(row, keys) + + assert result["name"] == "" + + def test_list_value_converted_to_string(self) -> None: + row = {"tags": ["a", "b", "c"]} + keys = ["tags"] + + result = _convert_manifest_data_row_to_dict(row, keys) + + assert result["tags"] == "[a,b,c]" + + def test_extra_keys_in_row_are_not_included_in_output(self) -> None: + row = {"path": "/f.txt", "extra": "ignored"} + keys = ["path"] + + result = _convert_manifest_data_row_to_dict(row, keys) + + assert "extra" not in result + assert result == {"path": "/f.txt"} diff --git a/tests/unit/synapseclient/services/unit_test_migration_and_types_async.py b/tests/unit/synapseclient/services/unit_test_migration_and_types_async.py new file mode 100644 index 000000000..d709edc4a --- /dev/null +++ b/tests/unit/synapseclient/services/unit_test_migration_and_types_async.py @@ -0,0 +1,2520 @@ +"""Unit tests for synapseclient.models.services.migration and migration_types (sync and async).""" + +import asyncio +import csv +import json +import os +import sqlite3 +import tempfile +from dataclasses import fields +from typing import Any, Dict +from unittest import mock +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from synapseclient.core.constants import concrete_types +from synapseclient.core.exceptions import SynapseError +from synapseclient.models.services.migration import ( + BATCH_SIZE, + DEFAULT_PART_SIZE, + _check_file_handle_exists, + _check_indexed, + _confirm_migration, + _create_new_file_version_async, + _ensure_schema, + _escape_column_name, + _execute_migration_async, + _get_default_db_path, + _get_file_migration_status, + _get_part_size, + _get_table_file_handle_rows_async, + _get_version_numbers_async, + _index_container_async, + _index_entity_async, + _index_file_entity_async, + _index_table_entity_async, + _insert_file_migration, + _insert_table_file_migration, + _join_column_names, + _mark_container_indexed, + _migrate_file_version_async, + _migrate_item_async, + _migrate_table_attached_file_async, + _prepare_migration_db, + _query_migration_batch, + _record_indexing_error, + _retrieve_index_settings, + _update_migration_database, + _verify_storage_location_ownership_async, + index_files_for_migration_async, + migrate_indexed_files_async, + track_migration_results_async, +) +from synapseclient.models.services.migration_types import ( + IndexingError, + MigrationError, + MigrationKey, + MigrationResult, + MigrationSettings, + MigrationStatus, + MigrationType, +) + +# ============================================================================= +# Fixtures +# ============================================================================= + +MODULE = "synapseclient.models.services.migration" + + +@pytest.fixture +def in_memory_db(): + """Return an in-memory SQLite connection with schema applied.""" + conn = sqlite3.connect(":memory:") + cursor = conn.cursor() + _ensure_schema(cursor) + conn.commit() + yield conn, cursor + conn.close() + + +@pytest.fixture +def db_file(): + """Return a path to a temporary SQLite file with schema applied.""" + fd, path = tempfile.mkstemp(suffix=".db") + os.close(fd) + conn = sqlite3.connect(path) + try: + cursor = conn.cursor() + _ensure_schema(cursor) + conn.commit() + finally: + conn.close() + yield path + os.unlink(path) + + +@pytest.fixture +def db_file_with_settings(): + """A temp db file with MigrationSettings already populated.""" + fd, path = tempfile.mkstemp(suffix=".db") + os.close(fd) + settings = MigrationSettings( + root_id="syn1", + dest_storage_location_id="99", + source_storage_location_ids=[], + file_version_strategy="new", + include_table_files=False, + ) + conn = sqlite3.connect(path) + try: + cursor = conn.cursor() + _ensure_schema(cursor) + cursor.execute( + "INSERT INTO migration_settings (settings) VALUES (?)", + (json.dumps(settings.to_dict()),), + ) + conn.commit() + finally: + conn.close() + yield path, settings + os.unlink(path) + + +def _populate_db(db_path: str) -> None: + """Insert sample rows into a migration database for MigrationResult tests.""" + rows = [ + # (id, type, version, row_id, col_id, parent_id, status, exception, from_sl, from_fh, to_fh, file_size) + ( + "syn1", + MigrationType.PROJECT.value, + None, + None, + None, + None, + MigrationStatus.INDEXED.value, + None, + None, + None, + None, + None, + ), + ( + "syn2", + MigrationType.FOLDER.value, + None, + None, + None, + "syn1", + MigrationStatus.INDEXED.value, + None, + None, + None, + None, + None, + ), + ( + "syn3", + MigrationType.FILE.value, + 1, + None, + None, + "syn1", + MigrationStatus.MIGRATED.value, + None, + "10", + "fh_a", + "fh_b", + 1024, + ), + ( + "syn4", + MigrationType.TABLE_ATTACHED_FILE.value, + 2, + 5, + 7, + "syn1", + MigrationStatus.MIGRATED.value, + None, + "10", + "fh_c", + "fh_d", + 512, + ), + ( + "syn5", + MigrationType.FILE.value, + 3, + None, + None, + "syn1", + MigrationStatus.ERRORED.value, + "boom", + None, + None, + None, + None, + ), + ( + "syn6", + MigrationType.FILE.value, + 4, + None, + None, + "syn1", + MigrationStatus.INDEXED.value, + None, + "10", + "fh_e", + None, + 256, + ), + ( + "syn7", + MigrationType.FILE.value, + 5, + None, + None, + "syn1", + MigrationStatus.ALREADY_MIGRATED.value, + None, + "20", + "fh_f", + None, + 128, + ), + ] + conn = sqlite3.connect(db_path) + try: + cursor = conn.cursor() + _ensure_schema(cursor) + cursor.executemany( + """ + INSERT INTO migrations (id, type, version, row_id, col_id, parent_id, status, exception, + from_storage_location_id, from_file_handle_id, to_file_handle_id, file_size) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + rows, + ) + conn.commit() + finally: + conn.close() + + +@pytest.fixture +def result_db(): + fd, path = tempfile.mkstemp(suffix=".db") + os.close(fd) + _populate_db(path) + yield path + os.unlink(path) + + +def _make_mock_client(): + client = MagicMock() + client.rest_get_async = AsyncMock() + client.rest_put_async = AsyncMock() + client.logger = MagicMock() + client._get_parallel_file_transfer_semaphore.return_value = asyncio.Semaphore(10) + return client + + +def _make_file_handle( + concrete_type=None, storage_location_id="10", content_size=1024, fh_id="fh1" +): + fh = MagicMock() + fh.concrete_type = concrete_type or concrete_types.S3_FILE_HANDLE + fh.storage_location_id = storage_location_id + fh.content_size = content_size + fh.id = fh_id + return fh + + +def _make_entity( + entity_id="syn3", version_number=1, file_handle=None, data_file_handle_id="fh1" +): + entity = MagicMock() + entity.id = entity_id + entity.version_number = version_number + entity.file_handle = file_handle or _make_file_handle() + entity.data_file_handle_id = data_file_handle_id + entity.dataFileHandleId = data_file_handle_id + entity.store_async = AsyncMock() + return entity + + +async def _aiter(*items): + """Helper: yield items from an async generator.""" + for item in items: + yield item + + +# ============================================================================= +# migration_types.py – MigrationStatus +# ============================================================================= + + +class TestMigrationStatus: + def test_values(self): + assert MigrationStatus.INDEXED.value == 1 + assert MigrationStatus.MIGRATED.value == 2 + assert MigrationStatus.ALREADY_MIGRATED.value == 3 + assert MigrationStatus.ERRORED.value == 4 + + def test_names(self): + assert MigrationStatus(1).name == "INDEXED" + assert MigrationStatus(2).name == "MIGRATED" + assert MigrationStatus(3).name == "ALREADY_MIGRATED" + assert MigrationStatus(4).name == "ERRORED" + + +# ============================================================================= +# migration_types.py – MigrationType +# ============================================================================= + + +class TestMigrationType: + def test_values(self): + assert MigrationType.PROJECT.value == 1 + assert MigrationType.FOLDER.value == 2 + assert MigrationType.FILE.value == 3 + assert MigrationType.TABLE_ATTACHED_FILE.value == 4 + + @pytest.mark.parametrize( + "concrete_type,expected", + [ + (concrete_types.PROJECT_ENTITY, MigrationType.PROJECT), + (concrete_types.FOLDER_ENTITY, MigrationType.FOLDER), + (concrete_types.FILE_ENTITY, MigrationType.FILE), + (concrete_types.TABLE_ENTITY, MigrationType.TABLE_ATTACHED_FILE), + ], + ) + def test_from_concrete_type(self, concrete_type, expected): + assert MigrationType.from_concrete_type(concrete_type) == expected + + def test_from_concrete_type_unknown_raises(self): + with pytest.raises(ValueError, match="Unhandled concrete type"): + MigrationType.from_concrete_type("org.sagebionetworks.repo.model.Unknown") + + +# ============================================================================= +# migration_types.py – MigrationKey +# ============================================================================= + + +class TestMigrationKey: + def test_equality_same(self): + k1 = MigrationKey("syn1", MigrationType.FILE, version=2) + k2 = MigrationKey("syn1", MigrationType.FILE, version=2) + assert k1 == k2 + + def test_equality_different_version(self): + k1 = MigrationKey("syn1", MigrationType.FILE, version=1) + k2 = MigrationKey("syn1", MigrationType.FILE, version=2) + assert k1 != k2 + + def test_equality_different_id(self): + k1 = MigrationKey("syn1", MigrationType.FILE) + k2 = MigrationKey("syn2", MigrationType.FILE) + assert k1 != k2 + + def test_equality_different_type(self): + k1 = MigrationKey("syn1", MigrationType.FILE) + k2 = MigrationKey("syn1", MigrationType.TABLE_ATTACHED_FILE) + assert k1 != k2 + + def test_equality_with_row_col(self): + k1 = MigrationKey("syn1", MigrationType.TABLE_ATTACHED_FILE, row_id=1, col_id=2) + k2 = MigrationKey("syn1", MigrationType.TABLE_ATTACHED_FILE, row_id=1, col_id=2) + assert k1 == k2 + + def test_not_equal_to_other_type(self): + k = MigrationKey("syn1", MigrationType.FILE) + assert k != "not a key" + + def test_hashable_usable_in_set(self): + k1 = MigrationKey("syn1", MigrationType.FILE, version=1) + k2 = MigrationKey("syn1", MigrationType.FILE, version=1) + k3 = MigrationKey("syn2", MigrationType.FILE, version=1) + s = {k1, k2, k3} + assert len(s) == 2 + + def test_default_optional_fields_are_none(self): + k = MigrationKey("syn1", MigrationType.FOLDER) + assert k.version is None + assert k.row_id is None + assert k.col_id is None + + +# ============================================================================= +# migration_types.py – MigrationSettings +# ============================================================================= + + +class TestMigrationSettings: + def _make_settings(self, **kwargs): + defaults = dict( + root_id="syn1", + dest_storage_location_id="123", + source_storage_location_ids=["10", "20"], + file_version_strategy="new", + include_table_files=False, + ) + defaults.update(kwargs) + return MigrationSettings(**defaults) + + def test_to_dict_round_trip(self): + s = self._make_settings() + d = s.to_dict() + assert d["root_id"] == "syn1" + assert d["dest_storage_location_id"] == "123" + assert d["source_storage_location_ids"] == ["10", "20"] + assert d["file_version_strategy"] == "new" + assert d["include_table_files"] == 0 + + def test_to_dict_include_table_files_true(self): + s = self._make_settings(include_table_files=True) + assert s.to_dict()["include_table_files"] == 1 + + def test_from_dict(self): + d = { + "root_id": "syn5", + "dest_storage_location_id": "99", + "source_storage_location_ids": ["5"], + "file_version_strategy": "all", + "include_table_files": 1, + } + s = MigrationSettings.from_dict(d) + assert s.root_id == "syn5" + assert s.dest_storage_location_id == "99" + assert s.source_storage_location_ids == ["5"] + assert s.file_version_strategy == "all" + assert s.include_table_files is True + + def test_from_dict_int_false(self): + d = { + "root_id": "syn5", + "dest_storage_location_id": "99", + "include_table_files": 0, + } + s = MigrationSettings.from_dict(d) + assert s.include_table_files is False + + def test_from_dict_missing_optional_fields(self): + d = {"root_id": "syn1", "dest_storage_location_id": "5"} + s = MigrationSettings.from_dict(d) + assert s.source_storage_location_ids == [] + assert s.file_version_strategy == "new" + assert s.include_table_files is False + + def test_verify_migration_settings_matching(self): + s = self._make_settings() + # Should not raise + s.verify_migration_settings(s, "/tmp/test.db") + + @pytest.mark.parametrize( + "field_name,bad_value", + [ + ("root_id", "syn999"), + ("dest_storage_location_id", "9999"), + ("file_version_strategy", "all"), + ("include_table_files", True), + ], + ) + def test_verify_migration_settings_mismatch_raises(self, field_name, bad_value): + existing = self._make_settings() + current_kwargs = {field_name: bad_value} + current = self._make_settings(**current_kwargs) + with pytest.raises(ValueError, match="Index parameter does not match"): + current.verify_migration_settings(existing, "/tmp/test.db") + + +# ============================================================================= +# migration_types.py – IndexingError +# ============================================================================= + + +class TestIndexingError: + def test_attributes(self): + err = IndexingError("syn42", concrete_types.FILE_ENTITY) + assert err.entity_id == "syn42" + assert err.concrete_type == concrete_types.FILE_ENTITY + + def test_is_exception(self): + assert issubclass(IndexingError, Exception) + + +# ============================================================================= +# migration_types.py – MigrationError +# ============================================================================= + + +class TestMigrationError: + def test_basic_message(self): + key = MigrationKey("syn1", MigrationType.FILE) + err = MigrationError(key, from_file_handle_id="fh1") + assert "syn1" in str(err) + assert err.key is key + assert err.from_file_handle_id == "fh1" + assert err.to_file_handle_id is None + + def test_with_cause(self): + key = MigrationKey("syn1", MigrationType.FILE) + cause = RuntimeError("network failure") + err = MigrationError(key, from_file_handle_id="fh1", cause=cause) + assert "network failure" in str(err) + + def test_with_to_handle(self): + key = MigrationKey("syn1", MigrationType.FILE) + err = MigrationError(key, from_file_handle_id="fh1", to_file_handle_id="fh2") + assert err.to_file_handle_id == "fh2" + + def test_is_exception(self): + assert issubclass(MigrationError, Exception) + + +# ============================================================================= +# migration_types.py – MigrationResult +# ============================================================================= + + +class TestMigrationResult: + def test_get_counts_by_status(self, result_db): + result = MigrationResult(db_path=result_db) + counts = result.get_counts_by_status() + # Containers (PROJECT, FOLDER) are excluded from counts + assert counts["MIGRATED"] == 2 + assert counts["ERRORED"] == 1 + assert counts["INDEXED"] == 1 + assert counts["ALREADY_MIGRATED"] == 1 + + def test_counts_by_status_property(self, result_db): + result = MigrationResult(db_path=result_db) + assert result.counts_by_status == result.get_counts_by_status() + + def test_get_migrations_returns_only_file_and_table(self, result_db): + result = MigrationResult(db_path=result_db) + migrations = list(result.get_migrations()) + types = {m["type"] for m in migrations} + assert types <= {"file", "table"} + + def test_get_migrations_file_entry(self, result_db): + result = MigrationResult(db_path=result_db) + migrations = list(result.get_migrations()) + file_migrations = [m for m in migrations if m["id"] == "syn3"] + assert len(file_migrations) == 1 + m = file_migrations[0] + assert m["type"] == "file" + assert m["version"] == 1 + assert m["status"] == "MIGRATED" + assert m["from_file_handle_id"] == "fh_a" + assert m["to_file_handle_id"] == "fh_b" + + def test_get_migrations_table_entry(self, result_db): + result = MigrationResult(db_path=result_db) + migrations = list(result.get_migrations()) + table_migrations = [m for m in migrations if m["id"] == "syn4"] + assert len(table_migrations) == 1 + m = table_migrations[0] + assert m["type"] == "table" + assert m["row_id"] == 5 + + def test_get_migrations_error_entry(self, result_db): + result = MigrationResult(db_path=result_db) + migrations = list(result.get_migrations()) + errored = [m for m in migrations if m["status"] == "ERRORED"] + assert len(errored) == 1 + assert errored[0]["exception"] == "boom" + + def test_get_migrations_col_name_resolved_via_client(self, result_db): + mock_client = mock.MagicMock() + mock_client.restGET.return_value = {"name": "my_col"} + result = MigrationResult(db_path=result_db, synapse_client=mock_client) + migrations = list(result.get_migrations()) + table_m = [m for m in migrations if m["type"] == "table"][0] + assert table_m["col_name"] == "my_col" + + def test_as_csv(self, result_db): + result = MigrationResult(db_path=result_db) + with tempfile.NamedTemporaryFile(suffix=".csv", delete=False, mode="w") as f: + csv_path = f.name + try: + result.as_csv(csv_path) + with open(csv_path, newline="") as f: + reader = csv.DictReader(f) + rows = list(reader) + ids = {r["id"] for r in rows} + # Should include file and table-attached entries, not containers + assert "syn3" in ids + assert "syn4" in ids + assert "syn1" not in ids # PROJECT + assert "syn2" not in ids # FOLDER + assert "id" in reader.fieldnames + assert "status" in reader.fieldnames + finally: + os.unlink(csv_path) + + @pytest.mark.asyncio + async def test_get_counts_by_status_async(self, result_db): + result = MigrationResult(db_path=result_db) + counts = await result.get_counts_by_status_async() + assert counts["MIGRATED"] == 2 + + @pytest.mark.asyncio + async def test_get_migrations_async(self, result_db): + result = MigrationResult(db_path=result_db) + migrations = await result.get_migrations_async() + assert isinstance(migrations, list) + assert len(migrations) > 0 + + @pytest.mark.asyncio + async def test_as_csv_async(self, result_db): + result = MigrationResult(db_path=result_db) + with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as f: + csv_path = f.name + try: + await result.as_csv_async(csv_path) + assert os.path.exists(csv_path) + with open(csv_path) as f: + content = f.read() + assert "id" in content + finally: + os.unlink(csv_path) + + +# ============================================================================= +# migration.py – pure helper functions +# ============================================================================= + + +class TestGetDefaultDbPath: + def test_returns_path_with_entity_id(self): + path = _get_default_db_path("syn123") + assert "migration_syn123.db" in path + assert os.path.exists(os.path.dirname(path)) + + +class TestEscapeColumnName: + def test_plain_string(self): + assert _escape_column_name("my_col") == '"my_col"' + + def test_dict_with_name_key(self): + assert _escape_column_name({"name": "col_name"}) == '"col_name"' + + def test_escapes_double_quotes(self): + assert _escape_column_name('col"name') == '"col""name"' + + def test_dict_escapes_double_quotes(self): + assert _escape_column_name({"name": 'a"b'}) == '"a""b"' + + +class TestJoinColumnNames: + def test_single(self): + assert _join_column_names(["col1"]) == '"col1"' + + def test_multiple(self): + result = _join_column_names(["a", "b", "c"]) + assert result == '"a","b","c"' + + def test_dict_columns(self): + cols = [{"name": "x"}, {"name": "y"}] + assert _join_column_names(cols) == '"x","y"' + + +class TestGetPartSize: + def test_small_file_uses_default(self): + size = 1 * 1024 * 1024 # 1 MB + assert _get_part_size(size) == DEFAULT_PART_SIZE + + def test_large_file_exceeds_default(self): + from synapseclient.core.upload.multipart_upload import MAX_NUMBER_OF_PARTS + + # File so large that default part size would require too many parts + size = DEFAULT_PART_SIZE * MAX_NUMBER_OF_PARTS + 1 + part_size = _get_part_size(size) + assert part_size > DEFAULT_PART_SIZE + + +class TestGetFileMigrationStatus: + def _make_handle(self, concrete_type, storage_location_id): + handle = mock.MagicMock() + handle.concrete_type = concrete_type + handle.storage_location_id = storage_location_id + return handle + + def test_non_s3_handle_returns_none(self): + handle = self._make_handle( + "org.sagebionetworks.repo.model.file.ExternalFileHandle", "10" + ) + result = _get_file_migration_status(handle, [], "20") + assert result is None + + def test_already_at_destination_returns_already_migrated(self): + handle = self._make_handle(concrete_types.S3_FILE_HANDLE, "20") + result = _get_file_migration_status(handle, [], "20") + assert result == MigrationStatus.ALREADY_MIGRATED.value + + def test_no_source_filter_returns_indexed(self): + handle = self._make_handle(concrete_types.S3_FILE_HANDLE, "10") + result = _get_file_migration_status(handle, [], "20") + assert result == MigrationStatus.INDEXED.value + + def test_source_filter_match_returns_indexed(self): + handle = self._make_handle(concrete_types.S3_FILE_HANDLE, "10") + result = _get_file_migration_status(handle, ["10", "11"], "20") + assert result == MigrationStatus.INDEXED.value + + def test_source_filter_no_match_returns_none(self): + handle = self._make_handle(concrete_types.S3_FILE_HANDLE, "99") + result = _get_file_migration_status(handle, ["10", "11"], "20") + assert result is None + + +# ============================================================================= +# migration.py – database helper functions +# ============================================================================= + + +class TestEnsureSchema: + def test_creates_migrations_table(self, in_memory_db): + conn, cursor = in_memory_db + tables = cursor.execute( + "SELECT name FROM sqlite_master WHERE type='table'" + ).fetchall() + table_names = {t[0] for t in tables} + assert "migrations" in table_names + assert "migration_settings" in table_names + + def test_idempotent(self, in_memory_db): + conn, cursor = in_memory_db + # Running again should not raise + _ensure_schema(cursor) + + +class TestCheckIndexed: + def test_not_indexed(self, in_memory_db): + conn, cursor = in_memory_db + assert _check_indexed(cursor, "syn999", synapse_client=MagicMock()) is False + + def test_indexed(self, in_memory_db): + conn, cursor = in_memory_db + cursor.execute( + "INSERT INTO migrations (id, type, status) VALUES (?, ?, ?)", + ("syn1", MigrationType.FILE.value, MigrationStatus.INDEXED.value), + ) + conn.commit() + assert _check_indexed(cursor, "syn1", synapse_client=MagicMock()) is True + + +class TestMarkContainerIndexed: + def test_inserts_row(self, in_memory_db): + conn, cursor = in_memory_db + # Callers always pass migration_type as .value (int) + _mark_container_indexed(cursor, "syn10", MigrationType.FOLDER.value, "syn1") + conn.commit() + row = cursor.execute( + "SELECT id, type, parent_id, status FROM migrations WHERE id = 'syn10'" + ).fetchone() + assert row is not None + assert row[1] == MigrationType.FOLDER.value + assert row[2] == "syn1" + assert row[3] == MigrationStatus.INDEXED.value + + def test_check_indexed_prevents_double_insert(self, in_memory_db): + """In practice, _check_indexed guards against re-indexing containers. + After marking a container indexed, _check_indexed should return True.""" + conn, cursor = in_memory_db + _mark_container_indexed(cursor, "syn10", MigrationType.FOLDER.value, "syn1") + conn.commit() + assert _check_indexed(cursor, "syn10", synapse_client=MagicMock()) is True + + +class TestRecordIndexingError: + def test_inserts_error_row(self, in_memory_db): + conn, cursor = in_memory_db + _record_indexing_error( + cursor, "syn11", MigrationType.FILE.value, "syn1", "Traceback..." + ) + conn.commit() + row = cursor.execute( + "SELECT status, exception FROM migrations WHERE id='syn11'" + ).fetchone() + assert row[0] == MigrationStatus.ERRORED.value + assert row[1] == "Traceback..." + + +class TestInsertFileMigration: + def test_inserts_row(self, in_memory_db): + conn, cursor = in_memory_db + insert_values = [ + ( + "syn3", + MigrationType.FILE.value, + 1, + "syn1", + "10", + "fh_a", + 1024, + MigrationStatus.INDEXED.value, + ), + ] + _insert_file_migration(cursor, insert_values) + conn.commit() + row = cursor.execute( + "SELECT id, type, version, from_file_handle_id FROM migrations WHERE id='syn3'" + ).fetchone() + assert row is not None + assert row[2] == 1 + assert row[3] == "fh_a" + + def test_inserts_multiple(self, in_memory_db): + conn, cursor = in_memory_db + insert_values = [ + ( + "syn3", + MigrationType.FILE.value, + 1, + "syn1", + "10", + "fh_a", + 1024, + MigrationStatus.INDEXED.value, + ), + ( + "syn4", + MigrationType.FILE.value, + 2, + "syn1", + "10", + "fh_b", + 2048, + MigrationStatus.INDEXED.value, + ), + ] + _insert_file_migration(cursor, insert_values) + conn.commit() + count = cursor.execute("SELECT count(*) FROM migrations").fetchone()[0] + assert count == 2 + + +class TestInsertTableFileMigration: + def test_inserts_row(self, in_memory_db): + conn, cursor = in_memory_db + insert_values = [ + ( + "syn5", + MigrationType.TABLE_ATTACHED_FILE.value, + 1, + 2, + 3, + "syn1", + "10", + "fh_x", + 512, + MigrationStatus.INDEXED.value, + ), + ] + _insert_table_file_migration(cursor, insert_values) + conn.commit() + row = cursor.execute( + "SELECT id, row_id, col_id FROM migrations WHERE id='syn5'" + ).fetchone() + assert row is not None + assert row[1] == 1 + assert row[2] == 2 + + def test_ignore_on_duplicate(self, in_memory_db): + conn, cursor = in_memory_db + insert_values = [ + ( + "syn5", + MigrationType.TABLE_ATTACHED_FILE.value, + 1, + 2, + 3, + "syn1", + "10", + "fh_x", + 512, + MigrationStatus.INDEXED.value, + ), + ( + "syn5", + MigrationType.TABLE_ATTACHED_FILE.value, + 1, + 2, + 3, + "syn1", + "10", + "fh_x", + 512, + MigrationStatus.INDEXED.value, + ), + ] + _insert_table_file_migration(cursor, insert_values) + conn.commit() + count = cursor.execute( + "SELECT count(*) FROM migrations WHERE id='syn5'" + ).fetchone()[0] + assert count == 1 + + +class TestRetrieveIndexSettings: + def test_returns_none_when_empty(self, in_memory_db): + conn, cursor = in_memory_db + assert _retrieve_index_settings(cursor) is None + + def test_returns_settings_when_present(self, in_memory_db): + conn, cursor = in_memory_db + settings = MigrationSettings( + root_id="syn1", + dest_storage_location_id="99", + source_storage_location_ids=["5"], + file_version_strategy="all", + include_table_files=True, + ) + cursor.execute( + "INSERT INTO migration_settings (settings) VALUES (?)", + (json.dumps(settings.to_dict()),), + ) + conn.commit() + retrieved = _retrieve_index_settings(cursor) + assert retrieved.root_id == "syn1" + assert retrieved.dest_storage_location_id == "99" + assert retrieved.include_table_files is True + + +class TestPrepareMigrationDb: + def test_inserts_settings_on_first_run(self, in_memory_db): + conn, cursor = in_memory_db + _prepare_migration_db( + conn=conn, + cursor=cursor, + db_path=":memory:", + root_id="syn1", + dest_storage_location_id="99", + source_storage_location_ids=["5"], + file_version_strategy="new", + include_table_files=False, + ) + retrieved = _retrieve_index_settings(cursor) + assert retrieved is not None + assert retrieved.root_id == "syn1" + + def test_no_error_on_matching_settings(self, in_memory_db): + conn, cursor = in_memory_db + kwargs = dict( + conn=conn, + cursor=cursor, + db_path=":memory:", + root_id="syn1", + dest_storage_location_id="99", + source_storage_location_ids=["5"], + file_version_strategy="new", + include_table_files=False, + ) + _prepare_migration_db(**kwargs) + # Should not raise on second call with same settings + _prepare_migration_db(**kwargs) + + def test_raises_on_mismatched_settings(self, in_memory_db): + conn, cursor = in_memory_db + _prepare_migration_db( + conn=conn, + cursor=cursor, + db_path=":memory:", + root_id="syn1", + dest_storage_location_id="99", + source_storage_location_ids=[], + file_version_strategy="new", + include_table_files=False, + ) + with pytest.raises(ValueError, match="Index parameter does not match"): + _prepare_migration_db( + conn=conn, + cursor=cursor, + db_path=":memory:", + root_id="syn_different", # changed + dest_storage_location_id="99", + source_storage_location_ids=[], + file_version_strategy="new", + include_table_files=False, + ) + + +class TestCheckFileHandleExists: + def test_returns_none_when_not_found(self, in_memory_db): + conn, cursor = in_memory_db + assert _check_file_handle_exists(cursor, "fh_missing") is None + + def test_returns_to_handle_when_found(self, in_memory_db): + conn, cursor = in_memory_db + cursor.execute( + """INSERT INTO migrations (id, type, status, from_file_handle_id, to_file_handle_id) + VALUES (?, ?, ?, ?, ?)""", + ( + "syn1", + MigrationType.FILE.value, + MigrationStatus.MIGRATED.value, + "fh_a", + "fh_b", + ), + ) + conn.commit() + assert _check_file_handle_exists(cursor, "fh_a") == "fh_b" + + def test_returns_none_when_to_handle_is_null(self, in_memory_db): + conn, cursor = in_memory_db + cursor.execute( + """INSERT INTO migrations (id, type, status, from_file_handle_id) + VALUES (?, ?, ?, ?)""", + ("syn1", MigrationType.FILE.value, MigrationStatus.INDEXED.value, "fh_a"), + ) + conn.commit() + assert _check_file_handle_exists(cursor, "fh_a") is None + + +class TestUpdateMigrationDatabase: + def _insert_indexed_file(self, cursor, entity_id="syn1", version=1): + cursor.execute( + """INSERT INTO migrations (id, type, version, status, from_file_handle_id) + VALUES (?, ?, ?, ?, ?)""", + ( + entity_id, + MigrationType.FILE.value, + version, + MigrationStatus.INDEXED.value, + "fh_src", + ), + ) + + def test_updates_to_migrated(self, in_memory_db): + conn, cursor = in_memory_db + self._insert_indexed_file(cursor) + conn.commit() + key = MigrationKey("syn1", MigrationType.FILE, version=1) + # Callers always pass status as .value (int) + _update_migration_database( + conn, cursor, key, "fh_dest", MigrationStatus.MIGRATED.value + ) + row = cursor.execute( + "SELECT status, to_file_handle_id FROM migrations WHERE id='syn1'" + ).fetchone() + assert row[0] == MigrationStatus.MIGRATED.value + assert row[1] == "fh_dest" + + def test_stores_exception_traceback(self, in_memory_db): + conn, cursor = in_memory_db + self._insert_indexed_file(cursor) + conn.commit() + key = MigrationKey("syn1", MigrationType.FILE, version=1) + cause = RuntimeError("disk full") + _update_migration_database( + conn, cursor, key, None, MigrationStatus.ERRORED.value, exception=cause + ) + row = cursor.execute( + "SELECT status, exception FROM migrations WHERE id='syn1'" + ).fetchone() + assert row[0] == MigrationStatus.ERRORED.value + assert "disk full" in row[1] + + +class TestConfirmMigration: + def test_force_returns_true(self, in_memory_db): + conn, cursor = in_memory_db + assert _confirm_migration(cursor, "99", force=True) is True + + def test_no_items_returns_false(self, in_memory_db): + conn, cursor = in_memory_db + assert ( + _confirm_migration(cursor, "99", force=False, synapse_client=MagicMock()) + is False + ) + + def test_non_tty_returns_false_without_input(self, in_memory_db): + conn, cursor = in_memory_db + cursor.execute( + "INSERT INTO migrations (id, type, status) VALUES (?, ?, ?)", + ("syn1", MigrationType.FILE.value, MigrationStatus.INDEXED.value), + ) + conn.commit() + with mock.patch("sys.stdout") as mock_stdout: + mock_stdout.isatty.return_value = False + result = _confirm_migration( + cursor, "99", force=False, synapse_client=MagicMock() + ) + assert result is False + + def test_tty_yes_returns_true(self, in_memory_db): + conn, cursor = in_memory_db + cursor.execute( + "INSERT INTO migrations (id, type, status) VALUES (?, ?, ?)", + ("syn1", MigrationType.FILE.value, MigrationStatus.INDEXED.value), + ) + conn.commit() + with ( + mock.patch("sys.stdout") as mock_stdout, + mock.patch("builtins.input", return_value="y"), + ): + mock_stdout.isatty.return_value = True + result = _confirm_migration(cursor, "99", force=False) + assert result is True + + def test_tty_no_returns_false(self, in_memory_db): + conn, cursor = in_memory_db + cursor.execute( + "INSERT INTO migrations (id, type, status) VALUES (?, ?, ?)", + ("syn1", MigrationType.FILE.value, MigrationStatus.INDEXED.value), + ) + conn.commit() + with ( + mock.patch("sys.stdout") as mock_stdout, + mock.patch("builtins.input", return_value="n"), + ): + mock_stdout.isatty.return_value = True + result = _confirm_migration(cursor, "99", force=False) + assert result is False + + +class TestQueryMigrationBatch: + def _insert_indexed( + self, + cursor, + entity_id, + migration_type, + version=None, + row_id=None, + col_id=None, + from_fh="fh_x", + ): + cursor.execute( + """INSERT INTO migrations (id, type, version, row_id, col_id, status, from_file_handle_id) + VALUES (?, ?, ?, ?, ?, ?, ?)""", + ( + entity_id, + migration_type.value, + version, + row_id, + col_id, + MigrationStatus.INDEXED.value, + from_fh, + ), + ) + + def test_returns_forward_progress(self, in_memory_db): + conn, cursor = in_memory_db + self._insert_indexed( + cursor, "syn2", MigrationType.FILE, version=1, from_fh="fh_1" + ) + self._insert_indexed( + cursor, "syn3", MigrationType.FILE, version=1, from_fh="fh_2" + ) + conn.commit() + + start_key = MigrationKey("", MigrationType.FILE) + results = _query_migration_batch(cursor, start_key, set(), set(), limit=10) + ids = [r["id"] for r in results] + assert "syn2" in ids + assert "syn3" in ids + + def test_excludes_pending_file_handles(self, in_memory_db): + conn, cursor = in_memory_db + self._insert_indexed( + cursor, "syn2", MigrationType.FILE, version=1, from_fh="fh_pending" + ) + self._insert_indexed( + cursor, "syn3", MigrationType.FILE, version=1, from_fh="fh_ok" + ) + conn.commit() + + start_key = MigrationKey("", MigrationType.FILE) + results = _query_migration_batch( + cursor, start_key, {"fh_pending"}, set(), limit=10 + ) + ids = [r["id"] for r in results] + assert "syn2" not in ids + assert "syn3" in ids + + def test_respects_limit(self, in_memory_db): + conn, cursor = in_memory_db + for i in range(5): + self._insert_indexed( + cursor, f"syn{i+10}", MigrationType.FILE, version=1, from_fh=f"fh_{i}" + ) + conn.commit() + + start_key = MigrationKey("", MigrationType.FILE) + results = _query_migration_batch(cursor, start_key, set(), set(), limit=2) + assert len(results) <= 2 + + +# ============================================================================= +# _verify_storage_location_ownership_async +# ============================================================================= + + +class TestVerifyStorageLocationOwnershipAsync: + @pytest.mark.asyncio + async def test_success(self): + client = _make_mock_client() + mock_get = AsyncMock(return_value={"storageLocationId": "99"}) + with patch(f"{MODULE}.get_storage_location_setting", mock_get): + # Should not raise + await _verify_storage_location_ownership_async("99", synapse_client=client) + mock_get.assert_awaited_once_with( + storage_location_id="99", synapse_client=client + ) + + @pytest.mark.asyncio + async def test_synapse_error_raises_value_error(self): + client = _make_mock_client() + mock_get = AsyncMock(side_effect=SynapseError("forbidden")) + with patch(f"{MODULE}.get_storage_location_setting", mock_get): + with pytest.raises(ValueError, match="Unable to verify ownership"): + await _verify_storage_location_ownership_async( + "99", synapse_client=client + ) + + +# ============================================================================= +# _get_version_numbers_async +# ============================================================================= + + +class TestGetVersionNumbersAsync: + @pytest.mark.asyncio + async def test_yields_version_numbers(self): + client = _make_mock_client() + pages = [{"versionNumber": 3}, {"versionNumber": 2}, {"versionNumber": 1}] + + async def _mock_paginated(path, *, synapse_client): + for p in pages: + yield p + + with patch(f"{MODULE}.rest_get_paginated_async", _mock_paginated): + versions = [v async for v in _get_version_numbers_async("syn1", client)] + + assert versions == [3, 2, 1] + + @pytest.mark.asyncio + async def test_empty_yields_nothing(self): + client = _make_mock_client() + + async def _mock_paginated(path, *, synapse_client): + return + yield # make it an async generator + + with patch(f"{MODULE}.rest_get_paginated_async", _mock_paginated): + versions = [v async for v in _get_version_numbers_async("syn1", client)] + + assert versions == [] + + +# ============================================================================= +# index_files_for_migration_async – validation +# ============================================================================= + + +class TestIndexFilesForMigrationAsyncValidation: + @pytest.mark.asyncio + async def test_invalid_file_version_strategy_raises(self): + client = _make_mock_client() + with patch(f"{MODULE}.Synapse.get_client", return_value=client): + with pytest.raises(ValueError, match="Invalid file_version_strategy"): + await index_files_for_migration_async( + entity="syn1", + dest_storage_location_id="99", + file_version_strategy="bogus", + synapse_client=client, + ) + + @pytest.mark.asyncio + async def test_skip_strategy_with_no_table_files_raises(self): + client = _make_mock_client() + with patch(f"{MODULE}.Synapse.get_client", return_value=client): + with pytest.raises(ValueError, match="nothing to migrate"): + await index_files_for_migration_async( + entity="syn1", + dest_storage_location_id="99", + file_version_strategy="skip", + include_table_files=False, + synapse_client=client, + ) + + @pytest.mark.asyncio + async def test_ownership_failure_raises(self): + client = _make_mock_client() + client.rest_get_async.side_effect = SynapseError("forbidden") + + with ( + patch(f"{MODULE}.Synapse.get_client", return_value=client), + patch(f"{MODULE}.utils.id_of", return_value="syn1"), + ): + with pytest.raises(ValueError, match="Unable to verify ownership"): + await index_files_for_migration_async( + entity="syn1", + dest_storage_location_id="99", + synapse_client=client, + ) + + @pytest.mark.asyncio + async def test_successful_indexing_returns_migration_result(self): + client = _make_mock_client() + entity = _make_entity("syn3") + + fd, db_path = tempfile.mkstemp(suffix=".db") + os.close(fd) + try: + with ( + patch(f"{MODULE}.Synapse.get_client", return_value=client), + patch(f"{MODULE}.utils.id_of", return_value="syn3"), + patch( + f"{MODULE}._verify_storage_location_ownership_async", + new=AsyncMock(), + ), + patch(f"{MODULE}._index_entity_async", new=AsyncMock()), + ): + result = await index_files_for_migration_async( + entity=entity, + dest_storage_location_id="99", + db_path=db_path, + synapse_client=client, + ) + finally: + os.unlink(db_path) + + assert isinstance(result, MigrationResult) + assert result.db_path == db_path + + @pytest.mark.asyncio + async def test_indexing_error_is_reraised(self): + client = _make_mock_client() + entity = _make_entity("syn3") + underlying = RuntimeError("network down") + indexing_err = IndexingError("syn3", concrete_types.FILE_ENTITY) + indexing_err.__cause__ = underlying + + fd, db_path = tempfile.mkstemp(suffix=".db") + os.close(fd) + try: + with ( + patch(f"{MODULE}.Synapse.get_client", return_value=client), + patch(f"{MODULE}.utils.id_of", return_value="syn3"), + patch( + f"{MODULE}._verify_storage_location_ownership_async", + new=AsyncMock(), + ), + patch(f"{MODULE}._index_entity_async", side_effect=indexing_err), + ): + with pytest.raises(RuntimeError, match="network down"): + await index_files_for_migration_async( + entity=entity, + dest_storage_location_id="99", + db_path=db_path, + synapse_client=client, + ) + finally: + os.unlink(db_path) + + +# ============================================================================= +# _index_entity_async +# ============================================================================= + + +class TestIndexEntityAsync: + def _common_kwargs(self, conn, cursor, client, entity_id="syn3"): + return dict( + conn=conn, + cursor=cursor, + entity=entity_id, + parent_id="syn1", + dest_storage_location_id="99", + source_storage_location_ids=[], + file_version_strategy="new", + include_table_files=False, + continue_on_error=False, + synapse_client=client, + ) + + def _mock_entity_type(self, concrete_type): + et = MagicMock() + et.type = concrete_type + return et + + @pytest.mark.asyncio + async def test_routes_file_entity(self, in_memory_db): + conn, cursor = in_memory_db + client = _make_mock_client() + + with ( + patch(f"{MODULE}.utils.id_of", return_value="syn3"), + patch( + f"{MODULE}.get_entity_type", + new=AsyncMock( + return_value=self._mock_entity_type(concrete_types.FILE_ENTITY) + ), + ), + patch( + f"{MODULE}._index_file_entity_async", new=AsyncMock() + ) as mock_index_file, + ): + await _index_entity_async(**self._common_kwargs(conn, cursor, client)) + + mock_index_file.assert_awaited_once() + + @pytest.mark.asyncio + async def test_skips_file_entity_when_strategy_is_skip(self, in_memory_db): + conn, cursor = in_memory_db + client = _make_mock_client() + kwargs = self._common_kwargs(conn, cursor, client) + kwargs["file_version_strategy"] = "skip" + + with ( + patch(f"{MODULE}.utils.id_of", return_value="syn3"), + patch( + f"{MODULE}.get_entity_type", + new=AsyncMock( + return_value=self._mock_entity_type(concrete_types.FILE_ENTITY) + ), + ), + patch( + f"{MODULE}._index_file_entity_async", new=AsyncMock() + ) as mock_index_file, + ): + await _index_entity_async(**kwargs) + + mock_index_file.assert_not_awaited() + + @pytest.mark.asyncio + async def test_routes_table_entity_when_include_tables(self, in_memory_db): + conn, cursor = in_memory_db + client = _make_mock_client() + kwargs = self._common_kwargs(conn, cursor, client) + kwargs["include_table_files"] = True + + with ( + patch(f"{MODULE}.utils.id_of", return_value="syn5"), + patch( + f"{MODULE}.get_entity_type", + new=AsyncMock( + return_value=self._mock_entity_type(concrete_types.TABLE_ENTITY) + ), + ), + patch( + f"{MODULE}._index_table_entity_async", new=AsyncMock() + ) as mock_index_table, + ): + await _index_entity_async(**kwargs) + + mock_index_table.assert_awaited_once() + + @pytest.mark.asyncio + async def test_skips_table_entity_when_include_tables_false(self, in_memory_db): + conn, cursor = in_memory_db + client = _make_mock_client() + + with ( + patch(f"{MODULE}.utils.id_of", return_value="syn5"), + patch( + f"{MODULE}.get_entity_type", + new=AsyncMock( + return_value=self._mock_entity_type(concrete_types.TABLE_ENTITY) + ), + ), + patch( + f"{MODULE}._index_table_entity_async", new=AsyncMock() + ) as mock_index_table, + ): + await _index_entity_async(**self._common_kwargs(conn, cursor, client)) + + mock_index_table.assert_not_awaited() + + @pytest.mark.asyncio + async def test_routes_folder_entity(self, in_memory_db): + conn, cursor = in_memory_db + client = _make_mock_client() + + with ( + patch(f"{MODULE}.utils.id_of", return_value="syn2"), + patch( + f"{MODULE}.get_entity_type", + new=AsyncMock( + return_value=self._mock_entity_type(concrete_types.FOLDER_ENTITY) + ), + ), + patch( + f"{MODULE}._index_container_async", new=AsyncMock() + ) as mock_container, + ): + await _index_entity_async(**self._common_kwargs(conn, cursor, client)) + + mock_container.assert_awaited_once() + + @pytest.mark.asyncio + async def test_routes_project_entity(self, in_memory_db): + conn, cursor = in_memory_db + client = _make_mock_client() + + with ( + patch(f"{MODULE}.utils.id_of", return_value="syn1"), + patch( + f"{MODULE}.get_entity_type", + new=AsyncMock( + return_value=self._mock_entity_type(concrete_types.PROJECT_ENTITY) + ), + ), + patch( + f"{MODULE}._index_container_async", new=AsyncMock() + ) as mock_container, + ): + await _index_entity_async(**self._common_kwargs(conn, cursor, client)) + + mock_container.assert_awaited_once() + + @pytest.mark.asyncio + async def test_skips_already_indexed_entity(self, in_memory_db): + conn, cursor = in_memory_db + client = _make_mock_client() + # Pre-insert the entity as indexed + cursor.execute( + "INSERT INTO migrations (id, type, status) VALUES (?, ?, ?)", + ("syn3", MigrationType.FILE.value, MigrationStatus.INDEXED.value), + ) + conn.commit() + + with ( + patch(f"{MODULE}.utils.id_of", return_value="syn3"), + patch( + f"{MODULE}.get_entity_type", + new=AsyncMock( + return_value=self._mock_entity_type(concrete_types.FILE_ENTITY) + ), + ), + patch( + f"{MODULE}._index_file_entity_async", new=AsyncMock() + ) as mock_index_file, + ): + await _index_entity_async(**self._common_kwargs(conn, cursor, client)) + + mock_index_file.assert_not_awaited() + + @pytest.mark.asyncio + async def test_error_without_continue_raises_indexing_error(self, in_memory_db): + conn, cursor = in_memory_db + client = _make_mock_client() + + with ( + patch(f"{MODULE}.utils.id_of", return_value="syn3"), + patch( + f"{MODULE}.get_entity_type", + new=AsyncMock( + return_value=self._mock_entity_type(concrete_types.FILE_ENTITY) + ), + ), + patch( + f"{MODULE}._index_file_entity_async", side_effect=RuntimeError("boom") + ), + ): + with pytest.raises(IndexingError): + await _index_entity_async(**self._common_kwargs(conn, cursor, client)) + + @pytest.mark.asyncio + async def test_error_with_continue_records_error(self, in_memory_db): + conn, cursor = in_memory_db + client = _make_mock_client() + kwargs = self._common_kwargs(conn, cursor, client) + kwargs["continue_on_error"] = True + + with ( + patch(f"{MODULE}.utils.id_of", return_value="syn3"), + patch( + f"{MODULE}.get_entity_type", + new=AsyncMock( + return_value=self._mock_entity_type(concrete_types.FILE_ENTITY) + ), + ), + patch( + f"{MODULE}._index_file_entity_async", + side_effect=RuntimeError("transient"), + ), + ): + # Should not raise + await _index_entity_async(**kwargs) + + row = cursor.execute("SELECT status FROM migrations WHERE id='syn3'").fetchone() + assert row[0] == MigrationStatus.ERRORED.value + + +# ============================================================================= +# _index_file_entity_async +# ============================================================================= + + +class TestIndexFileEntityAsync: + def _make_cursor(self): + conn = sqlite3.connect(":memory:") + cursor = conn.cursor() + _ensure_schema(cursor) + conn.commit() + return conn, cursor + + @pytest.mark.asyncio + async def test_new_strategy_inserts_with_none_version(self): + conn, cursor = self._make_cursor() + client = _make_mock_client() + fh = _make_file_handle(storage_location_id="10") + entity = _make_entity("syn3", file_handle=fh) + + with patch(f"{MODULE}.utils.id_of", return_value="syn3"): + await _index_file_entity_async( + cursor=cursor, + entity=entity, + parent_id="syn1", + dest_storage_location_id="99", + source_storage_location_ids=[], + file_version_strategy="new", + synapse_client=client, + ) + + conn.commit() + row = cursor.execute( + "SELECT id, version, status FROM migrations WHERE id='syn3'" + ).fetchone() + assert row is not None + assert row[1] is None # new strategy → version=None + assert row[2] == MigrationStatus.INDEXED.value + + @pytest.mark.asyncio + async def test_latest_strategy_inserts_with_version_number(self): + conn, cursor = self._make_cursor() + client = _make_mock_client() + fh = _make_file_handle(storage_location_id="10") + entity = _make_entity("syn3", version_number=5, file_handle=fh) + + with patch(f"{MODULE}.utils.id_of", return_value="syn3"): + await _index_file_entity_async( + cursor=cursor, + entity=entity, + parent_id="syn1", + dest_storage_location_id="99", + source_storage_location_ids=[], + file_version_strategy="latest", + synapse_client=client, + ) + + conn.commit() + row = cursor.execute( + "SELECT version FROM migrations WHERE id='syn3'" + ).fetchone() + assert row[0] == 5 + + @pytest.mark.asyncio + async def test_all_strategy_inserts_each_version(self): + conn, cursor = self._make_cursor() + client = _make_mock_client() + fh = _make_file_handle(storage_location_id="10") + entity = _make_entity("syn3", file_handle=fh) + + async def _mock_versions(entity_id, syn_client): + for v in [1, 2, 3]: + yield v + + with ( + patch(f"{MODULE}.utils.id_of", return_value="syn3"), + patch(f"{MODULE}._get_version_numbers_async", _mock_versions), + patch( + "synapseclient.operations.get_async", new=AsyncMock(return_value=entity) + ), + ): + await _index_file_entity_async( + cursor=cursor, + entity=entity, + parent_id="syn1", + dest_storage_location_id="99", + source_storage_location_ids=[], + file_version_strategy="all", + synapse_client=client, + ) + + conn.commit() + count = cursor.execute( + "SELECT count(*) FROM migrations WHERE id='syn3'" + ).fetchone()[0] + assert count == 3 + + @pytest.mark.asyncio + async def test_already_migrated_file_skipped(self): + conn, cursor = self._make_cursor() + client = _make_mock_client() + # storage_location_id matches dest → ALREADY_MIGRATED → should still insert + fh = _make_file_handle(storage_location_id="99") + entity = _make_entity("syn3", file_handle=fh) + + with patch(f"{MODULE}.utils.id_of", return_value="syn3"): + await _index_file_entity_async( + cursor=cursor, + entity=entity, + parent_id="syn1", + dest_storage_location_id="99", + source_storage_location_ids=[], + file_version_strategy="new", + synapse_client=client, + ) + + conn.commit() + row = cursor.execute("SELECT status FROM migrations WHERE id='syn3'").fetchone() + assert row[0] == MigrationStatus.ALREADY_MIGRATED.value + + @pytest.mark.asyncio + async def test_source_filter_excludes_non_matching(self): + conn, cursor = self._make_cursor() + client = _make_mock_client() + fh = _make_file_handle(storage_location_id="99") # not in source list + entity = _make_entity("syn3", file_handle=fh) + + with patch(f"{MODULE}.utils.id_of", return_value="syn3"): + await _index_file_entity_async( + cursor=cursor, + entity=entity, + parent_id="syn1", + dest_storage_location_id="20", + source_storage_location_ids=["10"], # "99" not in list + file_version_strategy="new", + synapse_client=client, + ) + + conn.commit() + count = cursor.execute( + "SELECT count(*) FROM migrations WHERE id='syn3'" + ).fetchone()[0] + assert count == 0 + + +# ============================================================================= +# _get_table_file_handle_rows_async +# ============================================================================= + + +class TestGetTableFileHandleRowsAsync: + @pytest.mark.asyncio + async def test_no_file_handle_columns_yields_nothing(self): + client = _make_mock_client() + col = MagicMock() + col.column_type = "STRING" # not FILEHANDLEID + + with patch(f"{MODULE}.get_columns", new=AsyncMock(return_value=[col])): + rows = [ + r + async for r in _get_table_file_handle_rows_async( + "syn5", synapse_client=client + ) + ] + + assert rows == [] + + @pytest.mark.asyncio + async def test_file_handle_columns_yields_rows(self): + client = _make_mock_client() + col = MagicMock() + col.column_type = "FILEHANDLEID" + col.id = "col_42" + + fh = _make_file_handle() + + # Row: [row_id, row_version, file_handle_id] + mock_results = MagicMock() + mock_results.iterrows.return_value = iter([(0, [1, 2, "fh_abc"])]) + + mock_table_instance = MagicMock() + mock_table_instance.query_async = AsyncMock(return_value=mock_results) + mock_table_class = MagicMock(return_value=mock_table_instance) + + with ( + patch(f"{MODULE}.get_columns", new=AsyncMock(return_value=[col])), + patch("synapseclient.models.Table", mock_table_class), + patch( + f"{MODULE}.get_file_handle_for_download_async", + new=AsyncMock(return_value={"fileHandle": fh}), + ), + ): + rows = [ + r + async for r in _get_table_file_handle_rows_async( + "syn5", synapse_client=client + ) + ] + + assert len(rows) == 1 + row_id, row_version, file_handles = rows[0] + assert row_id == 1 + assert row_version == 2 + assert "col_42" in file_handles + + +# ============================================================================= +# _index_table_entity_async +# ============================================================================= + + +class TestIndexTableEntityAsync: + @pytest.mark.asyncio + async def test_inserts_table_file_entries(self): + conn = sqlite3.connect(":memory:") + cursor = conn.cursor() + _ensure_schema(cursor) + conn.commit() + + client = _make_mock_client() + fh = _make_file_handle(storage_location_id="10", fh_id="fh_t1") + + async def _mock_rows(entity_id, *, synapse_client): + yield 1, 2, {"col_7": fh} + + with patch(f"{MODULE}._get_table_file_handle_rows_async", _mock_rows): + await _index_table_entity_async( + cursor=cursor, + entity_id="syn5", + parent_id="syn1", + dest_storage_location_id="99", + source_storage_location_ids=[], + synapse_client=client, + ) + + conn.commit() + row = cursor.execute( + "SELECT id, type, row_id, col_id FROM migrations WHERE id='syn5'" + ).fetchone() + assert row is not None + assert row[1] == MigrationType.TABLE_ATTACHED_FILE.value + assert row[2] == 1 # row_id + assert row[3] == "col_7" # col_id + + @pytest.mark.asyncio + async def test_skips_non_s3_file_handles(self): + conn = sqlite3.connect(":memory:") + cursor = conn.cursor() + _ensure_schema(cursor) + conn.commit() + + client = _make_mock_client() + fh = _make_file_handle( + concrete_type="org.sagebionetworks.repo.model.file.ExternalFileHandle", + storage_location_id="10", + ) + + async def _mock_rows(entity_id, *, synapse_client): + yield 1, 2, {"col_7": fh} + + with patch(f"{MODULE}._get_table_file_handle_rows_async", _mock_rows): + await _index_table_entity_async( + cursor=cursor, + entity_id="syn5", + parent_id="syn1", + dest_storage_location_id="99", + source_storage_location_ids=[], + synapse_client=client, + ) + + conn.commit() + count = cursor.execute("SELECT count(*) FROM migrations").fetchone()[0] + assert count == 0 + + +# ============================================================================= +# _index_container_async +# ============================================================================= + + +class TestIndexContainerAsync: + @pytest.mark.asyncio + async def test_indexes_children_and_marks_container(self): + conn = sqlite3.connect(":memory:") + cursor = conn.cursor() + _ensure_schema(cursor) + conn.commit() + + client = _make_mock_client() + child_entity = _make_entity("syn3") + + et = MagicMock() + et.type = concrete_types.PROJECT_ENTITY + + async def _mock_get_children(parent, include_types, synapse_client): + yield {"id": "syn3"} + + with ( + patch(f"{MODULE}.get_entity_type", new=AsyncMock(return_value=et)), + patch(f"{MODULE}.get_children", _mock_get_children), + patch( + "synapseclient.operations.get_async", + new=AsyncMock(return_value=child_entity), + ), + patch(f"{MODULE}._index_entity_async", new=AsyncMock()) as mock_index, + ): + await _index_container_async( + conn=conn, + cursor=cursor, + entity_id="syn1", + parent_id=None, + dest_storage_location_id="99", + source_storage_location_ids=[], + file_version_strategy="new", + include_table_files=False, + continue_on_error=False, + synapse_client=client, + ) + + mock_index.assert_awaited_once() + # Container should be marked as indexed + row = cursor.execute("SELECT id FROM migrations WHERE id='syn1'").fetchone() + assert row is not None + + @pytest.mark.asyncio + async def test_includes_table_type_when_flag_set(self): + conn = sqlite3.connect(":memory:") + cursor = conn.cursor() + _ensure_schema(cursor) + conn.commit() + + client = _make_mock_client() + et = MagicMock() + et.type = concrete_types.FOLDER_ENTITY + + captured_types = [] + + async def _mock_get_children(parent, include_types, synapse_client): + captured_types.extend(include_types) + return + yield # empty generator + + with ( + patch(f"{MODULE}.get_entity_type", new=AsyncMock(return_value=et)), + patch(f"{MODULE}.get_children", _mock_get_children), + ): + await _index_container_async( + conn=conn, + cursor=cursor, + entity_id="syn2", + parent_id="syn1", + dest_storage_location_id="99", + source_storage_location_ids=[], + file_version_strategy="new", + include_table_files=True, + continue_on_error=False, + synapse_client=client, + ) + + assert "table" in captured_types + + @pytest.mark.asyncio + async def test_excludes_file_types_when_strategy_is_skip(self): + conn = sqlite3.connect(":memory:") + cursor = conn.cursor() + _ensure_schema(cursor) + conn.commit() + + client = _make_mock_client() + et = MagicMock() + et.type = concrete_types.FOLDER_ENTITY + + captured_types = [] + + async def _mock_get_children(parent, include_types, synapse_client): + captured_types.extend(include_types) + return + yield + + with ( + patch(f"{MODULE}.get_entity_type", new=AsyncMock(return_value=et)), + patch(f"{MODULE}.get_children", _mock_get_children), + ): + await _index_container_async( + conn=conn, + cursor=cursor, + entity_id="syn2", + parent_id="syn1", + dest_storage_location_id="99", + source_storage_location_ids=[], + file_version_strategy="skip", + include_table_files=True, + continue_on_error=False, + synapse_client=client, + ) + + assert "file" not in captured_types + assert "folder" not in captured_types + + +# ============================================================================= +# _migrate_item_async +# ============================================================================= + + +class TestMigrateItemAsync: + @pytest.mark.asyncio + async def test_copies_file_handle_and_creates_new_version(self): + client = _make_mock_client() + key = MigrationKey("syn3", MigrationType.FILE, version=None) + semaphore = asyncio.Semaphore(10) + + with ( + patch( + f"{MODULE}.multipart_copy_async", new=AsyncMock(return_value="fh_new") + ), + patch( + f"{MODULE}._create_new_file_version_async", new=AsyncMock() + ) as mock_create, + ): + result = await _migrate_item_async( + key=key, + from_file_handle_id="fh_old", + to_file_handle_id=None, + file_size=1024, + dest_storage_location_id="99", + semaphore=semaphore, + synapse_client=client, + ) + + assert result["to_file_handle_id"] == "fh_new" + assert result["from_file_handle_id"] == "fh_old" + mock_create.assert_awaited_once() + + @pytest.mark.asyncio + async def test_reuses_existing_file_handle(self): + client = _make_mock_client() + key = MigrationKey("syn3", MigrationType.FILE, version=None) + semaphore = asyncio.Semaphore(10) + + with ( + patch(f"{MODULE}.multipart_copy_async", new=AsyncMock()) as mock_copy, + patch(f"{MODULE}._create_new_file_version_async", new=AsyncMock()), + ): + result = await _migrate_item_async( + key=key, + from_file_handle_id="fh_old", + to_file_handle_id="fh_existing", # already copied + file_size=1024, + dest_storage_location_id="99", + semaphore=semaphore, + synapse_client=client, + ) + + mock_copy.assert_not_awaited() + assert result["to_file_handle_id"] == "fh_existing" + + @pytest.mark.asyncio + async def test_migrates_versioned_file(self): + client = _make_mock_client() + key = MigrationKey("syn3", MigrationType.FILE, version=2) + semaphore = asyncio.Semaphore(10) + + with ( + patch( + f"{MODULE}.multipart_copy_async", new=AsyncMock(return_value="fh_new") + ), + patch( + f"{MODULE}._migrate_file_version_async", new=AsyncMock() + ) as mock_migrate_ver, + ): + await _migrate_item_async( + key=key, + from_file_handle_id="fh_old", + to_file_handle_id=None, + file_size=1024, + dest_storage_location_id="99", + semaphore=semaphore, + synapse_client=client, + ) + + mock_migrate_ver.assert_awaited_once() + + @pytest.mark.asyncio + async def test_migrates_table_attached_file(self): + client = _make_mock_client() + key = MigrationKey( + "syn5", MigrationType.TABLE_ATTACHED_FILE, row_id=1, col_id=2 + ) + semaphore = asyncio.Semaphore(10) + + with ( + patch( + f"{MODULE}.multipart_copy_async", new=AsyncMock(return_value="fh_new") + ), + patch( + f"{MODULE}._migrate_table_attached_file_async", new=AsyncMock() + ) as mock_table, + ): + await _migrate_item_async( + key=key, + from_file_handle_id="fh_old", + to_file_handle_id=None, + file_size=512, + dest_storage_location_id="99", + semaphore=semaphore, + synapse_client=client, + ) + + mock_table.assert_awaited_once() + + @pytest.mark.asyncio + async def test_exception_wrapped_as_migration_error(self): + client = _make_mock_client() + key = MigrationKey("syn3", MigrationType.FILE, version=None) + semaphore = asyncio.Semaphore(10) + + with patch( + f"{MODULE}.multipart_copy_async", side_effect=RuntimeError("S3 error") + ): + with pytest.raises(MigrationError) as exc_info: + await _migrate_item_async( + key=key, + from_file_handle_id="fh_old", + to_file_handle_id=None, + file_size=1024, + dest_storage_location_id="99", + semaphore=semaphore, + synapse_client=client, + ) + + assert exc_info.value.key is key + assert "S3 error" in str(exc_info.value) + + +# ============================================================================= +# _create_new_file_version_async +# ============================================================================= + + +class TestCreateNewFileVersionAsync: + @pytest.mark.asyncio + async def test_sets_file_handle_and_stores(self): + client = _make_mock_client() + entity = _make_entity("syn3") + + with ( + patch(f"{MODULE}.Synapse.get_client", return_value=client), + patch( + "synapseclient.operations.get_async", new=AsyncMock(return_value=entity) + ), + ): + await _create_new_file_version_async( + entity_id="syn3", + to_file_handle_id="fh_new", + synapse_client=client, + ) + assert entity.data_file_handle_id == "fh_new" + entity.store_async.assert_awaited_once() + + +# ============================================================================= +# _migrate_file_version_async +# ============================================================================= + + +class TestMigrateFileVersionAsync: + @pytest.mark.asyncio + async def test_calls_rest_put_with_correct_payload(self): + client = _make_mock_client() + + with patch(f"{MODULE}.Synapse.get_client", return_value=client): + await _migrate_file_version_async( + entity_id="syn3", + version=2, + from_file_handle_id="fh_old", + to_file_handle_id="fh_new", + synapse_client=client, + ) + + client.rest_put_async.assert_awaited_once() + call_args = client.rest_put_async.call_args + assert call_args[0][0] == "/entity/syn3/version/2/filehandle" + body = json.loads(call_args[1]["body"]) + assert body["oldFileHandleId"] == "fh_old" + assert body["newFileHandleId"] == "fh_new" + + +# ============================================================================= +# _migrate_table_attached_file_async +# ============================================================================= + + +class TestMigrateTableAttachedFileAsync: + @pytest.mark.asyncio + async def test_sends_transaction(self): + client = _make_mock_client() + key = MigrationKey( + "syn5", MigrationType.TABLE_ATTACHED_FILE, row_id=7, col_id=3 + ) + + mock_transaction = MagicMock() + mock_transaction.send_job_and_wait_async = AsyncMock() + + with patch(f"{MODULE}.TableUpdateTransaction", return_value=mock_transaction): + await _migrate_table_attached_file_async( + key=key, + to_file_handle_id="fh_new", + synapse_client=client, + ) + + mock_transaction.send_job_and_wait_async.assert_awaited_once() + + +# ============================================================================= +# track_migration_results_async +# ============================================================================= + + +class TestTrackMigrationResultsAsync: + def _make_db(self, from_fh="fh_src", entity_id="syn3", version=1): + conn = sqlite3.connect(":memory:", check_same_thread=False) + cursor = conn.cursor() + _ensure_schema(cursor) + cursor.execute( + """INSERT INTO migrations (id, type, version, status, from_file_handle_id) + VALUES (?, ?, ?, ?, ?)""", + ( + entity_id, + MigrationType.FILE.value, + version, + MigrationStatus.INDEXED.value, + from_fh, + ), + ) + conn.commit() + return conn, cursor + + @pytest.mark.asyncio + async def test_successful_task_marks_migrated(self): + conn, cursor = self._make_db() + key = MigrationKey("syn3", MigrationType.FILE, version=1) + from_fh = "fh_src" + + async def _successful_migrate(): + return { + "key": key, + "from_file_handle_id": from_fh, + "to_file_handle_id": "fh_dst", + } + + task = asyncio.create_task(_successful_migrate()) + await asyncio.sleep(0) # let it complete + + pending_fh = {from_fh} + completed_fh = set() + pending_keys = {key} + + await track_migration_results_async( + conn=conn, + cursor=cursor, + active_tasks={task}, + pending_file_handles=pending_fh, + completed_file_handles=completed_fh, + pending_keys=pending_keys, + return_when=asyncio.ALL_COMPLETED, + continue_on_error=False, + ) + + row = cursor.execute( + "SELECT status, to_file_handle_id FROM migrations WHERE id='syn3'" + ).fetchone() + assert row[0] == MigrationStatus.MIGRATED.value + assert row[1] == "fh_dst" + assert from_fh in completed_fh + assert key not in pending_keys + + @pytest.mark.asyncio + async def test_failed_task_marks_errored(self): + conn, cursor = self._make_db() + key = MigrationKey("syn3", MigrationType.FILE, version=1) + from_fh = "fh_src" + inner_error = RuntimeError("network") + + async def _failing_migrate(): + err = MigrationError(key, from_fh) + err.__cause__ = inner_error + raise err + + task = asyncio.create_task(_failing_migrate()) + await asyncio.sleep(0) + + pending_fh = {from_fh} + completed_fh = set() + pending_keys = {key} + + await track_migration_results_async( + conn=conn, + cursor=cursor, + active_tasks={task}, + pending_file_handles=pending_fh, + completed_file_handles=completed_fh, + pending_keys=pending_keys, + return_when=asyncio.ALL_COMPLETED, + continue_on_error=True, # don't re-raise + ) + + row = cursor.execute("SELECT status FROM migrations WHERE id='syn3'").fetchone() + assert row[0] == MigrationStatus.ERRORED.value + assert from_fh in completed_fh + + @pytest.mark.asyncio + async def test_failed_task_reraises_when_not_continue_on_error(self): + conn, cursor = self._make_db() + key = MigrationKey("syn3", MigrationType.FILE, version=1) + from_fh = "fh_src" + inner_error = RuntimeError("critical failure") + + async def _failing_migrate(): + err = MigrationError(key, from_fh) + err.__cause__ = inner_error + raise err + + task = asyncio.create_task(_failing_migrate()) + await asyncio.sleep(0) + + with pytest.raises(RuntimeError, match="critical failure"): + await track_migration_results_async( + conn=conn, + cursor=cursor, + active_tasks={task}, + pending_file_handles={from_fh}, + completed_file_handles=set(), + pending_keys={key}, + return_when=asyncio.ALL_COMPLETED, + continue_on_error=False, + ) + + +# ============================================================================= +# migrate_indexed_files_async +# ============================================================================= + + +class TestMigrateIndexedFilesAsync: + @pytest.mark.asyncio + async def test_raises_if_no_settings_in_db(self): + fd, path = tempfile.mkstemp(suffix=".db") + os.close(fd) + try: + conn = sqlite3.connect(path) + try: + cursor = conn.cursor() + _ensure_schema(cursor) + conn.commit() + finally: + conn.close() + + client = _make_mock_client() + with patch(f"{MODULE}.Synapse.get_client", return_value=client): + with pytest.raises( + ValueError, match="Unable to retrieve existing index settings" + ): + await migrate_indexed_files_async( + db_path=path, synapse_client=client + ) + finally: + os.unlink(path) + + @pytest.mark.asyncio + async def test_returns_none_when_migration_not_confirmed( + self, db_file_with_settings + ): + path, _ = db_file_with_settings + # Add an indexed row so there's something to confirm + conn = sqlite3.connect(path) + try: + cursor = conn.cursor() + cursor.execute( + "INSERT INTO migrations (id, type, status) VALUES (?, ?, ?)", + ("syn3", MigrationType.FILE.value, MigrationStatus.INDEXED.value), + ) + conn.commit() + finally: + conn.close() + + client = _make_mock_client() + with ( + patch(f"{MODULE}.Synapse.get_client", return_value=client), + patch("sys.stdout") as mock_stdout, + ): + mock_stdout.isatty.return_value = False + result = await migrate_indexed_files_async( + db_path=path, + force=False, + synapse_client=client, + ) + + assert result is None + + @pytest.mark.asyncio + async def test_returns_migration_result_on_success(self, db_file_with_settings): + path, _ = db_file_with_settings + client = _make_mock_client() + + with ( + patch(f"{MODULE}.Synapse.get_client", return_value=client), + patch(f"{MODULE}._execute_migration_async", new=AsyncMock()), + ): + result = await migrate_indexed_files_async( + db_path=path, + force=True, + synapse_client=client, + ) + + assert isinstance(result, MigrationResult) + assert result.db_path == path + + +# ============================================================================= +# _execute_migration_async +# ============================================================================= + + +class TestExecuteMigrationAsync: + def _make_db_with_indexed_file(self, from_fh="fh_src", entity_id="syn3", version=1): + conn = sqlite3.connect(":memory:", check_same_thread=False) + cursor = conn.cursor() + _ensure_schema(cursor) + cursor.execute( + """INSERT INTO migrations (id, type, version, status, from_file_handle_id, file_size) + VALUES (?, ?, ?, ?, ?, ?)""", + ( + entity_id, + MigrationType.FILE.value, + version, + MigrationStatus.INDEXED.value, + from_fh, + 1024, + ), + ) + conn.commit() + return conn, cursor + + @pytest.mark.asyncio + async def test_migrates_single_item(self): + conn, cursor = self._make_db_with_indexed_file() + client = _make_mock_client() + + key = MigrationKey("syn3", MigrationType.FILE, version=1) + + async def _mock_migrate_item( + key, + from_file_handle_id, + to_file_handle_id, + file_size, + dest_storage_location_id, + semaphore, + *, + synapse_client, + ): + return { + "key": key, + "from_file_handle_id": from_file_handle_id, + "to_file_handle_id": "fh_dst", + } + + with patch(f"{MODULE}._migrate_item_async", _mock_migrate_item): + await _execute_migration_async( + conn=conn, + cursor=cursor, + dest_storage_location_id="99", + create_table_snapshots=False, + continue_on_error=False, + synapse_client=client, + ) + + row = cursor.execute("SELECT status FROM migrations WHERE id='syn3'").fetchone() + assert row[0] == MigrationStatus.MIGRATED.value + + @pytest.mark.asyncio + async def test_empty_db_completes_without_error(self): + conn = sqlite3.connect(":memory:", check_same_thread=False) + cursor = conn.cursor() + _ensure_schema(cursor) + conn.commit() + + client = _make_mock_client() + + await _execute_migration_async( + conn=conn, + cursor=cursor, + dest_storage_location_id="99", + create_table_snapshots=False, + continue_on_error=False, + synapse_client=client, + ) + + @pytest.mark.asyncio + async def test_continue_on_error_records_failure(self): + conn, cursor = self._make_db_with_indexed_file() + client = _make_mock_client() + + key = MigrationKey("syn3", MigrationType.FILE, version=1) + + async def _failing_migrate( + key, + from_file_handle_id, + to_file_handle_id, + file_size, + dest_storage_location_id, + semaphore, + *, + synapse_client, + ): + err = MigrationError(key, from_file_handle_id) + err.__cause__ = RuntimeError("disk full") + raise err + + with patch(f"{MODULE}._migrate_item_async", _failing_migrate): + await _execute_migration_async( + conn=conn, + cursor=cursor, + dest_storage_location_id="99", + create_table_snapshots=False, + continue_on_error=True, + synapse_client=client, + ) + + row = cursor.execute("SELECT status FROM migrations WHERE id='syn3'").fetchone() + assert row[0] == MigrationStatus.ERRORED.value diff --git a/tests/unit/synapseclient/unit_test_Entity.py b/tests/unit/synapseclient/unit_test_Entity.py index d2b03e5a7..e3a345356 100644 --- a/tests/unit/synapseclient/unit_test_Entity.py +++ b/tests/unit/synapseclient/unit_test_Entity.py @@ -236,7 +236,7 @@ def test_split_entity_namespaces(): "foo": 1234, "parentId": "syn1234", } - (properties, annotations, local_state) = split_entity_namespaces(e) + properties, annotations, local_state = split_entity_namespaces(e) assert set(properties.keys()) == {"concreteType", "name", "parentId"} assert properties["name"] == "Henry" @@ -255,7 +255,7 @@ def test_split_entity_namespaces(): "files": ["foo.xyz"], "path": "/foo/bar/bat/foo.xyz", } - (properties, annotations, local_state) = split_entity_namespaces(e) + properties, annotations, local_state = split_entity_namespaces(e) assert set(properties.keys()) == { "concreteType", diff --git a/tests/unit/synapseclient/unit_test_client.py b/tests/unit/synapseclient/unit_test_client.py index 57142e5bc..f4fc5ea76 100644 --- a/tests/unit/synapseclient/unit_test_client.py +++ b/tests/unit/synapseclient/unit_test_client.py @@ -1,4 +1,5 @@ """Unit tests for the Synapse client""" + import asyncio import configparser import datetime @@ -344,12 +345,15 @@ async def test_download_file_handle__retry_error( (disk_space_error, 1), (ValueError("foo"), retries), ]: - with patch( - GET_FILE_HANDLE_FOR_DOWNLOAD, - new_callable=AsyncMock, - ) as mock_get_file_handle_download, patch( - DOWNLOAD_FROM_URL, - ) as mock_download_from_URL: + with ( + patch( + GET_FILE_HANDLE_FOR_DOWNLOAD, + new_callable=AsyncMock, + ) as mock_get_file_handle_download, + patch( + DOWNLOAD_FROM_URL, + ) as mock_download_from_URL, + ): mock_get_file_handle_download.return_value = { "fileHandle": { "id": file_handle_id, @@ -413,10 +417,13 @@ def mock_with_boto_sts_credentials( expected_download_path = FOO_KEY mock_s3_client_wrapper.download_file.return_value = expected_download_path - with patch( - GET_FILE_HANDLE_FOR_DOWNLOAD, - new_callable=AsyncMock, - ) as mock_get_file_handle_download, patch.object(self.syn, "cache") as cache: + with ( + patch( + GET_FILE_HANDLE_FOR_DOWNLOAD, + new_callable=AsyncMock, + ) as mock_get_file_handle_download, + patch.object(self.syn, "cache") as cache, + ): mock_get_file_handle_download.return_value = { "fileHandle": { "id": file_handle_id, @@ -468,19 +475,20 @@ async def test_download_file_ftp_link(self) -> None: destination = "/tmp" expected_destination = os.path.abspath(destination) - with patch( - GET_FILE_HANDLE_FOR_DOWNLOAD, - new_callable=AsyncMock, - ) as mock_get_file_handle_download, patch.object( - self.syn, "cache" - ), patch.object( - urllib_request, "urlretrieve" - ) as mock_url_retrieve, patch.object( - utils, "md5_for_file" - ) as mock_md5_for_file, patch.object( - os, "makedirs" - ), patch.object( - sts_transfer, "is_storage_location_sts_enabled_async", return_value=False + with ( + patch( + GET_FILE_HANDLE_FOR_DOWNLOAD, + new_callable=AsyncMock, + ) as mock_get_file_handle_download, + patch.object(self.syn, "cache"), + patch.object(urllib_request, "urlretrieve") as mock_url_retrieve, + patch.object(utils, "md5_for_file") as mock_md5_for_file, + patch.object(os, "makedirs"), + patch.object( + sts_transfer, + "is_storage_location_sts_enabled_async", + return_value=False, + ), ): mock_get_file_handle_download.return_value = { "fileHandle": { @@ -837,12 +845,14 @@ def test_submit_docker(self) -> None: "contributors": self.contributors, "submitterAlias": self.team["name"], } - with patch.object( - self.syn, "get_async", return_value=docker_entity - ) as patch_syn_get, patch.object( - self.syn, "_get_docker_digest", return_value=docker_digest - ) as patch_get_digest, patch.object( - self.syn, "_submit", return_value=expected_submission + with ( + patch.object( + self.syn, "get_async", return_value=docker_entity + ) as patch_syn_get, + patch.object( + self.syn, "_get_docker_digest", return_value=docker_digest + ) as patch_get_digest, + patch.object(self.syn, "_submit", return_value=expected_submission), ): submission = self.syn.submit("9090", patch_syn_get, name="George") patch_get_digest.assert_called_once_with(docker_entity, "latest") @@ -973,13 +983,14 @@ def test_send_message(syn: Synapse) -> None: "Through caverns measureless to man\n" "Down to a sunless sea.\n" ) - with patch( - "synapseclient.client.multipart_upload_string_async", - new_callable=AsyncMock, - return_value="7365905", - ) as mock_upload_string, patch( - "synapseclient.client.Synapse.restPOST" - ) as post_mock: + with ( + patch( + "synapseclient.client.multipart_upload_string_async", + new_callable=AsyncMock, + return_value="7365905", + ) as mock_upload_string, + patch("synapseclient.client.Synapse.restPOST") as post_mock, + ): syn.sendMessage( userIds=[1421212], messageSubject="Xanadu", messageBody=messageBody ) @@ -1023,15 +1034,17 @@ def test__uploadExternallyStoringProjects_external_user( max_threads = 8 # method under test - with patch.object( - upload_functions, - "multipart_upload_file", - return_value=expected_file_handle_id, - ) as mocked_multipart_upload, patch.object( - self.syn.cache, "add" - ) as mocked_cache_add, patch.object( - self.syn, "_get_file_handle_as_creator" - ) as mocked_get_file_handle: + with ( + patch.object( + upload_functions, + "multipart_upload_file", + return_value=expected_file_handle_id, + ) as mocked_multipart_upload, + patch.object(self.syn.cache, "add") as mocked_cache_add, + patch.object( + self.syn, "_get_file_handle_as_creator" + ) as mocked_get_file_handle, + ): upload_functions.upload_file_handle( syn=self.syn, parent_entity=test_file["parentId"], @@ -1429,9 +1442,10 @@ def test_move(syn: Synapse) -> None: entity = Folder(name="folder", parent="syn456") moved_entity = entity moved_entity.parentId = "syn789" - with patch.object(syn, "get", return_value=entity) as syn_get_patch, patch.object( - syn, "store", return_value=moved_entity - ) as syn_store_patch: + with ( + patch.object(syn, "get", return_value=entity) as syn_get_patch, + patch.object(syn, "store", return_value=moved_entity) as syn_store_patch, + ): assert moved_entity == syn.move("syn123", "syn789") syn_get_patch.assert_called_once_with("syn123", downloadFile=False) syn_store_patch.assert_called_once_with(moved_entity, forceVersion=False) @@ -1482,9 +1496,11 @@ def test_set_permissions_default_permissions(syn: Synapse) -> None: {"accessType": ["READ", "DOWNLOAD"], "principalId": principalId} ] } - with patch.object(syn, "_getBenefactor", return_value=entity), patch.object( - syn, "_getACL", return_value=acl - ), patch.object(syn, "_storeACL", return_value=update_acl) as patch_store_acl: + with ( + patch.object(syn, "_getBenefactor", return_value=entity), + patch.object(syn, "_getACL", return_value=acl), + patch.object(syn, "_storeACL", return_value=update_acl) as patch_store_acl, + ): assert update_acl == syn.setPermissions(entity, principalId) patch_store_acl.assert_called_once_with(entity, update_acl) @@ -1678,11 +1694,11 @@ def test_folder_or_parent(self) -> None: def _create_storage_location_test( self, expected_post_body, *args, **kwargs ) -> None: - with patch.object(self.syn, "restPOST") as mock_post, patch.object( - self.syn, "setStorageLocation" - ) as mock_set_storage_location, patch.object( - self.syn, "store_async" - ) as syn_store: + with ( + patch.object(self.syn, "restPOST") as mock_post, + patch.object(self.syn, "setStorageLocation") as mock_set_storage_location, + patch.object(self.syn, "store_async") as syn_store, + ): mock_post.return_value = {"storageLocationId": 456} mock_set_storage_location.return_value = {"id": "foo"} @@ -1754,19 +1770,16 @@ def init_syn(self, syn: Synapse) -> None: self.syn = syn def _s3_file_handle_test(self, **kwargs) -> None: - with patch.object( - self.syn, "_getDefaultUploadDestination" - ) as mock_get_upload_dest, patch.object( - os, "path" - ) as mock_os_path, patch.object( - os, "stat" - ) as mock_os_stat, patch.object( - utils, "md5_for_file" - ) as mock_md5, patch( - "mimetypes.guess_type" - ) as mock_guess_mimetype, patch.object( - self.syn, "restPOST" - ) as mock_post: + with ( + patch.object( + self.syn, "_getDefaultUploadDestination" + ) as mock_get_upload_dest, + patch.object(os, "path") as mock_os_path, + patch.object(os, "stat") as mock_os_stat, + patch.object(utils, "md5_for_file") as mock_md5, + patch("mimetypes.guess_type") as mock_guess_mimetype, + patch.object(self.syn, "restPOST") as mock_post, + ): bucket_name = "foo_bucket" s3_file_key = "/foo/bar/baz" file_path = "/tmp/foo" @@ -1925,13 +1938,17 @@ def test_invite_to_team__email(self) -> None: "inviteeEmail": self.email, "inviteeId": None, } - with patch.object( - self.syn, "get_team_open_invitations", return_value=[] - ) as patch_get_invites, patch.object( - self.syn, "getUserProfile", return_value=self.profile - ) as patch_get_profile, patch.object( - self.syn, "send_membership_invitation", return_value=self.response - ) as patch_invitation: + with ( + patch.object( + self.syn, "get_team_open_invitations", return_value=[] + ) as patch_get_invites, + patch.object( + self.syn, "getUserProfile", return_value=self.profile + ) as patch_get_profile, + patch.object( + self.syn, "send_membership_invitation", return_value=self.response + ) as patch_invitation, + ): invite = self.syn.invite_to_team( self.team, inviteeEmail=self.email, message=self.message ) @@ -1944,15 +1961,20 @@ def test_invite_to_team__user(self) -> None: """Invite user to team via their Synapse userid""" self.member_status["isMember"] = False invite_body = {"inviteeId": self.userid, "inviteeEmail": None, "message": None} - with patch.object( - self.syn, "get_membership_status", return_value=self.member_status - ) as patch_getmem, patch.object( - self.syn, "get_team_open_invitations", return_value=[] - ) as patch_get_invites, patch.object( - self.syn, "getUserProfile", return_value=self.profile - ) as patch_get_profile, patch.object( - self.syn, "send_membership_invitation", return_value=self.response - ) as patch_invitation: + with ( + patch.object( + self.syn, "get_membership_status", return_value=self.member_status + ) as patch_getmem, + patch.object( + self.syn, "get_team_open_invitations", return_value=[] + ) as patch_get_invites, + patch.object( + self.syn, "getUserProfile", return_value=self.profile + ) as patch_get_profile, + patch.object( + self.syn, "send_membership_invitation", return_value=self.response + ) as patch_invitation, + ): invite = self.syn.invite_to_team(self.team, user=self.userid) patch_getmem.assert_called_once_with(self.userid, self.team.id) patch_get_profile.assert_called_once_with(self.userid) @@ -1964,15 +1986,20 @@ def test_invite_to_team__username(self) -> None: """Invite user to team via their Synapse username""" self.member_status["isMember"] = False invite_body = {"inviteeId": self.userid, "inviteeEmail": None, "message": None} - with patch.object( - self.syn, "get_membership_status", return_value=self.member_status - ) as patch_getmem, patch.object( - self.syn, "get_team_open_invitations", return_value=[] - ) as patch_get_invites, patch.object( - self.syn, "getUserProfile", return_value=self.profile - ) as patch_get_profile, patch.object( - self.syn, "send_membership_invitation", return_value=self.response - ) as patch_invitation: + with ( + patch.object( + self.syn, "get_membership_status", return_value=self.member_status + ) as patch_getmem, + patch.object( + self.syn, "get_team_open_invitations", return_value=[] + ) as patch_get_invites, + patch.object( + self.syn, "getUserProfile", return_value=self.profile + ) as patch_get_profile, + patch.object( + self.syn, "send_membership_invitation", return_value=self.response + ) as patch_invitation, + ): invite = self.syn.invite_to_team(self.team, user=self.username) patch_getmem.assert_called_once_with(self.userid, self.team.id) patch_get_profile.assert_called_once_with(self.username) @@ -1982,15 +2009,20 @@ def test_invite_to_team__username(self) -> None: def test_invite_to_team__ismember(self) -> None: """None returned when user is already a member""" - with patch.object( - self.syn, "get_membership_status", return_value=self.member_status - ) as patch_getmem, patch.object( - self.syn, "get_team_open_invitations", return_value=[] - ) as patch_get_invites, patch.object( - self.syn, "getUserProfile", return_value=self.profile - ) as patch_get_profile, patch.object( - self.syn, "send_membership_invitation", return_value=self.response - ) as patch_invitation: + with ( + patch.object( + self.syn, "get_membership_status", return_value=self.member_status + ) as patch_getmem, + patch.object( + self.syn, "get_team_open_invitations", return_value=[] + ) as patch_get_invites, + patch.object( + self.syn, "getUserProfile", return_value=self.profile + ) as patch_get_profile, + patch.object( + self.syn, "send_membership_invitation", return_value=self.response + ) as patch_invitation, + ): invite = self.syn.invite_to_team(self.team, user=self.userid) patch_getmem.assert_called_once_with(self.userid, self.team.id) patch_get_profile.assert_called_once_with(self.userid) @@ -2002,17 +2034,21 @@ def test_invite_to_team__user_openinvite(self) -> None: """None returned when user already has an invitation""" self.member_status["isMember"] = False invite_body = {"inviteeId": self.userid} - with patch.object( - self.syn, "get_membership_status", return_value=self.member_status - ) as patch_getmem, patch.object( - self.syn, "get_team_open_invitations", return_value=[invite_body] - ) as patch_get_invites, patch.object( - self.syn, "getUserProfile", return_value=self.profile - ) as patch_get_profile, patch.object( - self.syn, "_delete_membership_invitation" - ) as patch_delete, patch.object( - self.syn, "send_membership_invitation", return_value=self.response - ) as patch_invitation: + with ( + patch.object( + self.syn, "get_membership_status", return_value=self.member_status + ) as patch_getmem, + patch.object( + self.syn, "get_team_open_invitations", return_value=[invite_body] + ) as patch_get_invites, + patch.object( + self.syn, "getUserProfile", return_value=self.profile + ) as patch_get_profile, + patch.object(self.syn, "_delete_membership_invitation") as patch_delete, + patch.object( + self.syn, "send_membership_invitation", return_value=self.response + ) as patch_invitation, + ): invite = self.syn.invite_to_team(self.team, user=self.userid) patch_getmem.assert_called_once_with(self.userid, self.team.id) patch_get_profile.assert_called_once_with(self.userid) @@ -2024,13 +2060,15 @@ def test_invite_to_team__user_openinvite(self) -> None: def test_invite_to_team__email_openinvite(self) -> None: """None returned when email already has an invitation""" invite_body = {"inviteeEmail": self.email} - with patch.object( - self.syn, "get_team_open_invitations", return_value=[invite_body] - ) as patch_get_invites, patch.object( - self.syn, "_delete_membership_invitation" - ) as patch_delete, patch.object( - self.syn, "send_membership_invitation", return_value=self.response - ) as patch_invitation: + with ( + patch.object( + self.syn, "get_team_open_invitations", return_value=[invite_body] + ) as patch_get_invites, + patch.object(self.syn, "_delete_membership_invitation") as patch_delete, + patch.object( + self.syn, "send_membership_invitation", return_value=self.response + ) as patch_invitation, + ): invite = self.syn.invite_to_team(self.team, inviteeEmail=self.email) patch_get_invites.assert_called_once_with(self.team.id) patch_invitation.assert_not_called() @@ -2041,13 +2079,15 @@ def test_invite_to_team__email_openinvite(self) -> None: def test_invite_to_team__none_matching_invitation(self) -> None: """Invitation sent when no matching open invitations""" invite_body = {"inviteeEmail": self.email + "foo"} - with patch.object( - self.syn, "get_team_open_invitations", return_value=[invite_body] - ) as patch_get_invites, patch.object( - self.syn, "_delete_membership_invitation" - ) as patch_delete, patch.object( - self.syn, "send_membership_invitation", return_value=self.response - ) as patch_invitation: + with ( + patch.object( + self.syn, "get_team_open_invitations", return_value=[invite_body] + ) as patch_get_invites, + patch.object(self.syn, "_delete_membership_invitation") as patch_delete, + patch.object( + self.syn, "send_membership_invitation", return_value=self.response + ) as patch_invitation, + ): invite = self.syn.invite_to_team(self.team, inviteeEmail=self.email) patch_get_invites.assert_called_once_with(self.team.id) patch_delete.assert_not_called() @@ -2058,13 +2098,15 @@ def test_invite_to_team__force_invite(self) -> None: """Invitation sent when force the invite, make sure open invitation is deleted""" open_invitations = {"inviteeEmail": self.email, "id": "9938"} - with patch.object( - self.syn, "get_team_open_invitations", return_value=[open_invitations] - ) as patch_get_invites, patch.object( - self.syn, "_delete_membership_invitation" - ) as patch_delete, patch.object( - self.syn, "send_membership_invitation", return_value=self.response - ) as patch_invitation: + with ( + patch.object( + self.syn, "get_team_open_invitations", return_value=[open_invitations] + ) as patch_get_invites, + patch.object(self.syn, "_delete_membership_invitation") as patch_delete, + patch.object( + self.syn, "send_membership_invitation", return_value=self.response + ) as patch_invitation, + ): invite = self.syn.invite_to_team( self.team, inviteeEmail=self.email, force=True ) @@ -2293,15 +2335,16 @@ def _rest_call_test(self, requests_session=None): kwargs = {"stream": True} requests_session = requests_session or self.syn._requests_session - with patch.object( - self.syn, "_build_uri_and_headers" - ) as mock_build_uri_and_headers, patch.object( - self.syn, "_build_retry_policy" - ) as mock_build_retry_policy, patch.object( - self.syn, "_handle_synapse_http_error" - ) as mock_handle_synapse_http_error, patch.object( - requests_session, method - ) as mock_requests_call: + with ( + patch.object( + self.syn, "_build_uri_and_headers" + ) as mock_build_uri_and_headers, + patch.object(self.syn, "_build_retry_policy") as mock_build_retry_policy, + patch.object( + self.syn, "_handle_synapse_http_error" + ) as mock_handle_synapse_http_error, + patch.object(requests_session, method) as mock_requests_call, + ): mock_build_uri_and_headers.return_value = (uri, headers) mock_build_retry_policy.return_value = retryPolicy @@ -2547,22 +2590,18 @@ def test_store_needs_upload_false_file_handle_id_not_in_local_state( ], "annotations": {"id": synapse_id, "etag": etag, "annotations": {}}, } - with patch.object( - syn, "_getEntityBundle", return_value=returned_bundle - ), patch.object( - synapseclient.client, - "upload_file_handle_async", - return_value=returned_file_handle, - ), patch.object( - syn.cache, "contains", return_value=True - ), patch.object( - syn, "_updateEntity" - ), patch.object( - syn, "set_annotations" - ), patch.object( - Entity, "create" - ), patch.object( - syn, "get" + with ( + patch.object(syn, "_getEntityBundle", return_value=returned_bundle), + patch.object( + synapseclient.client, + "upload_file_handle_async", + return_value=returned_file_handle, + ), + patch.object(syn.cache, "contains", return_value=True), + patch.object(syn, "_updateEntity"), + patch.object(syn, "set_annotations"), + patch.object(Entity, "create"), + patch.object(syn, "get"), ): f = File("/fake_file.txt", parent=parent_id) syn.store(f) @@ -2627,22 +2666,20 @@ def test_store_existing_processed_as_update(syn: Synapse) -> None: "baz": [4], } - with patch.object(syn, "_getEntityBundle") as mock_get_entity_bundle, patch.object( - synapseclient.client, - "upload_file_handle_async", - return_value=returned_file_handle, - ), patch.object(syn.cache, "contains", return_value=True), patch.object( - syn, "_createEntity" - ) as mock_createEntity, patch.object( - syn, "_updateEntity" - ) as mock_updateEntity, patch.object( - syn, "findEntityId" - ) as mock_findEntityId, patch.object( - syn, "set_annotations" - ) as mock_set_annotations, patch.object( - Entity, "create" - ), patch.object( - syn, "get" + with ( + patch.object(syn, "_getEntityBundle") as mock_get_entity_bundle, + patch.object( + synapseclient.client, + "upload_file_handle_async", + return_value=returned_file_handle, + ), + patch.object(syn.cache, "contains", return_value=True), + patch.object(syn, "_createEntity") as mock_createEntity, + patch.object(syn, "_updateEntity") as mock_updateEntity, + patch.object(syn, "findEntityId") as mock_findEntityId, + patch.object(syn, "set_annotations") as mock_set_annotations, + patch.object(Entity, "create"), + patch.object(syn, "get"), ): mock_get_entity_bundle.return_value = returned_bundle @@ -2723,23 +2760,21 @@ def test_store__409_processed_as_update(syn: Synapse) -> None: "baz": [4], } - with patch.object(syn, "_getEntityBundle") as mock_get_entity_bundle, patch.object( - synapseclient.client, - "upload_file_handle_async", - new_callable=AsyncMock, - return_value=returned_file_handle, - ), patch.object(syn.cache, "contains", return_value=True), patch.object( - syn, "_createEntity" - ) as mock_createEntity, patch.object( - syn, "_updateEntity" - ) as mock_updateEntity, patch.object( - syn, "findEntityId" - ) as mock_findEntityId, patch.object( - syn, "set_annotations" - ) as mock_set_annotations, patch.object( - Entity, "create" - ), patch.object( - syn, "get_async" + with ( + patch.object(syn, "_getEntityBundle") as mock_get_entity_bundle, + patch.object( + synapseclient.client, + "upload_file_handle_async", + new_callable=AsyncMock, + return_value=returned_file_handle, + ), + patch.object(syn.cache, "contains", return_value=True), + patch.object(syn, "_createEntity") as mock_createEntity, + patch.object(syn, "_updateEntity") as mock_updateEntity, + patch.object(syn, "findEntityId") as mock_findEntityId, + patch.object(syn, "set_annotations") as mock_set_annotations, + patch.object(Entity, "create"), + patch.object(syn, "get_async"), ): mock_get_entity_bundle.side_effect = [None, returned_bundle] mock_createEntity.side_effect = SynapseHTTPError( @@ -2804,22 +2839,20 @@ def test_store__no_need_to_update_annotation(syn: Synapse) -> None: }, } - with patch.object(syn, "_getEntityBundle") as mock_get_entity_bundle, patch.object( - synapseclient.client, - "upload_file_handle_async", - return_value=returned_file_handle, - ), patch.object(syn.cache, "contains", return_value=True), patch.object( - syn, "_createEntity" - ), patch.object( - syn, "_updateEntity" - ), patch.object( - syn, "findEntityId" - ), patch.object( - syn, "set_annotations" - ) as mock_set_annotations, patch.object( - Entity, "create" - ), patch.object( - syn, "get" + with ( + patch.object(syn, "_getEntityBundle") as mock_get_entity_bundle, + patch.object( + synapseclient.client, + "upload_file_handle_async", + return_value=returned_file_handle, + ), + patch.object(syn.cache, "contains", return_value=True), + patch.object(syn, "_createEntity"), + patch.object(syn, "_updateEntity"), + patch.object(syn, "findEntityId"), + patch.object(syn, "set_annotations") as mock_set_annotations, + patch.object(Entity, "create"), + patch.object(syn, "get"), ): mock_get_entity_bundle.return_value = returned_bundle @@ -2875,22 +2908,20 @@ def test_store__update_version_comment(syn: Synapse) -> None: "versionComment": "12345", } - with patch.object(syn, "_getEntityBundle") as mock_get_entity_bundle, patch.object( - synapseclient.client, - "upload_file_handle_async", - return_value=returned_file_handle, - ), patch.object(syn.cache, "contains", return_value=True), patch.object( - syn, "_createEntity" - ) as mock_createEntity, patch.object( - syn, "_updateEntity" - ) as mock_updateEntity, patch.object( - syn, "findEntityId" - ) as mock_findEntityId, patch.object( - syn, "set_annotations" - ) as mock_set_annotations, patch.object( - Entity, "create" - ), patch.object( - syn, "get" + with ( + patch.object(syn, "_getEntityBundle") as mock_get_entity_bundle, + patch.object( + synapseclient.client, + "upload_file_handle_async", + return_value=returned_file_handle, + ), + patch.object(syn.cache, "contains", return_value=True), + patch.object(syn, "_createEntity") as mock_createEntity, + patch.object(syn, "_updateEntity") as mock_updateEntity, + patch.object(syn, "findEntityId") as mock_findEntityId, + patch.object(syn, "set_annotations") as mock_set_annotations, + patch.object(Entity, "create"), + patch.object(syn, "get"), ): mock_get_entity_bundle.return_value = returned_bundle @@ -2984,16 +3015,17 @@ def test_store__existing_no_update(syn: Synapse) -> None: "annotations": {}, } - with patch.object(syn, "_getEntityBundle") as mock_get_entity_bundle, patch.object( - synapseclient.client, - "upload_file_handle_async", - return_value=returned_file_handle, - ), patch.object(syn.cache, "contains", return_value=True), patch.object( - syn, "_createEntity" - ) as mock_createEntity, patch.object( - syn, "_updateEntity" - ) as mock_updatentity, patch.object( - syn, "get" + with ( + patch.object(syn, "_getEntityBundle") as mock_get_entity_bundle, + patch.object( + synapseclient.client, + "upload_file_handle_async", + return_value=returned_file_handle, + ), + patch.object(syn.cache, "contains", return_value=True), + patch.object(syn, "_createEntity") as mock_createEntity, + patch.object(syn, "_updateEntity") as mock_updatentity, + patch.object(syn, "get"), ): mock_get_entity_bundle.return_value = returned_bundle mock_createEntity.side_effect = SynapseHTTPError( @@ -3049,9 +3081,10 @@ def test_get_submission_with_annotations(syn: Synapse) -> None: "entityBundleJSON": json.dumps(entity_bundle_json), } - with patch.object(syn, "restGET") as restGET, patch.object( - syn, "_getWithEntityBundle_async" - ) as get_entity: + with ( + patch.object(syn, "restGET") as restGET, + patch.object(syn, "_getWithEntityBundle_async") as get_entity, + ): restGET.return_value = submission response = syn.getSubmission(submission_id) @@ -3096,9 +3129,10 @@ def run_get_submission_test( "entityBundleJSON": json.dumps({}), } - with patch.object(syn, "restGET") as restGET, patch.object( - syn, "_getWithEntityBundle_async" - ) as get_entity: + with ( + patch.object(syn, "restGET") as restGET, + patch.object(syn, "_getWithEntityBundle_async") as get_entity, + ): restGET.return_value = submission if should_warn: @@ -3170,12 +3204,11 @@ def test_get_submission_and_submission_status_interchangeability( } # Let's mock all the API calls made within these two methods - with patch.object(syn, "restGET") as restGET, patch.object( - Submission, "getURI" - ) as get_submission_uri, patch.object( - SubmissionStatus, "getURI" - ) as get_status_uri, patch.object( - syn, "_getWithEntityBundle_async" + with ( + patch.object(syn, "restGET") as restGET, + patch.object(Submission, "getURI") as get_submission_uri, + patch.object(SubmissionStatus, "getURI") as get_status_uri, + patch.object(syn, "_getWithEntityBundle_async"), ): get_submission_uri.return_value = ( f"/evaluation/submission/{expected_submission_id}" @@ -3248,9 +3281,10 @@ def test__create_table_snapshot__with_activity(self, syn: Synapse) -> None: "description": activity["description"], "id": 123, } - with patch.object( - syn, "restPOST", return_value=snapshot - ) as restpost, patch.object(syn, "_saveActivity") as mock__saveActivity: + with ( + patch.object(syn, "restPOST", return_value=snapshot) as restpost, + patch.object(syn, "_saveActivity") as mock__saveActivity, + ): mock__saveActivity.return_value = mock_dict syn._create_table_snapshot( "syn1234", comment="foo", label="new_label", activity=activity @@ -3311,11 +3345,14 @@ def test_create_snapshot_version_table(self, syn: Synapse) -> None: """Create Table snapshot""" table = Mock(Schema) snapshot_version = 3 - with patch.object(syn, "get", return_value=table) as get, patch.object( - syn, - "_create_table_snapshot", - return_value={"snapshotVersionNumber": snapshot_version}, - ) as create: + with ( + patch.object(syn, "get", return_value=table) as get, + patch.object( + syn, + "_create_table_snapshot", + return_value={"snapshotVersionNumber": snapshot_version}, + ) as create, + ): result = syn.create_snapshot_version( "syn1234", comment="foo", label="new_label", activity=2, wait=True ) @@ -3335,11 +3372,14 @@ def test_create_snapshot_version_entityview(self, syn: Synapse) -> None: views = [Mock(EntityViewSchema), Mock(SubmissionViewSchema)] for view in views: snapshot_version = 3 - with patch.object(syn, "get", return_value=view) as get, patch.object( - syn, - "_async_table_update", - return_value={"snapshotVersionNumber": snapshot_version}, - ) as update: + with ( + patch.object(syn, "get", return_value=view) as get, + patch.object( + syn, + "_async_table_update", + return_value={"snapshotVersionNumber": snapshot_version}, + ) as update, + ): result = syn.create_snapshot_version( "syn1234", comment="foo", @@ -3358,9 +3398,12 @@ def test_create_snapshot_version_entityview(self, syn: Synapse) -> None: ) assert snapshot_version == result - with patch.object(syn, "get", return_value=view) as get, patch.object( - syn, "_async_table_update", return_value={"token": 5} - ) as update: + with ( + patch.object(syn, "get", return_value=view) as get, + patch.object( + syn, "_async_table_update", return_value={"token": 5} + ) as update, + ): result = syn.create_snapshot_version( "syn1234", comment="foo", label="new_label", activity=2, wait=False ) @@ -3378,9 +3421,12 @@ def test_create_snapshot_version_entityview(self, syn: Synapse) -> None: def test_create_snapshot_version_raiseerror(self, syn: Synapse) -> None: """Raise error if entity view or table not passed in""" wrong_type = Mock() - with patch.object(syn, "get", return_value=wrong_type), pytest.raises( - ValueError, - match="This function only accepts Synapse ids of Tables or Views", + with ( + patch.object(syn, "get", return_value=wrong_type), + pytest.raises( + ValueError, + match="This function only accepts Synapse ids of Tables or Views", + ), ): syn.create_snapshot_version("syn1234") @@ -3525,14 +3571,15 @@ def test_query_table_csv(self, download_location: str, syn: Synapse) -> None: expanduser = os.path.expanduser expandvars = os.path.expandvars os_join = os.path.join - with patch.object(syn, "_waitForAsync") as mock_wait_for_async, patch.object( - syn, "cache" - ) as mock_cache, patch( - "synapseclient.client.download_by_file_handle", - new_callable=AsyncMock, - ) as mock_download_file_handle, patch.object( - client, "os" - ) as mock_os: + with ( + patch.object(syn, "_waitForAsync") as mock_wait_for_async, + patch.object(syn, "cache") as mock_cache, + patch( + "synapseclient.client.download_by_file_handle", + new_callable=AsyncMock, + ) as mock_download_file_handle, + patch.object(client, "os") as mock_os, + ): mock_download_result = {"resultsFileHandleId": file_handle_id} mock_wait_for_async.return_value = mock_download_result mock_cache.get.return_value = None @@ -3642,11 +3689,14 @@ def test__get_certified_passing_record(userid, syn: Synapse) -> None: @pytest.mark.parametrize("response", [True, False]) def test_is_certified(response, syn: Synapse) -> None: - with patch.object( - syn, "getUserProfile", return_value={"ownerId": "foobar"} - ) as patch_get_user, patch.object( - syn, "_get_certified_passing_record", return_value={"passed": response} - ) as patch_get_cert: + with ( + patch.object( + syn, "getUserProfile", return_value={"ownerId": "foobar"} + ) as patch_get_user, + patch.object( + syn, "_get_certified_passing_record", return_value={"passed": response} + ) as patch_get_cert, + ): is_certified = syn.is_certified("test") patch_get_user.assert_called_once_with("test") patch_get_cert.assert_called_once_with("foobar") @@ -3658,13 +3708,16 @@ def test_is_certified__no_quiz_results(syn: Synapse) -> None: In this case the back end returns a 404 rather than a result.""" response = MagicMock(requests.Response) response.status_code = 404 - with patch.object( - syn, "getUserProfile", return_value={"ownerId": "foobar"} - ) as patch_get_user, patch.object( - syn, - "_get_certified_passing_record", - side_effect=SynapseHTTPError(response=response), - ) as patch_get_cert: + with ( + patch.object( + syn, "getUserProfile", return_value={"ownerId": "foobar"} + ) as patch_get_user, + patch.object( + syn, + "_get_certified_passing_record", + side_effect=SynapseHTTPError(response=response), + ) as patch_get_cert, + ): is_certified = syn.is_certified("test") patch_get_user.assert_called_once_with("test") patch_get_cert.assert_called_once_with("foobar") @@ -3776,41 +3829,46 @@ def init(self, syn: Synapse) -> None: def test_get_permissions_with_defined_set_for_access(self) -> None: # GIVEN the API calls are mocked - with patch.object( - self.syn, - "_getUserbyPrincipalIdOrName", - # AND a user with id of 456 - return_value=456, - ), patch.object( - self.syn, - "_getACL", - return_value={ - "resourceAccess": [ - { - "principalId": 456, - # AND the permissions are given to the user - "accessType": [ - "READ", - "DELETE", - "CHANGE_SETTINGS", - "UPDATE", - "CHANGE_PERMISSIONS", - "CREATE", - "MODERATE", - "DOWNLOAD", - ], - } - ] - }, - ), patch.object( - self.syn, - "_find_teams_for_principal", - # AND the user is a part of no teams - return_value=[], - ), patch.object( - self.syn, - "_get_user_bundle", - return_value=None, + with ( + patch.object( + self.syn, + "_getUserbyPrincipalIdOrName", + # AND a user with id of 456 + return_value=456, + ), + patch.object( + self.syn, + "_getACL", + return_value={ + "resourceAccess": [ + { + "principalId": 456, + # AND the permissions are given to the user + "accessType": [ + "READ", + "DELETE", + "CHANGE_SETTINGS", + "UPDATE", + "CHANGE_PERMISSIONS", + "CREATE", + "MODERATE", + "DOWNLOAD", + ], + } + ] + }, + ), + patch.object( + self.syn, + "_find_teams_for_principal", + # AND the user is a part of no teams + return_value=[], + ), + patch.object( + self.syn, + "_get_user_bundle", + return_value=None, + ), ): # WHEN I get the permissions for the user on the entity permissions = self.syn.getPermissions("123", "456") @@ -3830,41 +3888,46 @@ def test_get_permissions_with_defined_set_for_access(self) -> None: def test_get_permissions_with_no_permissions_for_user(self) -> None: # GIVEN the API calls are mocked - with patch.object( - self.syn, - "_getUserbyPrincipalIdOrName", - # AND a user with id of 456 - return_value=456, - ), patch.object( - self.syn, - "_getACL", - return_value={ - "resourceAccess": [ - { - # AND the permissions are given to an unknown user - "principalId": 99999, - "accessType": [ - "READ", - "DELETE", - "CHANGE_SETTINGS", - "UPDATE", - "CHANGE_PERMISSIONS", - "CREATE", - "MODERATE", - "DOWNLOAD", - ], - } - ] - }, - ), patch.object( - self.syn, - "_find_teams_for_principal", - # AND the user is a part of no teams - return_value=[], - ), patch.object( - self.syn, - "_get_user_bundle", - return_value=None, + with ( + patch.object( + self.syn, + "_getUserbyPrincipalIdOrName", + # AND a user with id of 456 + return_value=456, + ), + patch.object( + self.syn, + "_getACL", + return_value={ + "resourceAccess": [ + { + # AND the permissions are given to an unknown user + "principalId": 99999, + "accessType": [ + "READ", + "DELETE", + "CHANGE_SETTINGS", + "UPDATE", + "CHANGE_PERMISSIONS", + "CREATE", + "MODERATE", + "DOWNLOAD", + ], + } + ] + }, + ), + patch.object( + self.syn, + "_find_teams_for_principal", + # AND the user is a part of no teams + return_value=[], + ), + patch.object( + self.syn, + "_get_user_bundle", + return_value=None, + ), ): # WHEN I get the permissions for the user on the entity permissions = self.syn.getPermissions("123", "456") @@ -3875,41 +3938,46 @@ def test_get_permissions_with_no_permissions_for_user(self) -> None: def test_get_permissions_with_permissions_given_through_single_team(self) -> None: # GIVEN the API calls are mocked - with patch.object( - self.syn, - "_getUserbyPrincipalIdOrName", - # AND a user with id of 456 - return_value=456, - ), patch.object( - self.syn, - "_getACL", - return_value={ - "resourceAccess": [ - { - # AND the permissions are given to a team - "principalId": 999, - "accessType": [ - "READ", - "DELETE", - "CHANGE_SETTINGS", - "UPDATE", - "CHANGE_PERMISSIONS", - "CREATE", - "MODERATE", - "DOWNLOAD", - ], - } - ] - }, - ), patch.object( - self.syn, - "_find_teams_for_principal", - # AND the user is assigned to a team - return_value=[Team(id=999)], - ), patch.object( - self.syn, - "_get_user_bundle", - return_value=None, + with ( + patch.object( + self.syn, + "_getUserbyPrincipalIdOrName", + # AND a user with id of 456 + return_value=456, + ), + patch.object( + self.syn, + "_getACL", + return_value={ + "resourceAccess": [ + { + # AND the permissions are given to a team + "principalId": 999, + "accessType": [ + "READ", + "DELETE", + "CHANGE_SETTINGS", + "UPDATE", + "CHANGE_PERMISSIONS", + "CREATE", + "MODERATE", + "DOWNLOAD", + ], + } + ] + }, + ), + patch.object( + self.syn, + "_find_teams_for_principal", + # AND the user is assigned to a team + return_value=[Team(id=999)], + ), + patch.object( + self.syn, + "_get_user_bundle", + return_value=None, + ), ): # WHEN I get the permissions for the user on the entity permissions = self.syn.getPermissions("123", "456") @@ -3931,49 +3999,54 @@ def test_get_permissions_with_permissions_given_through_multiple_teams( self, ) -> None: # GIVEN the API calls are mocked - with patch.object( - self.syn, - "_getUserbyPrincipalIdOrName", - # AND a user with id of 456 - return_value=456, - ), patch.object( - self.syn, - "_getACL", - return_value={ - "resourceAccess": [ - # AND the permissions are spread across a set of 2 teams - { - "principalId": 888, - "accessType": [ - "READ", - "DELETE", - "CHANGE_SETTINGS", - "UPDATE", - "CHANGE_PERMISSIONS", - ], - }, - { - "principalId": 999, - "accessType": [ - "READ", - "UPDATE", - "CHANGE_PERMISSIONS", - "CREATE", - "MODERATE", - "DOWNLOAD", - ], - }, - ] - }, - ), patch.object( - self.syn, - "_find_teams_for_principal", - # AND the user is assigned to both of the teams - return_value=[Team(id=888), Team(id=999)], - ), patch.object( - self.syn, - "_get_user_bundle", - return_value=None, + with ( + patch.object( + self.syn, + "_getUserbyPrincipalIdOrName", + # AND a user with id of 456 + return_value=456, + ), + patch.object( + self.syn, + "_getACL", + return_value={ + "resourceAccess": [ + # AND the permissions are spread across a set of 2 teams + { + "principalId": 888, + "accessType": [ + "READ", + "DELETE", + "CHANGE_SETTINGS", + "UPDATE", + "CHANGE_PERMISSIONS", + ], + }, + { + "principalId": 999, + "accessType": [ + "READ", + "UPDATE", + "CHANGE_PERMISSIONS", + "CREATE", + "MODERATE", + "DOWNLOAD", + ], + }, + ] + }, + ), + patch.object( + self.syn, + "_find_teams_for_principal", + # AND the user is assigned to both of the teams + return_value=[Team(id=888), Team(id=999)], + ), + patch.object( + self.syn, + "_get_user_bundle", + return_value=None, + ), ): # WHEN I get the permissions for the user on the entity permissions = self.syn.getPermissions("123", "456") diff --git a/tests/unit/synapseclient/unit_test_commandline.py b/tests/unit/synapseclient/unit_test_commandline.py index 7f22a85e2..412a165ce 100644 --- a/tests/unit/synapseclient/unit_test_commandline.py +++ b/tests/unit/synapseclient/unit_test_commandline.py @@ -1,6 +1,4 @@ -"""Test the Synapse command line client. - -""" +"""Test the Synapse command line client.""" import os import shutil diff --git a/tests/unit/synapseclient/unit_test_get_permissions.py b/tests/unit/synapseclient/unit_test_get_permissions.py index 7140d6feb..8fa406e63 100644 --- a/tests/unit/synapseclient/unit_test_get_permissions.py +++ b/tests/unit/synapseclient/unit_test_get_permissions.py @@ -1,6 +1,7 @@ """ Unit test for synapseclient.client.get_permissions """ + from typing import Dict from unittest.mock import patch diff --git a/tests/unit/synapseclient/unit_test_tables.py b/tests/unit/synapseclient/unit_test_tables.py index f4143d637..6f3250eda 100644 --- a/tests/unit/synapseclient/unit_test_tables.py +++ b/tests/unit/synapseclient/unit_test_tables.py @@ -1,4 +1,5 @@ """Unit test for synapseclient.table""" + import csv import io import json @@ -259,15 +260,16 @@ def test_csv_to_pandas_df_no_kwargs(): {"col1": [1, 2, 3], "col2": ["a", "b", "c"], "col3": [True, False, True]} ) - with patch.object( - pd, "read_csv", return_value=expected_df - ) as mock_read_csv, patch.object(os, "linesep", "\r\n"): + with ( + patch.object(pd, "read_csv", return_value=expected_df) as mock_read_csv, + patch.object(os, "linesep", "\r\n"), + ): # WHEN I call _csv_to_pandas_df with default parameters df = _csv_to_pandas_df( filepath="dummy_path.csv", separator=synapseclient.table.DEFAULT_SEPARATOR, quote_char=synapseclient.table.DEFAULT_QUOTE_CHARACTER, - escape_char=synapseclient.table.DEFAULT_ESCAPSE_CHAR, + escape_char=synapseclient.table.DEFAULT_ESCAPE_CHAR, contain_headers=True, lines_to_skip=0, date_columns=None, @@ -282,7 +284,7 @@ def test_csv_to_pandas_df_no_kwargs(): dtype=None, sep=synapseclient.table.DEFAULT_SEPARATOR, quotechar=synapseclient.table.DEFAULT_QUOTE_CHARACTER, - escapechar=synapseclient.table.DEFAULT_ESCAPSE_CHAR, + escapechar=synapseclient.table.DEFAULT_ESCAPE_CHAR, header=0, skiprows=0, lineterminator=None, @@ -297,16 +299,17 @@ def test_csv_to_pandas_df_with_kwargs() -> None: # GIVEN a pandas DataFrame (CSV file stand-in) expected_df = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]}) - with patch.object( - pd, "read_csv", return_value=expected_df - ) as mock_read_csv, patch.object(os, "linesep", "\r\n"): + with ( + patch.object(pd, "read_csv", return_value=expected_df) as mock_read_csv, + patch.object(os, "linesep", "\r\n"), + ): # WHEN I call _csv_to_pandas_df with custom keyword arguments kwargs = {"escapechar": "\\", "keep_default_na": False} df = _csv_to_pandas_df( filepath="dummy_path.csv", separator=synapseclient.table.DEFAULT_SEPARATOR, quote_char=synapseclient.table.DEFAULT_QUOTE_CHARACTER, - escape_char=synapseclient.table.DEFAULT_ESCAPSE_CHAR, + escape_char=synapseclient.table.DEFAULT_ESCAPE_CHAR, contain_headers=True, lines_to_skip=0, date_columns=None, @@ -339,15 +342,18 @@ def test_csv_to_pandas_df_calls_convert_date_cols(): {"col1": [1, 2, 3], "date_col": ["2021-01-01", "2021-01-02", "2021-01-03"]} ) - with patch.object(pd, "read_csv", return_value=expected_df), patch.object( - synapseclient.table, "_convert_df_date_cols_to_datetime" - ) as mock_convert_dates: + with ( + patch.object(pd, "read_csv", return_value=expected_df), + patch.object( + synapseclient.table, "_convert_df_date_cols_to_datetime" + ) as mock_convert_dates, + ): # WHEN I call _csv_to_pandas_df with date_columns specified _csv_to_pandas_df( filepath="dummy_path.csv", separator=synapseclient.table.DEFAULT_SEPARATOR, quote_char=synapseclient.table.DEFAULT_QUOTE_CHARACTER, - escape_char=synapseclient.table.DEFAULT_ESCAPSE_CHAR, + escape_char=synapseclient.table.DEFAULT_ESCAPE_CHAR, contain_headers=True, lines_to_skip=0, date_columns=["date_col"], # Specify date column @@ -372,17 +378,19 @@ def test_csv_to_pandas_df_handles_list_columns(): {"col1": [1, 2, 3], "list_col": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]} ) - with patch.object(pd, "read_csv", return_value=initial_df), patch.object( - synapseclient.table, "_convert_df_date_cols_to_datetime" - ), patch.object( - pd.Series, "apply", return_value=expected_final_df["list_col"] - ) as mock_apply: + with ( + patch.object(pd, "read_csv", return_value=initial_df), + patch.object(synapseclient.table, "_convert_df_date_cols_to_datetime"), + patch.object( + pd.Series, "apply", return_value=expected_final_df["list_col"] + ) as mock_apply, + ): # WHEN I call _csv_to_pandas_df with list_columns specified result_df = synapseclient.table._csv_to_pandas_df( filepath="dummy_path.csv", separator=synapseclient.table.DEFAULT_SEPARATOR, quote_char=synapseclient.table.DEFAULT_QUOTE_CHARACTER, - escape_char=synapseclient.table.DEFAULT_ESCAPSE_CHAR, + escape_char=synapseclient.table.DEFAULT_ESCAPE_CHAR, contain_headers=True, lines_to_skip=0, date_columns=None, @@ -415,17 +423,20 @@ def test_csv_to_pandas_df_handles_row_id_and_version(): {"col1": ["a", "b", "c"], "col2": [10, 20, 30]}, index=["1_1", "2_1", "3_2"] ) # Index format: ROW_ID_ROW_VERSION - with patch.object(pd, "read_csv", return_value=initial_df), patch.object( - synapseclient.table, - "row_labels_from_id_and_version", - return_value=["1_1", "2_1", "3_2"], - ) as mock_row_labels: + with ( + patch.object(pd, "read_csv", return_value=initial_df), + patch.object( + synapseclient.table, + "row_labels_from_id_and_version", + return_value=["1_1", "2_1", "3_2"], + ) as mock_row_labels, + ): # WHEN I call _csv_to_pandas_df with rowIdAndVersionInIndex=True result_df = synapseclient.table._csv_to_pandas_df( filepath="dummy_path.csv", separator=synapseclient.table.DEFAULT_SEPARATOR, quote_char=synapseclient.table.DEFAULT_QUOTE_CHARACTER, - escape_char=synapseclient.table.DEFAULT_ESCAPSE_CHAR, + escape_char=synapseclient.table.DEFAULT_ESCAPE_CHAR, contain_headers=True, lines_to_skip=0, date_columns=None, @@ -1173,22 +1184,25 @@ def test_downloadTableColumns(syn: Synapse, download_location: str) -> None: 3: zip_entry_file_paths[1], } - with patch.object(syn, "cache") as mock_cache, patch.object( - syn, "_waitForAsync" - ) as mock_async, patch( - "synapseclient.client.ensure_download_location_is_directory" - ) as mock_ensure_dir, patch( - "synapseclient.client.download_by_file_handle" - ) as mock_download_file_handle, patch.object( - client, "zipfile" - ), patch.object( - client, "extract_zip_file_to_directory" - ) as mock_extract_zip_file_to_directory: + with ( + patch.object(syn, "cache") as mock_cache, + patch.object(syn, "_waitForAsync") as mock_async, + patch( + "synapseclient.client.ensure_download_location_is_directory" + ) as mock_ensure_dir, + patch( + "synapseclient.client.download_by_file_handle" + ) as mock_download_file_handle, + patch.object(client, "zipfile"), + patch.object( + client, "extract_zip_file_to_directory" + ) as mock_extract_zip_file_to_directory, + ): mock_cache.get.side_effect = cached_paths mock_async.return_value = mock_async_response - mock_ensure_dir.return_value = ( - mock_cache.get_cache_dir.return_value - ) = "/tmp/download" + mock_ensure_dir.return_value = mock_cache.get_cache_dir.return_value = ( + "/tmp/download" + ) mock_download_file_handle.return_value = zip_file_path mock_extract_zip_file_to_directory.side_effect = zip_entry_file_paths @@ -1245,12 +1259,10 @@ def test_SubmissionViewSchema__default_params() -> None: def test_SubmissionViewSchema__before_synapse_store(syn: Synapse) -> None: - with patch.object( - syn, "_get_default_view_columns" - ) as mocked_get_default, patch.object( - syn, "_get_annotation_view_columns" - ) as mocked_get_annotations, patch.object( - SchemaBase, "_before_synapse_store" + with ( + patch.object(syn, "_get_default_view_columns") as mocked_get_default, + patch.object(syn, "_get_annotation_view_columns") as mocked_get_annotations, + patch.object(SchemaBase, "_before_synapse_store"), ): submission_view = SubmissionViewSchema(scopes=["123"], parent="idk") submission_view._before_synapse_store(syn) @@ -1265,12 +1277,10 @@ def test_SubmissionViewSchema__before_synapse_store(syn: Synapse) -> None: def test_EntityViewSchema__before_synapse_store(syn: Synapse) -> None: syn = Synapse(debug=True, skip_checks=True, cache_client=False) - with patch.object( - syn, "_get_default_view_columns" - ) as mocked_get_default, patch.object( - syn, "_get_annotation_view_columns" - ) as mocked_get_annotations, patch.object( - SchemaBase, "_before_synapse_store" + with ( + patch.object(syn, "_get_default_view_columns") as mocked_get_default, + patch.object(syn, "_get_annotation_view_columns") as mocked_get_annotations, + patch.object(SchemaBase, "_before_synapse_store"), ): submission_view = EntityViewSchema(scopes=["syn123"], parent="idk") submission_view._before_synapse_store(syn) @@ -1385,12 +1395,12 @@ def test_EntityViewSchema__ignore_annotation_column_names(syn: Synapse) -> None: Column(name="long2", columnType="INTEGER"), ] - with patch.object( - syn, "_get_annotation_view_columns", return_value=mocked_annotation_result1 - ) as mocked_get_annotations, patch.object( - syn, "getColumns" - ) as mocked_get_columns, patch.object( - SchemaBase, "_before_synapse_store" + with ( + patch.object( + syn, "_get_annotation_view_columns", return_value=mocked_annotation_result1 + ) as mocked_get_annotations, + patch.object(syn, "getColumns") as mocked_get_columns, + patch.object(SchemaBase, "_before_synapse_store"), ): entity_view._before_synapse_store(syn) @@ -1500,8 +1510,9 @@ def test_rowset_asDataFrame__with_ROW_ETAG_column(syn: Synapse) -> None: }, } - with patch.object(syn, "_queryTable", return_value=query_result), patch.object( - syn, "_queryTableNext", return_value=query_result_next_page + with ( + patch.object(syn, "_queryTable", return_value=query_result), + patch.object(syn, "_queryTableNext", return_value=query_result_next_page), ): table = syn.tableQuery("select something from syn123", resultsAs="rowset") dataframe = table.asDataFrame() @@ -1539,14 +1550,17 @@ def test_build_table__with_pandas_DataFrame() -> None: def test_build_table__with_csv() -> None: string_io = StringIOContextManager("a,b\n" "1,c\n" "2,d\n" "3,e") - with patch.object( - synapseclient.table, - "as_table_columns", - return_value=[ - Column(name="a", columnType="INTEGER"), - Column(name="b", columnType="STRING"), - ], - ), patch.object(io, "open", return_value=string_io): + with ( + patch.object( + synapseclient.table, + "as_table_columns", + return_value=[ + Column(name="a", columnType="INTEGER"), + Column(name="b", columnType="STRING"), + ], + ), + patch.object(io, "open", return_value=string_io), + ): table = build_table("test", "syn123", "some_file_name") for col, row in enumerate(table): assert row[0] == (col + 1) diff --git a/tests/unit/synapseutils/unit_test_synapseutils_copy.py b/tests/unit/synapseutils/unit_test_synapseutils_copy.py index c0b8ac83e..d42f417fb 100644 --- a/tests/unit/synapseutils/unit_test_synapseutils_copy.py +++ b/tests/unit/synapseutils/unit_test_synapseutils_copy.py @@ -15,8 +15,9 @@ def test_copyWiki_empty_Wiki(syn): entity = {"id": "syn123"} - with patch.object(syn, "getWikiHeaders", return_value=None), patch.object( - syn, "get", return_value=entity + with ( + patch.object(syn, "getWikiHeaders", return_value=None), + patch.object(syn, "get", return_value=entity), ): synapseutils.copyWiki(syn, "syn123", "syn456", updateLinks=False) @@ -35,10 +36,11 @@ def test_copyWiki_input_validation(syn): call({"id": "syn123"}, "8689"), call({"id": "syn123"}, "8690"), ] - with patch.object(syn, "getWikiHeaders", return_value=to_copy), patch.object( - syn, "get", return_value=entity - ), patch.object(syn, "getWiki", return_value=wiki) as mock_getWiki, patch.object( - syn, "store", return_value=wiki + with ( + patch.object(syn, "getWikiHeaders", return_value=to_copy), + patch.object(syn, "get", return_value=entity), + patch.object(syn, "getWiki", return_value=wiki) as mock_getWiki, + patch.object(syn, "store", return_value=wiki), ): synapseutils.copyWiki( syn, @@ -362,13 +364,12 @@ def test__batch_iterator_generator__two_iterables(self): @pytest.mark.parametrize("forceVersionToggle", [True, False]) def test_change_file_metadata(syn, forceVersionToggle): - with patch.object(syn, "get") as get_mock, patch.object( - syn, "_getFileHandleDownload" - ) as fh_mock, patch( - "synapseutils.copy_functions.copyFileHandles" - ) as copy_mock, patch.object( - syn, "store" - ) as store_mock: + with ( + patch.object(syn, "get") as get_mock, + patch.object(syn, "_getFileHandleDownload") as fh_mock, + patch("synapseutils.copy_functions.copyFileHandles") as copy_mock, + patch.object(syn, "store") as store_mock, + ): copy_result = {"failureCode": None, "newFileHandle": {"id": 123}} copy_mock.return_value = [copy_result] synapseutils.changeFileMetaData(syn, "syn123", forceVersion=forceVersionToggle) @@ -402,11 +403,12 @@ def setup_method(self): def test_dont_copy_read_permissions(self): """Entities with READ permissions not copied""" permissions = {"canDownload": False} - with patch.object( - self.syn, "get", return_value=self.file_ent - ) as patch_syn_get, patch.object( - self.syn, "restGET", return_value=permissions - ) as patch_rest_get: + with ( + patch.object(self.syn, "get", return_value=self.file_ent) as patch_syn_get, + patch.object( + self.syn, "restGET", return_value=permissions + ) as patch_rest_get, + ): copied_file = synapseutils.copy( self.syn, self.file_ent, @@ -441,11 +443,12 @@ def test_copy_entity_access_requirements(self): # TEST: Entity with access requirement not copied access_requirements = {"results": ["fee", "fi"]} permissions = {"canDownload": True} - with patch.object( - self.syn, "get", return_value=self.file_ent - ) as patch_syn_get, patch.object( - self.syn, "restGET", side_effects=[permissions, access_requirements] - ) as patch_rest_get: + with ( + patch.object(self.syn, "get", return_value=self.file_ent) as patch_syn_get, + patch.object( + self.syn, "restGET", side_effects=[permissions, access_requirements] + ) as patch_rest_get, + ): copied_file = synapseutils.copy( self.syn, self.file_ent, @@ -483,15 +486,16 @@ def test_no_copy_types(self): """Docker repositories and EntityViews aren't copied""" access_requirements = {"results": []} permissions = {"canDownload": True} - with patch.object( - self.syn, "get", return_value=self.project_entity - ) as patch_syn_get, patch.object( - self.syn, "restGET", side_effect=[permissions, access_requirements] - ) as patch_rest_get, patch.object( - self.syn, "getChildren" - ) as patch_get_children, patch.object( - self.syn, "store" - ) as patch_store: + with ( + patch.object( + self.syn, "get", return_value=self.project_entity + ) as patch_syn_get, + patch.object( + self.syn, "restGET", side_effect=[permissions, access_requirements] + ) as patch_rest_get, + patch.object(self.syn, "getChildren") as patch_get_children, + patch.object(self.syn, "store") as patch_store, + ): copied_project = synapseutils.copy( self.syn, self.project_entity, diff --git a/tests/unit/synapseutils/unit_test_synapseutils_describe.py b/tests/unit/synapseutils/unit_test_synapseutils_describe.py index 99399a417..b5af12200 100644 --- a/tests/unit/synapseutils/unit_test_synapseutils_describe.py +++ b/tests/unit/synapseutils/unit_test_synapseutils_describe.py @@ -154,11 +154,16 @@ def test_describe_with_mixed_series(self, syn: synapseclient.Synapse): def test_describe(self): syn = Mock() - with patch.object( - describe_functions, "_open_entity_as_df", return_value=self.df_mixed - ) as mock_open_entity, patch.object( - describe_functions, "_describe_wrapper", return_value=self.expected_results - ) as mock_describe: + with ( + patch.object( + describe_functions, "_open_entity_as_df", return_value=self.df_mixed + ) as mock_open_entity, + patch.object( + describe_functions, + "_describe_wrapper", + return_value=self.expected_results, + ) as mock_describe, + ): result = describe_functions.describe(syn=syn, entity="syn1234") mock_open_entity.assert_called_once_with(syn=syn, entity="syn1234") mock_describe.assert_called_once_with(self.df_mixed, syn=syn) @@ -167,9 +172,10 @@ def test_describe(self): def test_describe_none(self): """Test if data type is not supported""" syn = Mock() - with patch.object( - describe_functions, "_open_entity_as_df", return_value=None - ), patch.object(describe_functions, "_describe_wrapper") as mock_describe: + with ( + patch.object(describe_functions, "_open_entity_as_df", return_value=None), + patch.object(describe_functions, "_describe_wrapper") as mock_describe, + ): result = describe_functions.describe(syn=syn, entity="syn1234") mock_describe.assert_not_called() assert result is None diff --git a/tests/unit/synapseutils/unit_test_synapseutils_migrate.py b/tests/unit/synapseutils/unit_test_synapseutils_migrate.py index 9dae8e071..cd2507b61 100644 --- a/tests/unit/synapseutils/unit_test_synapseutils_migrate.py +++ b/tests/unit/synapseutils/unit_test_synapseutils_migrate.py @@ -275,9 +275,10 @@ class TestIndex: @pytest.fixture(scope="function") def conn(self): # temp file context manager doesn't work on windows so we manually remove in fixture - with tempfile.NamedTemporaryFile(delete=False) as tmpfile, sqlite3.connect( - tmpfile.name - ) as conn: + with ( + tempfile.NamedTemporaryFile(delete=False) as tmpfile, + sqlite3.connect(tmpfile.name) as conn, + ): yield conn def test_check_indexed(self, conn): @@ -330,8 +331,7 @@ def _index_file_entity_version_test(self, conn, file_version_strategy): file_version_strategy, ) - row = cursor.execute( - """ + row = cursor.execute(""" select id, parent_id, @@ -341,8 +341,7 @@ def _index_file_entity_version_test(self, conn, file_version_strategy): file_size, status from migrations - """ - ).fetchone() + """).fetchone() row_dict = _get_row_dict(cursor, row, True) @@ -436,8 +435,7 @@ def test_index_file_entity__all(self, conn): "all", ) - result = cursor.execute( - """ + result = cursor.execute(""" select id, parent_id, @@ -447,8 +445,7 @@ def test_index_file_entity__all(self, conn): file_size, status from migrations - """ - ).fetchall() + """).fetchall() result_iter = iter(result) row_0 = next(result_iter) @@ -578,8 +575,7 @@ def test_index_table_entity(self, mocker, conn): [from_storage_location_id, "543"], ) - result = cursor.execute( - """ + result = cursor.execute(""" select id, parent_id, @@ -589,8 +585,7 @@ def test_index_table_entity(self, mocker, conn): from_file_handle_id, status from migrations - """ - ).fetchall() + """).fetchall() row_iter = iter(result) row_0_dict = _get_row_dict(cursor, next(row_iter), True) @@ -724,16 +719,14 @@ def test_index_container__files(self, mock_index_entity, conn): assert mock_index_entity.call_args_list == expected_calls - row = cursor.execute( - """ + row = cursor.execute(""" select id, type, parent_id, status from migrations - """ - ).fetchone() + """).fetchone() row_dict = _get_row_dict(cursor, row, True) assert row_dict["id"] == project_id @@ -800,16 +793,14 @@ def test_index_container__tables(self, mock_index_entity, conn): assert mock_index_entity.call_args_list == expected_calls - row = cursor.execute( - """ + row = cursor.execute(""" select id, type, parent_id, status from migrations - """ - ).fetchone() + """).fetchone() row_dict = _get_row_dict(cursor, row, True) assert row_dict["id"] == folder_id @@ -1047,16 +1038,14 @@ def test_index_entity__error__continue( file_version_strategy, ) - row = cursor.execute( - """ + row = cursor.execute(""" select id, type, status, exception from migrations - """ - ).fetchone() + """).fetchone() row_dict = _get_row_dict(cursor, row, True) assert row_dict["id"] == entity_id @@ -1288,9 +1277,10 @@ class TestMigrate: @pytest.fixture(scope="function") def conn(self): # temp file context manager doesn't work on windows so we manually remove in fixture - with tempfile.NamedTemporaryFile(delete=False) as tmpfile, sqlite3.connect( - tmpfile.name - ) as conn: + with ( + tempfile.NamedTemporaryFile(delete=False) as tmpfile, + sqlite3.connect(tmpfile.name) as conn, + ): yield conn @pytest.fixture(scope="function") @@ -1547,19 +1537,21 @@ def mock_rest_get_side_effect(uri): migration_values, ) - with mock.patch.object(syn, "get") as mock_syn_get, mock.patch.object( - syn, "store" - ) as mock_syn_store, mock.patch.object( - syn, "_getFileHandleDownload" - ) as mock_get_file_handle_download, mock.patch.object( - syn, "restGET" - ) as mock_syn_rest_get, mock.patch.object( - syn, "create_snapshot_version" - ) as mock_create_snapshot_version, mock.patch.object( - syn, "tableQuery" - ) as mock_syn_table_query, mock.patch.object( - synapseutils.migrate_functions, "multipart_copy" - ) as mock_multipart_copy: + with ( + mock.patch.object(syn, "get") as mock_syn_get, + mock.patch.object(syn, "store") as mock_syn_store, + mock.patch.object( + syn, "_getFileHandleDownload" + ) as mock_get_file_handle_download, + mock.patch.object(syn, "restGET") as mock_syn_rest_get, + mock.patch.object( + syn, "create_snapshot_version" + ) as mock_create_snapshot_version, + mock.patch.object(syn, "tableQuery") as mock_syn_table_query, + mock.patch.object( + synapseutils.migrate_functions, "multipart_copy" + ) as mock_multipart_copy, + ): mock_syn_get.side_effect = mock_syn_get_side_effect mock_syn_store.side_effect = mock_syn_store_side_effect mock_get_file_handle_download.side_effect = ( @@ -1693,9 +1685,10 @@ def test_migrate__shared_file_handles(mocker, syn): ), ] - with tempfile.NamedTemporaryFile(delete=False) as tmpfile, sqlite3.connect( - tmpfile.name - ) as conn: + with ( + tempfile.NamedTemporaryFile(delete=False) as tmpfile, + sqlite3.connect(tmpfile.name) as conn, + ): cursor = conn.cursor() _ensure_schema(cursor) @@ -2101,8 +2094,7 @@ def test_get_table_file_handle_rows__no_file_columns(mocker, syn): def _verify_schema(cursor): - results = cursor.execute( - """ + results = cursor.execute(""" SELECT m.name as table_name, p.name as column_name @@ -2113,8 +2105,7 @@ def _verify_schema(cursor): ORDER BY m.name, p.cid - """ - ) + """) expected_table_columns = { "migration_settings": { @@ -2148,9 +2139,10 @@ def _verify_schema(cursor): def test_ensure_schema(): """Verify _ensure_schema bootstraps the necessary schema""" - with tempfile.NamedTemporaryFile(delete=False) as db_file, sqlite3.connect( - db_file.name - ) as conn: + with ( + tempfile.NamedTemporaryFile(delete=False) as db_file, + sqlite3.connect(db_file.name) as conn, + ): cursor = conn.cursor() _ensure_schema(cursor) _verify_schema(cursor) @@ -2177,9 +2169,10 @@ def test_verify_storage_location_ownership(): def test__verify_index_settings__retrieve_index_settings(): """Verify the behavior saving index settings and re-retreiving them.""" - with tempfile.NamedTemporaryFile(delete=False) as db_file, sqlite3.connect( - db_file.name - ) as conn: + with ( + tempfile.NamedTemporaryFile(delete=False) as db_file, + sqlite3.connect(db_file.name) as conn, + ): db_path = db_file.name cursor = conn.cursor() _ensure_schema(cursor) @@ -2302,9 +2295,10 @@ def test__verify_index_settings__invalid_table_schema(): version of the function. """ - with tempfile.NamedTemporaryFile(delete=False) as db_file, sqlite3.connect( - db_file.name - ) as conn: + with ( + tempfile.NamedTemporaryFile(delete=False) as db_file, + sqlite3.connect(db_file.name) as conn, + ): db_path = db_file.name cursor = conn.cursor() cursor.execute("create table migration_settings (foo text)") diff --git a/tests/unit/synapseutils/unit_test_synapseutils_monitor.py b/tests/unit/synapseutils/unit_test_synapseutils_monitor.py index a74fca243..b146a811d 100644 --- a/tests/unit/synapseutils/unit_test_synapseutils_monitor.py +++ b/tests/unit/synapseutils/unit_test_synapseutils_monitor.py @@ -8,9 +8,12 @@ def test_notifyMe__successful_call(syn): subject = "some message subject" owner_id = "12434" user_profile = {"ownerId": owner_id} - with patch.object(syn, "sendMessage") as mocked_send_message, patch.object( - syn, "getUserProfile", return_value=user_profile - ) as mocked_get_user_profile: + with ( + patch.object(syn, "sendMessage") as mocked_send_message, + patch.object( + syn, "getUserProfile", return_value=user_profile + ) as mocked_get_user_profile, + ): mocked_func = MagicMock() @notifyMe(syn, messageSubject=subject) @@ -30,8 +33,9 @@ def test_notifyMe__exception_thrown_and_retry_fail(syn): subject = "some message subject" owner_id = "12434" user_profile = {"ownerId": owner_id} - with patch.object(syn, "sendMessage") as mocked_send_message, patch.object( - syn, "getUserProfile", return_value=user_profile + with ( + patch.object(syn, "sendMessage") as mocked_send_message, + patch.object(syn, "getUserProfile", return_value=user_profile), ): mocked_func = MagicMock( side_effect=[Exception("first time fails"), "second time is Fine"] diff --git a/tests/unit/synapseutils/unit_test_synapseutils_sync.py b/tests/unit/synapseutils/unit_test_synapseutils_sync.py index 062984be0..8187c6802 100644 --- a/tests/unit/synapseutils/unit_test_synapseutils_sync.py +++ b/tests/unit/synapseutils/unit_test_synapseutils_sync.py @@ -1,4 +1,5 @@ """Unit tests for the Sync utility functions""" + import csv import datetime import math @@ -165,9 +166,11 @@ def test_read_manifest_sync_order_with_home_directory(syn: Synapse) -> None: # mock syn.get() to return a project because the final check is making sure parent is a container # mock isfile() to always return true to avoid having to create files in the home directory # side effect mocks values for: manfiest file, file1.txt, file2.txt, isfile(project.id) check in syn.get() - with patch.object(syn, "get_async", return_value=Project()), patch.object( - os.path, "isfile", side_effect=[True, True, True, False] - ), patch.object(sync, "_check_size_each_file", return_value=Mock()): + with ( + patch.object(syn, "get_async", return_value=Project()), + patch.object(os.path, "isfile", side_effect=[True, True, True, False]), + patch.object(sync, "_check_size_each_file", return_value=Mock()), + ): manifest_dataframe = synapseutils.sync.readManifestFile(syn, manifest) expected_order = pd.Series( [ @@ -194,10 +197,10 @@ def test_read_manifest_file_synapse_store_values_not_set(syn: Synapse) -> None: } manifest = StringIO(header + row1 + row2) - with patch.object(syn, "get_async", return_value=Project()), patch.object( - os.path, "isfile", return_value=True - ), patch.object( - sync, "_check_size_each_file", return_value=Mock() + with ( + patch.object(syn, "get_async", return_value=Project()), + patch.object(os.path, "isfile", return_value=True), + patch.object(sync, "_check_size_each_file", return_value=Mock()), ): # side effect mocks values for: file1.txt manifest_dataframe = synapseutils.sync.readManifestFile(syn, manifest) actual_synapseStore = manifest_dataframe.set_index("path")[ @@ -233,10 +236,10 @@ def test_read_manifest_file_synapse_store_values_are_set(syn: Synapse) -> None: } manifest = StringIO(header + row1 + row2 + row3 + row4 + row5 + row6) - with patch.object(syn, "get_async", return_value=Project()), patch.object( - sync, "_check_size_each_file", return_value=Mock() - ), patch.object( - os.path, "isfile", return_value=True + with ( + patch.object(syn, "get_async", return_value=Project()), + patch.object(sync, "_check_size_each_file", return_value=Mock()), + patch.object(os.path, "isfile", return_value=True), ): # mocks values for: file1.txt, file3.txt, file5.txt manifest_dataframe = synapseutils.sync.readManifestFile(syn, manifest) @@ -248,12 +251,14 @@ def test_read_manifest_file_synapse_store_values_are_set(syn: Synapse) -> None: def test_sync_from_synapse_non_file_entity(syn: Synapse) -> None: table_schema = "syn12345" - with patch.object(syn, "getChildren", return_value=[]), patch.object( - syn, "get", return_value=Schema(name="asssdfa", parent=PARENT_ID) - ), patch( - "synapseutils.sync.get_entity", - new_callable=AsyncMock, - return_value={"concreteType": concrete_types.TABLE_ENTITY}, + with ( + patch.object(syn, "getChildren", return_value=[]), + patch.object(syn, "get", return_value=Schema(name="asssdfa", parent=PARENT_ID)), + patch( + "synapseutils.sync.get_entity", + new_callable=AsyncMock, + return_value={"concreteType": concrete_types.TABLE_ENTITY}, + ), ): pytest.raises(ValueError, synapseutils.syncFromSynapse, syn, table_schema) @@ -263,17 +268,21 @@ async def mock_get_children(*args, **kwargs): for child in []: yield child - with patch( - "synapseclient.models.mixins.storable_container.get_children", - side_effect=mock_get_children, - ), patch( - "synapseutils.sync.get_entity", - new_callable=AsyncMock, - return_value=(mock_folder_dict()), - ), patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value=(mocked_folder_rest_api_dict()), + with ( + patch( + "synapseclient.models.mixins.storable_container.get_children", + side_effect=mock_get_children, + ), + patch( + "synapseutils.sync.get_entity", + new_callable=AsyncMock, + return_value=(mock_folder_dict()), + ), + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=(mocked_folder_rest_api_dict()), + ), ): assert list() == synapseutils.syncFromSynapse(syn=syn, entity=SYN_123) @@ -285,14 +294,18 @@ def test_sync_from_synapse_file_entity(syn: Synapse) -> None: id=SYN_123, properties={"isLatestVersion": True}, ) - with patch.object(syn, "getChildren") as patch_syn_get_children, patch( - "synapseutils.sync.get_entity", - new_callable=AsyncMock, - return_value=(mock_file_dict()), - ), patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value=(mocked_file_rest_api_dict()), + with ( + patch.object(syn, "getChildren") as patch_syn_get_children, + patch( + "synapseutils.sync.get_entity", + new_callable=AsyncMock, + return_value=(mock_file_dict()), + ), + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + return_value=(mocked_file_rest_api_dict()), + ), ): result = synapseutils.syncFromSynapse(syn, file) assert [file] == result @@ -312,17 +325,21 @@ async def mock_get_children(*args, **kwargs): for child in [mocked_file_child()]: yield child - with patch( - "synapseclient.models.mixins.storable_container.get_children", - side_effect=mock_get_children, - ), patch( - "synapseutils.sync.get_entity", - new_callable=AsyncMock, - side_effect=[mock_folder_dict(), mock_file_dict()], - ), patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - side_effect=[mocked_folder_rest_api_dict(), mocked_file_rest_api_dict()], + with ( + patch( + "synapseclient.models.mixins.storable_container.get_children", + side_effect=mock_get_children, + ), + patch( + "synapseutils.sync.get_entity", + new_callable=AsyncMock, + side_effect=[mock_folder_dict(), mock_file_dict()], + ), + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + side_effect=[mocked_folder_rest_api_dict(), mocked_file_rest_api_dict()], + ), ): result = synapseutils.syncFromSynapse(syn, folder) assert [file] == result @@ -353,25 +370,32 @@ async def mock_get_children(*args, **kwargs): yield child call_count += 1 - with patch( - "synapseclient.models.mixins.storable_container.get_children", - side_effect=mock_get_children, - ) as patch_syn_get_children, patch( - "synapseutils.sync.get_entity", - new_callable=AsyncMock, - side_effect=[mock_project_dict(), mock_folder_dict(), mock_file_dict()], - ), patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - side_effect=[ - mocked_project_rest_api_dict(), - mocked_folder_rest_api_dict(), - mocked_file_rest_api_dict(), - ], - ), patch( - "synapseclient.models.file.get_from_entity_factory", - wraps=spy_for_async_function(synapseclient.models.file.get_from_entity_factory), - ) as patch_get_file_entity: + with ( + patch( + "synapseclient.models.mixins.storable_container.get_children", + side_effect=mock_get_children, + ) as patch_syn_get_children, + patch( + "synapseutils.sync.get_entity", + new_callable=AsyncMock, + side_effect=[mock_project_dict(), mock_folder_dict(), mock_file_dict()], + ), + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + side_effect=[ + mocked_project_rest_api_dict(), + mocked_folder_rest_api_dict(), + mocked_file_rest_api_dict(), + ], + ), + patch( + "synapseclient.models.file.get_from_entity_factory", + wraps=spy_for_async_function( + synapseclient.models.file.get_from_entity_factory + ), + ) as patch_get_file_entity, + ): result = synapseutils.syncFromSynapse(syn=syn, entity=project) assert [file] == result assert patch_syn_get_children.call_count == 2 @@ -420,25 +444,32 @@ async def mock_get_children(*args, **kwargs): yield child call_count += 1 - with patch( - "synapseclient.models.mixins.storable_container.get_children", - side_effect=mock_get_children, - ) as patch_syn_get_children, patch( - "synapseutils.sync.get_entity", - new_callable=AsyncMock, - side_effect=[mock_project_dict(), mock_folder_dict(), mock_file_dict()], - ), patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - side_effect=[ - mocked_project_rest_api_dict(), - mocked_file_rest_api_dict(), - mocked_folder_rest_api_dict(), - ], - ), patch( - "synapseclient.models.file.get_from_entity_factory", - wraps=spy_for_async_function(synapseclient.models.file.get_from_entity_factory), - ) as patch_get_file_entity: + with ( + patch( + "synapseclient.models.mixins.storable_container.get_children", + side_effect=mock_get_children, + ) as patch_syn_get_children, + patch( + "synapseutils.sync.get_entity", + new_callable=AsyncMock, + side_effect=[mock_project_dict(), mock_folder_dict(), mock_file_dict()], + ), + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + side_effect=[ + mocked_project_rest_api_dict(), + mocked_file_rest_api_dict(), + mocked_folder_rest_api_dict(), + ], + ), + patch( + "synapseclient.models.file.get_from_entity_factory", + wraps=spy_for_async_function( + synapseclient.models.file.get_from_entity_factory + ), + ) as patch_get_file_entity, + ): result = synapseutils.syncFromSynapse( syn=syn, entity=project, downloadFile=False ) @@ -520,38 +551,47 @@ async def mock_get_children(*args, **kwargs): yield child call_count += 1 - with patch( - "synapseclient.models.mixins.storable_container.get_children", - side_effect=mock_get_children, - ) as patch_syn_get_children, patch( - "synapseutils.sync.get_entity", - new_callable=AsyncMock, - side_effect=[ - mock_project_dict(), - mock_file_dict(syn_id=SYN_123), - mock_folder_dict(), - mock_file_dict(syn_id=SYN_789), - ], - ), patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - side_effect=[ - mocked_project_rest_api_dict(), - mocked_file_rest_api_dict(syn_id=SYN_123), - mocked_folder_rest_api_dict(), - mocked_file_rest_api_dict(syn_id=SYN_789), - ], - ), patch( - "synapseclient.models.file.get_from_entity_factory", - wraps=spy_for_async_function(synapseclient.models.file.get_from_entity_factory), - ) as patch_get_file_entity, patch( - "synapseclient.models.activity.Activity.from_parent_async", - new_callable=AsyncMock, - side_effect=lambda parent, **kwargs: provenance.get(parent.id), - ) as patch_activity_from_parent, patch( - "synapseutils.sync.generate_manifest", - wraps=spy_for_function(synapseutils.sync.generate_manifest), - ) as generate_manifest_spy: + with ( + patch( + "synapseclient.models.mixins.storable_container.get_children", + side_effect=mock_get_children, + ) as patch_syn_get_children, + patch( + "synapseutils.sync.get_entity", + new_callable=AsyncMock, + side_effect=[ + mock_project_dict(), + mock_file_dict(syn_id=SYN_123), + mock_folder_dict(), + mock_file_dict(syn_id=SYN_789), + ], + ), + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + side_effect=[ + mocked_project_rest_api_dict(), + mocked_file_rest_api_dict(syn_id=SYN_123), + mocked_folder_rest_api_dict(), + mocked_file_rest_api_dict(syn_id=SYN_789), + ], + ), + patch( + "synapseclient.models.file.get_from_entity_factory", + wraps=spy_for_async_function( + synapseclient.models.file.get_from_entity_factory + ), + ) as patch_get_file_entity, + patch( + "synapseclient.models.activity.Activity.from_parent_async", + new_callable=AsyncMock, + side_effect=lambda parent, **kwargs: provenance.get(parent.id), + ) as patch_activity_from_parent, + patch( + "synapseutils.sync.generate_manifest", + wraps=spy_for_function(synapseutils.sync.generate_manifest), + ) as generate_manifest_spy, + ): result = synapseutils.syncFromSynapse( syn=syn, entity=project, path=temp_directory_path, manifest="all" ) @@ -712,38 +752,47 @@ async def mock_get_children(*args, **kwargs): yield child call_count += 1 - with patch( - "synapseclient.models.mixins.storable_container.get_children", - side_effect=mock_get_children, - ) as patch_syn_get_children, patch( - "synapseutils.sync.get_entity", - new_callable=AsyncMock, - side_effect=[ - mock_project_dict(), - mock_file_dict(syn_id=SYN_123), - mock_folder_dict(), - mock_file_dict(syn_id=SYN_789), - ], - ), patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - side_effect=[ - mocked_project_rest_api_dict(), - mocked_file_rest_api_dict(syn_id=SYN_123), - mocked_folder_rest_api_dict(), - mocked_file_rest_api_dict(syn_id=SYN_789), - ], - ), patch( - "synapseclient.models.file.get_from_entity_factory", - wraps=spy_for_async_function(synapseclient.models.file.get_from_entity_factory), - ) as patch_get_file_entity, patch( - "synapseclient.models.activity.Activity.from_parent_async", - new_callable=AsyncMock, - side_effect=lambda parent, **kwargs: provenance.get(parent.id), - ) as patch_activity_from_parent, patch( - "synapseutils.sync.generate_manifest", - wraps=spy_for_function(synapseutils.sync.generate_manifest), - ) as generate_manifest_spy: + with ( + patch( + "synapseclient.models.mixins.storable_container.get_children", + side_effect=mock_get_children, + ) as patch_syn_get_children, + patch( + "synapseutils.sync.get_entity", + new_callable=AsyncMock, + side_effect=[ + mock_project_dict(), + mock_file_dict(syn_id=SYN_123), + mock_folder_dict(), + mock_file_dict(syn_id=SYN_789), + ], + ), + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + side_effect=[ + mocked_project_rest_api_dict(), + mocked_file_rest_api_dict(syn_id=SYN_123), + mocked_folder_rest_api_dict(), + mocked_file_rest_api_dict(syn_id=SYN_789), + ], + ), + patch( + "synapseclient.models.file.get_from_entity_factory", + wraps=spy_for_async_function( + synapseclient.models.file.get_from_entity_factory + ), + ) as patch_get_file_entity, + patch( + "synapseclient.models.activity.Activity.from_parent_async", + new_callable=AsyncMock, + side_effect=lambda parent, **kwargs: provenance.get(parent.id), + ) as patch_activity_from_parent, + patch( + "synapseutils.sync.generate_manifest", + wraps=spy_for_function(synapseutils.sync.generate_manifest), + ) as generate_manifest_spy, + ): result = synapseutils.syncFromSynapse( syn=syn, entity=project, path=temp_directory_path, manifest="root" ) @@ -875,35 +924,43 @@ async def mock_get_children(*args, **kwargs): yield child call_count += 1 - with patch( - "synapseclient.models.mixins.storable_container.get_children", - side_effect=mock_get_children, - ) as patch_syn_get_children, patch( - "synapseutils.sync.get_entity", - new_callable=AsyncMock, - side_effect=[ - mock_project_dict(), - mock_file_dict(syn_id=SYN_123), - mock_folder_dict(), - mock_file_dict(syn_id=SYN_789), - ], - ), patch( - "synapseclient.api.entity_factory.get_entity_id_bundle2", - new_callable=AsyncMock, - side_effect=[ - mocked_project_rest_api_dict(), - mocked_file_rest_api_dict(syn_id=SYN_123), - mocked_folder_rest_api_dict(), - mocked_file_rest_api_dict(syn_id=SYN_789), - ], - ), patch( - "synapseclient.models.file.get_from_entity_factory", - wraps=spy_for_async_function(synapseclient.models.file.get_from_entity_factory), - ) as patch_get_file_entity, patch( - "synapseclient.models.activity.Activity.from_parent_async", - new_callable=AsyncMock, - return_value=None, - ) as patch_activity_from_parent: + with ( + patch( + "synapseclient.models.mixins.storable_container.get_children", + side_effect=mock_get_children, + ) as patch_syn_get_children, + patch( + "synapseutils.sync.get_entity", + new_callable=AsyncMock, + side_effect=[ + mock_project_dict(), + mock_file_dict(syn_id=SYN_123), + mock_folder_dict(), + mock_file_dict(syn_id=SYN_789), + ], + ), + patch( + "synapseclient.api.entity_factory.get_entity_id_bundle2", + new_callable=AsyncMock, + side_effect=[ + mocked_project_rest_api_dict(), + mocked_file_rest_api_dict(syn_id=SYN_123), + mocked_folder_rest_api_dict(), + mocked_file_rest_api_dict(syn_id=SYN_789), + ], + ), + patch( + "synapseclient.models.file.get_from_entity_factory", + wraps=spy_for_async_function( + synapseclient.models.file.get_from_entity_factory + ), + ) as patch_get_file_entity, + patch( + "synapseclient.models.activity.Activity.from_parent_async", + new_callable=AsyncMock, + return_value=None, + ) as patch_activity_from_parent, + ): result = synapseutils.syncFromSynapse( syn=syn, entity=project, path="./", manifest="suppress" ) @@ -1086,10 +1143,9 @@ def mock_uploader_constructor(*args, **kwargs): mocked_uploaders.append(mock_uploader) return mock_uploader - with patch.object( - synapseutils.sync, "_SyncUploadItem" - ) as upload_item_init, patch.object( - synapseutils.sync, "_SyncUploader", new=mock_uploader_constructor + with ( + patch.object(synapseutils.sync, "_SyncUploadItem") as upload_item_init, + patch.object(synapseutils.sync, "_SyncUploader", new=mock_uploader_constructor), ): await synapseutils.sync._manifest_upload(syn, df) @@ -1637,11 +1693,10 @@ def test_generate_sync_manifest(syn: Synapse) -> None: folder_name = "TestName" parent_id = SYN_123 manifest_path = "TestFolder" - with patch.object( - sync, "_walk_directory_tree" - ) as patch_walk_directory_tree, patch.object( - sync, "_write_manifest_data" - ) as patch_write_manifest_data: + with ( + patch.object(sync, "_walk_directory_tree") as patch_walk_directory_tree, + patch.object(sync, "_write_manifest_data") as patch_write_manifest_data, + ): sync.generate_sync_manifest(syn, folder_name, parent_id, manifest_path) patch_walk_directory_tree.assert_called_once_with(syn, folder_name, parent_id) patch_write_manifest_data.assert_called_with( @@ -1861,3 +1916,21 @@ def test_multiple_item(self) -> None: "baz", '"foo, bar, baz"', ] + + +class TestSyncFromSynapseDeprecation: + """Tests for the deprecation of syncFromSynapse.""" + + def test_syncFromSynapse_emits_deprecation_warning(self, syn: Synapse) -> None: + # GIVEN the legacy syncFromSynapse function + # WHEN it is called + # THEN a DeprecationWarning is raised pointing to StorableContainer + with pytest.warns( + DeprecationWarning, match="StorableContainer.sync_from_synapse" + ): + with patch.object( + sync, + "syncFromSynapse_async", + return_value=AsyncMock(return_value=[])(), + ): + sync.syncFromSynapse(syn=syn, entity="syn123") diff --git a/tests/unit/synapseutils/unit_test_synapseutils_walk.py b/tests/unit/synapseutils/unit_test_synapseutils_walk.py index 3b7b365f5..aa0bc4254 100644 --- a/tests/unit/synapseutils/unit_test_synapseutils_walk.py +++ b/tests/unit/synapseutils/unit_test_synapseutils_walk.py @@ -35,9 +35,10 @@ def test_help_walk_one_child_file(syn, include_types): } child = [{"id": "syn2222", "conreteType": "File", "name": "test_file"}] expected = [(("parent_folder", "syn123"), [], [("test_file", "syn2222")])] - with patch.object(syn, "get", return_value=entity) as mock_syn_get, patch.object( - syn, "getChildren", return_value=child - ) as mock_get_child: + with ( + patch.object(syn, "get", return_value=entity) as mock_syn_get, + patch.object(syn, "getChildren", return_value=child) as mock_get_child, + ): result = _help_walk(syn=syn, syn_id="syn123", include_types=include_types) gen_result = list(result) mock_syn_get.assert_called_once_with("syn123", downloadFile=False) @@ -82,11 +83,10 @@ def test_help_walk_recursive(syn): [("test_file_2", "syn22223")], ), ] - with patch.object( - syn, "get", side_effect=entity_list - ) as mock_syn_get, patch.object( - syn, "getChildren", side_effect=child_list - ) as mock_get_child: + with ( + patch.object(syn, "get", side_effect=entity_list) as mock_syn_get, + patch.object(syn, "getChildren", side_effect=child_list) as mock_get_child, + ): result = _help_walk(syn=syn, syn_id="syn123", include_types=["folder", "file"]) gen_result = list(result) mock_syn_get.assert_called_once_with("syn123", downloadFile=False) @@ -103,9 +103,10 @@ def test_help_walk_newpath(syn): } child = [{"id": "syn2222", "conreteType": "File", "name": "test_file"}] expected = [(("testpathnow", "syn123"), [], [("test_file", "syn2222")])] - with patch.object(syn, "get", return_value=entity) as mock_syn_get, patch.object( - syn, "getChildren", return_value=child - ) as mock_get_child: + with ( + patch.object(syn, "get", return_value=entity) as mock_syn_get, + patch.object(syn, "getChildren", return_value=child) as mock_get_child, + ): result = _help_walk( syn=syn, syn_id="syn123",