diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 2fe73ca77..16d5f11bc 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,11 +1,20 @@ version: 2 +enable-beta-ecosystems: true updates: -- package-ecosystem: "github-actions" +- package-ecosystem: github-actions directory: "/" schedule: - interval: "weekly" + interval: weekly -- package-ecosystem: "gitsubmodule" +- package-ecosystem: gitsubmodule directory: "/" schedule: - interval: "weekly" + interval: weekly + +- package-ecosystem: pre-commit + directory: "/" + schedule: + interval: monthly + groups: + pre-commit: + patterns: ["*"] diff --git a/.github/workflows/alpine-test.yml b/.github/workflows/alpine-test.yml index 2c1eed391..5c999e487 100644 --- a/.github/workflows/alpine-test.yml +++ b/.github/workflows/alpine-test.yml @@ -2,8 +2,11 @@ name: test-alpine on: [push, pull_request, workflow_dispatch] +permissions: + contents: read + jobs: - build: + test: runs-on: ubuntu-latest container: @@ -16,14 +19,14 @@ jobs: steps: - name: Prepare Alpine Linux run: | - apk add sudo git git-daemon python3 py3-pip + apk add sudo git git-daemon python3 py3-pip py3-virtualenv echo 'Defaults env_keep += "CI GITHUB_* RUNNER_*"' >/etc/sudoers.d/ci_env addgroup -g 127 docker - adduser -D -u 1001 runner + adduser -D -u 1001 runner # TODO: Check if this still works on GHA as intended. adduser runner docker shell: sh -exo pipefail {0} # Run this as root, not the "runner" user. - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: fetch-depth: 0 @@ -44,23 +47,44 @@ jobs: # and cause subsequent tests to fail cat test/fixtures/.gitconfig >> ~/.gitconfig - - name: Set up virtualenv + - name: Set up virtual environment run: | python -m venv .venv - . .venv/bin/activate - printf '%s=%s\n' 'PATH' "$PATH" 'VIRTUAL_ENV' "$VIRTUAL_ENV" >>"$GITHUB_ENV" - name: Update PyPA packages run: | - # Get the latest pip, wheel, and prior to Python 3.12, setuptools. - python -m pip install -U pip $(pip freeze --all | grep -ow ^setuptools) wheel + . .venv/bin/activate + python -m pip install -U pip 'setuptools; python_version<"3.12"' wheel - name: Install project and test dependencies run: | - pip install ".[test]" + . .venv/bin/activate + pip install '.[test]' + + - name: Show POSIX file ownership + run: | + ls -ld -- \ + "$(pwd)" \ + "$(pwd)/.git" \ + "$(pwd)/git/ext/gitdb" \ + "$(pwd)/git/ext/gitdb/.git" \ + "$(pwd)/git/ext/gitdb/gitdb/ext/smmap" \ + "$(pwd)/git/ext/gitdb/gitdb/ext/smmap/.git" \ + "${HOME:?HOME is not set}/.gitconfig" \ + 2>&1 || true + + - name: Show safe.directory entries + # `actions/checkout`'s safe.directory add is only durable for the + # checkout itself (it writes under a throwaway HOME override and + # then discards it), so by the time this step runs the runner + # user's `~/.gitconfig` has no entries -- and the Alpine container + # chowns the workspace to runner:docker to match the test user, so + # git accepts the ownership without one. Expected: `(none)`. + run: git config --global --get-all safe.directory || echo "(none)" - name: Show version and platform information run: | + . .venv/bin/activate uname -a command -v git python git version @@ -69,4 +93,5 @@ jobs: - name: Test with pytest run: | + . .venv/bin/activate pytest --color=yes -p no:sugar --instafail -vv diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index ae5241898..e243416a8 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -19,38 +19,48 @@ on: jobs: analyze: - name: Analyze + name: Analyze (${{ matrix.language }}) # Runner size impacts CodeQL analysis time. To learn more, please see: # - https://gh.io/recommended-hardware-resources-for-running-codeql # - https://gh.io/supported-runners-and-hardware-resources - # - https://gh.io/using-larger-runners - # Consider using larger runners for possible analysis time improvements. + # - https://gh.io/using-larger-runners (GitHub.com only) + # Consider using larger runners or machines with greater resources for possible analysis time improvements. runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} - timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} permissions: - actions: read - contents: read security-events: write strategy: fail-fast: false matrix: - language: [ 'python' ] - # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] - # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both - # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both - # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support - + include: + - language: actions + build-mode: none + - language: python + build-mode: none + # CodeQL supports the following values keywords for 'language': 'actions', 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' + # Use `c-cpp` to analyze code written in C, C++ or both + # Use 'java-kotlin' to analyze code written in Java, Kotlin or both + # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both + # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, + # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. + # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how + # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 + + # Add any setup steps before running the `github/codeql-action/init` action. + # This includes steps like installing compilers or runtimes (`actions/setup-node` + # or others). This is typically only required for manual builds. + # - name: Setup runtime (example) + # uses: actions/setup-example@v1 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v3 + uses: github/codeql-action/init@v4 with: languages: ${{ matrix.language }} - setup-python-dependencies: false + build-mode: ${{ matrix.build-mode }} # If you wish to specify custom queries, you can do so here or in a config file. # By default, queries listed here will override any specified in a config file. # Prefix the list here with "+" to use these queries and those in the config file. @@ -58,23 +68,23 @@ jobs: # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs # queries: security-extended,security-and-quality - - # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). - # If this step fails, then you should remove it and run the build manually (see below) - - name: Autobuild - uses: github/codeql-action/autobuild@v3 - + # If the analyze step fails for one of the languages you are analyzing with + # "We were unable to automatically build your code", modify the matrix above + # to set the build mode to "manual" for that language. Then modify this step + # to build your code. # â„šī¸ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun - - # If the Autobuild fails above, remove it and uncomment the following three lines. - # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. - - # - run: | - # echo "Run, Build Application using script" - # ./location_of_script_within_repo/buildscript.sh + - if: matrix.build-mode == 'manual' + shell: bash + run: | + echo 'If you are using a "manual" build mode for one or more of the' \ + 'languages you are analyzing, replace this with the commands to build' \ + 'your code, for example:' + echo ' make bootstrap' + echo ' make release' + exit 1 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v3 + uses: github/codeql-action/analyze@v4 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/cygwin-test.yml b/.github/workflows/cygwin-test.yml index bde4ea659..17ba4bc82 100644 --- a/.github/workflows/cygwin-test.yml +++ b/.github/workflows/cygwin-test.yml @@ -2,35 +2,47 @@ name: test-cygwin on: [push, pull_request, workflow_dispatch] -jobs: - build: - runs-on: windows-latest +permissions: + contents: read +jobs: + test: strategy: + matrix: + selection: [fast, perf] + include: + - selection: fast + additional-pytest-args: --ignore=test/performance + - selection: perf + additional-pytest-args: test/performance + fail-fast: false + runs-on: windows-latest + env: CHERE_INVOKING: "1" CYGWIN_NOWINPATH: "1" defaults: run: - shell: C:\tools\cygwin\bin\bash.exe --login --norc -eo pipefail -o igncr "{0}" + shell: C:\cygwin\bin\bash.exe --login --norc -eo pipefail -o igncr "{0}" steps: - name: Force LF line endings run: | git config --global core.autocrlf false # Affects the non-Cygwin git. - shell: bash # Use Git Bash instead of Cygwin Bash for this step. + shell: pwsh # Do this outside Cygwin, to affect actions/checkout. - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: fetch-depth: 0 - - name: Set up Cygwin - uses: egor-tensin/setup-cygwin@v4 + - name: Install Cygwin + uses: cygwin/cygwin-install-action@v6 with: - packages: python39=3.9.16-1 python39-pip python39-virtualenv git + packages: git python39 python-pip-wheel python-setuptools-wheel python-wheel-wheel + add-to-path: false # No need to change $PATH outside the Cygwin environment. - name: Arrange for verbose output run: | @@ -41,6 +53,8 @@ jobs: run: | git config --global --add safe.directory "$(pwd)" git config --global --add safe.directory "$(pwd)/.git" + git config --global --add safe.directory "$(pwd)/git/ext/gitdb" + git config --global --add safe.directory "$(pwd)/git/ext/gitdb/gitdb/ext/smmap" git config --global core.autocrlf false - name: Prepare this repo for tests @@ -55,19 +69,70 @@ jobs: # and cause subsequent tests to fail cat test/fixtures/.gitconfig >> ~/.gitconfig - - name: Ensure the "pip" command is available + - name: Set up virtual environment run: | - # This is used unless, and before, an updated pip is installed. - ln -s pip3 /usr/bin/pip + python3.9 -m venv .venv + echo 'BASH_ENV=.venv/bin/activate' >>"$GITHUB_ENV" - name: Update PyPA packages run: | - # Get the latest pip, wheel, and prior to Python 3.12, setuptools. - python -m pip install -U pip $(pip freeze --all | grep -ow ^setuptools) wheel + python -m pip install -U pip 'setuptools; python_version<"3.12"' wheel - name: Install project and test dependencies run: | - pip install ".[test]" + pip install '.[test]' + + - name: Show POSIX file ownership + # Cygwin's `ls -ld` reports the NTFS Owner SID via Cygwin's SID-to-uid + # mapping (well-known SIDs by their RID, machine-local accounts by + # 0x30000+RID). That mapping is what Cygwin git's + # `is_path_owned_by_current_user` reduces to, so this is the view that + # determines whether `safe.directory` is consulted. + run: | + ls -ld -- \ + "$(pwd)" \ + "$(pwd)/.git" \ + "$(pwd)/git/ext/gitdb" \ + "$(pwd)/git/ext/gitdb/.git" \ + "$(pwd)/.git/modules/gitdb" \ + "$(pwd)/git/ext/gitdb/gitdb/ext/smmap" \ + "$(pwd)/git/ext/gitdb/gitdb/ext/smmap/.git" \ + "$(pwd)/.git/modules/gitdb/modules/smmap" \ + "${HOME:?HOME is not set}/.gitconfig" \ + 2>&1 || true + + - name: Show NTFS file ownership + # Authoritative NTFS Owner via Get-Acl, with no Cygwin SID-to-uid layer + # in between -- useful for confirming what the Cygwin view reports as + # "Administrators" is the BUILTIN\Administrators SID (S-1-5-32-544). + shell: pwsh + run: | + $paths = @( + "$pwd", + "$pwd\.git", + "$pwd\git\ext\gitdb", + "$pwd\git\ext\gitdb\.git", + "$pwd\.git\modules\gitdb", + "$pwd\git\ext\gitdb\gitdb\ext\smmap", + "$pwd\git\ext\gitdb\gitdb\ext\smmap\.git", + "$pwd\.git\modules\gitdb\modules\smmap", + "$env:USERPROFILE\.gitconfig" + ) + foreach ($p in $paths) { + if (Test-Path -LiteralPath $p) { + try { + $owner = (Get-Acl -LiteralPath $p).Owner + } catch { + $owner = "ERROR: $($_.Exception.Message)" + } + "{0,-44} {1}" -f $owner, $p + } else { + "(missing: $p)" + } + } + + - name: Show safe.directory entries + run: git config --global --get-all safe.directory - name: Show version and platform information run: | @@ -77,6 +142,6 @@ jobs: python --version python -c 'import os, sys; print(f"sys.platform={sys.platform!r}, os.name={os.name!r}")' - - name: Test with pytest + - name: Test with pytest (${{ matrix.additional-pytest-args }}) run: | - pytest --color=yes -p no:sugar --instafail -vv + pytest --color=yes -p no:sugar --instafail -vv ${{ matrix.additional-pytest-args }} diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index a0e81a993..e32e946c8 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -2,14 +2,17 @@ name: Lint on: [push, pull_request, workflow_dispatch] +permissions: + contents: read + jobs: lint: - runs-on: ubuntu-latest + runs-on: ubuntu-slim steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - - uses: actions/setup-python@v5 + - uses: actions/setup-python@v6 with: python-version: "3.x" diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 747db62f0..6746b92c6 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -9,41 +9,63 @@ permissions: contents: read jobs: - build: + test: strategy: - fail-fast: false matrix: - os: ["ubuntu-22.04", "macos-latest", "windows-latest"] - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + os-type: [ubuntu, macos, windows] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t"] exclude: - - os: "macos-latest" - python-version: "3.7" + - os-type: macos + python-version: "3.7" # Not available for the ARM-based macOS runners. + - os-type: macos + python-version: "3.13t" + - os-type: macos + python-version: "3.14t" + - os-type: windows + python-version: "3.13" # FIXME: Fix and enable Python 3.13 and 3.14 on Windows (#1955). + - os-type: windows + python-version: "3.13t" + - os-type: windows + python-version: "3.14" + - os-type: windows + python-version: "3.14t" include: + - os-ver: latest + - os-type: ubuntu + python-version: "3.7" + os-ver: "22.04" + - build-docs: true # We ensure documentation builds, except on very old interpreters. + - python-version: "3.7" + build-docs: false + - python-version: "3.8" + build-docs: false - experimental: false - runs-on: ${{ matrix.os }} + fail-fast: false + + runs-on: ${{ matrix.os-type }}-${{ matrix.os-ver }} defaults: run: shell: bash --noprofile --norc -exo pipefail {0} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 with: fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} allow-prereleases: ${{ matrix.experimental }} - name: Set up WSL (Windows) - if: startsWith(matrix.os, 'windows') - uses: Vampire/setup-wsl@v3.1.1 + if: matrix.os-type == 'windows' + uses: Vampire/setup-wsl@v6.0.0 with: - distribution: Alpine - additional-packages: bash + wsl-version: 1 + distribution: Debian - name: Prepare this repo for tests run: | @@ -59,12 +81,67 @@ jobs: - name: Update PyPA packages run: | - # Get the latest pip, wheel, and prior to Python 3.12, setuptools. - python -m pip install -U pip $(pip freeze --all | grep -ow ^setuptools) wheel + python -m pip install -U pip 'setuptools; python_version<"3.12"' wheel - name: Install project and test dependencies run: | - pip install ".[test]" + pip install '.[test]' + + - name: Show POSIX file ownership + # Linux and macOS only. On Windows, Git Bash's `ls -ld` reports a + # uniform uid+gid for every path regardless of NTFS Owner (MSYS2's + # SID-to-uid mapping doesn't have Cygwin's fidelity), so it would + # not be informative here. The NTFS Owner check below covers Windows. + if: matrix.os-type != 'windows' + run: | + ls -ld -- \ + "$(pwd)" \ + "$(pwd)/.git" \ + "$(pwd)/git/ext/gitdb" \ + "$(pwd)/git/ext/gitdb/.git" \ + "$(pwd)/git/ext/gitdb/gitdb/ext/smmap" \ + "$(pwd)/git/ext/gitdb/gitdb/ext/smmap/.git" \ + "${HOME:?HOME is not set}/.gitconfig" \ + 2>&1 || true + + - name: Show NTFS file ownership + # Windows only. Reads NTFS Owner directly via Get-Acl, which is the + # authoritative view for Windows-side ownership questions; the POSIX + # view via Git Bash's MSYS2 layer is not a reliable proxy here. + if: matrix.os-type == 'windows' + shell: pwsh + run: | + $paths = @( + "$pwd", + "$pwd\.git", + "$pwd\git\ext\gitdb", + "$pwd\git\ext\gitdb\.git", + "$pwd\git\ext\gitdb\gitdb\ext\smmap", + "$pwd\git\ext\gitdb\gitdb\ext\smmap\.git", + "$env:USERPROFILE\.gitconfig" + ) + foreach ($p in $paths) { + if (Test-Path -LiteralPath $p) { + try { + $owner = (Get-Acl -LiteralPath $p).Owner + } catch { + $owner = "ERROR: $($_.Exception.Message)" + } + "{0,-44} {1}" -f $owner, $p + } else { + "(missing: $p)" + } + } + + - name: Show safe.directory entries + # `actions/checkout`'s safe.directory add is only durable for the + # checkout itself (it writes under a throwaway HOME override and + # then discards it), so by the time this step runs the runner + # user's `~/.gitconfig` has no entries -- and git accepts the + # workspace's ownership anyway: Git for Windows via its + # Admins-group exemption on the windows matrix; on Linux/macOS + # the workspace is owned by the test user. Expected: `(none)`. + run: git config --global --get-all safe.directory || echo "(none)" - name: Show version and platform information run: | @@ -76,7 +153,7 @@ jobs: # For debugging hook tests on native Windows systems that may have WSL. - name: Show bash.exe candidates (Windows) - if: startsWith(matrix.os, 'windows') + if: matrix.os-type == 'windows' run: | set +e bash.exe -c 'printenv WSL_DISTRO_NAME; uname -a' @@ -84,14 +161,13 @@ jobs: continue-on-error: true - name: Check types with mypy + if: matrix.python-version != '3.7' && matrix.python-version != '3.8' run: | - mypy --python-version=${{ matrix.python-version }} + mypy --python-version="${PYTHON_VERSION%t}" # Version only, with no "t" for free-threaded. env: MYPY_FORCE_COLOR: "1" TERM: "xterm-256color" # For color: https://github.com/python/mypy/issues/13817 - # With new versions of mypy new issues might arise. This is a problem if there is - # nobody able to fix them, so we have to ignore errors until that changes. - continue-on-error: true + PYTHON_VERSION: ${{ matrix.python-version }} - name: Test with pytest run: | @@ -99,7 +175,7 @@ jobs: continue-on-error: false - name: Documentation - if: matrix.python-version != '3.7' + if: matrix.build-docs run: | - pip install ".[doc]" + pip install '.[doc]' make -C doc html diff --git a/.gitignore b/.gitignore index d85569405..eab294a65 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,8 @@ __pycache__/ # Transient editor files *.swp *~ +\#*# +.#*# # Editor configuration nbproject diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 424cc5f37..9cc97962d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,29 +1,29 @@ repos: - repo: https://github.com/codespell-project/codespell - rev: v2.3.0 + rev: v2.4.2 hooks: - id: codespell additional_dependencies: [tomli] exclude: ^test/fixtures/ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.0 + rev: v0.15.15 hooks: - - id: ruff + - id: ruff-check args: ["--fix"] exclude: ^git/ext/ - id: ruff-format exclude: ^git/ext/ - repo: https://github.com/shellcheck-py/shellcheck-py - rev: v0.10.0.1 + rev: v0.11.0.1 hooks: - id: shellcheck args: [--color] exclude: ^test/fixtures/polyglot$|^git/ext/ - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v6.0.0 hooks: - id: end-of-file-fixer exclude: ^test/fixtures/|COPYING|LICENSE @@ -33,6 +33,6 @@ repos: - id: check-merge-conflict - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.19 + rev: v0.25 hooks: - id: validate-pyproject diff --git a/AUTHORS b/AUTHORS index 45b14c961..15333e1e5 100644 --- a/AUTHORS +++ b/AUTHORS @@ -55,5 +55,7 @@ Contributors are: -Eliah Kagan -Ethan Lin -Jonas Scharpf +-Gordon Marx +-Enji Cooper Portions derived from other open source works and are clearly marked. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8536d7f73..60e34a651 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -9,6 +9,37 @@ The following is a short step-by-step rundown of what one typically would do to - Feel free to add yourself to AUTHORS file. - Create a pull request. +## Quality expectations + +Contributions must be made with care and meet the quality bar of the surrounding code. +That means a change should not leave GitPython worse than it was before: it should be +readable, maintainable, tested where practical, documented and consistent with the +existing style and behavior. + +A contribution that works only narrowly but lowers the quality of the +codebase may be declined. The maintainers may not always be able to provide +detailed feedback. + +## AI-assisted contributions + +If AI edits files for you, disclose it in the pull request description and commit +metadata. Prefer making the agent identity part of the commit, for example by using +an AI author such as `$agent $version ` or a co-author via +a `Co-authored-by: ` trailer. + +Agents operating through a person's GitHub account must identify themselves. For +example, comments posted by an agent should say so directly with phrases like +`AI agent on behalf of : ...`. + +Fully AI-generated comments on pull requests or issues must also be disclosed. +Undisclosed AI-generated comments may lead to the pull request or issue being closed. + +AI-assisted proofreading or wording polish does not need disclosure, but it is still +courteous to mention it when the AI materially influenced the final text. + +Automated or "full-auto" AI contributions without a human responsible for reviewing +and standing behind the work may be closed. + ## Fuzzing Test Specific Documentation For details related to contributing to the fuzzing test suite and OSS-Fuzz integration, please diff --git a/README.md b/README.md index 59c6f995b..412d38205 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ by setting the `GIT_PYTHON_GIT_EXECUTABLE=` environment variable. - Git (1.7.x or newer) - Python >= 3.7 -The list of dependencies are listed in `./requirements.txt` and `./test-requirements.txt`. +The list of dependencies are listed in [`./requirements.txt`](https://github.com/gitpython-developers/GitPython/blob/main/requirements.txt) and [`./test-requirements.txt`](https://github.com/gitpython-developers/GitPython/blob/main/test-requirements.txt). The installer takes care of installing them for you. ### INSTALL @@ -180,7 +180,7 @@ Style and formatting checks, and running tests on all the different supported Py #### Configuration files -Specific tools are all configured in the `./pyproject.toml` file: +Specific tools are all configured in the [`./pyproject.toml`](https://github.com/gitpython-developers/GitPython/blob/main/pyproject.toml) file: - `pytest` (test runner) - `coverage.py` (code coverage) @@ -189,9 +189,9 @@ Specific tools are all configured in the `./pyproject.toml` file: Orchestration tools: -- Configuration for `pre-commit` is in the `./.pre-commit-config.yaml` file. -- Configuration for `tox` is in `./tox.ini`. -- Configuration for GitHub Actions (CI) is in files inside `./.github/workflows/`. +- Configuration for `pre-commit` is in the [`./.pre-commit-config.yaml`](https://github.com/gitpython-developers/GitPython/blob/main/.pre-commit-config.yaml) file. +- Configuration for `tox` is in [`./tox.ini`](https://github.com/gitpython-developers/GitPython/blob/main/tox.ini). +- Configuration for GitHub Actions (CI) is in files inside [`./.github/workflows/`](https://github.com/gitpython-developers/GitPython/tree/main/.github/workflows). ### Contributions @@ -212,8 +212,8 @@ Please have a look at the [contributions file][contributing]. ### How to make a new release -1. Update/verify the **version** in the `VERSION` file. -2. Update/verify that the `doc/source/changes.rst` changelog file was updated. It should include a link to the forthcoming release page: `https://github.com/gitpython-developers/GitPython/releases/tag/` +1. Update/verify the **version** in the [`VERSION`](https://github.com/gitpython-developers/GitPython/blob/main/VERSION) file. +2. Update/verify that the [`doc/source/changes.rst`](https://github.com/gitpython-developers/GitPython/blob/main/doc/source/changes.rst) changelog file was updated. It should include a link to the forthcoming release page: `https://github.com/gitpython-developers/GitPython/releases/tag/` 3. Commit everything. 4. Run `git tag -s ` to tag the version in Git. 5. _Optionally_ create and activate a [virtual environment](https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/#creating-a-virtual-environment). (Then the next step can install `build` and `twine`.) @@ -240,7 +240,7 @@ Please have a look at the [contributions file][contributing]. [3-Clause BSD License](https://opensource.org/license/bsd-3-clause/), also known as the New BSD License. See the [LICENSE file][license]. -One file exclusively used for fuzz testing is subject to [a separate license, detailed here](./fuzzing/README.md#license). +One file exclusively used for fuzz testing is subject to [a separate license, detailed here](https://github.com/gitpython-developers/GitPython/blob/main/fuzzing/README.md#license). This file is not included in the wheel or sdist packages published by the maintainers of GitPython. [contributing]: https://github.com/gitpython-developers/GitPython/blob/main/CONTRIBUTING.md diff --git a/SECURITY.md b/SECURITY.md index d39425b70..0aea34845 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -11,4 +11,6 @@ Only the latest version of GitPython can receive security updates. If a vulnerab ## Reporting a Vulnerability -Please report private portions of a vulnerability to . Doing so helps to receive updates and collaborate on the matter, without disclosing it publicliy right away. +Please report private portions of a vulnerability to . Doing so helps to receive updates and collaborate on the matter, without disclosing it publicly right away. + +Vulnerabilities in GitPython's dependencies [gitdb](https://github.com/gitpython-developers/gitdb/blob/master/SECURITY.md) or [smmap](https://github.com/gitpython-developers/smmap/blob/master/SECURITY.md), which primarily exist to support GitPython, can be reported here as well, at that same link. The affected package (`GitPython`, `gitdb`, or `smmap`) can be included in the report, if known. diff --git a/VERSION b/VERSION index d1bf6638d..0bc461141 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.43 +3.1.50 diff --git a/doc/Makefile b/doc/Makefile index ddeadbd7e..7e0d325fe 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -3,7 +3,7 @@ # You can set these variables from the command line. BUILDDIR = build -SPHINXOPTS = -W +SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = diff --git a/doc/requirements.txt b/doc/requirements.txt index 81140d898..24472ba39 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,3 +1,3 @@ -sphinx >= 7.1.2, < 7.2 +sphinx >= 7.4.7, < 8 sphinx_rtd_theme sphinx-autodoc-typehints diff --git a/doc/source/changes.rst b/doc/source/changes.rst index 3c903423c..b5152b3c5 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -2,6 +2,61 @@ Changelog ========= +3.1.50 +====== + +Save setting of configuration values, this time sections as well, as follow-up to 3.1.49. + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.50 + +3.1.49 +====== + +Save setting of configuration values, +which could be used to inject other more configuration. + +Also more conforming `rev-parse` implementation. + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.49 + +3.1.48 +====== + +Safe reference creation in the face of untrusted input. + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.48 + + +3.1.47 +====== + +Address various security issues related to bypassing injection-protection +of unsafe Git flags. + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.47 + +3.1.46 +====== + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.46 + +3.1.45 +====== + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.45 + +3.1.44 +====== + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.44 + 3.1.43 ====== @@ -99,7 +154,7 @@ https://github.com/gitpython-developers/gitpython/milestone/61?closed=1 but a necessary fix for https://github.com/gitpython-developers/GitPython/issues/1515. Please take a look at the PR for more information and how to bypass these protections in case they cause breakage: https://github.com/gitpython-developers/GitPython/pull/1521. - + See the following for all changes. https://github.com/gitpython-developers/gitpython/milestone/60?closed=1 @@ -164,38 +219,38 @@ https://github.com/gitpython-developers/gitpython/milestone/53?closed=1 * General: - Remove python 3.6 support - + - Remove distutils ahead of deprecation in standard library. - + - Update sphinx to 4.1.12 and use autodoc-typehints. - + - Include README as long_description on PyPI - + - Test against earliest and latest minor version available on Github Actions (e.g. 3.9.0 and 3.9.7) - + * Typing: - Add types to ALL functions. - + - Ensure py.typed is collected. - + - Increase mypy strictness with disallow_untyped_defs, warn_redundant_casts, warn_unreachable. - + - Use typing.NamedTuple and typing.OrderedDict now 3.6 dropped. - + - Make Protocol classes ABCs at runtime due to new behaviour/bug in 3.9.7 & 3.10.0-rc1 - + - Remove use of typing.TypeGuard until later release, to allow dependent libs time to update. - + - Tracking issue: https://github.com/gitpython-developers/GitPython/issues/1095 * Runtime improvements: - Add clone_multi_options support to submodule.add() - + - Delay calling get_user_id() unless essential, to support sand-boxed environments. - + - Add timeout to handle_process_output(), in case thread.join() hangs. See the following for details: diff --git a/doc/source/tutorial.rst b/doc/source/tutorial.rst index fd3b14c57..d095d3be3 100644 --- a/doc/source/tutorial.rst +++ b/doc/source/tutorial.rst @@ -513,6 +513,12 @@ The GitDB is a pure-python implementation of the git object database. It is the repo = Repo("path/to/repo", odbt=GitDB) +.. warning:: + ``GitDB`` may fail or become extremely slow when traversing trees in + repositories with very large commits (thousands of changed files in a + single commit). If you encounter ``RecursionError`` or excessive + slowness during tree traversal, switch to ``GitCmdObjectDB`` instead. + GitCmdObjectDB ============== diff --git a/fuzzing/fuzz-targets/fuzz_submodule.py b/fuzzing/fuzz-targets/fuzz_submodule.py index d22b0aa5b..afa653d0d 100644 --- a/fuzzing/fuzz-targets/fuzz_submodule.py +++ b/fuzzing/fuzz-targets/fuzz_submodule.py @@ -9,11 +9,17 @@ get_max_filename_length, ) -# Setup the git environment +# Setup the Git environment setup_git_environment() from git import Repo, GitCommandError, InvalidGitRepositoryError +def sanitize_input(input_str, max_length=255): + """Sanitize and truncate inputs to avoid invalid Git operations.""" + sanitized = "".join(ch for ch in input_str if ch.isalnum() or ch in ("-", "_", ".")) + return sanitized[:max_length] + + def TestOneInput(data): fdp = atheris.FuzzedDataProvider(data) @@ -24,12 +30,23 @@ def TestOneInput(data): try: with tempfile.TemporaryDirectory() as submodule_temp_dir: sub_repo = Repo.init(submodule_temp_dir, bare=fdp.ConsumeBool()) - sub_repo.index.commit(fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512))) + commit_message = sanitize_input(fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512))) + sub_repo.index.commit(commit_message) - submodule_name = fdp.ConsumeUnicodeNoSurrogates( - fdp.ConsumeIntInRange(1, max(1, get_max_filename_length(repo.working_tree_dir))) + submodule_name = sanitize_input( + fdp.ConsumeUnicodeNoSurrogates( + fdp.ConsumeIntInRange(1, get_max_filename_length(repo.working_tree_dir)) + ) ) - submodule_path = os.path.join(repo.working_tree_dir, submodule_name) + + submodule_path = os.path.relpath( + os.path.join(repo.working_tree_dir, submodule_name), + start=repo.working_tree_dir, + ) + + # Ensure submodule_path is valid + if not submodule_name or submodule_name.startswith("/") or ".." in submodule_name: + return -1 # Reject invalid input so they are not added to the corpus submodule = repo.create_submodule(submodule_name, submodule_path, url=sub_repo.git_dir) repo.index.commit("Added submodule") @@ -39,25 +56,38 @@ def TestOneInput(data): value_length = fdp.ConsumeIntInRange(1, max(1, fdp.remaining_bytes())) writer.set_value( - fdp.ConsumeUnicodeNoSurrogates(key_length), fdp.ConsumeUnicodeNoSurrogates(value_length) + sanitize_input(fdp.ConsumeUnicodeNoSurrogates(key_length)), + sanitize_input(fdp.ConsumeUnicodeNoSurrogates(value_length)), ) writer.release() - submodule.update(init=fdp.ConsumeBool(), dry_run=fdp.ConsumeBool(), force=fdp.ConsumeBool()) + submodule.update( + init=fdp.ConsumeBool(), + dry_run=fdp.ConsumeBool(), + force=fdp.ConsumeBool(), + ) + submodule_repo = submodule.module() - new_file_name = fdp.ConsumeUnicodeNoSurrogates( - fdp.ConsumeIntInRange(1, max(1, get_max_filename_length(submodule_repo.working_tree_dir))) + new_file_name = sanitize_input( + fdp.ConsumeUnicodeNoSurrogates( + fdp.ConsumeIntInRange(1, get_max_filename_length(submodule_repo.working_tree_dir)) + ) ) new_file_path = os.path.join(submodule_repo.working_tree_dir, new_file_name) with open(new_file_path, "wb") as new_file: new_file.write(fdp.ConsumeBytes(fdp.ConsumeIntInRange(1, 512))) + submodule_repo.index.add([new_file_path]) submodule_repo.index.commit("Added new file to submodule") repo.submodule_update(recursive=fdp.ConsumeBool()) - submodule_repo.head.reset(commit="HEAD~1", working_tree=fdp.ConsumeBool(), head=fdp.ConsumeBool()) - # Use fdp.PickValueInList to ensure at least one of 'module' or 'configuration' is True + submodule_repo.head.reset( + commit="HEAD~1", + working_tree=fdp.ConsumeBool(), + head=fdp.ConsumeBool(), + ) + module_option_value, configuration_option_value = fdp.PickValueInList( [(True, False), (False, True), (True, True)] ) @@ -82,12 +112,7 @@ def TestOneInput(data): ): return -1 except Exception as e: - if isinstance(e, ValueError) and "embedded null byte" in str(e): - return -1 - elif isinstance(e, OSError) and "File name too long" in str(e): - return -1 - else: - return handle_exception(e) + return handle_exception(e) def main(): diff --git a/git/cmd.py b/git/cmd.py index 2048a43fa..92ca09c2a 100644 --- a/git/cmd.py +++ b/git/cmd.py @@ -60,6 +60,11 @@ overload, ) +if sys.version_info >= (3, 10): + from typing import TypeAlias +else: + from typing_extensions import TypeAlias + from git.types import Literal, PathLike, TBD if TYPE_CHECKING: @@ -207,7 +212,7 @@ def pump_stream( ) if stderr_handler: error_str: Union[str, bytes] = ( - "error: process killed because it timed out." f" kill_after_timeout={kill_after_timeout} seconds" + f"error: process killed because it timed out. kill_after_timeout={kill_after_timeout} seconds" ) if not decode_streams and isinstance(p_stderr, BinaryIO): # Assume stderr_handler needs binary input. @@ -268,12 +273,12 @@ def _safer_popen_windows( if shell: # The original may be immutable, or the caller may reuse it. Mutate a copy. env = {} if env is None else dict(env) - env["NoDefaultCurrentDirectoryInExePath"] = "1" # The "1" can be an value. + env["NoDefaultCurrentDirectoryInExePath"] = "1" # The "1" can be any value. # When not using a shell, the current process does the search in a # CreateProcessW API call, so the variable must be set in our environment. With # a shell, that's unnecessary if https://github.com/python/cpython/issues/101283 - # is patched. In Python versions where it is unpatched, and in the rare case the + # is patched. In Python versions where it is unpatched, in the rare case the # ComSpec environment variable is unset, the search for the shell itself is # unsafe. Setting NoDefaultCurrentDirectoryInExePath in all cases, as done here, # is simpler and protects against that. (As above, the "1" can be any value.) @@ -308,6 +313,234 @@ def dict_to_slots_and__excluded_are_none(self: object, d: Mapping[str, Any], exc ## -- End Utilities -- @} + +class _AutoInterrupt: + """Process wrapper that terminates the wrapped process on finalization. + + This kills/interrupts the stored process instance once this instance goes out of + scope. It is used to prevent processes piling up in case iterators stop reading. + + All attributes are wired through to the contained process object. + + The wait method is overridden to perform automatic status code checking and possibly + raise. + """ + + __slots__ = ("proc", "args", "status") + + # If this is non-zero it will override any status code during _terminate, used + # to prevent race conditions in testing. + _status_code_if_terminate: int = 0 + + def __init__(self, proc: Union[None, subprocess.Popen], args: Any) -> None: + self.proc = proc + self.args = args + self.status: Union[int, None] = None + + def _terminate(self) -> None: + """Terminate the underlying process.""" + if self.proc is None: + return + + proc = self.proc + self.proc = None + if proc.stdin: + proc.stdin.close() + if proc.stdout: + proc.stdout.close() + if proc.stderr: + proc.stderr.close() + # Did the process finish already so we have a return code? + try: + if proc.poll() is not None: + self.status = self._status_code_if_terminate or proc.poll() + return + except OSError as ex: + _logger.info("Ignored error after process had died: %r", ex) + + # It can be that nothing really exists anymore... + if os is None or getattr(os, "kill", None) is None: + return + + # Try to kill it. + try: + proc.terminate() + status = proc.wait() # Ensure the process goes away. + + self.status = self._status_code_if_terminate or status + except (OSError, AttributeError) as ex: + # On interpreter shutdown (notably on Windows), parts of the stdlib used by + # subprocess can already be torn down (e.g. `subprocess._winapi` becomes None), + # which can cause AttributeError during terminate(). In that case, we prefer + # to silently ignore to avoid noisy "Exception ignored in: __del__" messages. + _logger.info("Ignored error while terminating process: %r", ex) + # END exception handling + + def __del__(self) -> None: + self._terminate() + + def __getattr__(self, attr: str) -> Any: + return getattr(self.proc, attr) + + # TODO: Bad choice to mimic `proc.wait()` but with different args. + def wait(self, stderr: Union[None, str, bytes] = b"") -> int: + """Wait for the process and return its status code. + + :param stderr: + Previously read value of stderr, in case stderr is already closed. + + :warn: + May deadlock if output or error pipes are used and not handled separately. + + :raise git.exc.GitCommandError: + If the return status is not 0. + """ + if stderr is None: + stderr_b = b"" + stderr_b = force_bytes(data=stderr, encoding="utf-8") + status: Union[int, None] + if self.proc is not None: + status = self.proc.wait() + p_stderr = self.proc.stderr + else: # Assume the underlying proc was killed earlier or never existed. + status = self.status + p_stderr = None + + def read_all_from_possibly_closed_stream(stream: Union[IO[bytes], None]) -> bytes: + if stream: + try: + return stderr_b + force_bytes(stream.read()) + except (OSError, ValueError): + return stderr_b or b"" + else: + return stderr_b or b"" + + # END status handling + + if status != 0: + errstr = read_all_from_possibly_closed_stream(p_stderr) + _logger.debug("AutoInterrupt wait stderr: %r" % (errstr,)) + raise GitCommandError(remove_password_if_present(self.args), status, errstr) + return status + + +_AutoInterrupt.__name__ = "AutoInterrupt" +_AutoInterrupt.__qualname__ = "Git.AutoInterrupt" + + +class _CatFileContentStream: + """Object representing a sized read-only stream returning the contents of + an object. + + This behaves like a stream, but counts the data read and simulates an empty stream + once our sized content region is empty. + + If not all data are read to the end of the object's lifetime, we read the rest to + ensure the underlying stream continues to work. + """ + + __slots__ = ("_stream", "_nbr", "_size") + + def __init__(self, size: int, stream: IO[bytes]) -> None: + self._stream = stream + self._size = size + self._nbr = 0 # Number of bytes read. + + # Special case: If the object is empty, has null bytes, get the final + # newline right away. + if size == 0: + stream.read(1) + # END handle empty streams + + def read(self, size: int = -1) -> bytes: + bytes_left = self._size - self._nbr + if bytes_left == 0: + return b"" + if size > -1: + # Ensure we don't try to read past our limit. + size = min(bytes_left, size) + else: + # They try to read all, make sure it's not more than what remains. + size = bytes_left + # END check early depletion + data = self._stream.read(size) + self._nbr += len(data) + + # Check for depletion, read our final byte to make the stream usable by + # others. + if self._size - self._nbr == 0: + self._stream.read(1) # final newline + # END finish reading + return data + + def readline(self, size: int = -1) -> bytes: + if self._nbr == self._size: + return b"" + + # Clamp size to lowest allowed value. + bytes_left = self._size - self._nbr + if size > -1: + size = min(bytes_left, size) + else: + size = bytes_left + # END handle size + + data = self._stream.readline(size) + self._nbr += len(data) + + # Handle final byte. + if self._size - self._nbr == 0: + self._stream.read(1) + # END finish reading + + return data + + def readlines(self, size: int = -1) -> List[bytes]: + if self._nbr == self._size: + return [] + + # Leave all additional logic to our readline method, we just check the size. + out = [] + nbr = 0 + while True: + line = self.readline() + if not line: + break + out.append(line) + if size > -1: + nbr += len(line) + if nbr > size: + break + # END handle size constraint + # END readline loop + return out + + # skipcq: PYL-E0301 + def __iter__(self) -> "Git.CatFileContentStream": + return self + + def __next__(self) -> bytes: + line = self.readline() + if not line: + raise StopIteration + + return line + + next = __next__ + + def __del__(self) -> None: + bytes_left = self._size - self._nbr + if bytes_left: + # Read and discard - seeking is impossible within a stream. + # This includes any terminating newline. + self._stream.read(bytes_left + 1) + # END handle incomplete read + + +_CatFileContentStream.__name__ = "CatFileContentStream" +_CatFileContentStream.__qualname__ = "Git.CatFileContentStream" + + _USE_SHELL_DEFAULT_MESSAGE = ( "Git.USE_SHELL is deprecated, because only its default value of False is safe. " "It will be removed in a future release." @@ -321,7 +554,7 @@ def dict_to_slots_and__excluded_are_none(self: object, d: Mapping[str, Any], exc ) -def _warn_use_shell(extra_danger: bool) -> None: +def _warn_use_shell(*, extra_danger: bool) -> None: warnings.warn( _USE_SHELL_DANGER_MESSAGE if extra_danger else _USE_SHELL_DEFAULT_MESSAGE, DeprecationWarning, @@ -337,12 +570,12 @@ class _GitMeta(type): def __getattribute(cls, name: str) -> Any: if name == "USE_SHELL": - _warn_use_shell(False) + _warn_use_shell(extra_danger=False) return super().__getattribute__(name) def __setattr(cls, name: str, value: Any) -> Any: if name == "USE_SHELL": - _warn_use_shell(value) + _warn_use_shell(extra_danger=value) super().__setattr__(name, value) if not TYPE_CHECKING: @@ -711,6 +944,21 @@ def check_unsafe_protocols(cls, url: str) -> None: f"The `{protocol}::` protocol looks suspicious, use `allow_unsafe_protocols=True` to allow it." ) + @classmethod + def _canonicalize_option_name(cls, option: str) -> str: + """Return the option name used for unsafe-option checks. + + Examples: + ``"--upload-pack=/tmp/helper"`` -> ``"upload-pack"`` + ``"upload_pack"`` -> ``"upload-pack"`` + ``"--config core.filemode=false"`` -> ``"config"`` + """ + option_name = option.lstrip("-").split("=", 1)[0] + option_tokens = option_name.split(None, 1) + if not option_tokens: + return "" + return dashify(option_tokens[0]) + @classmethod def check_unsafe_options(cls, options: List[str], unsafe_options: List[str]) -> None: """Check for unsafe options. @@ -718,231 +966,16 @@ def check_unsafe_options(cls, options: List[str], unsafe_options: List[str]) -> Some options that are passed to ``git `` can be used to execute arbitrary commands. These are blocked by default. """ - # Options can be of the form `foo`, `--foo bar`, or `--foo=bar`, so we need to - # check if they start with "--foo" or if they are equal to "foo". - bare_unsafe_options = [option.lstrip("-") for option in unsafe_options] + # Options can be of the form `foo`, `--foo`, `--foo bar`, or `--foo=bar`. + canonical_unsafe_options = {cls._canonicalize_option_name(option): option for option in unsafe_options} for option in options: - for unsafe_option, bare_option in zip(unsafe_options, bare_unsafe_options): - if option.startswith(unsafe_option) or option == bare_option: - raise UnsafeOptionError( - f"{unsafe_option} is not allowed, use `allow_unsafe_options=True` to allow it." - ) - - class AutoInterrupt: - """Process wrapper that terminates the wrapped process on finalization. - - This kills/interrupts the stored process instance once this instance goes out of - scope. It is used to prevent processes piling up in case iterators stop reading. - - All attributes are wired through to the contained process object. - - The wait method is overridden to perform automatic status code checking and - possibly raise. - """ - - __slots__ = ("proc", "args", "status") - - # If this is non-zero it will override any status code during _terminate, used - # to prevent race conditions in testing. - _status_code_if_terminate: int = 0 - - def __init__(self, proc: Union[None, subprocess.Popen], args: Any) -> None: - self.proc = proc - self.args = args - self.status: Union[int, None] = None - - def _terminate(self) -> None: - """Terminate the underlying process.""" - if self.proc is None: - return - - proc = self.proc - self.proc = None - if proc.stdin: - proc.stdin.close() - if proc.stdout: - proc.stdout.close() - if proc.stderr: - proc.stderr.close() - # Did the process finish already so we have a return code? - try: - if proc.poll() is not None: - self.status = self._status_code_if_terminate or proc.poll() - return - except OSError as ex: - _logger.info("Ignored error after process had died: %r", ex) - - # It can be that nothing really exists anymore... - if os is None or getattr(os, "kill", None) is None: - return - - # Try to kill it. - try: - proc.terminate() - status = proc.wait() # Ensure the process goes away. - - self.status = self._status_code_if_terminate or status - except OSError as ex: - _logger.info("Ignored error after process had died: %r", ex) - # END exception handling - - def __del__(self) -> None: - self._terminate() - - def __getattr__(self, attr: str) -> Any: - return getattr(self.proc, attr) - - # TODO: Bad choice to mimic `proc.wait()` but with different args. - def wait(self, stderr: Union[None, str, bytes] = b"") -> int: - """Wait for the process and return its status code. - - :param stderr: - Previously read value of stderr, in case stderr is already closed. - - :warn: - May deadlock if output or error pipes are used and not handled - separately. - - :raise git.exc.GitCommandError: - If the return status is not 0. - """ - if stderr is None: - stderr_b = b"" - stderr_b = force_bytes(data=stderr, encoding="utf-8") - status: Union[int, None] - if self.proc is not None: - status = self.proc.wait() - p_stderr = self.proc.stderr - else: # Assume the underlying proc was killed earlier or never existed. - status = self.status - p_stderr = None - - def read_all_from_possibly_closed_stream(stream: Union[IO[bytes], None]) -> bytes: - if stream: - try: - return stderr_b + force_bytes(stream.read()) - except (OSError, ValueError): - return stderr_b or b"" - else: - return stderr_b or b"" - - # END status handling + unsafe_option = canonical_unsafe_options.get(cls._canonicalize_option_name(option)) + if unsafe_option is not None: + raise UnsafeOptionError(f"{unsafe_option} is not allowed, use `allow_unsafe_options=True` to allow it.") - if status != 0: - errstr = read_all_from_possibly_closed_stream(p_stderr) - _logger.debug("AutoInterrupt wait stderr: %r" % (errstr,)) - raise GitCommandError(remove_password_if_present(self.args), status, errstr) - return status + AutoInterrupt: TypeAlias = _AutoInterrupt - # END auto interrupt - - class CatFileContentStream: - """Object representing a sized read-only stream returning the contents of - an object. - - This behaves like a stream, but counts the data read and simulates an empty - stream once our sized content region is empty. - - If not all data are read to the end of the object's lifetime, we read the - rest to ensure the underlying stream continues to work. - """ - - __slots__ = ("_stream", "_nbr", "_size") - - def __init__(self, size: int, stream: IO[bytes]) -> None: - self._stream = stream - self._size = size - self._nbr = 0 # Number of bytes read. - - # Special case: If the object is empty, has null bytes, get the final - # newline right away. - if size == 0: - stream.read(1) - # END handle empty streams - - def read(self, size: int = -1) -> bytes: - bytes_left = self._size - self._nbr - if bytes_left == 0: - return b"" - if size > -1: - # Ensure we don't try to read past our limit. - size = min(bytes_left, size) - else: - # They try to read all, make sure it's not more than what remains. - size = bytes_left - # END check early depletion - data = self._stream.read(size) - self._nbr += len(data) - - # Check for depletion, read our final byte to make the stream usable by - # others. - if self._size - self._nbr == 0: - self._stream.read(1) # final newline - # END finish reading - return data - - def readline(self, size: int = -1) -> bytes: - if self._nbr == self._size: - return b"" - - # Clamp size to lowest allowed value. - bytes_left = self._size - self._nbr - if size > -1: - size = min(bytes_left, size) - else: - size = bytes_left - # END handle size - - data = self._stream.readline(size) - self._nbr += len(data) - - # Handle final byte. - if self._size - self._nbr == 0: - self._stream.read(1) - # END finish reading - - return data - - def readlines(self, size: int = -1) -> List[bytes]: - if self._nbr == self._size: - return [] - - # Leave all additional logic to our readline method, we just check the size. - out = [] - nbr = 0 - while True: - line = self.readline() - if not line: - break - out.append(line) - if size > -1: - nbr += len(line) - if nbr > size: - break - # END handle size constraint - # END readline loop - return out - - # skipcq: PYL-E0301 - def __iter__(self) -> "Git.CatFileContentStream": - return self - - def __next__(self) -> bytes: - line = self.readline() - if not line: - raise StopIteration - - return line - - next = __next__ - - def __del__(self) -> None: - bytes_left = self._size - self._nbr - if bytes_left: - # Read and discard - seeking is impossible within a stream. - # This includes any terminating newline. - self._stream.read(bytes_left + 1) - # END handle incomplete read + CatFileContentStream: TypeAlias = _CatFileContentStream def __init__(self, working_dir: Union[None, PathLike] = None) -> None: """Initialize this instance with: @@ -971,7 +1004,7 @@ def __init__(self, working_dir: Union[None, PathLike] = None) -> None: def __getattribute__(self, name: str) -> Any: if name == "USE_SHELL": - _warn_use_shell(False) + _warn_use_shell(extra_danger=False) return super().__getattribute__(name) def __getattr__(self, name: str) -> Any: @@ -1098,9 +1131,28 @@ def execute( information (stdout). :param command: - The command argument list to execute. - It should be a sequence of program arguments, or a string. The - program to execute is the first item in the args sequence or string. + The command to execute. A sequence of program arguments is recommended. + A string is also accepted, but its meaning is strongly platform-dependent. + + By default, a shell is not used. On Unix-like systems, a string is the whole + program name (so ``"git log -n 1"`` raises :class:`GitCommandNotFound`). On + Windows, the program parses the arguments itself, so multi-word strings can + work but are not portable. + + Avoid ``shell=True`` (and :attr:`Git.USE_SHELL`): this runs the command in + a shell, which is generally unsafe. The shell interprets metacharacters + such as ``;``, ``|``, ``&``, ``$(...)``, ``$VAR``, ``%VAR%``, and ``^`` + (depending on the platform) as syntax. Any untrusted text in the command + can then execute arbitrary OS commands. See :attr:`Git.USE_SHELL`. + + Producing a sequence automatically by :func:`shlex.split` and passing it + as the command is far safer than ``shell=True``. But :func:`shlex.split` + parses POSIX shell syntax on all systems, and the result is still unsafe + for anything but *fixed, fully trusted* strings. Do not use it on strings + built by interpolating values: whitespace or quoting in an untrusted value + can still inject arguments. For input derived in any way from untrusted + data, build the argument sequence yourself, while ensuring each argument + is fully sanitized. :param istream: Standard input filehandle passed to :class:`subprocess.Popen`. @@ -1168,6 +1220,11 @@ def execute( needed (nor useful) to work around any known operating system specific issues. + On Unix-like systems, when migrating away from passing string commands with + ``shell=True``, :func:`shlex.split` may serve as a transitional step in rare + cases, with extreme care. (Drop ``shell=True`` and pass the resulting + sequence as the command.) See the `command` parameter above on the risks. + :param env: A dictionary of environment variables to be passed to :class:`subprocess.Popen`. @@ -1319,7 +1376,7 @@ def communicate() -> Tuple[AnyStr, AnyStr]: out, err = proc.communicate() watchdog.cancel() if kill_check.is_set(): - err = 'Timeout: the command "%s" did not complete in %d ' "secs." % ( + err = 'Timeout: the command "%s" did not complete in %d secs.' % ( " ".join(redacted_command), timeout, ) @@ -1343,25 +1400,29 @@ def communicate() -> Tuple[AnyStr, AnyStr]: if output_stream is None: stdout_value, stderr_value = communicate() # Strip trailing "\n". - if stdout_value.endswith(newline) and strip_newline_in_stdout: # type: ignore[arg-type] + if stdout_value is not None and stdout_value.endswith(newline) and strip_newline_in_stdout: # type: ignore[arg-type] stdout_value = stdout_value[:-1] - if stderr_value.endswith(newline): # type: ignore[arg-type] + if stderr_value is not None and stderr_value.endswith(newline): # type: ignore[arg-type] stderr_value = stderr_value[:-1] status = proc.returncode else: max_chunk_size = max_chunk_size if max_chunk_size and max_chunk_size > 0 else io.DEFAULT_BUFFER_SIZE - stream_copy(proc.stdout, output_stream, max_chunk_size) - stdout_value = proc.stdout.read() - stderr_value = proc.stderr.read() + if proc.stdout is not None: + stream_copy(proc.stdout, output_stream, max_chunk_size) + stdout_value = proc.stdout.read() + if proc.stderr is not None: + stderr_value = proc.stderr.read() # Strip trailing "\n". - if stderr_value.endswith(newline): # type: ignore[arg-type] + if stderr_value is not None and stderr_value.endswith(newline): # type: ignore[arg-type] stderr_value = stderr_value[:-1] status = proc.wait() # END stdout handling finally: - proc.stdout.close() - proc.stderr.close() + if proc.stdout is not None: + proc.stdout.close() + if proc.stderr is not None: + proc.stderr.close() if self.GIT_PYTHON_TRACE == "full": cmdstr = " ".join(redacted_command) @@ -1551,7 +1612,7 @@ def _call_process( turns into:: - git rev-list max-count 10 --header master + git rev-list --max-count=10 --header master :return: Same as :meth:`execute`. If no args are given, used :meth:`execute`'s diff --git a/git/config.py b/git/config.py index de3508360..82747eadd 100644 --- a/git/config.py +++ b/git/config.py @@ -66,12 +66,15 @@ CONFIG_LEVELS: ConfigLevels_Tup = ("system", "user", "global", "repository") """The configuration level of a configuration file.""" -CONDITIONAL_INCLUDE_REGEXP = re.compile(r"(?<=includeIf )\"(gitdir|gitdir/i|onbranch):(.+)\"") +CONDITIONAL_INCLUDE_REGEXP = re.compile(r"(?<=includeIf )\"(gitdir|gitdir/i|onbranch|hasconfig:remote\.\*\.url):(.+)\"") """Section pattern to detect conditional includes. See: https://git-scm.com/docs/git-config#_conditional_includes """ +UNSAFE_CONFIG_CHARS_RE = re.compile(r"[\r\n\x00]") +"""Characters that cannot be safely written in config names or values.""" + class MetaParserBuilder(abc.ABCMeta): # noqa: B024 """Utility class wrapping base-class methods into decorators that assure read-only @@ -87,15 +90,15 @@ def __new__(cls, name: str, bases: Tuple, clsdict: Dict[str, Any]) -> "MetaParse mutating_methods = clsdict[kmm] for base in bases: methods = (t for t in inspect.getmembers(base, inspect.isroutine) if not t[0].startswith("_")) - for name, method in methods: - if name in clsdict: + for method_name, method in methods: + if method_name in clsdict: continue method_with_values = needs_values(method) - if name in mutating_methods: + if method_name in mutating_methods: method_with_values = set_dirty_and_flush_changes(method_with_values) # END mutating methods handling - clsdict[name] = method_with_values + clsdict[method_name] = method_with_values # END for each name/method pair # END for each base # END if mutating methods configuration is set @@ -496,19 +499,26 @@ def string_decode(v: str) -> str: if mo: # We might just have handled the last line, which could contain a quotation we want to remove. optname, vi, optval = mo.group("option", "vi", "value") + optname = self.optionxform(optname.rstrip()) + if vi in ("=", ":") and ";" in optval and not optval.strip().startswith('"'): pos = optval.find(";") if pos != -1 and optval[pos - 1].isspace(): optval = optval[:pos] optval = optval.strip() - if optval == '""': - optval = "" - # END handle empty string - optname = self.optionxform(optname.rstrip()) - if len(optval) > 1 and optval[0] == '"' and optval[-1] != '"': + + if len(optval) < 2 or optval[0] != '"': + # Does not open quoting. + pass + elif optval[-1] != '"': + # Opens quoting and does not close: appears to start multi-line quoting. is_multi_line = True optval = string_decode(optval[1:]) - # END handle multi-line + elif optval.find("\\", 1, -1) == -1 and optval.find('"', 1, -1) == -1: + # Opens and closes quoting. Single line, and all we need is quote removal. + optval = optval[1:-1] + # TODO: Handle other quoted content, especially well-formed backslash escapes. + # Preserves multiple values for duplicate optnames. cursect.add(optname, optval) else: @@ -542,11 +552,21 @@ def _included_paths(self) -> List[Tuple[str, str]]: :return: The list of paths, where each path is a tuple of (option, value). """ + + def _all_items(section: str) -> List[Tuple[str, str]]: + """Return all (key, value) pairs for a section, including duplicate keys.""" + return [ + (key, value) + for key, values in self._sections[section].items_all() + if key != "__name__" + for value in values + ] + paths = [] for section in self.sections(): if section == "include": - paths += self.items(section) + paths += _all_items(section) match = CONDITIONAL_INCLUDE_REGEXP.search(section) if match is None or self._repo is None: @@ -567,12 +587,12 @@ def _included_paths(self) -> List[Tuple[str, str]]: if keyword.endswith("/i"): value = re.sub( r"[a-zA-Z]", - lambda m: "[{}{}]".format(m.group().lower(), m.group().upper()), + lambda m: f"[{m.group().lower()!r}{m.group().upper()!r}]", value, ) if self._repo.git_dir: - if fnmatch.fnmatchcase(str(self._repo.git_dir), value): - paths += self.items(section) + if fnmatch.fnmatchcase(os.fspath(self._repo.git_dir), value): + paths += _all_items(section) elif keyword == "onbranch": try: @@ -582,8 +602,12 @@ def _included_paths(self) -> List[Tuple[str, str]]: continue if fnmatch.fnmatchcase(branch_name, value): - paths += self.items(section) - + paths += _all_items(section) + elif keyword == "hasconfig:remote.*.url": + for remote in self._repo.remotes: + if fnmatch.fnmatchcase(remote.url, value): + paths += _all_items(section) + break return paths def read(self) -> None: # type: ignore[override] @@ -622,8 +646,6 @@ def read(self) -> None: # type: ignore[override] file_path = cast(IO[bytes], file_path) self._read(file_path, file_path.name) else: - # Assume a path if it is not a file-object. - file_path = cast(PathLike, file_path) try: with open(file_path, "rb") as fp: file_ok = True @@ -757,8 +779,9 @@ def _assure_writable(self, method_name: str) -> None: if self.read_only: raise IOError("Cannot execute non-constant method %s.%s" % (self, method_name)) - def add_section(self, section: str) -> None: + def add_section(self, section: "cp._SectionName") -> None: """Assures added options will stay in order.""" + self._assure_config_name_safe(section, "section") return super().add_section(section) @property @@ -863,6 +886,30 @@ def _value_to_string(self, value: Union[str, bytes, int, float, bool]) -> str: return str(value) return force_text(value) + def _value_to_string_safe(self, value: Union[str, bytes, int, float, bool]) -> str: + value_str = self._value_to_string(value) + if UNSAFE_CONFIG_CHARS_RE.search(value_str): + raise ValueError("Git config values must not contain CR, LF, or NUL") + return value_str + + def _assure_config_name_safe(self, name: "cp._SectionName", label: str) -> None: + if isinstance(name, str) and UNSAFE_CONFIG_CHARS_RE.search(name): + raise ValueError("Git config %s names must not contain CR, LF, or NUL" % label) + + @needs_values + @set_dirty_and_flush_changes + def set( + self, + section: str, + option: str, + value: Union[str, bytes, int, float, bool, None] = None, + ) -> None: + self._assure_config_name_safe(section, "section") + self._assure_config_name_safe(option, "option") + if value is not None: + value = self._value_to_string_safe(value) + return super().set(section, option, value) + @needs_values @set_dirty_and_flush_changes def set_value(self, section: str, option: str, value: Union[str, bytes, int, float, bool]) -> "GitConfigParser": @@ -883,9 +930,12 @@ def set_value(self, section: str, option: str, value: Union[str, bytes, int, flo :return: This instance """ + self._assure_config_name_safe(section, "section") + self._assure_config_name_safe(option, "option") + value_str = self._value_to_string_safe(value) if not self.has_section(section): self.add_section(section) - self.set(section, option, self._value_to_string(value)) + super().set(section, option, value_str) return self @needs_values @@ -910,9 +960,12 @@ def add_value(self, section: str, option: str, value: Union[str, bytes, int, flo :return: This instance """ + self._assure_config_name_safe(section, "section") + self._assure_config_name_safe(option, "option") + value_str = self._value_to_string_safe(value) if not self.has_section(section): self.add_section(section) - self._sections[section].add(option, self._value_to_string(value)) + self._sections[section].add(option, value_str) return self def rename_section(self, section: str, new_name: str) -> "GitConfigParser": @@ -929,6 +982,7 @@ def rename_section(self, section: str, new_name: str) -> "GitConfigParser": """ if not self.has_section(section): raise ValueError("Source section '%s' doesn't exist" % section) + self._assure_config_name_safe(new_name, "section") if self.has_section(new_name): raise ValueError("Destination section '%s' already exists" % new_name) diff --git a/git/diff.py b/git/diff.py index 9c6ae59e0..5af53e556 100644 --- a/git/diff.py +++ b/git/diff.py @@ -3,7 +3,7 @@ # This module is part of GitPython and is released under the # 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ -__all__ = ["DiffConstants", "NULL_TREE", "INDEX", "Diffable", "DiffIndex", "Diff"] +__all__ = ["DiffConstants", "NULL_TREE", "NULL_TREE_SHA", "INDEX", "Diffable", "DiffIndex", "Diff"] import enum import re @@ -23,13 +23,14 @@ List, Match, Optional, + Sequence, Tuple, TYPE_CHECKING, TypeVar, Union, cast, ) -from git.types import Literal, PathLike +from git.types import PathLike, Literal if TYPE_CHECKING: from subprocess import Popen @@ -83,6 +84,9 @@ class DiffConstants(enum.Enum): :const:`git.NULL_TREE` and :const:`Diffable.NULL_TREE`. """ +NULL_TREE_SHA = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" +"""SHA of Git's canonical empty tree object.""" + INDEX: Literal[DiffConstants.INDEX] = DiffConstants.INDEX """Stand-in indicating you want to diff against the index. @@ -289,7 +293,7 @@ class DiffIndex(List[T_Diff]): The class improves the diff handling convenience. """ - change_type = ("A", "C", "D", "R", "M", "T") + change_type: Sequence[Literal["A", "C", "D", "R", "M", "T"]] = ("A", "C", "D", "R", "M", "T") # noqa: F821 """Change type invariant identifying possible ways a blob can have changed: * ``A`` = Added @@ -598,7 +602,14 @@ def _index_from_patch_format(cls, repo: "Repo", proc: Union["Popen", "Git.AutoIn # FIXME: Here SLURPING raw, need to re-phrase header-regexes linewise. text_list: List[bytes] = [] - handle_process_output(proc, text_list.append, None, finalize_process, decode_streams=False) + stderr_list: List[bytes] = [] + + def finalize_process_with_stderr(proc: Union["Popen", "Git.AutoInterrupt"]) -> None: + finalize_process(proc, stderr=b"".join(stderr_list)) + + handle_process_output( + proc, text_list.append, stderr_list.append, finalize_process_with_stderr, decode_streams=False + ) # For now, we have to bake the stream. text = b"".join(text_list) @@ -764,11 +775,16 @@ def _index_from_raw_format(cls, repo: "Repo", proc: "Popen") -> "DiffIndex[Diff] # :100644 100644 687099101... 37c5e30c8... M .gitignore index: "DiffIndex" = DiffIndex() + stderr_list: List[bytes] = [] + + def finalize_process_with_stderr(proc: Union["Popen", "Git.AutoInterrupt"]) -> None: + finalize_process(proc, stderr=b"".join(stderr_list)) + handle_process_output( proc, lambda byt: cls._handle_diff_line(byt, repo, index), - None, - finalize_process, + stderr_list.append, + finalize_process_with_stderr, decode_streams=False, ) diff --git a/git/ext/gitdb b/git/ext/gitdb index 3d3e9572d..0a019a2e2 160000 --- a/git/ext/gitdb +++ b/git/ext/gitdb @@ -1 +1 @@ -Subproject commit 3d3e9572dc452fea53d328c101b3d1440bbefe40 +Subproject commit 0a019a2e2bd73158cf8b637ad78b5d4b8f15e42e diff --git a/git/index/base.py b/git/index/base.py index 39cc9143c..f03b452dc 100644 --- a/git/index/base.py +++ b/git/index/base.py @@ -407,7 +407,7 @@ def raise_exc(e: Exception) -> NoReturn: r = str(self.repo.working_tree_dir) rs = r + os.sep for path in paths: - abs_path = str(path) + abs_path = os.fspath(path) if not osp.isabs(abs_path): abs_path = osp.join(r, path) # END make absolute path @@ -508,7 +508,7 @@ def iter_blobs( :param predicate: Function(t) returning ``True`` if tuple(stage, Blob) should be yielded by - the iterator. A default filter, the `~git.index.typ.BlobFilter`, allows you + the iterator. A default filter, the :class:`~git.index.typ.BlobFilter`, allows you to yield blobs only if they match a given list of paths. """ for entry in self.entries.values(): @@ -530,7 +530,10 @@ def unmerged_blobs(self) -> Dict[PathLike, List[Tuple[StageType, Blob]]]: stage. That is, a file removed on the 'other' branch whose entries are at stage 3 will not have a stage 3 entry. """ - is_unmerged_blob = lambda t: t[0] != 0 + + def is_unmerged_blob(t: Tuple[StageType, Blob]) -> bool: + return t[0] != 0 + path_map: Dict[PathLike, List[Tuple[StageType, Blob]]] = {} for stage, blob in self.iter_blobs(is_unmerged_blob): path_map.setdefault(blob.path, []).append((stage, blob)) @@ -653,9 +656,12 @@ def _to_relative_path(self, path: PathLike) -> PathLike: return path if self.repo.bare: raise InvalidGitRepositoryError("require non-bare repository") - if not osp.normpath(str(path)).startswith(str(self.repo.working_tree_dir)): + if not osp.normpath(path).startswith(str(self.repo.working_tree_dir)): raise ValueError("Absolute path %r is not in git repository at %r" % (path, self.repo.working_tree_dir)) - return os.path.relpath(path, self.repo.working_tree_dir) + result = os.path.relpath(path, self.repo.working_tree_dir) + if os.fspath(path).endswith(os.sep) and not result.endswith(os.sep): + result += os.sep + return result def _preprocess_add_items( self, items: Union[PathLike, Sequence[Union[PathLike, Blob, BaseIndexEntry, "Submodule"]]] @@ -687,12 +693,17 @@ def _store_path(self, filepath: PathLike, fprogress: Callable) -> BaseIndexEntry This must be ensured in the calling code. """ st = os.lstat(filepath) # Handles non-symlinks as well. + if S_ISLNK(st.st_mode): # In PY3, readlink is a string, but we need bytes. # In PY2, it was just OS encoded bytes, we assumed UTF-8. - open_stream: Callable[[], BinaryIO] = lambda: BytesIO(force_bytes(os.readlink(filepath), encoding=defenc)) + def open_stream() -> BinaryIO: + return BytesIO(force_bytes(os.readlink(filepath), encoding=defenc)) else: - open_stream = lambda: open(filepath, "rb") + + def open_stream() -> BinaryIO: + return open(filepath, "rb") + with open_stream() as stream: fprogress(filepath, False, filepath) istream = self.repo.odb.store(IStream(Blob.type, st.st_size, stream)) @@ -767,7 +778,7 @@ def add( - path string Strings denote a relative or absolute path into the repository pointing - to an existing file, e.g., ``CHANGES``, `lib/myfile.ext``, + to an existing file, e.g., ``CHANGES``, ``lib/myfile.ext``, ``/home/gitrepo/lib/myfile.ext``. Absolute paths must start with working tree directory of this index's @@ -786,7 +797,7 @@ def add( They are added at stage 0. - - :class:~`git.objects.blob.Blob` or + - :class:`~git.objects.blob.Blob` or :class:`~git.objects.submodule.base.Submodule` object Blobs are added as they are assuming a valid mode is set. @@ -812,7 +823,7 @@ def add( - :class:`~git.index.typ.BaseIndexEntry` or type - Handling equals the one of :class:~`git.objects.blob.Blob` objects, but + Handling equals the one of :class:`~git.objects.blob.Blob` objects, but the stage may be explicitly set. Please note that Index Entries require binary sha's. @@ -995,7 +1006,7 @@ def remove( The path string may include globs, such as ``*.c``. - - :class:~`git.objects.blob.Blob` object + - :class:`~git.objects.blob.Blob` object Only the path portion is used in this case. @@ -1025,7 +1036,7 @@ def remove( args.append("--") # Preprocess paths. - paths = self._items_to_rela_paths(items) + paths = list(map(os.fspath, self._items_to_rela_paths(items))) # type: ignore[arg-type] removed_paths = self.repo.git.rm(args, paths, **kwargs).splitlines() # Process output to gain proper paths. @@ -1122,6 +1133,7 @@ def commit( author_date: Union[datetime.datetime, str, None] = None, commit_date: Union[datetime.datetime, str, None] = None, skip_hooks: bool = False, + trailers: Union[None, "Dict[str, str]", "List[Tuple[str, str]]"] = None, ) -> Commit: """Commit the current default index file, creating a :class:`~git.objects.commit.Commit` object. @@ -1158,6 +1170,7 @@ def commit( committer=committer, author_date=author_date, commit_date=commit_date, + trailers=trailers, ) if not skip_hooks: run_commit_hook("post-commit", self) @@ -1333,8 +1346,11 @@ def handle_stderr(proc: "Popen[bytes]", iter_checked_out_files: Iterable[PathLik kwargs["as_process"] = True kwargs["istream"] = subprocess.PIPE proc = self.repo.git.checkout_index(args, **kwargs) + # FIXME: Reading from GIL! - make_exc = lambda: GitCommandError(("git-checkout-index",) + tuple(args), 128, proc.stderr.read()) + def make_exc() -> GitCommandError: + return GitCommandError(("git-checkout-index", *args), 128, proc.stderr.read()) + checked_out_files: List[PathLike] = [] for path in paths: @@ -1345,11 +1361,11 @@ def handle_stderr(proc: "Popen[bytes]", iter_checked_out_files: Iterable[PathLik try: self.entries[(co_path, 0)] except KeyError: - folder = str(co_path) + folder = co_path if not folder.endswith("/"): folder += "/" for entry in self.entries.values(): - if str(entry.path).startswith(folder): + if os.fspath(entry.path).startswith(folder): p = entry.path self._write_path_to_stdin(proc, p, p, make_exc, fprogress, read_from_stdout=False) checked_out_files.append(p) @@ -1464,12 +1480,11 @@ def reset( return self - # FIXME: This is documented to accept the same parameters as Diffable.diff, but this - # does not handle NULL_TREE for `other`. (The suppressed mypy error is about this.) def diff( self, - other: Union[ # type: ignore[override] + other: Union[ Literal[git_diff.DiffConstants.INDEX], + Literal[git_diff.DiffConstants.NULL_TREE], "Tree", "Commit", str, @@ -1496,6 +1511,44 @@ def diff( if other is self.INDEX: return git_diff.DiffIndex() + if other == git_diff.NULL_TREE or other == git_diff.NULL_TREE_SHA: + args: List[Union[PathLike, str]] = [ + "--cached", + git_diff.NULL_TREE_SHA, + "--abbrev=40", + "--full-index", + ] + + if not any(x in kwargs for x in ("find_renames", "no_renames", "M")): + args.append("-M") + + if create_patch: + args.append("-p") + args.append("--no-ext-diff") + else: + args.append("--raw") + args.append("-z") + + args.append("--no-color") + + if paths is not None and not isinstance(paths, (tuple, list)): + paths = [paths] + + if paths: + args.append("--") + args.extend(paths) + + kwargs["as_process"] = True + proc = self.repo.git.diff(*args, **kwargs) + + diff_method = ( + git_diff.Diff._index_from_patch_format if create_patch else git_diff.Diff._index_from_raw_format + ) + index = diff_method(self.repo, proc) + + proc.wait() + return index + # Index against anything but None is a reverse diff with the respective item. # Handle existing -R flags properly. # Transform strings to the object so that we can call diff on it. diff --git a/git/index/fun.py b/git/index/fun.py index 59cce6ae6..629c19b1e 100644 --- a/git/index/fun.py +++ b/git/index/fun.py @@ -36,7 +36,7 @@ ) from git.util import IndexFileSHA1Writer, finalize_process -from .typ import BaseIndexEntry, IndexEntry, CE_NAMEMASK, CE_STAGESHIFT +from .typ import CE_EXTENDED, BaseIndexEntry, IndexEntry, CE_NAMEMASK, CE_STAGESHIFT from .util import pack, unpack # typing ----------------------------------------------------------------------------- @@ -87,7 +87,7 @@ def run_commit_hook(name: str, index: "IndexFile", *args: str) -> None: return env = os.environ.copy() - env["GIT_INDEX_FILE"] = safe_decode(str(index.path)) + env["GIT_INDEX_FILE"] = safe_decode(os.fspath(index.path)) env["GIT_EDITOR"] = ":" cmd = [hp] try: @@ -158,7 +158,7 @@ def write_cache( write = stream_sha.write # Header - version = 2 + version = 3 if any(entry.extended_flags for entry in entries) else 2 write(b"DIRC") write(pack(">LL", version, len(entries))) @@ -172,6 +172,8 @@ def write_cache( plen = len(path) & CE_NAMEMASK # Path length assert plen == len(path), "Path %s too long to fit into index" % entry.path flags = plen | (entry.flags & CE_NAMEMASK_INV) # Clear possible previous values. + if entry.extended_flags: + flags |= CE_EXTENDED write( pack( ">LLLLLL20sH", @@ -185,6 +187,8 @@ def write_cache( flags, ) ) + if entry.extended_flags: + write(pack(">H", entry.extended_flags)) write(path) real_size = (tell() - beginoffset + 8) & ~7 write(b"\0" * ((beginoffset + real_size) - tell())) @@ -206,8 +210,7 @@ def read_header(stream: IO[bytes]) -> Tuple[int, int]: unpacked = cast(Tuple[int, int], unpack(">LL", stream.read(4 * 2))) version, num_entries = unpacked - # TODO: Handle version 3: extended data, see read-cache.c. - assert version in (1, 2) + assert version in (1, 2, 3), "Unsupported git index version %i, only 1, 2, and 3 are supported" % version return version, num_entries @@ -260,12 +263,15 @@ def read_cache( ctime = unpack(">8s", read(8))[0] mtime = unpack(">8s", read(8))[0] (dev, ino, mode, uid, gid, size, sha, flags) = unpack(">LLLLLL20sH", read(20 + 4 * 6 + 2)) + extended_flags = 0 + if flags & CE_EXTENDED: + extended_flags = unpack(">H", read(2))[0] path_size = flags & CE_NAMEMASK path = read(path_size).decode(defenc) real_size = (tell() - beginoffset + 8) & ~7 read((beginoffset + real_size) - tell()) - entry = IndexEntry((mode, sha, flags, path, ctime, mtime, dev, ino, uid, gid, size)) + entry = IndexEntry((mode, sha, flags, path, ctime, mtime, dev, ino, uid, gid, size, extended_flags)) # entry_key would be the method to use, but we save the effort. entries[(path, entry.stage)] = entry count += 1 diff --git a/git/index/typ.py b/git/index/typ.py index 974252528..927633a9f 100644 --- a/git/index/typ.py +++ b/git/index/typ.py @@ -32,6 +32,9 @@ CE_VALID = 0x8000 CE_STAGESHIFT = 12 +CE_EXT_SKIP_WORKTREE = 0x4000 +CE_EXT_INTENT_TO_ADD = 0x2000 + # } END invariants @@ -87,6 +90,8 @@ class BaseIndexEntryHelper(NamedTuple): uid: int = 0 gid: int = 0 size: int = 0 + # version 3 extended flags, only when (flags & CE_EXTENDED) is set + extended_flags: int = 0 class BaseIndexEntry(BaseIndexEntryHelper): @@ -102,7 +107,7 @@ def __new__( cls, inp_tuple: Union[ Tuple[int, bytes, int, PathLike], - Tuple[int, bytes, int, PathLike, bytes, bytes, int, int, int, int, int], + Tuple[int, bytes, int, PathLike, bytes, bytes, int, int, int, int, int, int], ], ) -> "BaseIndexEntry": """Override ``__new__`` to allow construction from a tuple for backwards @@ -134,6 +139,14 @@ def stage(self) -> int: """ return (self.flags & CE_STAGEMASK) >> CE_STAGESHIFT + @property + def skip_worktree(self) -> bool: + return (self.extended_flags & CE_EXT_SKIP_WORKTREE) > 0 + + @property + def intent_to_add(self) -> bool: + return (self.extended_flags & CE_EXT_INTENT_TO_ADD) > 0 + @classmethod def from_blob(cls, blob: Blob, stage: int = 0) -> "BaseIndexEntry": """:return: Fully equipped BaseIndexEntry at the given stage""" @@ -179,7 +192,7 @@ def from_base(cls, base: "BaseIndexEntry") -> "IndexEntry": Instance of type :class:`BaseIndexEntry`. """ time = pack(">LL", 0, 0) - return IndexEntry((base.mode, base.binsha, base.flags, base.path, time, time, 0, 0, 0, 0, 0)) + return IndexEntry((base.mode, base.binsha, base.flags, base.path, time, time, 0, 0, 0, 0, 0)) # type: ignore[arg-type] @classmethod def from_blob(cls, blob: Blob, stage: int = 0) -> "IndexEntry": @@ -198,5 +211,5 @@ def from_blob(cls, blob: Blob, stage: int = 0) -> "IndexEntry": 0, 0, blob.size, - ) + ) # type: ignore[arg-type] ) diff --git a/git/index/util.py b/git/index/util.py index e59cb609f..982a5afb7 100644 --- a/git/index/util.py +++ b/git/index/util.py @@ -15,7 +15,7 @@ # typing ---------------------------------------------------------------------- -from typing import Any, Callable, TYPE_CHECKING, Optional, Type +from typing import Any, Callable, TYPE_CHECKING, Optional, Type, cast from git.types import Literal, PathLike, _T @@ -106,7 +106,7 @@ def git_working_dir(func: Callable[..., _T]) -> Callable[..., _T]: @wraps(func) def set_git_working_dir(self: "IndexFile", *args: Any, **kwargs: Any) -> _T: cur_wd = os.getcwd() - os.chdir(str(self.repo.working_tree_dir)) + os.chdir(cast(PathLike, self.repo.working_tree_dir)) try: return func(self, *args, **kwargs) finally: diff --git a/git/objects/base.py b/git/objects/base.py index eeaebc09b..faf600c6b 100644 --- a/git/objects/base.py +++ b/git/objects/base.py @@ -122,7 +122,7 @@ def new(cls, repo: "Repo", id: Union[str, "Reference"]) -> AnyGitObject: :return: New :class:`Object` instance of a type appropriate to the object type behind `id`. The id of the newly created object will be a binsha even though the - input id may have been a `~git.refs.reference.Reference` or rev-spec. + input id may have been a :class:`~git.refs.reference.Reference` or rev-spec. :param id: :class:`~git.refs.reference.Reference`, rev-spec, or hexsha. @@ -218,7 +218,7 @@ class IndexObject(Object): """Base for all objects that can be part of the index file. The classes representing git object types that can be part of the index file are - :class:`~git.objects.tree.Tree and :class:`~git.objects.blob.Blob`. In addition, + :class:`~git.objects.tree.Tree` and :class:`~git.objects.blob.Blob`. In addition, :class:`~git.objects.submodule.base.Submodule`, which is not really a git object type but can be part of an index file, is also a subclass. """ diff --git a/git/objects/blob.py b/git/objects/blob.py index 58de59642..f7d49c9cc 100644 --- a/git/objects/blob.py +++ b/git/objects/blob.py @@ -6,6 +6,7 @@ __all__ = ["Blob"] from mimetypes import guess_type +import os import sys if sys.version_info >= (3, 8): @@ -44,5 +45,5 @@ def mime_type(self) -> str: """ guesses = None if self.path: - guesses = guess_type(str(self.path)) + guesses = guess_type(os.fspath(self.path)) return guesses and guesses[0] or self.DEFAULT_MIME_TYPE diff --git a/git/objects/commit.py b/git/objects/commit.py index 0ceb46609..da7677ee0 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -289,7 +289,7 @@ def name_rev(self) -> str: """ :return: String describing the commits hex sha based on the closest - `~git.refs.reference.Reference`. + :class:`~git.refs.reference.Reference`. :note: Mostly useful for UI purposes. @@ -349,7 +349,7 @@ def iter_items( return cls._iter_from_process_or_stream(repo, proc) def iter_parents(self, paths: Union[PathLike, Sequence[PathLike]] = "", **kwargs: Any) -> Iterator["Commit"]: - R"""Iterate _all_ parents of this commit. + R"""Iterate *all* parents of this commit. :param paths: Optional path or list of paths limiting the :class:`Commit`\s to those that @@ -450,14 +450,7 @@ def trailers_list(self) -> List[Tuple[str, str]]: :return: List containing key-value tuples of whitespace stripped trailer information. """ - cmd = ["git", "interpret-trailers", "--parse"] - proc: Git.AutoInterrupt = self.repo.git.execute( # type: ignore[call-overload] - cmd, - as_process=True, - istream=PIPE, - ) - trailer: str = proc.communicate(str(self.message).encode())[0].decode("utf8") - trailer = trailer.strip() + trailer = self._interpret_trailers(self.repo, self.message, ["--parse"], encoding=self.encoding).strip() if not trailer: return [] @@ -469,6 +462,27 @@ def trailers_list(self) -> List[Tuple[str, str]]: return trailer_list + @classmethod + def _interpret_trailers( + cls, + repo: "Repo", + message: Union[str, bytes], + trailer_args: Sequence[str], + encoding: str = default_encoding, + ) -> str: + message_bytes = message if isinstance(message, bytes) else message.encode(encoding, errors="strict") + cmd = [repo.git.GIT_PYTHON_GIT_EXECUTABLE, "interpret-trailers", *trailer_args] + proc: Git.AutoInterrupt = repo.git.execute( # type: ignore[call-overload] + cmd, + as_process=True, + istream=PIPE, + ) + try: + stdout_bytes, _ = proc.communicate(message_bytes) + return stdout_bytes.decode(encoding, errors="strict") + finally: + finalize_process(proc) + @property def trailers_dict(self) -> Dict[str, List[str]]: """Get the trailers of the message as a dictionary. @@ -570,6 +584,7 @@ def create_from_tree( committer: Union[None, Actor] = None, author_date: Union[None, str, datetime.datetime] = None, commit_date: Union[None, str, datetime.datetime] = None, + trailers: Union[None, Dict[str, str], List[Tuple[str, str]]] = None, ) -> "Commit": """Commit the given tree, creating a :class:`Commit` object. @@ -609,6 +624,14 @@ def create_from_tree( :param commit_date: The timestamp for the committer field. + :param trailers: + Optional trailer key-value pairs to append to the commit message. + Can be a dictionary mapping trailer keys to values, or a list of + ``(key, value)`` tuples (useful when the same key appears multiple + times, e.g. multiple ``Signed-off-by`` trailers). Trailers are + appended using ``git interpret-trailers``. + See :manpage:`git-interpret-trailers(1)`. + :return: :class:`Commit` object representing the new commit. @@ -678,6 +701,21 @@ def create_from_tree( tree = repo.tree(tree) # END tree conversion + # APPLY TRAILERS + if trailers: + trailer_args: List[str] = [] + if isinstance(trailers, dict): + for key, val in trailers.items(): + trailer_args.append("--trailer") + trailer_args.append(f"{key}: {val}") + else: + for key, val in trailers: + trailer_args.append("--trailer") + trailer_args.append(f"{key}: {val}") + + message = cls._interpret_trailers(repo, str(message), trailer_args) + # END apply trailers + # CREATE NEW COMMIT new_commit = cls( repo, @@ -900,7 +938,7 @@ def co_authors(self) -> List[Actor]: if self.message: results = re.findall( r"^Co-authored-by: (.*) <(.*?)>$", - self.message, + str(self.message), re.MULTILINE, ) for author in results: diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index fa60bcdaf..d183672db 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -11,6 +11,7 @@ import stat import sys import uuid +import urllib import git from git.cmd import Git @@ -65,7 +66,7 @@ if TYPE_CHECKING: from git.index import IndexFile from git.objects.commit import Commit - from git.refs import Head + from git.refs import Head, RemoteReference from git.repo import Repo # ----------------------------------------------------------------------------- @@ -351,7 +352,12 @@ def _clone_repo( module_abspath_dir = osp.dirname(module_abspath) if not osp.isdir(module_abspath_dir): os.makedirs(module_abspath_dir) - module_checkout_path = osp.join(str(repo.working_tree_dir), path) + module_checkout_path = osp.join(repo.working_tree_dir, path) # type: ignore[arg-type] + + if url.startswith("../"): + remote_name = cast("RemoteReference", repo.active_branch.tracking_branch()).remote_name + repo_remote_url = repo.remote(remote_name).url + url = os.path.join(repo_remote_url, url) clone = git.Repo.clone_from( url, @@ -535,7 +541,7 @@ def add( if sm.exists(): # Reretrieve submodule from tree. try: - sm = repo.head.commit.tree[str(path)] + sm = repo.head.commit.tree[os.fspath(path)] sm._name = name return sm except KeyError: @@ -794,9 +800,13 @@ def update( + "Cloning url '%s' to '%s' in submodule %r" % (self.url, checkout_module_abspath, self.name), ) if not dry_run: + if self.url.startswith("."): + url = urllib.parse.urljoin(self.repo.remotes.origin.url + "/", self.url) + else: + url = self.url mrepo = self._clone_repo( self.repo, - self.url, + url, self.path, self.name, n=True, diff --git a/git/objects/tree.py b/git/objects/tree.py index 09184a781..a3d611c80 100644 --- a/git/objects/tree.py +++ b/git/objects/tree.py @@ -5,6 +5,7 @@ __all__ = ["TreeModifier", "Tree"] +import os import sys import git.diff as git_diff @@ -50,7 +51,9 @@ # -------------------------------------------------------- -cmp: Callable[[str, str], int] = lambda a, b: (a > b) - (a < b) + +def cmp(a: str, b: str) -> int: + return (a > b) - (a < b) class TreeModifier: @@ -228,7 +231,7 @@ def _iter_convert_to_object(self, iterable: Iterable[TreeCacheTup]) -> Iterator[ raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path)) from e # END for each item - def join(self, file: str) -> IndexObjUnion: + def join(self, file: PathLike) -> IndexObjUnion: """Find the named object in this tree's contents. :return: @@ -239,6 +242,7 @@ def join(self, file: str) -> IndexObjUnion: If the given file or tree does not exist in this tree. """ msg = "Blob or Tree named %r not found" + file = os.fspath(file) if "/" in file: tree = self item = self @@ -267,7 +271,7 @@ def join(self, file: str) -> IndexObjUnion: raise KeyError(msg % file) # END handle long paths - def __truediv__(self, file: str) -> IndexObjUnion: + def __truediv__(self, file: PathLike) -> IndexObjUnion: """The ``/`` operator is another syntax for joining. See :meth:`join` for details. diff --git a/git/refs/head.py b/git/refs/head.py index 683634451..3c43993e7 100644 --- a/git/refs/head.py +++ b/git/refs/head.py @@ -22,7 +22,6 @@ from git.types import Commit_ish, PathLike if TYPE_CHECKING: - from git.objects import Commit from git.refs import RemoteReference from git.repo import Repo @@ -44,9 +43,6 @@ class HEAD(SymbolicReference): __slots__ = () - # TODO: This can be removed once SymbolicReference.commit has static type hints. - commit: "Commit" - def __init__(self, repo: "Repo", path: PathLike = _HEAD_NAME) -> None: if path != self._HEAD_NAME: raise ValueError("HEAD instance must point to %r, got %r" % (self._HEAD_NAME, path)) @@ -149,7 +145,7 @@ class Head(Reference): k_config_remote_ref = "merge" # Branch to merge from remote. @classmethod - def delete(cls, repo: "Repo", *heads: "Union[Head, str]", force: bool = False, **kwargs: Any) -> None: + def delete(cls, repo: "Repo", *heads: "Union[Head, str]", force: bool = False, **kwargs: Any) -> None: # type: ignore[override] """Delete the given heads. :param force: diff --git a/git/refs/log.py b/git/refs/log.py index 17e3a94b3..fbbe66b22 100644 --- a/git/refs/log.py +++ b/git/refs/log.py @@ -4,7 +4,6 @@ __all__ = ["RefLog", "RefLogEntry"] from mmap import mmap -import os.path as osp import re import time as _time @@ -126,7 +125,7 @@ def from_line(cls, line: bytes) -> "RefLogEntry": elif len(fields) == 2: info, msg = fields else: - raise ValueError("Line must have up to two TAB-separated fields." " Got %s" % repr(line_str)) + raise ValueError("Line must have up to two TAB-separated fields. Got %s" % repr(line_str)) # END handle first split oldhexsha = info[:40] @@ -145,7 +144,7 @@ def from_line(cls, line: bytes) -> "RefLogEntry": actor = Actor._from_string(info[82 : email_end + 1]) time, tz_offset = parse_date(info[email_end + 2 :]) # skipcq: PYL-W0621 - return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), msg)) + return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), msg)) # type: ignore [arg-type] class RefLog(List[RefLogEntry], Serializable): @@ -212,8 +211,11 @@ def path(cls, ref: "SymbolicReference") -> str: :param ref: :class:`~git.refs.symbolic.SymbolicReference` instance + + :raise ValueError: + If `ref.path` is invalid or escapes the repository's reflog directory. """ - return osp.join(ref.repo.git_dir, "logs", to_native_path(ref.path)) + return to_native_path(ref._get_validated_reflog_path(ref.repo, ref.path)) @classmethod def iter_entries(cls, stream: Union[str, "BytesIO", mmap]) -> Iterator[RefLogEntry]: diff --git a/git/refs/reference.py b/git/refs/reference.py index e5d473779..0c4327225 100644 --- a/git/refs/reference.py +++ b/git/refs/reference.py @@ -3,6 +3,7 @@ __all__ = ["Reference"] +import os from git.util import IterableObj, LazyMixin from .symbolic import SymbolicReference, T_References @@ -65,7 +66,7 @@ def __init__(self, repo: "Repo", path: PathLike, check_path: bool = True) -> Non If ``False``, you can provide any path. Otherwise the path must start with the default path prefix of this type. """ - if check_path and not str(path).startswith(self._common_path_default + "/"): + if check_path and not os.fspath(path).startswith(self._common_path_default + "/"): raise ValueError(f"Cannot instantiate {self.__class__.__name__!r} from path {path}") self.path: str # SymbolicReference converts to string at the moment. super().__init__(repo, path) diff --git a/git/refs/remote.py b/git/refs/remote.py index b4f4f7b36..e16ae70f8 100644 --- a/git/refs/remote.py +++ b/git/refs/remote.py @@ -58,17 +58,20 @@ def delete(cls, repo: "Repo", *refs: "RemoteReference", **kwargs: Any) -> None: `kwargs` are given for comparability with the base class method as we should not narrow the signature. """ + for ref in refs: + cls._check_ref_name_valid(ref.path) + repo.git.branch("-d", "-r", *refs) # The official deletion method will ignore remote symbolic refs - these are # generally ignored in the refs/ folder. We don't though and delete remainders # manually. for ref in refs: try: - os.remove(os.path.join(repo.common_dir, ref.path)) + os.remove(cls._get_validated_path(repo.common_dir, ref.path)) except OSError: pass try: - os.remove(os.path.join(repo.git_dir, ref.path)) + os.remove(cls._get_validated_path(repo.git_dir, ref.path)) except OSError: pass # END for each ref diff --git a/git/refs/symbolic.py b/git/refs/symbolic.py index 510850b2e..020de5e13 100644 --- a/git/refs/symbolic.py +++ b/git/refs/symbolic.py @@ -4,6 +4,7 @@ __all__ = ["SymbolicReference"] import os +from pathlib import Path from gitdb.exc import BadName, BadObject @@ -39,8 +40,8 @@ if TYPE_CHECKING: from git.config import GitConfigParser from git.objects.commit import Actor - from git.refs import Head, TagReference, RemoteReference, Reference from git.refs.log import RefLogEntry + from git.refs.reference import Reference from git.repo import Repo @@ -76,10 +77,10 @@ class SymbolicReference: def __init__(self, repo: "Repo", path: PathLike, check_path: bool = False) -> None: self.repo = repo - self.path = path + self.path: PathLike = path def __str__(self) -> str: - return str(self.path) + return os.fspath(self.path) def __repr__(self) -> str: return '' % (self.__class__.__name__, self.path) @@ -103,12 +104,38 @@ def name(self) -> str: In case of symbolic references, the shortest assumable name is the path itself. """ - return str(self.path) + return os.fspath(self.path) @property def abspath(self) -> PathLike: return join_path_native(_git_dir(self.repo, self.path), self.path) + @staticmethod + def _get_validated_path(base: PathLike, path: PathLike) -> str: + path = os.fspath(path) + base_path = os.path.realpath(os.fspath(base)) + abs_path = os.path.realpath(os.path.join(base_path, path)) + try: + common_path = os.path.commonpath([base_path, abs_path]) + except ValueError as e: + raise ValueError("Reference path %r escapes the repository" % path) from e + if os.path.normcase(common_path) != os.path.normcase(base_path): + raise ValueError("Reference path %r escapes the repository" % path) + return abs_path + + @classmethod + def _get_validated_ref_path(cls, repo: "Repo", path: PathLike) -> str: + """Return the absolute filesystem path for a ref after validating it.""" + cls._check_ref_name_valid(path) + ref_path = os.fspath(path) + return cls._get_validated_path(_git_dir(repo, ref_path), ref_path) + + @classmethod + def _get_validated_reflog_path(cls, repo: "Repo", path: PathLike) -> str: + """Return the absolute filesystem path for a reflog after validating it.""" + cls._check_ref_name_valid(path) + return cls._get_validated_path(os.path.join(repo.git_dir, "logs"), path) + @classmethod def _get_packed_refs_path(cls, repo: "Repo") -> str: return os.path.join(repo.common_dir, "packed-refs") @@ -178,7 +205,7 @@ def _check_ref_name_valid(ref_path: PathLike) -> None: """ previous: Union[str, None] = None one_before_previous: Union[str, None] = None - for c in str(ref_path): + for c in os.fspath(ref_path): if c in " ~^:?*[\\": raise ValueError( f"Invalid reference '{ref_path}': references cannot contain spaces, tildes (~), carets (^)," @@ -212,7 +239,7 @@ def _check_ref_name_valid(ref_path: PathLike) -> None: raise ValueError(f"Invalid reference '{ref_path}': references cannot end with a forward slash (/)") elif previous == "@" and one_before_previous is None: raise ValueError(f"Invalid reference '{ref_path}': references cannot be '@'") - elif any(component.endswith(".lock") for component in str(ref_path).split("/")): + elif any(component.endswith(".lock") for component in Path(ref_path).parts): raise ValueError( f"Invalid reference '{ref_path}': references cannot have slash-separated components that end with" " '.lock'" @@ -235,7 +262,7 @@ def _get_ref_info_helper( tokens: Union[None, List[str], Tuple[str, str]] = None repodir = _git_dir(repo, ref_path) try: - with open(os.path.join(repodir, str(ref_path)), "rt", encoding="UTF-8") as fp: + with open(os.path.join(repodir, ref_path), "rt", encoding="UTF-8") as fp: # type: ignore[arg-type] value = fp.read().rstrip() # Don't only split on spaces, but on whitespace, which allows to parse lines like: # 60b64ef992065e2600bfef6187a97f92398a9144 branch 'master' of git-server:/path/to/repo @@ -387,19 +414,25 @@ def set_object( # set the commit on our reference return self._get_reference().set_object(object, logmsg) - commit = property( - _get_commit, - set_commit, # type: ignore[arg-type] - doc="Query or set commits directly", - ) + @property + def commit(self) -> "Commit": + """Query or set commits directly""" + return self._get_commit() + + @commit.setter + def commit(self, commit: Union[Commit, "SymbolicReference", str]) -> "SymbolicReference": + return self.set_commit(commit) + + @property + def object(self) -> AnyGitObject: + """Return the object our ref currently refers to""" + return self._get_object() - object = property( - _get_object, - set_object, # type: ignore[arg-type] - doc="Return the object our ref currently refers to", - ) + @object.setter + def object(self, object: Union[AnyGitObject, "SymbolicReference", str]) -> "SymbolicReference": + return self.set_object(object) - def _get_reference(self) -> "SymbolicReference": + def _get_reference(self) -> "Reference": """ :return: :class:`~git.refs.reference.Reference` object we point to @@ -411,7 +444,7 @@ def _get_reference(self) -> "SymbolicReference": sha, target_ref_path = self._get_ref_info(self.repo, self.path) if target_ref_path is None: raise TypeError("%s is a detached symbolic reference as it points to %r" % (self, sha)) - return self.from_path(self.repo, target_ref_path) + return cast("Reference", self.from_path(self.repo, target_ref_path)) def set_reference( self, @@ -478,7 +511,7 @@ def set_reference( # END handle non-existing # END retrieve old hexsha - fpath = self.abspath + fpath = self._get_validated_ref_path(self.repo, self.path) assure_directory_exists(fpath, is_file=True) lfd = LockedFD(fpath) @@ -496,12 +529,14 @@ def set_reference( return self # Aliased reference - reference: Union["Head", "TagReference", "RemoteReference", "Reference"] - reference = property( # type: ignore[assignment] - _get_reference, - set_reference, # type: ignore[arg-type] - doc="Returns the Reference we point to", - ) + @property + def reference(self) -> "Reference": + return self._get_reference() + + @reference.setter + def reference(self, ref: Union[AnyGitObject, "SymbolicReference", str]) -> "SymbolicReference": + return self.set_reference(ref) + ref = reference def is_valid(self) -> bool: @@ -606,7 +641,7 @@ def to_full_path(cls, path: Union[PathLike, "SymbolicReference"]) -> PathLike: full_ref_path = path if not cls._common_path_default: return full_ref_path - if not str(path).startswith(cls._common_path_default + "/"): + if not os.fspath(path).startswith(cls._common_path_default + "/"): full_ref_path = "%s/%s" % (cls._common_path_default, path) return full_ref_path @@ -623,7 +658,7 @@ def delete(cls, repo: "Repo", path: PathLike) -> None: Alternatively the symbolic reference to be deleted. """ full_ref_path = cls.to_full_path(path) - abs_path = os.path.join(repo.common_dir, full_ref_path) + abs_path = cls._get_validated_ref_path(repo, full_ref_path) if os.path.exists(abs_path): os.remove(abs_path) else: @@ -686,9 +721,8 @@ def _create( symbolic reference. Otherwise it will be resolved to the corresponding object and a detached symbolic reference will be created instead. """ - git_dir = _git_dir(repo, path) full_ref_path = cls.to_full_path(path) - abs_ref_path = os.path.join(git_dir, full_ref_path) + abs_ref_path = cls._get_validated_ref_path(repo, full_ref_path) # Figure out target data. target = reference @@ -698,7 +732,7 @@ def _create( if not force and os.path.isfile(abs_ref_path): target_data = str(target) if isinstance(target, SymbolicReference): - target_data = str(target.path) + target_data = os.fspath(target.path) if not resolve: target_data = "ref: " + target_data with open(abs_ref_path, "rb") as fd: @@ -780,8 +814,8 @@ def rename(self, new_path: PathLike, force: bool = False) -> "SymbolicReference" if self.path == new_path: return self - new_abs_path = os.path.join(_git_dir(self.repo, new_path), new_path) - cur_abs_path = os.path.join(_git_dir(self.repo, self.path), self.path) + new_abs_path = self._get_validated_ref_path(self.repo, new_path) + cur_abs_path = self._get_validated_ref_path(self.repo, self.path) if os.path.isfile(new_abs_path): if not force: # If they point to the same file, it's not an error. @@ -834,7 +868,7 @@ def _iter_items( # Read packed refs. for _sha, rela_path in cls._iter_packed_refs(repo): - if rela_path.startswith(str(common_path)): + if rela_path.startswith(os.fspath(common_path)): rela_paths.add(rela_path) # END relative path matches common path # END packed refs reading @@ -908,8 +942,7 @@ def from_path(cls: Type[T_References], repo: "Repo", path: PathLike) -> T_Refere SymbolicReference, ): try: - instance: T_References - instance = ref_type(repo, path) + instance = cast(T_References, ref_type(repo, path)) if instance.__class__ is SymbolicReference and instance.is_detached: raise ValueError("SymbolicRef was detached, we drop it") else: @@ -923,4 +956,4 @@ def from_path(cls: Type[T_References], repo: "Repo", path: PathLike) -> T_Refere def is_remote(self) -> bool: """:return: True if this symbolic reference points to a remote branch""" - return str(self.path).startswith(self._remote_common_path_default + "/") + return os.fspath(self.path).startswith(self._remote_common_path_default + "/") diff --git a/git/refs/tag.py b/git/refs/tag.py index 1e38663ae..4525b09cb 100644 --- a/git/refs/tag.py +++ b/git/refs/tag.py @@ -45,8 +45,8 @@ class TagReference(Reference): _common_default = "tags" _common_path_default = Reference._common_path_default + "/" + _common_default - @property - def commit(self) -> "Commit": # type: ignore[override] # LazyMixin has unrelated commit method + @property # type: ignore[misc] + def commit(self) -> "Commit": # LazyMixin has unrelated commit method """:return: Commit object the tag ref points to :raise ValueError: @@ -80,8 +80,8 @@ def tag(self) -> Union["TagObject", None]: return None # Make object read-only. It should be reasonably hard to adjust an existing tag. - @property - def object(self) -> AnyGitObject: # type: ignore[override] + @property # type: ignore[misc] + def object(self) -> AnyGitObject: return Reference._get_object(self) @classmethod diff --git a/git/remote.py b/git/remote.py index 20e42b412..18d4829af 100644 --- a/git/remote.py +++ b/git/remote.py @@ -517,6 +517,9 @@ def iter_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> NoReturn: # -> raise NotImplementedError +Progress = Union[RemoteProgress, "UpdateProgress", Callable[..., RemoteProgress], None] + + class Remote(LazyMixin, IterableObj): """Provides easy read and write access to a git remote. @@ -872,7 +875,7 @@ def update(self, **kwargs: Any) -> "Remote": def _get_fetch_info_from_stderr( self, proc: "Git.AutoInterrupt", - progress: Union[Callable[..., Any], RemoteProgress, None], + progress: Progress, kill_after_timeout: Union[None, float] = None, ) -> IterableList["FetchInfo"]: progress = to_progress_instance(progress) @@ -1000,7 +1003,7 @@ def _assert_refspec(self) -> None: def fetch( self, refspec: Union[str, List[str], None] = None, - progress: Union[RemoteProgress, None, "UpdateProgress"] = None, + progress: Progress = None, verbose: bool = True, kill_after_timeout: Union[None, float] = None, allow_unsafe_protocols: bool = False, @@ -1081,7 +1084,7 @@ def fetch( def pull( self, refspec: Union[str, List[str], None] = None, - progress: Union[RemoteProgress, "UpdateProgress", None] = None, + progress: Progress = None, kill_after_timeout: Union[None, float] = None, allow_unsafe_protocols: bool = False, allow_unsafe_options: bool = False, @@ -1135,7 +1138,7 @@ def pull( def push( self, refspec: Union[str, List[str], None] = None, - progress: Union[RemoteProgress, "UpdateProgress", Callable[..., RemoteProgress], None] = None, + progress: Progress = None, kill_after_timeout: Union[None, float] = None, allow_unsafe_protocols: bool = False, allow_unsafe_options: bool = False, diff --git a/git/repo/base.py b/git/repo/base.py index db89cdf41..2d3cf24f0 100644 --- a/git/repo/base.py +++ b/git/repo/base.py @@ -126,6 +126,7 @@ class Repo: working_dir: PathLike """The working directory of the git command.""" + # stored as string for easier processing, but annotated as path for clearer intention _working_tree_dir: Optional[PathLike] = None git_dir: PathLike @@ -215,15 +216,13 @@ def __init__( epath = path or os.getenv("GIT_DIR") if not epath: epath = os.getcwd() + epath = os.fspath(epath) if Git.is_cygwin(): # Given how the tests are written, this seems more likely to catch Cygwin # git used from Windows than Windows git used from Cygwin. Therefore # changing to Cygwin-style paths is the relevant operation. - epath = cygpath(str(epath)) + epath = cygpath(epath) - epath = epath or path or os.getcwd() - if not isinstance(epath, str): - epath = str(epath) if expand_vars and re.search(self.re_envvars, epath): warnings.warn( "The use of environment variables in paths is deprecated" @@ -243,6 +242,28 @@ def __init__( # It's important to normalize the paths, as submodules will otherwise # initialize their repo instances with paths that depend on path-portions # that will not exist after being removed. It's just cleaner. + if ( + osp.isfile(osp.join(curpath, "gitdir")) + and osp.isfile(osp.join(curpath, "commondir")) + and osp.isfile(osp.join(curpath, "HEAD")) + ): + git_dir = curpath + + if "GIT_WORK_TREE" in os.environ: + self._working_tree_dir = os.getenv("GIT_WORK_TREE") + else: + # Linked worktree administrative directories store the path to the + # worktree's .git file in their gitdir file (without "gitdir: " prefix). + with open(osp.join(git_dir, "gitdir")) as fp: + worktree_gitfile = fp.read().strip() + + if not osp.isabs(worktree_gitfile): + worktree_gitfile = osp.normpath(osp.join(git_dir, worktree_gitfile)) + + self._working_tree_dir = osp.dirname(worktree_gitfile) + + break + if is_git_dir(curpath): git_dir = curpath # from man git-config : core.worktree @@ -274,7 +295,8 @@ def __init__( sm_gitpath = find_worktree_git_dir(dotgit) if sm_gitpath is not None: - git_dir = expand_path(sm_gitpath, expand_vars) + # worktrees can use relative paths as of Git 2.48, so we join to curpath + git_dir = osp.normpath(osp.join(curpath, sm_gitpath)) self._working_tree_dir = curpath break @@ -354,21 +376,19 @@ def __ne__(self, rhs: object) -> bool: def __hash__(self) -> int: return hash(self.git_dir) - # Description property - def _get_description(self) -> str: + @property + def description(self) -> str: + """The project's description""" filename = osp.join(self.git_dir, "description") with open(filename, "rb") as fp: return fp.read().rstrip().decode(defenc) - def _set_description(self, descr: str) -> None: + @description.setter + def description(self, descr: str) -> None: filename = osp.join(self.git_dir, "description") with open(filename, "wb") as fp: fp.write((descr + "\n").encode(defenc)) - description = property(_get_description, _set_description, doc="the project's description") - del _get_description - del _set_description - @property def working_tree_dir(self) -> Optional[PathLike]: """ @@ -514,7 +534,7 @@ def create_submodule(self, *args: Any, **kwargs: Any) -> Submodule: def iter_submodules(self, *args: Any, **kwargs: Any) -> Iterator[Submodule]: """An iterator yielding Submodule instances. - See the `~git.objects.util.Traversable` interface for a description of `args` + See the :class:`~git.objects.util.Traversable` interface for a description of `args` and `kwargs`. :return: @@ -522,7 +542,7 @@ def iter_submodules(self, *args: Any, **kwargs: Any) -> Iterator[Submodule]: """ return RootModule(self).traverse(*args, **kwargs) - def submodule_update(self, *args: Any, **kwargs: Any) -> Iterator[Submodule]: + def submodule_update(self, *args: Any, **kwargs: Any) -> RootModule: """Update the submodules, keeping the repository consistent as it will take the previous state into consideration. @@ -686,11 +706,7 @@ def _config_reader( git_dir: Optional[PathLike] = None, ) -> GitConfigParser: if config_level is None: - files = [ - self._get_config_path(cast(Lit_config_levels, f), git_dir) - for f in self.config_level - if cast(Lit_config_levels, f) - ] + files = [self._get_config_path(f, git_dir) for f in self.config_level if f] else: files = [self._get_config_path(config_level, git_dir)] return GitConfigParser(files, read_only=True, repo=self) @@ -885,13 +901,14 @@ def _set_daemon_export(self, value: object) -> None: elif not value and fileexists: os.unlink(filename) - daemon_export = property( - _get_daemon_export, - _set_daemon_export, - doc="If True, git-daemon may export this repository", - ) - del _get_daemon_export - del _set_daemon_export + @property + def daemon_export(self) -> bool: + """If True, git-daemon may export this repository""" + return self._get_daemon_export() + + @daemon_export.setter + def daemon_export(self, value: object) -> None: + self._set_daemon_export(value) def _get_alternates(self) -> List[str]: """The list of alternates for this repo from which objects can be retrieved. @@ -929,11 +946,14 @@ def _set_alternates(self, alts: List[str]) -> None: with open(alternates_path, "wb") as f: f.write("\n".join(alts).encode(defenc)) - alternates = property( - _get_alternates, - _set_alternates, - doc="Retrieve a list of alternates paths or set a list paths to be used as alternates", - ) + @property + def alternates(self) -> List[str]: + """Retrieve a list of alternates paths or set a list paths to be used as alternates""" + return self._get_alternates() + + @alternates.setter + def alternates(self, alts: List[str]) -> None: + self._set_alternates(alts) def is_dirty( self, @@ -959,7 +979,7 @@ def is_dirty( if not submodules: default_args.append("--ignore-submodules") if path: - default_args.extend(["--", str(path)]) + default_args.extend(["--", os.fspath(path)]) if index: # diff index against HEAD. if osp.isfile(self.index.path) and len(self.git.diff("--cached", *default_args)): @@ -1045,11 +1065,19 @@ def active_branch(self) -> Head: :raise TypeError: If HEAD is detached. + :raise ValueError: + If HEAD points to the ``.invalid`` ref Git uses to mark refs as + incompatible with older clients. + :return: :class:`~git.refs.head.Head` to the active branch """ - # reveal_type(self.head.reference) # => Reference - return self.head.reference + active_branch = self.head.reference + if active_branch.name == ".invalid": + raise ValueError( + "HEAD points to 'refs/heads/.invalid', which Git uses to mark refs as incompatible with older clients" + ) + return active_branch def blame_incremental(self, rev: str | HEAD | None, file: str, **kwargs: Any) -> Iterator["BlameEntry"]: """Iterator for blame information for the given file at the given revision. @@ -1359,9 +1387,9 @@ def _clone( ) -> "Repo": odbt = kwargs.pop("odbt", odb_default_type) - # When pathlib.Path or other class-based path is passed - if not isinstance(path, str): - path = str(path) + # url may be a path and this has no effect if it is a string + url = os.fspath(url) + path = os.fspath(path) ## A bug win cygwin's Git, when `--bare` or `--separate-git-dir` # it prepends the cwd or(?) the `url` into the `path, so:: @@ -1378,16 +1406,16 @@ def _clone( multi = shlex.split(" ".join(multi_options)) if not allow_unsafe_protocols: - Git.check_unsafe_protocols(str(url)) + Git.check_unsafe_protocols(url) if not allow_unsafe_options: Git.check_unsafe_options(options=list(kwargs.keys()), unsafe_options=cls.unsafe_git_clone_options) - if not allow_unsafe_options and multi_options: - Git.check_unsafe_options(options=multi_options, unsafe_options=cls.unsafe_git_clone_options) + if not allow_unsafe_options and multi: + Git.check_unsafe_options(options=multi, unsafe_options=cls.unsafe_git_clone_options) proc = git.clone( multi, "--", - Git.polish_url(str(url)), + Git.polish_url(url), clone_path, with_extended_output=True, as_process=True, @@ -1482,7 +1510,7 @@ def clone( self.common_dir, path, type(self.odb), - progress, + progress, # type: ignore[arg-type] multi_options, allow_unsafe_protocols=allow_unsafe_protocols, allow_unsafe_options=allow_unsafe_options, @@ -1543,7 +1571,7 @@ def clone_from( url, to_path, GitCmdObjectDB, - progress, + progress, # type: ignore[arg-type] multi_options, allow_unsafe_protocols=allow_unsafe_protocols, allow_unsafe_options=allow_unsafe_options, diff --git a/git/repo/fun.py b/git/repo/fun.py index 182cf82ed..66e7eba69 100644 --- a/git/repo/fun.py +++ b/git/repo/fun.py @@ -20,6 +20,7 @@ import os import os.path as osp from pathlib import Path +import re import stat from string import digits @@ -28,19 +29,21 @@ from git.cmd import Git from git.exc import WorkTreeRepositoryUnsupported from git.objects import Object +from git.objects.util import parse_date from git.refs import SymbolicReference from git.util import cygpath, bin_to_hex, hex_to_bin # Typing ---------------------------------------------------------------------- -from typing import Optional, TYPE_CHECKING, Union, cast, overload +from typing import Iterator, Optional, TYPE_CHECKING, Tuple, Union, cast, overload from git.types import AnyGitObject, Literal, PathLike if TYPE_CHECKING: from git.db import GitCmdObjectDB - from git.objects import Commit, TagObject + from git.objects import Commit from git.refs.reference import Reference + from git.refs.log import RefLog, RefLogEntry from git.refs.tag import Tag from .base import Repo @@ -139,6 +142,23 @@ def short_to_long(odb: "GitCmdObjectDB", hexsha: str) -> Optional[bytes]: # END exception handling +def _describe_to_long(repo: "Repo", name: str) -> Optional[bytes]: + """Resolve git-describe style names to the abbreviated object they contain.""" + match = re.match(r"^.+-\d+-g([0-9A-Fa-f]{4,40})(?:-dirty)?$", name) + if match is None: + match = re.match(r"^.+-g([0-9A-Fa-f]{4,40})(?:-dirty)?$", name) + if match is None: + match = re.match(r"^([0-9A-Fa-f]{4,40})-dirty$", name) + if match is None: + return None + # END handle match + + hexsha = match.group(1) + if len(hexsha) == 40: + return hexsha.encode("ascii") + return short_to_long(repo.odb, hexsha) + + @overload def name_to_object(repo: "Repo", name: str, return_ref: Literal[False] = ...) -> AnyGitObject: ... @@ -192,6 +212,10 @@ def name_to_object(repo: "Repo", name: str, return_ref: bool = False) -> Union[A # END for each base # END handle hexsha + if hexsha is None: + hexsha = _describe_to_long(repo, name) + # END handle describe output + # Didn't find any ref, this is an error. if return_ref: raise BadObject("Couldn't find reference named %r" % name) @@ -227,6 +251,361 @@ def to_commit(obj: Object) -> "Commit": return obj +def _object_from_hexsha(repo: "Repo", hexsha: str) -> AnyGitObject: + return Object.new_from_sha(repo, hex_to_bin(hexsha)) + + +def _current_reflog_ref(repo: "Repo") -> SymbolicReference: + try: + return repo.head.ref + except TypeError: + return repo.head + # END handle detached head + + +def _common_reflog_path(repo: "Repo", ref: SymbolicReference) -> Optional[str]: + if repo.common_dir == repo.git_dir: + return None + # END handle normal repository + return SymbolicReference._get_validated_path(osp.join(repo.common_dir, "logs"), ref.path) + + +def _ref_log(repo: "Repo", ref: SymbolicReference) -> "RefLog": + try: + return ref.log() + except FileNotFoundError: + common_path = _common_reflog_path(repo, ref) + if common_path and osp.isfile(common_path): + from git.refs.log import RefLog + + return RefLog.from_file(common_path) + # END handle linked-worktree branch logs + try: + if ref.path == repo.head.ref.path: + return repo.head.log() + # END handle linked-worktree current branch logs + except TypeError: + pass + # END handle detached head + raise + # END handle missing branch log + + +def _ref_log_entry(repo: "Repo", ref: SymbolicReference, index: int) -> "RefLogEntry": + try: + return ref.log_entry(index) + except FileNotFoundError: + common_path = _common_reflog_path(repo, ref) + if common_path and osp.isfile(common_path): + from git.refs.log import RefLog + + return RefLog.entry_at(common_path, index) + # END handle linked-worktree branch logs + try: + if ref.path == repo.head.ref.path: + return repo.head.log_entry(index) + # END handle linked-worktree current branch logs + except TypeError: + pass + # END handle detached head + raise + # END handle missing branch log + + +def _find_reflog_entry_by_date(repo: "Repo", ref: SymbolicReference, spec: str) -> str: + try: + timestamp, _offset = parse_date(spec) + except ValueError as e: + raise NotImplementedError("Support for additional @{...} modes not implemented") from e + # END handle unsupported dates + log = _ref_log(repo, ref) + if not log: + raise IndexError("Invalid revlog date: %s" % spec) + # END handle empty log + + for entry in reversed(log): + if entry.time[0] <= timestamp: + return entry.newhexsha + # END found candidate + # END for each entry + return log[0].newhexsha + + +def _previous_checked_out_branch(repo: "Repo", nth: int) -> AnyGitObject: + if nth <= 0: + raise ValueError("Invalid previous checkout selector: -%i" % nth) + # END handle invalid input + + seen = 0 + for entry in reversed(_ref_log(repo, repo.head)): + message = entry.message or "" + prefix = "checkout: moving from " + if not message.startswith(prefix): + continue + # END skip non-checkouts + + previous_branch = message[len(prefix) :].split(" to ", 1)[0] + seen += 1 + if seen == nth: + return name_to_object(repo, previous_branch) + # END found selector + # END for each entry + raise IndexError("Invalid previous checkout selector: -%i" % nth) + + +def _tracking_branch_object(repo: "Repo", ref: Optional[SymbolicReference]) -> AnyGitObject: + from git.refs.head import Head + + if ref is None: + try: + head = repo.active_branch + except TypeError as e: + raise BadName("@{upstream}") from e + elif isinstance(ref, Head): + head = ref + elif os.fspath(ref.path).startswith("refs/heads/"): + head = Head(repo, ref.path) + else: + raise BadName("%s@{upstream}" % ref.name) + # END handle head + + tracking_branch = head.tracking_branch() + if tracking_branch is None: + raise BadName("%s@{upstream}" % head.name) + # END handle missing upstream + return tracking_branch.commit + + +def _apply_reflog(repo: "Repo", ref: Optional[SymbolicReference], content: str) -> AnyGitObject: + if content.startswith("+"): + content = content[1:] + # END handle explicit positive sign + + if content.startswith("-"): + if ref is not None: + raise ValueError("Previous checkout selectors do not take an explicit ref") + if content == "-0": + raise ValueError("Negative zero is invalid in reflog selector") + # END handle invalid negative zero + try: + return _previous_checked_out_branch(repo, int(content[1:])) + except ValueError as e: + raise ValueError("Invalid previous checkout selector: %s" % content) from e + # END handle previous checkout branch + + content_lower = content.lower() + if content_lower in ("u", "upstream", "push"): + return _tracking_branch_object(repo, ref) + # END handle sibling branches + + ref = ref or _current_reflog_ref(repo) + try: + entry_no = int(content) + except ValueError: + hexsha = _find_reflog_entry_by_date(repo, ref, content) + else: + if entry_no >= 100000000: + hexsha = _find_reflog_entry_by_date(repo, ref, "%s +0000" % entry_no) + elif entry_no == 0: + return ref.commit + else: + try: + entry = _ref_log_entry(repo, ref, -(entry_no + 1)) + except IndexError as e: + raise IndexError("Invalid revlog index: %i" % entry_no) from e + # END handle index out of bound + hexsha = entry.newhexsha + # END handle offset or date-like timestamp + # END handle content + return _object_from_hexsha(repo, hexsha) + + +def _find_closing_brace(rev: str, start: int) -> int: + depth = 1 + escaped = False + for idx in range(start + 1, len(rev)): + char = rev[idx] + if escaped: + escaped = False + elif char == "\\": + escaped = True + elif char == "{": + depth += 1 + elif char == "}": + depth -= 1 + if depth == 0: + return idx + # END found end + # END handle char + # END for each char + raise ValueError("Missing closing brace to define type in %s" % rev) + + +def _parse_search(pattern: str) -> Tuple[str, bool]: + if not pattern: + raise ValueError("Revision search requires a pattern") + # END handle empty pattern + + if pattern.startswith("!-"): + return pattern[2:], True + if pattern.startswith("!!"): + return pattern[1:], False + if pattern.startswith("!"): + raise ValueError("Need one character after /!, typically -") + return pattern, False + + +def _unescape_braced_regex(pattern: str) -> str: + out = [] + idx = 0 + while idx < len(pattern): + char = pattern[idx] + if char == "\\" and idx + 1 < len(pattern): + next_char = pattern[idx + 1] + if next_char in "{}\\": + out.append(next_char) + else: + out.append(char) + out.append(next_char) + # END handle escaped char + idx += 2 + continue + # END handle backslash + out.append(char) + idx += 1 + # END for each char + return "".join(out) + + +def _find_commit_by_message( + repo: "Repo", rev: Optional[AnyGitObject], pattern: str, braced: bool = False +) -> AnyGitObject: + pattern, negated = _parse_search(_unescape_braced_regex(pattern) if braced else pattern) + try: + regex = re.compile(pattern) + except re.error as e: + raise ValueError("Invalid commit message regex %r" % pattern) from e + # END handle invalid regex + if rev is None: + commits = _all_ref_commits(repo) + else: + commits = _reachable_commits([to_commit(cast(Object, rev))]) + # END handle starting point + + for commit in commits: + message = commit.message + if isinstance(message, bytes): + message = message.decode(commit.encoding, "replace") + # END handle bytes message + matches = regex.search(message or "") is not None + if matches != negated: + return commit + # END found commit + # END for each commit + raise BadName("No commit found matching message pattern %r" % pattern) + + +def _all_ref_commits(repo: "Repo") -> Iterator["Commit"]: + starts = [] + for ref in repo.references: + try: + starts.append(to_commit(cast(Object, ref.object))) + except (BadName, ValueError): + pass + # END skip refs that do not point to commits + # END for each ref + try: + starts.append(repo.head.commit) + except ValueError: + pass + # END handle unborn head + return _reachable_commits(starts) + + +def _reachable_commits(starts: list["Commit"]) -> Iterator["Commit"]: + seen = set() + pending = starts[:] + while pending: + pending.sort(key=lambda commit: commit.committed_date, reverse=True) + commit = pending.pop(0) + if commit.binsha in seen: + continue + # END skip seen commit + seen.add(commit.binsha) + yield commit + pending.extend(commit.parents) + # END while commits remain + + +def _index_lookup(repo: "Repo", spec: str) -> AnyGitObject: + if not spec: + raise ValueError("':' must be followed by a path") + # END handle empty lookup + + stage = 0 + path = spec + if len(spec) >= 2 and spec[1] == ":" and spec[0] in "0123": + stage = int(spec[0]) + path = spec[2:] + # END handle stage + + try: + return repo.index.entries[(path, stage)].to_blob(repo) + except KeyError as e: + raise BadName("Path %r did not exist in the index at stage %i" % (path, stage)) from e + + +def _tree_lookup(obj: AnyGitObject, path: str) -> AnyGitObject: + if obj.type != "tree": + obj = to_commit(cast(Object, obj)).tree + # END get tree + if not path: + return obj + return obj[path] + + +def _peel(obj: AnyGitObject, output_type: str, repo: "Repo", rev: str) -> AnyGitObject: + if output_type.startswith("/"): + return _find_commit_by_message(repo, obj, output_type[1:], braced=True) + if output_type == "": + return deref_tag(obj) if obj.type == "tag" else obj + if output_type == "object": + return obj + if output_type == "commit": + return to_commit(cast(Object, obj)) + if output_type == "tree": + return to_commit(cast(Object, obj)).tree if obj.type != "tree" else obj + if output_type == "blob": + obj = deref_tag(obj) if obj.type == "tag" else obj + if obj.type == output_type: + return obj + # END handle matching type + raise ValueError("Could not accommodate requested object type %r, got %s" % (output_type, obj.type)) + if output_type == "tag": + if obj.type == output_type: + return obj + # END handle matching type + raise ValueError("Could not accommodate requested object type %r, got %s" % (output_type, obj.type)) + # END handle known types + raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev)) + + +def _first_rev_token(rev: str) -> Optional[int]: + for idx, char in enumerate(rev): + if char in "^~:": + return idx + if char == "@": + next_char = rev[idx + 1] if idx + 1 < len(rev) else None + if idx == 0 and next_char in (None, "^", "~", ":", "{"): + return idx + if next_char == "{": + return idx + # END handle reflog selector + # END handle at symbol + # END for each char + return None + + def rev_parse(repo: "Repo", rev: str) -> AnyGitObject: """Parse a revision string. Like :manpage:`git-rev-parse(1)`. @@ -253,131 +632,82 @@ def rev_parse(repo: "Repo", rev: str) -> AnyGitObject: :raise IndexError: If an invalid reflog index is specified. """ - # Are we in colon search mode? if rev.startswith(":/"): - # Colon search mode - raise NotImplementedError("commit by message search (regex)") - # END handle search + return _find_commit_by_message(repo, None, rev[2:]) + if rev.startswith(":"): + return _index_lookup(repo, rev[1:]) + # END handle top-level colon modes obj: Optional[AnyGitObject] = None ref = None - output_type = "commit" - start = 0 - parsed_to = 0 lr = len(rev) - while start < lr: - if rev[start] not in "^~:@": - start += 1 - continue - # END handle start + first_token = _first_rev_token(rev) + if first_token is None: + return name_to_object(repo, rev) + # END handle plain name + + if first_token == 0: + if rev[0] != "@": + raise ValueError("Revision specifier must start with an object name: %s" % rev) + # END handle invalid leading token + ref = _current_reflog_ref(repo) + obj = ref.commit + start = 0 if rev.startswith("@{") else 1 + else: + if rev[first_token] == "@": + ref = cast("Reference", name_to_object(repo, rev[:first_token], return_ref=True)) + obj = ref.commit + else: + obj = name_to_object(repo, rev[:first_token]) + # END handle anchor + start = first_token + # END initialize anchor + while start < lr: token = rev[start] - if obj is None: - # token is a rev name. - if start == 0: - ref = repo.head.ref - else: - if token == "@": - ref = cast("Reference", name_to_object(repo, rev[:start], return_ref=True)) - else: - obj = name_to_object(repo, rev[:start]) - # END handle token - # END handle refname - else: - if ref is not None: - obj = cast("Commit", ref.commit) - # END handle ref - # END initialize obj on first token - - start += 1 + if token == "@": + if start + 1 >= lr or rev[start + 1] != "{": + raise ValueError("Invalid @ token in revision specifier: %s" % rev) + # END handle invalid @ + end = _find_closing_brace(rev, start + 1) + obj = _apply_reflog(repo, ref if first_token != 0 and start == first_token else None, rev[start + 2 : end]) + ref = None + start = end + 1 + continue + # END handle reflog - # Try to parse {type}. - if start < lr and rev[start] == "{": - end = rev.find("}", start) - if end == -1: - raise ValueError("Missing closing brace to define type in %s" % rev) - output_type = rev[start + 1 : end] # Exclude brace. - - # Handle type. - if output_type == "commit": - pass # Default. - elif output_type == "tree": - try: - obj = cast(AnyGitObject, obj) - obj = to_commit(obj).tree - except (AttributeError, ValueError): - pass # Error raised later. - # END exception handling - elif output_type in ("", "blob"): - obj = cast("TagObject", obj) - if obj and obj.type == "tag": - obj = deref_tag(obj) - else: - # Cannot do anything for non-tags. - pass - # END handle tag - elif token == "@": - # try single int - assert ref is not None, "Require Reference to access reflog" - revlog_index = None - try: - # Transform reversed index into the format of our revlog. - revlog_index = -(int(output_type) + 1) - except ValueError as e: - # TODO: Try to parse the other date options, using parse_date maybe. - raise NotImplementedError("Support for additional @{...} modes not implemented") from e - # END handle revlog index - - try: - entry = ref.log_entry(revlog_index) - except IndexError as e: - raise IndexError("Invalid revlog index: %i" % revlog_index) from e - # END handle index out of bound - - obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha)) - - # Make it pass the following checks. - output_type = "" - else: - raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev)) - # END handle output type + if token == ":": + return _tree_lookup(obj, rev[start + 1 :]) + # END handle path - # Empty output types don't require any specific type, its just about - # dereferencing tags. - if output_type and obj and obj.type != output_type: - raise ValueError("Could not accommodate requested object type %r, got %s" % (output_type, obj.type)) - # END verify output type + start += 1 - start = end + 1 # Skip brace. - parsed_to = start + if token == "^" and start < lr and rev[start] == "{": + end = _find_closing_brace(rev, start) + obj = _peel(obj, rev[start + 1 : end], repo, rev) + ref = None + start = end + 1 continue # END parse type - # Try to parse a number. num = 0 - if token != ":": - found_digit = False - while start < lr: - if rev[start] in digits: - num = num * 10 + int(rev[start]) - start += 1 - found_digit = True - else: - break - # END handle number - # END number parse loop - - # No explicit number given, 1 is the default. It could be 0 though. - if not found_digit: - num = 1 - # END set default num - # END number parsing only if non-blob mode - - parsed_to = start - # Handle hierarchy walk. + found_digit = False + while start < lr: + if rev[start] in digits: + num = num * 10 + int(rev[start]) + start += 1 + found_digit = True + else: + break + # END handle number + # END number parse loop + + if not found_digit: + num = 1 + # END set default num + try: - obj = cast(AnyGitObject, obj) if token == "~": obj = to_commit(obj) for _ in range(num): @@ -385,35 +715,22 @@ def rev_parse(repo: "Repo", rev: str) -> AnyGitObject: # END for each history item to walk elif token == "^": obj = to_commit(obj) - # Must be n'th parent. - if num: + if num == 0: + pass + else: obj = obj.parents[num - 1] - elif token == ":": - if obj.type != "tree": - obj = obj.tree - # END get tree type - obj = obj[rev[start:]] - parsed_to = lr + # END handle parent else: raise ValueError("Invalid token: %r" % token) # END end handle tag except (IndexError, AttributeError) as e: raise BadName( - f"Invalid revision spec '{rev}' - not enough " f"parent commits to reach '{token}{int(num)}'" + f"Invalid revision spec '{rev}' - not enough parent commits to reach '{token}{int(num)}'" ) from e # END exception handling # END parse loop - # Still no obj? It's probably a simple name. - if obj is None: - obj = name_to_object(repo, rev) - parsed_to = lr - # END handle simple name - if obj is None: raise ValueError("Revision specifier could not be parsed: %s" % rev) - if parsed_to != lr: - raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to])) - return obj diff --git a/git/types.py b/git/types.py index cce184530..100fff43f 100644 --- a/git/types.py +++ b/git/types.py @@ -13,7 +13,6 @@ Sequence as Sequence, Tuple, TYPE_CHECKING, - Type, TypeVar, Union, ) @@ -130,7 +129,8 @@ https://git-scm.com/docs/gitglossary#def_object_type """ -Lit_commit_ish: Type[Literal["commit", "tag"]] +if TYPE_CHECKING: + Lit_commit_ish = Literal["commit", "tag"] """Deprecated. Type of literal strings identifying typically-commitish git object types. Prior to a bugfix, this type had been defined more broadly. Any usage is in practice diff --git a/git/util.py b/git/util.py index 9e8ac821d..712fabe85 100644 --- a/git/util.py +++ b/git/util.py @@ -36,7 +36,7 @@ import logging import os import os.path as osp -import pathlib +from pathlib import Path import platform import re import shutil @@ -272,9 +272,9 @@ def stream_copy(source: BinaryIO, destination: BinaryIO, chunk_size: int = 512 * def join_path(a: PathLike, *p: PathLike) -> PathLike: R"""Join path tokens together similar to osp.join, but always use ``/`` instead of possibly ``\`` on Windows.""" - path = str(a) + path = os.fspath(a) for b in p: - b = str(b) + b = os.fspath(b) if not b: continue if b.startswith("/"): @@ -289,19 +289,19 @@ def join_path(a: PathLike, *p: PathLike) -> PathLike: if sys.platform == "win32": - def to_native_path_windows(path: PathLike) -> PathLike: - path = str(path) + def to_native_path_windows(path: PathLike) -> str: + path = os.fspath(path) return path.replace("/", "\\") def to_native_path_linux(path: PathLike) -> str: - path = str(path) + path = os.fspath(path) return path.replace("\\", "/") to_native_path = to_native_path_windows else: # No need for any work on Linux. def to_native_path_linux(path: PathLike) -> str: - return str(path) + return os.fspath(path) to_native_path = to_native_path_linux @@ -372,7 +372,7 @@ def is_exec(fpath: str) -> bool: progs = [] if not path: path = os.environ["PATH"] - for folder in str(path).split(os.pathsep): + for folder in os.fspath(path).split(os.pathsep): folder = folder.strip('"') if folder: exe_path = osp.join(folder, program) @@ -397,7 +397,7 @@ def _cygexpath(drive: Optional[str], path: str) -> str: p = cygpath(p) elif drive: p = "/proc/cygdrive/%s/%s" % (drive.lower(), p) - p_str = str(p) # ensure it is a str and not AnyPath + p_str = os.fspath(p) # ensure it is a str and not AnyPath return p_str.replace("\\", "/") @@ -418,7 +418,7 @@ def _cygexpath(drive: Optional[str], path: str) -> str: def cygpath(path: str) -> str: """Use :meth:`git.cmd.Git.polish_url` instead, that works on any environment.""" - path = str(path) # Ensure is str and not AnyPath. + path = os.fspath(path) # Ensure is str and not AnyPath. # Fix to use Paths when 3.5 dropped. Or to be just str if only for URLs? if not path.startswith(("/cygdrive", "//", "/proc/cygdrive")): for regex, parser, recurse in _cygpath_parsers: @@ -438,7 +438,7 @@ def cygpath(path: str) -> str: def decygpath(path: PathLike) -> str: - path = str(path) + path = os.fspath(path) m = _decygpath_regex.match(path) if m: drive, rest_path = m.groups() @@ -464,6 +464,12 @@ def _is_cygwin_git(git_executable: str) -> bool: # Just a name given, not a real path. uname_cmd = osp.join(git_dir, "uname") + + if not (Path(uname_cmd).is_file() and os.access(uname_cmd, os.X_OK)): + _logger.debug(f"Failed checking if running in CYGWIN: {uname_cmd} is not an executable") + _is_cygwin_cache[git_executable] = is_cygwin + return is_cygwin + process = subprocess.Popen([uname_cmd], stdout=subprocess.PIPE, universal_newlines=True) uname_out, _ = process.communicate() # retcode = process.poll() @@ -484,7 +490,9 @@ def is_cygwin_git(git_executable: PathLike) -> bool: ... def is_cygwin_git(git_executable: Union[None, PathLike]) -> bool: - if sys.platform == "win32": # TODO: See if we can use `sys.platform != "cygwin"`. + # TODO: when py3.7 support is dropped, use the new interpolation f"{variable=}" + _logger.debug(f"sys.platform={sys.platform!r}, git_executable={git_executable!r}") + if sys.platform != "cygwin": return False elif git_executable is None: return False @@ -515,7 +523,7 @@ def expand_path(p: PathLike, expand_vars: bool = ...) -> str: def expand_path(p: Union[None, PathLike], expand_vars: bool = True) -> Optional[PathLike]: - if isinstance(p, pathlib.Path): + if isinstance(p, Path): return p.resolve() try: p = osp.expanduser(p) # type: ignore[arg-type] @@ -1135,7 +1143,7 @@ def _obtain_lock(self) -> None: # END endless loop -class IterableList(List[T_IterableObj]): +class IterableList(List[T_IterableObj]): # type: ignore[type-var] """List of iterable objects allowing to query an object by id or by named index:: heads = repo.heads @@ -1200,24 +1208,20 @@ def __getattr__(self, attr: str) -> T_IterableObj: return list.__getattribute__(self, attr) def __getitem__(self, index: Union[SupportsIndex, int, slice, str]) -> T_IterableObj: # type: ignore[override] - assert isinstance(index, (int, str, slice)), "Index of IterableList should be an int or str" - if isinstance(index, int): return list.__getitem__(self, index) elif isinstance(index, slice): raise ValueError("Index should be an int or str") else: try: - return getattr(self, index) + return getattr(self, cast(str, index)) except AttributeError as e: - raise IndexError("No item found with id %r" % (self._prefix + index)) from e + raise IndexError(f"No item found with id {self._prefix}{index}") from e # END handle getattr def __delitem__(self, index: Union[SupportsIndex, int, slice, str]) -> None: - assert isinstance(index, (int, str)), "Index of IterableList should be an int or str" - delindex = cast(int, index) - if not isinstance(index, int): + if isinstance(index, str): delindex = -1 name = self._prefix + index for i, item in enumerate(self): diff --git a/init-tests-after-clone.sh b/init-tests-after-clone.sh index bfada01b0..a88f983fc 100755 --- a/init-tests-after-clone.sh +++ b/init-tests-after-clone.sh @@ -40,6 +40,11 @@ fi git tag __testing_point__ # The tests need a branch called master. +# +# If master is locally absent but more than one remote has it, checkout fails +# by default even if all remotes agree, and we fall back to creating it at +# HEAD. The reflog we populate below then traces HEAD's history rather than +# a remote master's, but master is reset to __testing_point__ either way. git checkout master -- || git checkout -b master # The tests need a reflog history on the master branch. diff --git a/pyproject.toml b/pyproject.toml index 090972eed..149f2dc92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,6 @@ testpaths = "test" # Space separated list of paths from root e.g test tests doc # filterwarnings ignore::WarningType # ignores those warnings [tool.mypy] -python_version = "3.8" files = ["git/", "test/deprecation/"] disallow_untyped_defs = true no_implicit_optional = true @@ -60,16 +59,14 @@ lint.select = [ # "UP", # See: https://docs.astral.sh/ruff/rules/#pyupgrade-up ] lint.extend-select = [ - # "A", # See: https://pypi.org/project/flake8-builtins - "B", # See: https://pypi.org/project/flake8-bugbear - "C4", # See: https://pypi.org/project/flake8-comprehensions - "TCH004", # See: https://docs.astral.sh/ruff/rules/runtime-import-in-type-checking-block/ + # "A", # See: https://pypi.org/project/flake8-builtins + "B", # See: https://pypi.org/project/flake8-bugbear + "C4", # See: https://pypi.org/project/flake8-comprehensions + "TC004", # See: https://docs.astral.sh/ruff/rules/runtime-import-in-type-checking-block/ ] lint.ignore = [ - "E203", # Whitespace before ':' - "E731", # Do not assign a `lambda` expression, use a `def` + # If it becomes necessary to ignore any rules, list them here. ] -lint.ignore-init-module-imports = true lint.unfixable = [ "F401", # Module imported but unused ] diff --git a/requirements-dev.txt b/requirements-dev.txt index f626644af..066b192b8 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,8 +1,8 @@ --r requirements.txt --r test-requirements.txt - -# For additional local testing/linting - to be added elsewhere eventually. -ruff -shellcheck -pytest-icdiff -# pytest-profiling +-r requirements.txt +-r test-requirements.txt + +# For additional local testing/linting - to be added elsewhere eventually. +ruff >=0.8 +shellcheck +pytest-icdiff +# pytest-profiling diff --git a/requirements.txt b/requirements.txt index 7159416a9..61d8403b0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ gitdb>=4.0.1,<5 -typing-extensions>=3.7.4.3;python_version<"3.8" +typing-extensions>=3.10.0.2;python_version<"3.10" diff --git a/setup.py b/setup.py index f28fedb85..a7b1eab00 100755 --- a/setup.py +++ b/setup.py @@ -95,7 +95,6 @@ def _stamp_version(filename: str) -> None: # "Development Status :: 7 - Inactive", "Environment :: Console", "Intended Audience :: Developers", - "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", "Operating System :: POSIX", "Operating System :: Microsoft :: Windows", diff --git a/test-requirements.txt b/test-requirements.txt index 75e9e81fa..e6e01c683 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,7 +1,7 @@ coverage[toml] ddt >= 1.1.1, != 1.4.3 mock ; python_version < "3.8" -mypy +mypy==1.18.2 ; python_version >= "3.9" # pin mypy version to avoid new errors pre-commit pytest >= 7.3.1 pytest-cov diff --git a/test/deprecation/test_types.py b/test/deprecation/test_types.py index f97375a85..d3c6af645 100644 --- a/test/deprecation/test_types.py +++ b/test/deprecation/test_types.py @@ -36,7 +36,7 @@ def test_can_access_lit_commit_ish_but_it_is_not_usable() -> None: assert 'Literal["commit", "tag"]' in message, "Has new definition." assert "GitObjectTypeString" in message, "Has new type name for old definition." - _: Lit_commit_ish = "commit" # type: ignore[valid-type] + _: Lit_commit_ish = "commit" # It should be as documented (even though deliberately unusable in static checks). assert Lit_commit_ish == Literal["commit", "tag"] diff --git a/test/fixtures/git_config_with_empty_quotes b/test/fixtures/git_config_with_empty_quotes new file mode 100644 index 000000000..f11fe4248 --- /dev/null +++ b/test/fixtures/git_config_with_empty_quotes @@ -0,0 +1,2 @@ +[core] + filemode = "" diff --git a/test/fixtures/git_config_with_extra_whitespace b/test/fixtures/git_config_with_extra_whitespace new file mode 100644 index 000000000..0f727cb5d --- /dev/null +++ b/test/fixtures/git_config_with_extra_whitespace @@ -0,0 +1,2 @@ +[init] + defaultBranch = trunk diff --git a/test/fixtures/git_config_with_quotes b/test/fixtures/git_config_with_quotes new file mode 100644 index 000000000..40e6710d9 --- /dev/null +++ b/test/fixtures/git_config_with_quotes @@ -0,0 +1,3 @@ +[user] + name = "Cody Veal" + email = "cveal05@gmail.com" diff --git a/test/fixtures/git_config_with_quotes_escapes b/test/fixtures/git_config_with_quotes_escapes new file mode 100644 index 000000000..33332c221 --- /dev/null +++ b/test/fixtures/git_config_with_quotes_escapes @@ -0,0 +1,9 @@ +[custom] + hasnewline = "first\nsecond" + hasbackslash = "foo\\bar" + hasquote = "ab\"cd" + hastrailingbackslash = "word\\" + hasunrecognized = "p\qrs" + hasunescapedquotes = "ab"cd"e" + ordinary = "hello world" + unquoted = good evening diff --git a/test/fixtures/git_config_with_quotes_whitespace_inside b/test/fixtures/git_config_with_quotes_whitespace_inside new file mode 100644 index 000000000..c6014cc61 --- /dev/null +++ b/test/fixtures/git_config_with_quotes_whitespace_inside @@ -0,0 +1,2 @@ +[core] + commentString = "# " diff --git a/test/fixtures/git_config_with_quotes_whitespace_outside b/test/fixtures/git_config_with_quotes_whitespace_outside new file mode 100644 index 000000000..4b1615a51 --- /dev/null +++ b/test/fixtures/git_config_with_quotes_whitespace_outside @@ -0,0 +1,2 @@ +[init] + defaultBranch = "trunk" diff --git a/test/fixtures/index_extended_flags b/test/fixtures/index_extended_flags new file mode 100644 index 000000000..f03713b68 Binary files /dev/null and b/test/fixtures/index_extended_flags differ diff --git a/test/lib/helper.py b/test/lib/helper.py index 5d91447ea..1c110e103 100644 --- a/test/lib/helper.py +++ b/test/lib/helper.py @@ -10,6 +10,7 @@ "with_rw_directory", "with_rw_repo", "with_rw_and_rw_remote_repo", + "PathLikeMock", "TestBase", "VirtualEnvironment", "TestCase", @@ -17,9 +18,11 @@ "skipIf", "GIT_REPO", "GIT_DAEMON_PORT", + "xfail_if_raises", ] import contextlib +from dataclasses import dataclass from functools import wraps import gc import io @@ -33,8 +36,10 @@ import time import unittest import venv +from typing import Union, Type, Tuple import gitdb +import pytest from git.util import rmtree, cwd @@ -49,6 +54,15 @@ _logger = logging.getLogger(__name__) + +@dataclass +class PathLikeMock: + path: str + + def __fspath__(self) -> str: + return self.path + + # { Routines @@ -76,7 +90,7 @@ def __init__(self, input_string): self.stdout = io.BytesIO(input_string) self.stderr = io.BytesIO() - def wait(self): + def wait(self, stderr=None): return 0 poll = wait @@ -149,7 +163,7 @@ def repo_creator(self): os.chdir(rw_repo.working_dir) try: return func(self, rw_repo) - except: # noqa: E722 B001 + except: # noqa: E722 _logger.info("Keeping repo after failure: %s", repo_dir) repo_dir = None raise @@ -309,7 +323,7 @@ def remote_repo_creator(self): with cwd(rw_repo.working_dir): try: return func(self, rw_repo, rw_daemon_repo) - except: # noqa: E722 B001 + except: # noqa: E722 _logger.info( "Keeping repos after failure: \n rw_repo_dir: %s \n rw_daemon_repo_dir: %s", rw_repo_dir, @@ -415,9 +429,15 @@ def __init__(self, env_dir, *, with_pip): if with_pip: # The upgrade_deps parameter to venv.create is 3.9+ only, so do it this way. - command = [self.python, "-m", "pip", "install", "--upgrade", "pip"] - if sys.version_info < (3, 12): - command.append("setuptools") + command = [ + self.python, + "-m", + "pip", + "install", + "--upgrade", + "pip", + 'setuptools; python_version<"3.12"', + ] subprocess.check_output(command) @property @@ -448,3 +468,27 @@ def _executable(self, basename): if osp.isfile(path) or osp.islink(path): return path raise RuntimeError(f"no regular file or symlink {path!r}") + + +@contextlib.contextmanager +def xfail_if_raises( + condition: bool, + *, + raises: Union[Type[BaseException], Tuple[Type[BaseException], ...]], + reason: str = "", + strict: bool = False, +): + """Approximates the behavior of @pytest.mark.xfail(..., raises=...) as a context + manager that can be used within a test, such as when the condition is complex or has + side effects + + One difference is it will not report XPASS if the test passes, but setting `strict` + simulates it by raising an exception""" + try: + yield + except raises: + if condition: + pytest.xfail(reason) + raise + if strict and condition: + pytest.fail("[XPASS(strict)] " + reason) diff --git a/test/test_autointerrupt.py b/test/test_autointerrupt.py new file mode 100644 index 000000000..645ec402c --- /dev/null +++ b/test/test_autointerrupt.py @@ -0,0 +1,33 @@ +from git.cmd import Git + + +class _DummyProc: + """Minimal stand-in for subprocess.Popen used to exercise AutoInterrupt. + + We deliberately raise AttributeError from terminate() to simulate interpreter + shutdown on Windows where subprocess internals (e.g. subprocess._winapi) may + already be torn down. + """ + + stdin = None + stdout = None + stderr = None + + def poll(self): + return None + + def terminate(self): + raise AttributeError("TerminateProcess") + + def wait(self): # pragma: no cover - should not be reached in this test + raise AssertionError("wait() should not be called if terminate() fails") + + +def test_autointerrupt_terminate_ignores_attributeerror(): + ai = Git.AutoInterrupt(_DummyProc(), args=["git", "rev-list"]) + + # Should not raise, even if terminate() triggers AttributeError. + ai._terminate() + + # Ensure the reference is cleared to avoid repeated attempts. + assert ai.proc is None diff --git a/test/test_clone.py b/test/test_clone.py index 126ef0063..653d50aa3 100644 --- a/test/test_clone.py +++ b/test/test_clone.py @@ -1,12 +1,23 @@ # This module is part of GitPython and is released under the # 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ +import os +import os.path as osp +import pathlib +import sys +import tempfile +from unittest import skip + +from git import GitCommandError, Repo +from git.exc import UnsafeOptionError, UnsafeProtocolError + +from test.lib import TestBase, with_rw_directory, with_rw_repo, PathLikeMock + from pathlib import Path import re import git - -from test.lib import TestBase, with_rw_directory +import pytest class TestClone(TestBase): @@ -29,3 +40,307 @@ def test_checkout_in_non_empty_dir(self, rw_dir): ) else: self.fail("GitCommandError not raised") + + @with_rw_directory + def test_clone_from_pathlib(self, rw_dir): + original_repo = Repo.init(osp.join(rw_dir, "repo")) + + Repo.clone_from(pathlib.Path(original_repo.git_dir), pathlib.Path(rw_dir) / "clone_pathlib") + + @with_rw_directory + def test_clone_from_pathlike(self, rw_dir): + original_repo = Repo.init(osp.join(rw_dir, "repo")) + Repo.clone_from(PathLikeMock(original_repo.git_dir), PathLikeMock(os.path.join(rw_dir, "clone_pathlike"))) + + @with_rw_directory + def test_clone_from_pathlib_withConfig(self, rw_dir): + original_repo = Repo.init(osp.join(rw_dir, "repo")) + + cloned = Repo.clone_from( + original_repo.git_dir, + pathlib.Path(rw_dir) / "clone_pathlib_withConfig", + multi_options=[ + "--recurse-submodules=repo", + "--config core.filemode=false", + "--config submodule.repo.update=checkout", + "--config filter.lfs.clean='git-lfs clean -- %f'", + ], + allow_unsafe_options=True, + ) + + self.assertEqual(cloned.config_reader().get_value("submodule", "active"), "repo") + self.assertEqual(cloned.config_reader().get_value("core", "filemode"), False) + self.assertEqual(cloned.config_reader().get_value('submodule "repo"', "update"), "checkout") + self.assertEqual( + cloned.config_reader().get_value('filter "lfs"', "clean"), + "git-lfs clean -- %f", + ) + + def test_clone_from_with_path_contains_unicode(self): + with tempfile.TemporaryDirectory() as tmpdir: + unicode_dir_name = "\u0394" + path_with_unicode = os.path.join(tmpdir, unicode_dir_name) + os.makedirs(path_with_unicode) + + try: + Repo.clone_from( + url=self._small_repo_url(), + to_path=path_with_unicode, + ) + except UnicodeEncodeError: + self.fail("Raised UnicodeEncodeError") + + @with_rw_directory + @skip( + """The referenced repository was removed, and one needs to set up a new + password controlled repo under the org's control.""" + ) + def test_leaking_password_in_clone_logs(self, rw_dir): + password = "fakepassword1234" + try: + Repo.clone_from( + url="https://fakeuser:{}@fakerepo.example.com/testrepo".format(password), + to_path=rw_dir, + ) + except GitCommandError as err: + assert password not in str(err), "The error message '%s' should not contain the password" % err + # Working example from a blank private project. + Repo.clone_from( + url="https://gitlab+deploy-token-392045:mLWhVus7bjLsy8xj8q2V@gitlab.com/mercierm/test_git_python", + to_path=rw_dir, + ) + + @with_rw_repo("HEAD") + def test_clone_unsafe_options(self, rw_repo): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + tmp_file = tmp_dir / "pwn" + unsafe_options = [ + f"--upload-pack='touch {tmp_file}'", + f"-u 'touch {tmp_file}'", + "--config=protocol.ext.allow=always", + "-c protocol.ext.allow=always", + ] + for unsafe_option in unsafe_options: + with self.assertRaises(UnsafeOptionError): + rw_repo.clone(tmp_dir, multi_options=[unsafe_option]) + assert not tmp_file.exists() + + unsafe_options = [ + {"upload-pack": f"touch {tmp_file}"}, + {"upload_pack": f"touch {tmp_file}"}, + {"u": f"touch {tmp_file}"}, + {"config": "protocol.ext.allow=always"}, + {"c": "protocol.ext.allow=always"}, + ] + for unsafe_option in unsafe_options: + with self.assertRaises(UnsafeOptionError): + rw_repo.clone(tmp_dir, **unsafe_option) + assert not tmp_file.exists() + + @with_rw_repo("HEAD") + def test_clone_unsafe_options_are_checked_after_splitting_multi_options(self, rw_repo): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + payload = "--single-branch --config protocol.ext.allow=always" + + with self.assertRaises(UnsafeOptionError): + rw_repo.clone(tmp_dir, multi_options=[payload]) + + @pytest.mark.xfail( + sys.platform == "win32", + reason=( + "File not created. A separate Windows command may be needed. This and the " + "currently passing test test_clone_unsafe_options must be adjusted in the " + "same way. Until then, test_clone_unsafe_options is unreliable on Windows." + ), + raises=AssertionError, + ) + @with_rw_repo("HEAD") + def test_clone_unsafe_options_allowed(self, rw_repo): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + tmp_file = tmp_dir / "pwn" + unsafe_options = [ + f"--upload-pack='touch {tmp_file}'", + f"-u 'touch {tmp_file}'", + ] + for i, unsafe_option in enumerate(unsafe_options): + destination = tmp_dir / str(i) + assert not tmp_file.exists() + # The options will be allowed, but the command will fail. + with self.assertRaises(GitCommandError): + rw_repo.clone(destination, multi_options=[unsafe_option], allow_unsafe_options=True) + assert tmp_file.exists() + tmp_file.unlink() + + unsafe_options = [ + "--config=protocol.ext.allow=always", + "-c protocol.ext.allow=always", + ] + for i, unsafe_option in enumerate(unsafe_options): + destination = tmp_dir / str(i) + assert not destination.exists() + rw_repo.clone(destination, multi_options=[unsafe_option], allow_unsafe_options=True) + assert destination.exists() + + @with_rw_repo("HEAD") + def test_clone_safe_options(self, rw_repo): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + options = [ + "--depth=1", + "--single-branch", + "-q", + ] + for option in options: + destination = tmp_dir / option + assert not destination.exists() + rw_repo.clone(destination, multi_options=[option]) + assert destination.exists() + + @with_rw_repo("HEAD") + def test_clone_from_unsafe_options(self, rw_repo): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + tmp_file = tmp_dir / "pwn" + unsafe_options = [ + f"--upload-pack='touch {tmp_file}'", + f"-u 'touch {tmp_file}'", + "--config=protocol.ext.allow=always", + "-c protocol.ext.allow=always", + ] + for unsafe_option in unsafe_options: + with self.assertRaises(UnsafeOptionError): + Repo.clone_from(rw_repo.working_dir, tmp_dir, multi_options=[unsafe_option]) + assert not tmp_file.exists() + + unsafe_options = [ + {"upload-pack": f"touch {tmp_file}"}, + {"upload_pack": f"touch {tmp_file}"}, + {"u": f"touch {tmp_file}"}, + {"config": "protocol.ext.allow=always"}, + {"c": "protocol.ext.allow=always"}, + ] + for unsafe_option in unsafe_options: + with self.assertRaises(UnsafeOptionError): + Repo.clone_from(rw_repo.working_dir, tmp_dir, **unsafe_option) + assert not tmp_file.exists() + + @with_rw_repo("HEAD") + def test_clone_from_unsafe_options_are_checked_after_splitting_multi_options(self, rw_repo): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + payload = "--single-branch --config protocol.ext.allow=always" + + with self.assertRaises(UnsafeOptionError): + Repo.clone_from(rw_repo.working_dir, tmp_dir, multi_options=[payload]) + + @pytest.mark.xfail( + sys.platform == "win32", + reason=( + "File not created. A separate Windows command may be needed. This and the " + "currently passing test test_clone_from_unsafe_options must be adjusted in the " + "same way. Until then, test_clone_from_unsafe_options is unreliable on Windows." + ), + raises=AssertionError, + ) + @with_rw_repo("HEAD") + def test_clone_from_unsafe_options_allowed(self, rw_repo): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + tmp_file = tmp_dir / "pwn" + unsafe_options = [ + f"--upload-pack='touch {tmp_file}'", + f"-u 'touch {tmp_file}'", + ] + for i, unsafe_option in enumerate(unsafe_options): + destination = tmp_dir / str(i) + assert not tmp_file.exists() + # The options will be allowed, but the command will fail. + with self.assertRaises(GitCommandError): + Repo.clone_from( + rw_repo.working_dir, destination, multi_options=[unsafe_option], allow_unsafe_options=True + ) + assert tmp_file.exists() + tmp_file.unlink() + + unsafe_options = [ + "--config=protocol.ext.allow=always", + "-c protocol.ext.allow=always", + ] + for i, unsafe_option in enumerate(unsafe_options): + destination = tmp_dir / str(i) + assert not destination.exists() + Repo.clone_from( + rw_repo.working_dir, destination, multi_options=[unsafe_option], allow_unsafe_options=True + ) + assert destination.exists() + + @with_rw_repo("HEAD") + def test_clone_from_safe_options(self, rw_repo): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + options = [ + "--depth=1", + "--single-branch", + "-q", + ] + for option in options: + destination = tmp_dir / option + assert not destination.exists() + Repo.clone_from(rw_repo.common_dir, destination, multi_options=[option]) + assert destination.exists() + + def test_clone_from_unsafe_protocol(self): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + tmp_file = tmp_dir / "pwn" + urls = [ + f"ext::sh -c touch% {tmp_file}", + "fd::17/foo", + ] + for url in urls: + with self.assertRaises(UnsafeProtocolError): + Repo.clone_from(url, tmp_dir / "repo") + assert not tmp_file.exists() + + def test_clone_from_unsafe_protocol_allowed(self): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + tmp_file = tmp_dir / "pwn" + urls = [ + f"ext::sh -c touch% {tmp_file}", + "fd::/foo", + ] + for url in urls: + # The URL will be allowed into the command, but the command will + # fail since we don't have that protocol enabled in the Git config file. + with self.assertRaises(GitCommandError): + Repo.clone_from(url, tmp_dir / "repo", allow_unsafe_protocols=True) + assert not tmp_file.exists() + + def test_clone_from_unsafe_protocol_allowed_and_enabled(self): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = pathlib.Path(tdir) + tmp_file = tmp_dir / "pwn" + urls = [ + f"ext::sh -c touch% {tmp_file}", + ] + allow_ext = [ + "--config=protocol.ext.allow=always", + ] + for url in urls: + # The URL will be allowed into the command, and the protocol is enabled, + # but the command will fail since it can't read from the remote repo. + assert not tmp_file.exists() + with self.assertRaises(GitCommandError): + Repo.clone_from( + url, + tmp_dir / "repo", + multi_options=allow_ext, + allow_unsafe_protocols=True, + allow_unsafe_options=True, + ) + assert tmp_file.exists() + tmp_file.unlink() diff --git a/test/test_commit.py b/test/test_commit.py index 37c66e3e7..b56ad3a18 100644 --- a/test/test_commit.py +++ b/test/test_commit.py @@ -566,3 +566,142 @@ def test_commit_co_authors(self): Actor("test_user_2", "another_user-email@github.com"), Actor("test_user_3", "test_user_3@github.com"), ] + + @with_rw_directory + def test_create_from_tree_with_trailers_dict(self, rw_dir): + """Test that create_from_tree supports adding trailers via a dict.""" + rw_repo = Repo.init(osp.join(rw_dir, "test_trailers_dict")) + path = osp.join(str(rw_repo.working_tree_dir), "hello.txt") + touch(path) + rw_repo.index.add([path]) + tree = rw_repo.index.write_tree() + + trailers = {"Issue": "123", "Signed-off-by": "Test User "} + commit = Commit.create_from_tree( + rw_repo, + tree, + "Test commit with trailers", + head=True, + trailers=trailers, + ) + + assert "Issue: 123" in commit.message + assert "Signed-off-by: Test User " in commit.message + assert commit.trailers_dict == { + "Issue": ["123"], + "Signed-off-by": ["Test User "], + } + + @with_rw_directory + def test_create_from_tree_with_trailers_list(self, rw_dir): + """Test that create_from_tree supports adding trailers via a list of tuples.""" + rw_repo = Repo.init(osp.join(rw_dir, "test_trailers_list")) + path = osp.join(str(rw_repo.working_tree_dir), "hello.txt") + touch(path) + rw_repo.index.add([path]) + tree = rw_repo.index.write_tree() + + trailers = [ + ("Signed-off-by", "Alice "), + ("Signed-off-by", "Bob "), + ("Issue", "456"), + ] + commit = Commit.create_from_tree( + rw_repo, + tree, + "Test commit with multiple trailers", + head=True, + trailers=trailers, + ) + + assert "Signed-off-by: Alice " in commit.message + assert "Signed-off-by: Bob " in commit.message + assert "Issue: 456" in commit.message + assert commit.trailers_dict == { + "Signed-off-by": ["Alice ", "Bob "], + "Issue": ["456"], + } + + @with_rw_directory + def test_create_from_tree_with_non_utf8_trailers(self, rw_dir): + """Test that trailer creation and parsing respect the configured commit encoding.""" + rw_repo = Repo.init(osp.join(rw_dir, "test_trailers_non_utf8")) + with rw_repo.config_writer() as writer: + writer.set_value("i18n", "commitencoding", "ISO-8859-1") + + path = osp.join(str(rw_repo.working_tree_dir), "hello.txt") + touch(path) + rw_repo.index.add([path]) + tree = rw_repo.index.write_tree() + + commit = Commit.create_from_tree( + rw_repo, + tree, + "RÊsumÊ", + head=True, + trailers={"Reviewed-by": "AndrÊ "}, + ) + + assert commit.encoding == "ISO-8859-1" + assert "RÊsumÊ" in commit.message + assert "Reviewed-by: AndrÊ " in commit.message + assert commit.trailers_list == [("Reviewed-by", "AndrÊ ")] + + @with_rw_directory + def test_trailers_list_with_non_utf8_message_bytes(self, rw_dir): + """Test that trailer parsing handles non-UTF-8 commit message bytes.""" + rw_repo = Repo.init(osp.join(rw_dir, "test_trailers_non_utf8_bytes")) + with rw_repo.config_writer() as writer: + writer.set_value("i18n", "commitencoding", "ISO-8859-1") + + path = osp.join(str(rw_repo.working_tree_dir), "hello.txt") + touch(path) + rw_repo.index.add([path]) + tree = rw_repo.index.write_tree() + + commit = Commit.create_from_tree( + rw_repo, + tree, + "RÊsumÊ", + head=True, + trailers={"Reviewed-by": "AndrÊ "}, + ) + + bytes_commit = Commit( + rw_repo, + commit.binsha, + message=commit.message.encode(commit.encoding), + encoding=commit.encoding, + ) + + assert bytes_commit.trailers_list == [("Reviewed-by", "AndrÊ ")] + + def test_interpret_trailers_encodes_before_launching_process(self): + """Test that encoding failures happen before spawning interpret-trailers.""" + repo = Mock() + repo.git = Mock() + repo.git.GIT_PYTHON_GIT_EXECUTABLE = "git" + + with self.assertRaises(UnicodeEncodeError): + Commit._interpret_trailers(repo, "Euro: â‚Ŧ", ["--parse"], encoding="ISO-8859-1") + + repo.git.execute.assert_not_called() + + @with_rw_directory + def test_index_commit_with_trailers(self, rw_dir): + """Test that IndexFile.commit() supports adding trailers.""" + rw_repo = Repo.init(osp.join(rw_dir, "test_index_trailers")) + path = osp.join(str(rw_repo.working_tree_dir), "hello.txt") + touch(path) + rw_repo.index.add([path]) + + trailers = {"Reviewed-by": "Reviewer "} + commit = rw_repo.index.commit( + "Test index commit with trailers", + trailers=trailers, + ) + + assert "Reviewed-by: Reviewer " in commit.message + assert commit.trailers_dict == { + "Reviewed-by": ["Reviewer "], + } diff --git a/test/test_config.py b/test/test_config.py index 92997422d..3ddaf0a4b 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -150,6 +150,70 @@ def test_config_value_with_trailing_new_line(self): git_config = GitConfigParser(config_file) git_config.read() # This should not throw an exception + @with_rw_directory + def test_set_value_rejects_config_injection(self, rw_dir): + config_path = osp.join(rw_dir, "config") + payload = "foo\n[core]\nhooksPath=/tmp/hooks" + + with GitConfigParser(config_path, read_only=False) as git_config: + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.set_value("user", "name", payload) + + with GitConfigParser(config_path, read_only=True) as git_config: + self.assertFalse(git_config.has_section("user")) + self.assertFalse(git_config.has_section("core")) + + @with_rw_directory + def test_set_value_rejects_unsafe_section_and_option_names(self, rw_dir): + config_path = osp.join(rw_dir, "config") + bad_keys = ("user]\n[core", "user]\r[core", "user]\x00[core") + + with GitConfigParser(config_path, read_only=False) as git_config: + git_config.add_section("user") + for bad_key in bad_keys: + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.add_section(bad_key) + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.set(bad_key, "hooksPath", "/tmp/hooks") + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.set("user", bad_key, "/tmp/hooks") + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.set_value(bad_key, "hooksPath", "/tmp/hooks") + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.set_value("user", bad_key, "/tmp/hooks") + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.add_value(bad_key, "hooksPath", "/tmp/hooks") + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.add_value("user", bad_key, "/tmp/hooks") + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.rename_section("user", bad_key) + + git_config.set_value("user", "name", "safe") + + with GitConfigParser(config_path, read_only=True) as git_config: + self.assertEqual(git_config.get_value("user", "name"), "safe") + self.assertFalse(git_config.has_section("core")) + + @with_rw_directory + def test_set_and_add_value_reject_unsafe_value_characters(self, rw_dir): + config_path = osp.join(rw_dir, "config") + bad_values = ("foo\rbar", "foo\nbar", "foo\x00bar", b"foo\nbar") + + with GitConfigParser(config_path, read_only=False) as git_config: + git_config.add_section("user") + for bad_value in bad_values: + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.set("user", "name", bad_value) + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.set_value("user", "name", bad_value) + with pytest.raises(ValueError, match="CR, LF, or NUL"): + git_config.add_value("user", "name", bad_value) + + git_config.set_value("user", "name", "safe") + + with GitConfigParser(config_path, read_only=True) as git_config: + self.assertEqual(git_config.get_value("user", "name"), "safe") + def test_base(self): path_repo = fixture_path("git_config") path_global = fixture_path("git_config_global") @@ -246,6 +310,43 @@ def check_test_value(cr, value): with GitConfigParser(fpa, read_only=True) as cr: check_test_value(cr, tv) + @with_rw_directory + def test_multiple_include_paths_with_same_key(self, rw_dir): + """Test that multiple 'path' entries under [include] are all respected. + + Regression test for https://github.com/gitpython-developers/GitPython/issues/2099. + Git config allows multiple ``path`` values under ``[include]``, e.g.:: + + [include] + path = file1 + path = file2 + + Previously only one of these was included because _OMD.items() returns + only the last value for each key. + """ + # Create two config files to be included. + fp_inc1 = osp.join(rw_dir, "inc1.cfg") + fp_inc2 = osp.join(rw_dir, "inc2.cfg") + fp_main = osp.join(rw_dir, "main.cfg") + + with GitConfigParser(fp_inc1, read_only=False) as cw: + cw.set_value("user", "name", "from-inc1") + + with GitConfigParser(fp_inc2, read_only=False) as cw: + cw.set_value("core", "bar", "from-inc2") + + # Write a config with two path entries under a single [include] section. + # We write it manually because set_value would overwrite the key. + with open(fp_main, "w") as f: + f.write("[include]\n") + f.write(f"\tpath = {fp_inc1}\n") + f.write(f"\tpath = {fp_inc2}\n") + + with GitConfigParser(fp_main, read_only=True) as cr: + # Both included files should be loaded. + assert cr.get_value("user", "name") == "from-inc1" + assert cr.get_value("core", "bar") == "from-inc2" + @pytest.mark.xfail( sys.platform == "win32", reason='Second config._has_includes() assertion fails (for "config is included if path is matching git_dir")', @@ -373,6 +474,41 @@ def test_conditional_includes_from_branch_name_error(self, rw_dir): assert not config._has_includes() assert config._included_paths() == [] + @with_rw_directory + def test_conditional_includes_remote_url(self, rw_dir): + # Initiate mocked repository. + repo = mock.Mock() + repo.remotes = [mock.Mock(url="https://github.com/foo/repo")] + + # Initiate config files. + path1 = osp.join(rw_dir, "config1") + path2 = osp.join(rw_dir, "config2") + template = '[includeIf "hasconfig:remote.*.url:{}"]\n path={}\n' + + # Ensure that config with hasconfig and full url is correct. + with open(path1, "w") as stream: + stream.write(template.format("https://github.com/foo/repo", path2)) + + with GitConfigParser(path1, repo=repo) as config: + assert config._has_includes() + assert config._included_paths() == [("path", path2)] + + # Ensure that config with hasconfig and incorrect url is incorrect. + with open(path1, "w") as stream: + stream.write(template.format("incorrect", path2)) + + with GitConfigParser(path1, repo=repo) as config: + assert not config._has_includes() + assert config._included_paths() == [] + + # Ensure that config with hasconfig and url using glob pattern is correct. + with open(path1, "w") as stream: + stream.write(template.format("**/**github.com*/**", path2)) + + with GitConfigParser(path1, repo=repo) as config: + assert config._has_includes() + assert config._included_paths() == [("path", path2)] + def test_rename(self): file_obj = self._to_memcache(fixture_path("git_config")) with GitConfigParser(file_obj, read_only=False, merge_includes=False) as cw: @@ -391,13 +527,17 @@ def test_complex_aliases(self): with GitConfigParser(file_obj, read_only=False) as w_config: self.assertEqual( w_config.get("alias", "rbi"), - '"!g() { git rebase -i origin/${1:-master} ; } ; g"', + "!g() { git rebase -i origin/${1:-master} ; } ; g", ) self.assertEqual( file_obj.getvalue(), self._to_memcache(fixture_path(".gitconfig")).getvalue(), ) + def test_config_with_extra_whitespace(self): + cr = GitConfigParser(fixture_path("git_config_with_extra_whitespace"), read_only=True) + self.assertEqual(cr.get("init", "defaultBranch"), "trunk") + def test_empty_config_value(self): cr = GitConfigParser(fixture_path("git_config_with_empty_value"), read_only=True) @@ -406,6 +546,44 @@ def test_empty_config_value(self): with self.assertRaises(cp.NoOptionError): cr.get_value("color", "ui") + def test_config_with_quotes(self): + cr = GitConfigParser(fixture_path("git_config_with_quotes"), read_only=True) + + self.assertEqual(cr.get("user", "name"), "Cody Veal") + self.assertEqual(cr.get("user", "email"), "cveal05@gmail.com") + + def test_config_with_empty_quotes(self): + cr = GitConfigParser(fixture_path("git_config_with_empty_quotes"), read_only=True) + self.assertEqual(cr.get("core", "filemode"), "", "quotes can form a literal empty string as value") + + def test_config_with_quotes_with_literal_whitespace(self): + cr = GitConfigParser(fixture_path("git_config_with_quotes_whitespace_inside"), read_only=True) + self.assertEqual(cr.get("core", "commentString"), "# ") + + def test_config_with_quotes_with_whitespace_outside_value(self): + cr = GitConfigParser(fixture_path("git_config_with_quotes_whitespace_outside"), read_only=True) + self.assertEqual(cr.get("init", "defaultBranch"), "trunk") + + def test_config_with_quotes_containing_escapes(self): + """For now just suppress quote removal. But it would be good to interpret most of these.""" + cr = GitConfigParser(fixture_path("git_config_with_quotes_escapes"), read_only=True) + + # These can eventually be supported by substituting the represented character. + self.assertEqual(cr.get("custom", "hasnewline"), R'"first\nsecond"') + self.assertEqual(cr.get("custom", "hasbackslash"), R'"foo\\bar"') + self.assertEqual(cr.get("custom", "hasquote"), R'"ab\"cd"') + self.assertEqual(cr.get("custom", "hastrailingbackslash"), R'"word\\"') + self.assertEqual(cr.get("custom", "hasunrecognized"), R'"p\qrs"') + + # It is less obvious whether and what to eventually do with this. + self.assertEqual(cr.get("custom", "hasunescapedquotes"), '"ab"cd"e"') + + # Cases where quote removal is clearly safe should happen even after those. + self.assertEqual(cr.get("custom", "ordinary"), "hello world") + + # Cases without quotes should still parse correctly even after those, too. + self.assertEqual(cr.get("custom", "unquoted"), "good evening") + def test_get_values_works_without_requiring_any_other_calls_first(self): file_obj = self._to_memcache(fixture_path("git_config_multiple")) cr = GitConfigParser(file_obj, read_only=True) diff --git a/test/test_docs.py b/test/test_docs.py index cc0bbf26a..c3426a807 100644 --- a/test/test_docs.py +++ b/test/test_docs.py @@ -6,9 +6,6 @@ import gc import os import os.path -import sys - -import pytest from test.lib import TestBase from test.lib.helper import with_rw_directory @@ -478,11 +475,6 @@ def test_references_and_objects(self, rw_dir): repo.git.clear_cache() - @pytest.mark.xfail( - sys.platform == "cygwin", - reason="Cygwin GitPython can't find SHA for submodule", - raises=ValueError, - ) def test_submodules(self): # [1-test_submodules] repo = self.rorepo diff --git a/test/test_fixture_health.py b/test/test_fixture_health.py new file mode 100644 index 000000000..b18d5e8f9 --- /dev/null +++ b/test/test_fixture_health.py @@ -0,0 +1,131 @@ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Verify that fixture directories are usable by git. + +If a fixture directory is missing, isn't an initialized git repository, +or is rejected by git for "dubious ownership", dependent tests +elsewhere in the suite fail in opaque ways. The checks here name the +preconditions directly so a misconfigured environment is recognizable +from the test output rather than from a cascade of unrelated-seeming +failures. + +These tests do not exercise GitPython's production code. They verify +the conditions under which production code is exercised are valid. +""" + +import subprocess +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parent.parent + +# Directories git must trust for the test suite to operate normally. The +# current set is the GitPython working tree plus the working trees of its +# gitdb submodule and the smmap submodule nested inside gitdb. New entries +# should be added here whenever the test suite gains a dependency on git +# accepting another directory. +FIXTURE_DIRS = [ + pytest.param(REPO_ROOT, id="repo_root"), + pytest.param(REPO_ROOT / "git" / "ext" / "gitdb", id="gitdb"), + pytest.param( + REPO_ROOT / "git" / "ext" / "gitdb" / "gitdb" / "ext" / "smmap", + id="smmap", + ), +] + +# Submodule working trees that must be present and initialized for the +# test suite to operate normally: gitdb at `git/ext/gitdb`, and smmap +# nested inside gitdb at `git/ext/gitdb/gitdb/ext/smmap`. The paths +# below are anchored at REPO_ROOT (the GitPython source tree), not at +# any rorepo redirection target. +SUBMODULE_DIRS = [ + pytest.param(REPO_ROOT / "git" / "ext" / "gitdb", id="gitdb"), + pytest.param( + REPO_ROOT / "git" / "ext" / "gitdb" / "gitdb" / "ext" / "smmap", + id="smmap", + ), +] + + +@pytest.mark.parametrize("fixture_dir", FIXTURE_DIRS) +def test_fixture_dir_is_trusted_by_git(fixture_dir: Path) -> None: + """git accepts ``fixture_dir`` as its own repository owned by a trusted user. + + Run ``git -C rev-parse --show-toplevel`` and assert it + succeeds and reports ``fixture_dir`` itself as the toplevel. Failure + typically means the directory's on-disk ownership doesn't match the + running user and the CI workflow's ``safe.directory`` list is missing + an entry that would override the check. + """ + if not fixture_dir.exists(): + pytest.skip(f"{fixture_dir} not present (run `git submodule update --init --recursive` from the repo root)") + if not (fixture_dir / ".git").exists(): + pytest.skip( + f"{fixture_dir} has no .git marker " + "(submodule not initialized; run " + "`git submodule update --init --recursive` from the repo root)" + ) + try: + result = subprocess.run( + ["git", "-C", str(fixture_dir), "rev-parse", "--show-toplevel"], + capture_output=True, + text=True, + check=False, + ) + except FileNotFoundError: + pytest.skip("git is not installed or not on PATH") + assert result.returncode == 0, ( + f"git refuses to operate in {fixture_dir}.\n" + f"stderr: {result.stderr.strip()}\n" + "The directory's owner doesn't match the running user and no " + "`safe.directory` entry overrides the check. On CI, the " + "workflow's `safe.directory` list typically needs an entry for " + "this path. Locally, this is unexpected and usually indicates " + "an ownership problem worth investigating." + ) + reported = Path(result.stdout.strip()) + assert reported.samefile(fixture_dir), ( + f"git reports the toplevel as {reported}, " + f"not as {fixture_dir} itself. " + "This usually means the directory is not an initialized git " + "repository (its `.git` marker may be stale or pointing elsewhere)." + ) + + +@pytest.mark.parametrize("submodule_dir", SUBMODULE_DIRS) +def test_required_submodule_is_initialized(submodule_dir: Path) -> None: + """The submodule's working tree is present and initialized. + + Failure means the source tree is a git clone but the submodule's + working tree hasn't been populated. Skipped when the source tree + itself isn't a git clone (e.g. an extracted release tarball), since + ``git submodule update`` cannot operate there; setups that handle + submodules in a separately-prepared tree (via + ``GIT_PYTHON_TEST_GIT_REPO_BASE``) are exempted from this check. + """ + if not (REPO_ROOT / ".git").exists(): + pytest.skip( + "Source tree is not a git clone (no .git in REPO_ROOT); submodules " + "cannot be initialized via `git submodule update` here. Setups " + "that prepare submodules in a separately-pointed tree (via " + "GIT_PYTHON_TEST_GIT_REPO_BASE) are exempted from this check." + ) + # The assertion messages below recommend `git submodule update --init + # --recursive` rather than `init-tests-after-clone.sh`, even though the + # latter is the documented entry point for first-time test setup. Two + # reasons: the script performs `git reset --hard` operations that can + # destroy local work, and #1713 showed the script itself can carry + # submodule-init regressions, in which case recommending it would lead + # developers in a circle. The direct git command is a safe minimal fix + # for this test's specific failure mode and bypasses any such regression. + assert submodule_dir.is_dir(), ( + f"Submodule working tree missing: {submodule_dir}.\n" + "Run `git submodule update --init --recursive` from the repo root." + ) + assert (submodule_dir / ".git").exists(), ( + f"Submodule directory exists but has no .git marker: {submodule_dir}.\n" + "The submodule hasn't been initialized. " + "Run `git submodule update --init --recursive` from the repo root." + ) diff --git a/test/test_fun.py b/test/test_fun.py index b8593b400..a456b8aab 100644 --- a/test/test_fun.py +++ b/test/test_fun.py @@ -243,6 +243,7 @@ def test_tree_traversal(self): B_old = self.rorepo.tree("1f66cfbbce58b4b552b041707a12d437cc5f400a") # old base tree # Two very different trees. + entries = traverse_trees_recursive(odb, [B_old.binsha, H.binsha], "") self._assert_tree_entries(entries, 2) @@ -251,7 +252,10 @@ def test_tree_traversal(self): self._assert_tree_entries(oentries, 2) # Single tree. - is_no_tree = lambda i, d: i.type != "tree" + + def is_no_tree(i, _d): + return i.type != "tree" + entries = traverse_trees_recursive(odb, [B.binsha], "") assert len(entries) == len(list(B.traverse(predicate=is_no_tree))) self._assert_tree_entries(entries, 1) diff --git a/test/test_git.py b/test/test_git.py index 94e68ecf0..24b60af9d 100644 --- a/test/test_git.py +++ b/test/test_git.py @@ -6,6 +6,7 @@ import contextlib import gc import inspect +import io import logging import os import os.path as osp @@ -26,6 +27,7 @@ import ddt from git import Git, GitCommandError, GitCommandNotFound, Repo, cmd, refresh +from git.exc import UnsafeOptionError from git.util import cwd, finalize_process from test.lib import TestBase, fixture_path, with_rw_directory @@ -153,6 +155,21 @@ def test_it_transforms_kwargs_into_git_command_arguments(self): res = self.git.transform_kwargs(**{"s": True, "t": True}) self.assertEqual({"-s", "-t"}, set(res)) + def test_check_unsafe_options_normalizes_kwargs(self): + cases = [ + (["upload_pack"], ["--upload-pack"]), + (["receive_pack"], ["--receive-pack"]), + (["exec"], ["--exec"]), + (["u"], ["-u"]), + (["c"], ["-c"]), + (["--upload-pack=/tmp/helper"], ["--upload-pack"]), + (["--config core.filemode=false"], ["--config"]), + ] + + for options, unsafe_options in cases: + with self.assertRaises(UnsafeOptionError): + Git.check_unsafe_options(options=options, unsafe_options=unsafe_options) + _shell_cases = ( # value_in_call, value_from_class, expected_popen_arg (None, False, False), @@ -201,6 +218,25 @@ def test_it_logs_istream_summary_for_stdin(self, case): def test_it_executes_git_and_returns_result(self): self.assertRegex(self.git.execute(["git", "version"]), r"^git version [\d\.]{2}.*$") + def test_it_output_stream_with_stdout_is_false(self): + temp_stream = io.BytesIO() + self.git.execute( + ["git", "version"], + output_stream=temp_stream, + with_stdout=False, + ) + self.assertEqual(temp_stream.tell(), 0) + + def test_it_executes_git_without_stdout_redirect(self): + returncode, stdout, stderr = self.git.execute( + ["git", "version"], + with_extended_output=True, + with_stdout=False, + ) + self.assertEqual(returncode, 0) + self.assertIsNone(stdout) + self.assertIsNotNone(stderr) + @ddt.data( # chdir_to_repo, shell, command, use_shell_impostor (False, False, ["git", "version"], False), @@ -747,7 +783,7 @@ def test_environment(self, rw_dir): path = osp.join(rw_dir, "failing-script.sh") with open(path, "wt") as stream: - stream.write("#!/usr/bin/env sh\n" "echo FOO\n") + stream.write("#!/usr/bin/env sh\necho FOO\n") os.chmod(path, 0o777) rw_repo = Repo.init(osp.join(rw_dir, "repo")) @@ -762,17 +798,18 @@ def test_environment(self, rw_dir): def test_handle_process_output(self): from git.cmd import handle_process_output, safer_popen - line_count = 5002 - count = [None, 0, 0] + expected_line_count = 5002 + actual_lines = [None, [], []] - def counter_stdout(line): - count[1] += 1 + def stdout_handler(line): + actual_lines[1].append(line) - def counter_stderr(line): - count[2] += 1 + def stderr_handler(line): + actual_lines[2].append(line) cmdline = [ sys.executable, + "-S", # Keep any `CoverageWarning` messages out of the subprocess stderr. fixture_path("cat_file.py"), str(fixture_path("issue-301_stderr")), ] @@ -784,10 +821,10 @@ def counter_stderr(line): shell=False, ) - handle_process_output(proc, counter_stdout, counter_stderr, finalize_process) + handle_process_output(proc, stdout_handler, stderr_handler, finalize_process) - self.assertEqual(count[1], line_count) - self.assertEqual(count[2], line_count) + self.assertEqual(len(actual_lines[1]), expected_line_count, repr(actual_lines[1])) + self.assertEqual(len(actual_lines[2]), expected_line_count, repr(actual_lines[2])) def test_execute_kwargs_set_agrees_with_method(self): parameter_names = inspect.signature(cmd.Git.execute).parameters.keys() diff --git a/test/test_index.py b/test/test_index.py index c586a0b5a..3be750dbb 100644 --- a/test/test_index.py +++ b/test/test_index.py @@ -16,13 +16,15 @@ import subprocess import sys import tempfile +from unittest import mock from gitdb.base import IStream import ddt import pytest -from git import BlobFilter, Diff, Git, IndexFile, Object, Repo, Tree +from git import BlobFilter, Diff, Git, IndexFile, NULL_TREE, Object, Repo, Tree +from git.diff import NULL_TREE_SHA from git.exc import ( CheckoutError, GitCommandError, @@ -36,14 +38,8 @@ from git.objects import Blob from git.util import Actor, cwd, hex_to_bin, rmtree -from test.lib import ( - TestBase, - VirtualEnvironment, - fixture, - fixture_path, - with_rw_directory, - with_rw_repo, -) +from test.lib import TestBase, VirtualEnvironment, fixture, fixture_path, with_rw_directory, with_rw_repo, PathLikeMock +from test.lib.helper import xfail_if_raises HOOKS_SHEBANG = "#!/usr/bin/env sh\n" @@ -178,6 +174,19 @@ def _decode(stdout): _win_bash_status = WinBashStatus.check() +def _windows_supports_symlinks(): + if sys.platform != "win32": + return False + + with tempfile.TemporaryDirectory(prefix="gitpython-symlink-check-") as temp_dir: + link_path = osp.join(temp_dir, "link") + try: + os.symlink("missing-target", link_path) + except (NotImplementedError, OSError): + return False + return S_ISLNK(os.lstat(link_path)[ST_MODE]) + + def _make_hook(git_dir, name, content, make_exec=True): """A helper to create a hook""" hp = hook_path(name, git_dir) @@ -329,7 +338,10 @@ def test_index_file_from_tree(self, rw_repo): assert len([e for e in three_way_index.entries.values() if e.stage != 0]) # ITERATE BLOBS - merge_required = lambda t: t[0] != 0 + + def merge_required(t): + return t[0] != 0 + merge_blobs = list(three_way_index.iter_blobs(merge_required)) assert merge_blobs assert merge_blobs[0][0] in (1, 2, 3) @@ -544,6 +556,39 @@ def test_index_file_diffing(self, rw_repo): rval = index.checkout("lib") assert len(list(rval)) > 1 + @with_rw_directory + def test_index_file_diff_null_tree_with_initial_index(self, rw_dir): + repo = Repo.init(rw_dir) + filename = ".gitkeep" + file_path = osp.join(repo.working_tree_dir, filename) + with open(file_path, "w") as fp: + fp.write("# Initial file\n") + + index = repo.index + index.add([filename]) + index.write() + + index = IndexFile(repo) + self.assertEqual(len(index.diff(None)), 0) + + diff = index.diff(NULL_TREE) + self.assertEqual(len(diff), 1) + self.assertEqual(diff[0].change_type, "A") + assert diff[0].new_file + self.assertEqual(diff[0].b_path, filename) + + self.assertEqual(len(index.diff(NULL_TREE, paths=filename)), 1) + self.assertEqual(len(index.diff(NULL_TREE_SHA, paths=filename)), 1) + self.assertEqual(len(index.diff(NULL_TREE, paths="missing")), 0) + + patch = index.diff(NULL_TREE, create_patch=True) + self.assertEqual(len(patch), 1) + self.assertIn(b"+# Initial file", patch[0].diff) + + with self.assertRaises(GitCommandError) as exc_info: + index.diff(NULL_TREE, bogus_option=True) + self.assertIn("usage: git diff", exc_info.exception.stderr) + def _count_existing(self, repo, files): """Return count of files that actually exist in the repository directory.""" existing = 0 @@ -555,365 +600,369 @@ def _count_existing(self, repo, files): # END num existing helper - @pytest.mark.xfail( - sys.platform == "win32" and Git().config("core.symlinks") == "true", - reason="Assumes symlinks are not created on Windows and opens a symlink to a nonexistent target.", - raises=FileNotFoundError, - ) @with_rw_repo("0.1.6") def test_index_mutation(self, rw_repo): - index = rw_repo.index - num_entries = len(index.entries) - cur_head = rw_repo.head - - uname = "Thomas MÃŧller" - umail = "sd@company.com" - with rw_repo.config_writer() as writer: - writer.set_value("user", "name", uname) - writer.set_value("user", "email", umail) - self.assertEqual(writer.get_value("user", "name"), uname) - - # Remove all of the files, provide a wild mix of paths, BaseIndexEntries, - # IndexEntries. - def mixed_iterator(): - count = 0 - for entry in index.entries.values(): - type_id = count % 5 - if type_id == 0: # path (str) - yield entry.path - elif type_id == 1: # path (PathLike) - yield Path(entry.path) - elif type_id == 2: # blob - yield Blob(rw_repo, entry.binsha, entry.mode, entry.path) - elif type_id == 3: # BaseIndexEntry - yield BaseIndexEntry(entry[:4]) - elif type_id == 4: # IndexEntry - yield entry - else: - raise AssertionError("Invalid Type") - count += 1 - # END for each entry - - # END mixed iterator - deleted_files = index.remove(mixed_iterator(), working_tree=False) - assert deleted_files - self.assertEqual(self._count_existing(rw_repo, deleted_files), len(deleted_files)) - self.assertEqual(len(index.entries), 0) - - # Reset the index to undo our changes. - index.reset() - self.assertEqual(len(index.entries), num_entries) - - # Remove with working copy. - deleted_files = index.remove(mixed_iterator(), working_tree=True) - assert deleted_files - self.assertEqual(self._count_existing(rw_repo, deleted_files), 0) - - # Reset everything. - index.reset(working_tree=True) - self.assertEqual(self._count_existing(rw_repo, deleted_files), len(deleted_files)) - - # Invalid type. - self.assertRaises(TypeError, index.remove, [1]) - - # Absolute path. - deleted_files = index.remove([osp.join(rw_repo.working_tree_dir, "lib")], r=True) - assert len(deleted_files) > 1 - self.assertRaises(ValueError, index.remove, ["/doesnt/exists"]) - - # TEST COMMITTING - # Commit changed index. - cur_commit = cur_head.commit - commit_message = "commit default head by Frèderic Çauflâ‚Ŧ" - - new_commit = index.commit(commit_message, head=False) - assert cur_commit != new_commit - self.assertEqual(new_commit.author.name, uname) - self.assertEqual(new_commit.author.email, umail) - self.assertEqual(new_commit.committer.name, uname) - self.assertEqual(new_commit.committer.email, umail) - self.assertEqual(new_commit.message, commit_message) - self.assertEqual(new_commit.parents[0], cur_commit) - self.assertEqual(len(new_commit.parents), 1) - self.assertEqual(cur_head.commit, cur_commit) - - # Commit with other actor. - cur_commit = cur_head.commit - - my_author = Actor("Frèderic Çauflâ‚Ŧ", "author@example.com") - my_committer = Actor("Committing Frèderic Çauflâ‚Ŧ", "committer@example.com") - commit_actor = index.commit(commit_message, author=my_author, committer=my_committer) - assert cur_commit != commit_actor - self.assertEqual(commit_actor.author.name, "Frèderic Çauflâ‚Ŧ") - self.assertEqual(commit_actor.author.email, "author@example.com") - self.assertEqual(commit_actor.committer.name, "Committing Frèderic Çauflâ‚Ŧ") - self.assertEqual(commit_actor.committer.email, "committer@example.com") - self.assertEqual(commit_actor.message, commit_message) - self.assertEqual(commit_actor.parents[0], cur_commit) - self.assertEqual(len(new_commit.parents), 1) - self.assertEqual(cur_head.commit, commit_actor) - self.assertEqual(cur_head.log()[-1].actor, my_committer) - - # Commit with author_date and commit_date. - cur_commit = cur_head.commit - commit_message = "commit with dates by Avinash Sajjanshetty" - - new_commit = index.commit( - commit_message, - author_date="2006-04-07T22:13:13", - commit_date="2005-04-07T22:13:13", - ) - assert cur_commit != new_commit - print(new_commit.authored_date, new_commit.committed_date) - self.assertEqual(new_commit.message, commit_message) - self.assertEqual(new_commit.authored_date, 1144447993) - self.assertEqual(new_commit.committed_date, 1112911993) - - # Same index, no parents. - commit_message = "index without parents" - commit_no_parents = index.commit(commit_message, parent_commits=[], head=True) - self.assertEqual(commit_no_parents.message, commit_message) - self.assertEqual(len(commit_no_parents.parents), 0) - self.assertEqual(cur_head.commit, commit_no_parents) - - # same index, multiple parents. - commit_message = "Index with multiple parents\n commit with another line" - commit_multi_parent = index.commit(commit_message, parent_commits=(commit_no_parents, new_commit)) - self.assertEqual(commit_multi_parent.message, commit_message) - self.assertEqual(len(commit_multi_parent.parents), 2) - self.assertEqual(commit_multi_parent.parents[0], commit_no_parents) - self.assertEqual(commit_multi_parent.parents[1], new_commit) - self.assertEqual(cur_head.commit, commit_multi_parent) - - # Re-add all files in lib. - # Get the lib folder back on disk, but get an index without it. - index.reset(new_commit.parents[0], working_tree=True).reset(new_commit, working_tree=False) - lib_file_path = osp.join("lib", "git", "__init__.py") - assert (lib_file_path, 0) not in index.entries - assert osp.isfile(osp.join(rw_repo.working_tree_dir, lib_file_path)) - - # Directory. - entries = index.add(["lib"], fprogress=self._fprogress_add) - self._assert_entries(entries) - self._assert_fprogress(entries) - assert len(entries) > 1 - - # Glob. - entries = index.reset(new_commit).add([osp.join("lib", "git", "*.py")], fprogress=self._fprogress_add) - self._assert_entries(entries) - self._assert_fprogress(entries) - self.assertEqual(len(entries), 14) - - # Same file. - entries = index.reset(new_commit).add( - [osp.join(rw_repo.working_tree_dir, "lib", "git", "head.py")] * 2, - fprogress=self._fprogress_add, - ) - self._assert_entries(entries) - self.assertEqual(entries[0].mode & 0o644, 0o644) - # Would fail, test is too primitive to handle this case. - # self._assert_fprogress(entries) - self._reset_progress() - self.assertEqual(len(entries), 2) - - # Missing path. - self.assertRaises(OSError, index.reset(new_commit).add, ["doesnt/exist/must/raise"]) - - # Blob from older revision overrides current index revision. - old_blob = new_commit.parents[0].tree.blobs[0] - entries = index.reset(new_commit).add([old_blob], fprogress=self._fprogress_add) - self._assert_entries(entries) - self._assert_fprogress(entries) - self.assertEqual(index.entries[(old_blob.path, 0)].hexsha, old_blob.hexsha) - self.assertEqual(len(entries), 1) - - # Mode 0 not allowed. - null_hex_sha = Diff.NULL_HEX_SHA - null_bin_sha = b"\0" * 20 - self.assertRaises( - ValueError, - index.reset(new_commit).add, - [BaseIndexEntry((0, null_bin_sha, 0, "doesntmatter"))], - ) - - # Add new file. - new_file_relapath = "my_new_file" - self._make_file(new_file_relapath, "hello world", rw_repo) - entries = index.reset(new_commit).add( - [BaseIndexEntry((0o10644, null_bin_sha, 0, new_file_relapath))], - fprogress=self._fprogress_add, - ) - self._assert_entries(entries) - self._assert_fprogress(entries) - self.assertEqual(len(entries), 1) - self.assertNotEqual(entries[0].hexsha, null_hex_sha) - - # Add symlink. - if sys.platform != "win32": - for target in ("/etc/nonexisting", "/etc/passwd", "/etc"): - basename = "my_real_symlink" - - link_file = osp.join(rw_repo.working_tree_dir, basename) - os.symlink(target, link_file) - entries = index.reset(new_commit).add([link_file], fprogress=self._fprogress_add) - self._assert_entries(entries) - self._assert_fprogress(entries) - self.assertEqual(len(entries), 1) - self.assertTrue(S_ISLNK(entries[0].mode)) - self.assertTrue(S_ISLNK(index.entries[index.entry_key("my_real_symlink", 0)].mode)) - - # We expect only the target to be written. - self.assertEqual( - index.repo.odb.stream(entries[0].binsha).read().decode("ascii"), - target, - ) - - os.remove(link_file) - # END for each target - # END real symlink test - - # Add fake symlink and assure it checks out as a symlink. - fake_symlink_relapath = "my_fake_symlink" - link_target = "/etc/that" - fake_symlink_path = self._make_file(fake_symlink_relapath, link_target, rw_repo) - fake_entry = BaseIndexEntry((0o120000, null_bin_sha, 0, fake_symlink_relapath)) - entries = index.reset(new_commit).add([fake_entry], fprogress=self._fprogress_add) - self._assert_entries(entries) - self._assert_fprogress(entries) - assert entries[0].hexsha != null_hex_sha - self.assertEqual(len(entries), 1) - self.assertTrue(S_ISLNK(entries[0].mode)) - - # Check that this also works with an alternate method. - full_index_entry = IndexEntry.from_base(BaseIndexEntry((0o120000, entries[0].binsha, 0, entries[0].path))) - entry_key = index.entry_key(full_index_entry) - index.reset(new_commit) - - assert entry_key not in index.entries - index.entries[entry_key] = full_index_entry - index.write() - index.update() # Force reread of entries. - new_entry = index.entries[entry_key] - assert S_ISLNK(new_entry.mode) + with xfail_if_raises( + sys.platform == "win32" and (Git().config("core.symlinks") == "true" or _windows_supports_symlinks()), + raises=(FileNotFoundError, GitCommandError), + reason="Assumes symlinks are not created on Windows and opens a symlink to a nonexistent target.", + ): + index = rw_repo.index + num_entries = len(index.entries) + cur_head = rw_repo.head + + uname = "Thomas MÃŧller" + umail = "sd@company.com" + with rw_repo.config_writer() as writer: + writer.set_value("user", "name", uname) + writer.set_value("user", "email", umail) + self.assertEqual(writer.get_value("user", "name"), uname) + + # Remove all of the files, provide a wild mix of paths, BaseIndexEntries, + # IndexEntries. + def mixed_iterator(): + count = 0 + for entry in index.entries.values(): + type_id = count % 5 + if type_id == 0: # path (str) + yield entry.path + elif type_id == 1: # path (PathLike) + yield Path(entry.path) + elif type_id == 2: # path mock (PathLike) + yield PathLikeMock(entry.path) + elif type_id == 3: # path mock in a blob + yield Blob(rw_repo, entry.binsha, entry.mode, entry.path) + elif type_id == 4: # blob + yield Blob(rw_repo, entry.binsha, entry.mode, entry.path) + elif type_id == 5: # BaseIndexEntry + yield BaseIndexEntry(entry[:4]) + elif type_id == 6: # IndexEntry + yield entry + else: + raise AssertionError("Invalid Type") + count += 1 + # END for each entry + + # END mixed iterator + deleted_files = index.remove(mixed_iterator(), working_tree=False) + assert deleted_files + self.assertEqual(self._count_existing(rw_repo, deleted_files), len(deleted_files)) + self.assertEqual(len(index.entries), 0) + + # Reset the index to undo our changes. + index.reset() + self.assertEqual(len(index.entries), num_entries) + + # Remove with working copy. + deleted_files = index.remove(mixed_iterator(), working_tree=True) + assert deleted_files + self.assertEqual(self._count_existing(rw_repo, deleted_files), 0) + + # Reset everything. + index.reset(working_tree=True) + self.assertEqual(self._count_existing(rw_repo, deleted_files), len(deleted_files)) + + # Invalid type. + self.assertRaises(TypeError, index.remove, [1]) + + # Absolute path. + deleted_files = index.remove([osp.join(rw_repo.working_tree_dir, "lib")], r=True) + assert len(deleted_files) > 1 + self.assertRaises(ValueError, index.remove, ["/doesnt/exists"]) + + # TEST COMMITTING + # Commit changed index. + cur_commit = cur_head.commit + commit_message = "commit default head by Frèderic Çauflâ‚Ŧ" + + new_commit = index.commit(commit_message, head=False) + assert cur_commit != new_commit + self.assertEqual(new_commit.author.name, uname) + self.assertEqual(new_commit.author.email, umail) + self.assertEqual(new_commit.committer.name, uname) + self.assertEqual(new_commit.committer.email, umail) + self.assertEqual(new_commit.message, commit_message) + self.assertEqual(new_commit.parents[0], cur_commit) + self.assertEqual(len(new_commit.parents), 1) + self.assertEqual(cur_head.commit, cur_commit) + + # Commit with other actor. + cur_commit = cur_head.commit + + my_author = Actor("Frèderic Çauflâ‚Ŧ", "author@example.com") + my_committer = Actor("Committing Frèderic Çauflâ‚Ŧ", "committer@example.com") + commit_actor = index.commit(commit_message, author=my_author, committer=my_committer) + assert cur_commit != commit_actor + self.assertEqual(commit_actor.author.name, "Frèderic Çauflâ‚Ŧ") + self.assertEqual(commit_actor.author.email, "author@example.com") + self.assertEqual(commit_actor.committer.name, "Committing Frèderic Çauflâ‚Ŧ") + self.assertEqual(commit_actor.committer.email, "committer@example.com") + self.assertEqual(commit_actor.message, commit_message) + self.assertEqual(commit_actor.parents[0], cur_commit) + self.assertEqual(len(new_commit.parents), 1) + self.assertEqual(cur_head.commit, commit_actor) + self.assertEqual(cur_head.log()[-1].actor, my_committer) + + # Commit with author_date and commit_date. + cur_commit = cur_head.commit + commit_message = "commit with dates by Avinash Sajjanshetty" + + new_commit = index.commit( + commit_message, + author_date="2006-04-07T22:13:13", + commit_date="2005-04-07T22:13:13", + ) + assert cur_commit != new_commit + print(new_commit.authored_date, new_commit.committed_date) + self.assertEqual(new_commit.message, commit_message) + self.assertEqual(new_commit.authored_date, 1144447993) + self.assertEqual(new_commit.committed_date, 1112911993) + + # Same index, no parents. + commit_message = "index without parents" + commit_no_parents = index.commit(commit_message, parent_commits=[], head=True) + self.assertEqual(commit_no_parents.message, commit_message) + self.assertEqual(len(commit_no_parents.parents), 0) + self.assertEqual(cur_head.commit, commit_no_parents) + + # same index, multiple parents. + commit_message = "Index with multiple parents\n commit with another line" + commit_multi_parent = index.commit(commit_message, parent_commits=(commit_no_parents, new_commit)) + self.assertEqual(commit_multi_parent.message, commit_message) + self.assertEqual(len(commit_multi_parent.parents), 2) + self.assertEqual(commit_multi_parent.parents[0], commit_no_parents) + self.assertEqual(commit_multi_parent.parents[1], new_commit) + self.assertEqual(cur_head.commit, commit_multi_parent) + + # Re-add all files in lib. + # Get the lib folder back on disk, but get an index without it. + index.reset(new_commit.parents[0], working_tree=True).reset(new_commit, working_tree=False) + lib_file_path = osp.join("lib", "git", "__init__.py") + assert (lib_file_path, 0) not in index.entries + assert osp.isfile(osp.join(rw_repo.working_tree_dir, lib_file_path)) + + # Directory. + entries = index.add(["lib"], fprogress=self._fprogress_add) + self._assert_entries(entries) + self._assert_fprogress(entries) + assert len(entries) > 1 + + # Glob. + entries = index.reset(new_commit).add([osp.join("lib", "git", "*.py")], fprogress=self._fprogress_add) + self._assert_entries(entries) + self._assert_fprogress(entries) + self.assertEqual(len(entries), 14) + + # Same file. + entries = index.reset(new_commit).add( + [osp.join(rw_repo.working_tree_dir, "lib", "git", "head.py")] * 2, + fprogress=self._fprogress_add, + ) + self._assert_entries(entries) + self.assertEqual(entries[0].mode & 0o644, 0o644) + # Would fail, test is too primitive to handle this case. + # self._assert_fprogress(entries) + self._reset_progress() + self.assertEqual(len(entries), 2) + + # Missing path. + self.assertRaises(OSError, index.reset(new_commit).add, ["doesnt/exist/must/raise"]) + + # Blob from older revision overrides current index revision. + old_blob = new_commit.parents[0].tree.blobs[0] + entries = index.reset(new_commit).add([old_blob], fprogress=self._fprogress_add) + self._assert_entries(entries) + self._assert_fprogress(entries) + self.assertEqual(index.entries[(old_blob.path, 0)].hexsha, old_blob.hexsha) + self.assertEqual(len(entries), 1) + + # Mode 0 not allowed. + null_hex_sha = Diff.NULL_HEX_SHA + null_bin_sha = b"\0" * 20 + self.assertRaises( + ValueError, + index.reset(new_commit).add, + [BaseIndexEntry((0, null_bin_sha, 0, "doesntmatter"))], + ) - # A tree created from this should contain the symlink. - tree = index.write_tree() - assert fake_symlink_relapath in tree - index.write() # Flush our changes for the checkout. - - # Check out the fake link, should be a link then. - assert not S_ISLNK(os.stat(fake_symlink_path)[ST_MODE]) - os.remove(fake_symlink_path) - index.checkout(fake_symlink_path) - - # On Windows, we currently assume we will never get symlinks. - if sys.platform == "win32": - # Symlinks should contain the link as text (which is what a - # symlink actually is). - with open(fake_symlink_path, "rt") as fd: - self.assertEqual(fd.read(), link_target) - else: - self.assertTrue(S_ISLNK(os.lstat(fake_symlink_path)[ST_MODE])) - - # TEST RENAMING - def assert_mv_rval(rval): - for source, dest in rval: - assert not osp.exists(source) and osp.exists(dest) - # END for each renamed item - - # END move assertion utility - - self.assertRaises(ValueError, index.move, ["just_one_path"]) - # Try to move a file onto an existing file. - files = ["AUTHORS", "LICENSE"] - self.assertRaises(GitCommandError, index.move, files) - - # Again, with force. - assert_mv_rval(index.move(files, f=True)) - - # Move files into a directory - dry run. - paths = ["LICENSE", "VERSION", "doc"] - rval = index.move(paths, dry_run=True) - self.assertEqual(len(rval), 2) - assert osp.exists(paths[0]) - - # Again, no dry run. - rval = index.move(paths) - assert_mv_rval(rval) - - # Move dir into dir. - rval = index.move(["doc", "test"]) - assert_mv_rval(rval) - - # TEST PATH REWRITING - ###################### - count = [0] - - def rewriter(entry): - rval = str(count[0]) - count[0] += 1 - return rval - - # END rewriter - - def make_paths(): - """Help out the test by yielding two existing paths and one new path.""" - yield "CHANGES" - yield "ez_setup.py" - yield index.entries[index.entry_key("README", 0)] - yield index.entries[index.entry_key(".gitignore", 0)] - - for fid in range(3): - fname = "newfile%i" % fid - with open(fname, "wb") as fd: - fd.write(b"abcd") - yield Blob(rw_repo, Blob.NULL_BIN_SHA, 0o100644, fname) - # END for each new file - - # END path producer - paths = list(make_paths()) - self._assert_entries(index.add(paths, path_rewriter=rewriter)) - - for filenum in range(len(paths)): - assert index.entry_key(str(filenum), 0) in index.entries - - # TEST RESET ON PATHS - ###################### - arela = "aa" - brela = "bb" - afile = self._make_file(arela, "adata", rw_repo) - bfile = self._make_file(brela, "bdata", rw_repo) - akey = index.entry_key(arela, 0) - bkey = index.entry_key(brela, 0) - keys = (akey, bkey) - absfiles = (afile, bfile) - files = (arela, brela) - - for fkey in keys: - assert fkey not in index.entries - - index.add(files, write=True) - nc = index.commit("2 files committed", head=False) - - for fkey in keys: - assert fkey in index.entries - - # Just the index. - index.reset(paths=(arela, afile)) - assert akey not in index.entries - assert bkey in index.entries - - # Now with working tree - files on disk as well as entries must be recreated. - rw_repo.head.commit = nc - for absfile in absfiles: - os.remove(absfile) - - index.reset(working_tree=True, paths=files) - - for fkey in keys: - assert fkey in index.entries - for absfile in absfiles: - assert osp.isfile(absfile) + # Add new file. + new_file_relapath = "my_new_file" + self._make_file(new_file_relapath, "hello world", rw_repo) + entries = index.reset(new_commit).add( + [BaseIndexEntry((0o10644, null_bin_sha, 0, new_file_relapath))], + fprogress=self._fprogress_add, + ) + self._assert_entries(entries) + self._assert_fprogress(entries) + self.assertEqual(len(entries), 1) + self.assertNotEqual(entries[0].hexsha, null_hex_sha) + + # Add symlink. + if sys.platform != "win32": + for target in ("/etc/nonexisting", "/etc/passwd", "/etc"): + basename = "my_real_symlink" + + link_file = osp.join(rw_repo.working_tree_dir, basename) + os.symlink(target, link_file) + entries = index.reset(new_commit).add([link_file], fprogress=self._fprogress_add) + self._assert_entries(entries) + self._assert_fprogress(entries) + self.assertEqual(len(entries), 1) + self.assertTrue(S_ISLNK(entries[0].mode)) + self.assertTrue(S_ISLNK(index.entries[index.entry_key("my_real_symlink", 0)].mode)) + + # We expect only the target to be written. + self.assertEqual( + index.repo.odb.stream(entries[0].binsha).read().decode("ascii"), + target, + ) + + os.remove(link_file) + # END for each target + # END real symlink test + + # Add fake symlink and assure it checks out as a symlink. + fake_symlink_relapath = "my_fake_symlink" + link_target = "/etc/that" + fake_symlink_path = self._make_file(fake_symlink_relapath, link_target, rw_repo) + fake_entry = BaseIndexEntry((0o120000, null_bin_sha, 0, fake_symlink_relapath)) + entries = index.reset(new_commit).add([fake_entry], fprogress=self._fprogress_add) + self._assert_entries(entries) + self._assert_fprogress(entries) + assert entries[0].hexsha != null_hex_sha + self.assertEqual(len(entries), 1) + self.assertTrue(S_ISLNK(entries[0].mode)) + + # Check that this also works with an alternate method. + full_index_entry = IndexEntry.from_base(BaseIndexEntry((0o120000, entries[0].binsha, 0, entries[0].path))) + entry_key = index.entry_key(full_index_entry) + index.reset(new_commit) + + assert entry_key not in index.entries + index.entries[entry_key] = full_index_entry + index.write() + index.update() # Force reread of entries. + new_entry = index.entries[entry_key] + assert S_ISLNK(new_entry.mode) + + # A tree created from this should contain the symlink. + tree = index.write_tree() + assert fake_symlink_relapath in tree + index.write() # Flush our changes for the checkout. + + # Check out the fake link, should be a link then. + assert not S_ISLNK(os.stat(fake_symlink_path)[ST_MODE]) + os.remove(fake_symlink_path) + index.checkout(fake_symlink_path) + + # On Windows, we currently assume we will never get symlinks. + if sys.platform == "win32": + # Symlinks should contain the link as text (which is what a + # symlink actually is). + with open(fake_symlink_path, "rt") as fd: + self.assertEqual(fd.read(), link_target) + else: + self.assertTrue(S_ISLNK(os.lstat(fake_symlink_path)[ST_MODE])) + + # TEST RENAMING + def assert_mv_rval(rval): + for source, dest in rval: + assert not osp.exists(source) and osp.exists(dest) + # END for each renamed item + + # END move assertion utility + + self.assertRaises(ValueError, index.move, ["just_one_path"]) + # Try to move a file onto an existing file. + files = ["AUTHORS", "LICENSE"] + self.assertRaises(GitCommandError, index.move, files) + + # Again, with force. + assert_mv_rval(index.move(files, f=True)) + + # Move files into a directory - dry run. + paths = ["LICENSE", "VERSION", "doc"] + rval = index.move(paths, dry_run=True) + self.assertEqual(len(rval), 2) + assert osp.exists(paths[0]) + + # Again, no dry run. + rval = index.move(paths) + assert_mv_rval(rval) + + # Move dir into dir. + rval = index.move(["doc", "test"]) + assert_mv_rval(rval) + + # TEST PATH REWRITING + ###################### + count = [0] + + def rewriter(entry): + rval = str(count[0]) + count[0] += 1 + return rval + + # END rewriter + + def make_paths(): + """Help out the test by yielding two existing paths and one new path.""" + yield "CHANGES" + yield "ez_setup.py" + yield index.entries[index.entry_key("README", 0)] + yield index.entries[index.entry_key(".gitignore", 0)] + + for fid in range(3): + fname = "newfile%i" % fid + with open(fname, "wb") as fd: + fd.write(b"abcd") + yield Blob(rw_repo, Blob.NULL_BIN_SHA, 0o100644, fname) + # END for each new file + + # END path producer + paths = list(make_paths()) + self._assert_entries(index.add(paths, path_rewriter=rewriter)) + + for filenum in range(len(paths)): + assert index.entry_key(str(filenum), 0) in index.entries + + # TEST RESET ON PATHS + ###################### + arela = "aa" + brela = "bb" + afile = self._make_file(arela, "adata", rw_repo) + bfile = self._make_file(brela, "bdata", rw_repo) + akey = index.entry_key(arela, 0) + bkey = index.entry_key(brela, 0) + keys = (akey, bkey) + absfiles = (afile, bfile) + files = (arela, brela) + + for fkey in keys: + assert fkey not in index.entries + + index.add(files, write=True) + nc = index.commit("2 files committed", head=False) + + for fkey in keys: + assert fkey in index.entries + + # Just the index. + index.reset(paths=(arela, afile)) + assert akey not in index.entries + assert bkey in index.entries + + # Now with working tree - files on disk as well as entries must be recreated. + rw_repo.head.commit = nc + for absfile in absfiles: + os.remove(absfile) + + index.reset(working_tree=True, paths=files) + + for fkey in keys: + assert fkey in index.entries + for absfile in absfiles: + assert osp.isfile(absfile) @with_rw_repo("HEAD") def test_compare_write_tree(self, rw_repo): @@ -1015,6 +1064,27 @@ class Mocked: rel = index._to_relative_path(path) self.assertEqual(rel, os.path.relpath(path, root)) + def test__to_relative_path_absolute_trailing_slash(self): + repo_root = os.path.join(osp.abspath(os.sep), "directory1", "repo_root") + + class Mocked: + bare = False + git_dir = repo_root + working_tree_dir = repo_root + + repo = Mocked() + path = os.path.join(repo_root, f"directory2{os.sep}") + index = IndexFile(repo) + + expected_path = f"directory2{os.sep}" + actual_path = index._to_relative_path(path) + self.assertEqual(expected_path, actual_path) + + with mock.patch("git.index.base.os.path") as ospath_mock: + ospath_mock.relpath.return_value = f"directory2{os.sep}" + actual_path = index._to_relative_path(path) + self.assertEqual(expected_path, actual_path) + @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.Absent, reason="Can't run a hook on Windows without bash.exe.", @@ -1173,7 +1243,7 @@ def test_commit_msg_hook_fail(self, rw_repo): raise AssertionError("Should have caught a HookExecutionError") @with_rw_repo("HEAD") - def test_index_add_pathlike(self, rw_repo): + def test_index_add_pathlib(self, rw_repo): git_dir = Path(rw_repo.git_dir) file = git_dir / "file.txt" @@ -1181,6 +1251,15 @@ def test_index_add_pathlike(self, rw_repo): rw_repo.index.add(file) + @with_rw_repo("HEAD") + def test_index_add_pathlike(self, rw_repo): + git_dir = Path(rw_repo.git_dir) + + file = git_dir / "file.txt" + file.touch() + + rw_repo.index.add(PathLikeMock(str(file))) + @with_rw_repo("HEAD") def test_index_add_non_normalized_path(self, rw_repo): git_dir = Path(rw_repo.git_dir) @@ -1193,6 +1272,48 @@ def test_index_add_non_normalized_path(self, rw_repo): rw_repo.index.add(non_normalized_path) + def test_index_file_v3(self): + index = IndexFile(self.rorepo, fixture_path("index_extended_flags")) + assert index.entries + assert index.version == 3 + assert len(index.entries) == 4 + assert index.entries[("init.t", 0)].skip_worktree + + # Write the data - it must match the original. + with tempfile.NamedTemporaryFile() as tmpfile: + index.write(tmpfile.name) + assert Path(tmpfile.name).read_bytes() == Path(fixture_path("index_extended_flags")).read_bytes() + + @with_rw_directory + def test_index_file_v3_with_git_command(self, tmp_dir): + tmp_dir = Path(tmp_dir) + with cwd(tmp_dir): + git = Git(tmp_dir) + git.init() + + file = tmp_dir / "file.txt" + file.write_text("hello") + git.add("--intent-to-add", "file.txt") # intent-to-add sets extended flag + + repo = Repo(tmp_dir) + index = repo.index + + assert len(index.entries) == 1 + assert index.version == 3 + entry = list(index.entries.values())[0] + assert entry.path == "file.txt" + assert entry.intent_to_add + + file2 = tmp_dir / "file2.txt" + file2.write_text("world") + index.add(["file2.txt"]) + index.write() + + status_str = git.status(porcelain=True) + status_lines = status_str.splitlines() + assert " A file.txt" in status_lines + assert "A file2.txt" in status_lines + class TestIndexUtils: @pytest.mark.parametrize("file_path_type", [str, Path]) diff --git a/test/test_installation.py b/test/test_installation.py index ae6472e98..7c82bd403 100644 --- a/test/test_installation.py +++ b/test/test_installation.py @@ -2,6 +2,7 @@ # 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ import ast +import functools import os import subprocess @@ -11,50 +12,22 @@ class TestInstallation(TestBase): @with_rw_directory def test_installation(self, rw_dir): - venv = self._set_up_venv(rw_dir) + venv, run = self._set_up_venv(rw_dir) - result = subprocess.run( - [venv.pip, "install", "."], - stdout=subprocess.PIPE, - cwd=venv.sources, - ) - self.assertEqual( - 0, - result.returncode, - msg=result.stderr or result.stdout or "Can't install project", - ) + result = run([venv.pip, "install", "."]) + self._check_result(result, "Can't install project") - result = subprocess.run( - [venv.python, "-c", "import git"], - stdout=subprocess.PIPE, - cwd=venv.sources, - ) - self.assertEqual( - 0, - result.returncode, - msg=result.stderr or result.stdout or "Self-test failed", - ) + result = run([venv.python, "-c", "import git"]) + self._check_result(result, "Self-test failed") - result = subprocess.run( - [venv.python, "-c", "import gitdb; import smmap"], - stdout=subprocess.PIPE, - cwd=venv.sources, - ) - self.assertEqual( - 0, - result.returncode, - msg=result.stderr or result.stdout or "Dependencies not installed", - ) + result = run([venv.python, "-c", "import gitdb; import smmap"]) + self._check_result(result, "Dependencies not installed") # Even IF gitdb or any other dependency is supplied during development by # inserting its location into PYTHONPATH or otherwise patched into sys.path, # make sure it is not wrongly inserted as the *first* entry. - result = subprocess.run( - [venv.python, "-c", "import sys; import git; print(sys.path)"], - stdout=subprocess.PIPE, - cwd=venv.sources, - ) - syspath = result.stdout.decode("utf-8").splitlines()[0] + result = run([venv.python, "-c", "import sys; import git; print(sys.path)"]) + syspath = result.stdout.splitlines()[0] syspath = ast.literal_eval(syspath) self.assertEqual( "", @@ -64,10 +37,37 @@ def test_installation(self, rw_dir): @staticmethod def _set_up_venv(rw_dir): + # Initialize the virtual environment. venv = VirtualEnvironment(rw_dir, with_pip=True) + + # Make its src directory a symlink to our own top-level source tree. os.symlink( os.path.dirname(os.path.dirname(__file__)), venv.sources, target_is_directory=True, ) - return venv + + # Create a convenience function to run commands in it. + run = functools.partial( + subprocess.run, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + cwd=venv.sources, + env={**os.environ, "PYTHONWARNINGS": "error"}, + ) + + return venv, run + + def _check_result(self, result, failure_summary): + self.assertEqual( + 0, + result.returncode, + msg=self._prepare_failure_message(result, failure_summary), + ) + + @staticmethod + def _prepare_failure_message(result, failure_summary): + stdout = result.stdout.rstrip() + stderr = result.stderr.rstrip() + return f"{failure_summary}\n\nstdout:\n{stdout}\n\nstderr:\n{stderr}" diff --git a/test/test_quick_doc.py b/test/test_quick_doc.py index 4ef75f4aa..98658e02f 100644 --- a/test/test_quick_doc.py +++ b/test/test_quick_doc.py @@ -173,7 +173,7 @@ def test_cloned_repo_object(self, local_dir): # [15-test_cloned_repo_object] def print_files_from_git(root, level=0): for entry in root: - print(f'{"-" * 4 * level}| {entry.path}, {entry.type}') + print(f"{'-' * 4 * level}| {entry.path}, {entry.type}") if entry.type == "tree": print_files_from_git(entry, level + 1) diff --git a/test/test_refs.py b/test/test_refs.py index 08096e69e..d77b34eba 100644 --- a/test/test_refs.py +++ b/test/test_refs.py @@ -3,6 +3,7 @@ # This module is part of GitPython and is released under the # 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ +import contextlib from itertools import chain import os.path as osp from pathlib import Path @@ -18,6 +19,7 @@ RefLog, Reference, RemoteReference, + Repo, SymbolicReference, TagReference, ) @@ -25,10 +27,22 @@ import git.refs as refs from git.util import Actor -from test.lib import TestBase, with_rw_repo +from test.lib import TestBase, with_rw_repo, PathLikeMock class TestRefs(TestBase): + @contextlib.contextmanager + def _repo_with_initial_commit(self, base_dir): + repo_dir = base_dir / "repo" + repo = Repo.init(repo_dir) + (repo_dir / "file.txt").write_text("initial\n", encoding="utf-8") + repo.index.add(["file.txt"]) + repo.index.commit("initial") + try: + yield repo + finally: + repo.git.clear_cache() + def test_from_path(self): # Should be able to create any reference directly. for ref_type in (Reference, Head, TagReference, RemoteReference): @@ -43,6 +57,25 @@ def test_from_path(self): self.assertRaises(ValueError, TagReference, self.rorepo, "refs/invalid/tag") # Works without path check. TagReference(self.rorepo, "refs/invalid/tag", check_path=False) + # Check remoteness + assert Reference(self.rorepo, "refs/remotes/origin").is_remote() + + def test_from_pathlike(self): + # Should be able to create any reference directly. + for ref_type in (Reference, Head, TagReference, RemoteReference): + for name in ("rela_name", "path/rela_name"): + full_path = ref_type.to_full_path(PathLikeMock(name)) + instance = ref_type.from_path(self.rorepo, PathLikeMock(full_path)) + assert isinstance(instance, ref_type) + # END for each name + # END for each type + + # Invalid path. + self.assertRaises(ValueError, TagReference, self.rorepo, "refs/invalid/tag") + # Works without path check. + TagReference(self.rorepo, PathLikeMock("refs/invalid/tag"), check_path=False) + # Check remoteness + assert Reference(self.rorepo, PathLikeMock("refs/remotes/origin")).is_remote() def test_tag_base(self): tag_object_refs = [] @@ -629,6 +662,115 @@ def test_refs_outside_repo(self): ref_file_name = Path(ref_file.name).name self.assertRaises(BadName, self.rorepo.commit, f"../../{ref_file_name}") + def test_reference_create_rejects_path_traversal(self): + with tempfile.TemporaryDirectory() as tmp_dir: + base_dir = Path(tmp_dir) + with self._repo_with_initial_commit(base_dir) as repo: + outside_path = base_dir / "outside_write.txt" + + self.assertRaises(ValueError, Reference.create, repo, "../../../outside_write.txt", "HEAD") + assert not outside_path.exists() + + def test_symbolic_reference_create_rejects_path_traversal(self): + with tempfile.TemporaryDirectory() as tmp_dir: + base_dir = Path(tmp_dir) + with self._repo_with_initial_commit(base_dir) as repo: + outside_path = base_dir / "outside_write.txt" + + self.assertRaises(ValueError, SymbolicReference.create, repo, "../../outside_write.txt", "HEAD") + assert not outside_path.exists() + + def test_symbolic_reference_set_reference_rejects_path_traversal(self): + with tempfile.TemporaryDirectory() as tmp_dir: + base_dir = Path(tmp_dir) + with self._repo_with_initial_commit(base_dir) as repo: + outside_path = base_dir / "outside_write.txt" + + self.assertRaises(ValueError, SymbolicReference(repo, "../../outside_write.txt").set_reference, "HEAD") + assert not outside_path.exists() + + def test_symbolic_reference_rename_rejects_path_traversal(self): + with tempfile.TemporaryDirectory() as tmp_dir: + base_dir = Path(tmp_dir) + with self._repo_with_initial_commit(base_dir) as repo: + outside_path = base_dir / "outside_move.txt" + ref = SymbolicReference.create(repo, "SAFE_RENAME_SOURCE", "HEAD") + + self.assertRaises(ValueError, ref.rename, "../../outside_move.txt") + assert not outside_path.exists() + assert Path(ref.abspath).is_file() + + def test_symbolic_reference_delete_rejects_path_traversal(self): + with tempfile.TemporaryDirectory() as tmp_dir: + base_dir = Path(tmp_dir) + with self._repo_with_initial_commit(base_dir) as repo: + outside_path = base_dir / "outside_delete.txt" + outside_path.write_text("do not delete\n", encoding="utf-8") + + self.assertRaises(ValueError, SymbolicReference.delete, repo, "../../outside_delete.txt") + assert outside_path.read_text(encoding="utf-8") == "do not delete\n" + + def test_symbolic_reference_log_append_rejects_path_traversal(self): + with tempfile.TemporaryDirectory() as tmp_dir: + base_dir = Path(tmp_dir) + with self._repo_with_initial_commit(base_dir) as repo: + outside_path = base_dir / "outside_reflog.txt" + + ref = SymbolicReference(repo, "../../../outside_reflog.txt") + self.assertRaises( + ValueError, ref.log_append, Commit.NULL_BIN_SHA, "do not write", repo.head.commit.binsha + ) + assert not outside_path.exists() + + def test_symbolic_reference_set_reference_rejects_symlink_escape(self): + with tempfile.TemporaryDirectory() as tmp_dir: + base_dir = Path(tmp_dir) + with self._repo_with_initial_commit(base_dir) as repo: + outside_dir = base_dir / "outside_refs" + outside_dir.mkdir() + outside_path = outside_dir / "escaped" + + refs_heads_dir = Path(repo.common_dir) / "refs" / "heads" + refs_heads_dir.mkdir(parents=True, exist_ok=True) + symlink_path = refs_heads_dir / "link_out" + try: + symlink_path.symlink_to(outside_dir, target_is_directory=True) + except (OSError, NotImplementedError) as ex: + self.skipTest("symlinks unavailable on this platform: %s" % ex) + if osp.realpath(symlink_path / "escaped") == osp.abspath(symlink_path / "escaped"): + self.skipTest("realpath does not resolve directory symlinks on this platform") + + ref = SymbolicReference(repo, "refs/heads/link_out/escaped") + self.assertRaises(ValueError, ref.set_reference, "HEAD") + assert not outside_path.exists() + + def test_remote_reference_delete_cleanup_rejects_path_traversal(self): + with tempfile.TemporaryDirectory() as tmp_dir: + base_dir = Path(tmp_dir) + git_dir = base_dir / "repo" / ".git" + git_dir.mkdir(parents=True) + outside_path = base_dir / "outside_remote_delete.txt" + outside_path.write_text("do not delete\n", encoding="utf-8") + + class GitStub: + branch_called = False + + def branch(self, *args): + self.branch_called = True + + class RepoStub: + pass + + repo = RepoStub() + repo.git = GitStub() + repo.common_dir = str(git_dir) + repo.git_dir = str(git_dir) + ref = RemoteReference(repo, "../../outside_remote_delete.txt", check_path=False) + + self.assertRaises(ValueError, RemoteReference.delete, repo, ref) + assert not repo.git.branch_called + assert outside_path.read_text(encoding="utf-8") == "do not delete\n" + def test_validity_ref_names(self): """Ensure ref names are checked for validity. diff --git a/test/test_remote.py b/test/test_remote.py index 5ddb41bc0..2230c8df4 100644 --- a/test/test_remote.py +++ b/test/test_remote.py @@ -44,7 +44,7 @@ class TestRemoteProgress(RemoteProgress): __slots__ = ("_seen_lines", "_stages_per_op", "_num_progress_messages") - def __init__(self): + def __init__(self) -> None: super().__init__() self._seen_lines = [] self._stages_per_op = {} @@ -103,6 +103,9 @@ def assert_received_message(self): assert self._num_progress_messages +TestRemoteProgress.__test__ = False # type: ignore + + class TestRemote(TestBase): def tearDown(self): gc.collect() @@ -684,7 +687,12 @@ def test_multiple_urls(self, rw_repo): def test_fetch_error(self): rem = self.rorepo.remote("origin") - with self.assertRaisesRegex(GitCommandError, "[Cc]ouldn't find remote ref __BAD_REF__"): + msg = ( + r"[Cc]ouldn't find remote ref __BAD_REF__|" + r"could not read Username|" + r"expected flush after ref listing" + ) + with self.assertRaisesRegex(GitCommandError, msg): rem.fetch("__BAD_REF__") @with_rw_repo("0.1.6", bare=False) @@ -824,7 +832,7 @@ def test_fetch_unsafe_options(self, rw_repo): remote = rw_repo.remote("origin") tmp_dir = Path(tdir) tmp_file = tmp_dir / "pwn" - unsafe_options = [{"upload-pack": f"touch {tmp_file}"}] + unsafe_options = [{"upload-pack": f"touch {tmp_file}"}, {"upload_pack": f"touch {tmp_file}"}] for unsafe_option in unsafe_options: with self.assertRaises(UnsafeOptionError): remote.fetch(**unsafe_option) @@ -892,7 +900,7 @@ def test_pull_unsafe_options(self, rw_repo): remote = rw_repo.remote("origin") tmp_dir = Path(tdir) tmp_file = tmp_dir / "pwn" - unsafe_options = [{"upload-pack": f"touch {tmp_file}"}] + unsafe_options = [{"upload-pack": f"touch {tmp_file}"}, {"upload_pack": f"touch {tmp_file}"}] for unsafe_option in unsafe_options: with self.assertRaises(UnsafeOptionError): remote.pull(**unsafe_option) @@ -961,10 +969,9 @@ def test_push_unsafe_options(self, rw_repo): tmp_dir = Path(tdir) tmp_file = tmp_dir / "pwn" unsafe_options = [ - { - "receive-pack": f"touch {tmp_file}", - "exec": f"touch {tmp_file}", - } + {"receive-pack": f"touch {tmp_file}"}, + {"receive_pack": f"touch {tmp_file}"}, + {"exec": f"touch {tmp_file}"}, ] for unsafe_option in unsafe_options: assert not tmp_file.exists() @@ -988,10 +995,9 @@ def test_push_unsafe_options_allowed(self, rw_repo): tmp_dir = Path(tdir) tmp_file = tmp_dir / "pwn" unsafe_options = [ - { - "receive-pack": f"touch {tmp_file}", - "exec": f"touch {tmp_file}", - } + {"receive-pack": f"touch {tmp_file}"}, + {"receive_pack": f"touch {tmp_file}"}, + {"exec": f"touch {tmp_file}"}, ] for unsafe_option in unsafe_options: # The options will be allowed, but the command will fail. diff --git a/test/test_repo.py b/test/test_repo.py index e38da5bb6..d2dd1ea5d 100644 --- a/test/test_repo.py +++ b/test/test_repo.py @@ -14,7 +14,8 @@ import pickle import sys import tempfile -from unittest import mock, skip +from unittest import mock +from pathlib import Path import pytest @@ -36,11 +37,11 @@ Submodule, Tree, ) -from git.exc import BadObject, UnsafeOptionError, UnsafeProtocolError +from git.exc import BadObject from git.repo.fun import touch from git.util import bin_to_hex, cwd, cygpath, join_path_native, rmfile, rmtree -from test.lib import TestBase, fixture, with_rw_directory, with_rw_repo +from test.lib import TestBase, fixture, with_rw_directory, with_rw_repo, PathLikeMock def iter_flatten(lol): @@ -105,6 +106,11 @@ def test_repo_creation_pathlib(self, rw_repo): r_from_gitdir = Repo(pathlib.Path(rw_repo.git_dir)) self.assertEqual(r_from_gitdir.git_dir, rw_repo.git_dir) + @with_rw_repo("0.3.2.1") + def test_repo_creation_pathlike(self, rw_repo): + r_from_gitdir = Repo(PathLikeMock(rw_repo.git_dir)) + self.assertEqual(r_from_gitdir.git_dir, rw_repo.git_dir) + def test_description(self): txt = "Test repository" self.rorepo.description = txt @@ -140,6 +146,23 @@ def test_commit_from_revision(self): self.assertEqual(commit.type, "commit") self.assertEqual(self.rorepo.commit(commit), commit) + @with_rw_directory + def test_commit_from_tag_starting_with_at(self, rw_dir): + repo = Repo.init(rw_dir) + with repo.config_writer() as writer: + writer.set_value("user", "name", "GitPython Tests") + writer.set_value("user", "email", "gitpython@example.com") + + tracked_file = Path(rw_dir) / "hello.txt" + tracked_file.write_text("hello") + repo.index.add([str(tracked_file)]) + commit = repo.index.commit("init") + repo.create_tag("@foo") + + self.assertEqual(repo.tags["@foo"].commit, commit) + self.assertEqual(repo.commit("@"), commit) + self.assertEqual(repo.commit("@foo"), commit) + def test_commits(self): mc = 10 commits = list(self.rorepo.iter_commits("0.1.6", max_count=mc)) @@ -214,285 +237,6 @@ def test_date_format(self, rw_dir): # @-timestamp is the format used by git commit hooks. repo.index.commit("Commit messages", commit_date="@1400000000 +0000") - @with_rw_directory - def test_clone_from_pathlib(self, rw_dir): - original_repo = Repo.init(osp.join(rw_dir, "repo")) - - Repo.clone_from(original_repo.git_dir, pathlib.Path(rw_dir) / "clone_pathlib") - - @with_rw_directory - def test_clone_from_pathlib_withConfig(self, rw_dir): - original_repo = Repo.init(osp.join(rw_dir, "repo")) - - cloned = Repo.clone_from( - original_repo.git_dir, - pathlib.Path(rw_dir) / "clone_pathlib_withConfig", - multi_options=[ - "--recurse-submodules=repo", - "--config core.filemode=false", - "--config submodule.repo.update=checkout", - "--config filter.lfs.clean='git-lfs clean -- %f'", - ], - allow_unsafe_options=True, - ) - - self.assertEqual(cloned.config_reader().get_value("submodule", "active"), "repo") - self.assertEqual(cloned.config_reader().get_value("core", "filemode"), False) - self.assertEqual(cloned.config_reader().get_value('submodule "repo"', "update"), "checkout") - self.assertEqual( - cloned.config_reader().get_value('filter "lfs"', "clean"), - "git-lfs clean -- %f", - ) - - def test_clone_from_with_path_contains_unicode(self): - with tempfile.TemporaryDirectory() as tmpdir: - unicode_dir_name = "\u0394" - path_with_unicode = os.path.join(tmpdir, unicode_dir_name) - os.makedirs(path_with_unicode) - - try: - Repo.clone_from( - url=self._small_repo_url(), - to_path=path_with_unicode, - ) - except UnicodeEncodeError: - self.fail("Raised UnicodeEncodeError") - - @with_rw_directory - @skip( - """The referenced repository was removed, and one needs to set up a new - password controlled repo under the org's control.""" - ) - def test_leaking_password_in_clone_logs(self, rw_dir): - password = "fakepassword1234" - try: - Repo.clone_from( - url="https://fakeuser:{}@fakerepo.example.com/testrepo".format(password), - to_path=rw_dir, - ) - except GitCommandError as err: - assert password not in str(err), "The error message '%s' should not contain the password" % err - # Working example from a blank private project. - Repo.clone_from( - url="https://gitlab+deploy-token-392045:mLWhVus7bjLsy8xj8q2V@gitlab.com/mercierm/test_git_python", - to_path=rw_dir, - ) - - @with_rw_repo("HEAD") - def test_clone_unsafe_options(self, rw_repo): - with tempfile.TemporaryDirectory() as tdir: - tmp_dir = pathlib.Path(tdir) - tmp_file = tmp_dir / "pwn" - unsafe_options = [ - f"--upload-pack='touch {tmp_file}'", - f"-u 'touch {tmp_file}'", - "--config=protocol.ext.allow=always", - "-c protocol.ext.allow=always", - ] - for unsafe_option in unsafe_options: - with self.assertRaises(UnsafeOptionError): - rw_repo.clone(tmp_dir, multi_options=[unsafe_option]) - assert not tmp_file.exists() - - unsafe_options = [ - {"upload-pack": f"touch {tmp_file}"}, - {"u": f"touch {tmp_file}"}, - {"config": "protocol.ext.allow=always"}, - {"c": "protocol.ext.allow=always"}, - ] - for unsafe_option in unsafe_options: - with self.assertRaises(UnsafeOptionError): - rw_repo.clone(tmp_dir, **unsafe_option) - assert not tmp_file.exists() - - @pytest.mark.xfail( - sys.platform == "win32", - reason=( - "File not created. A separate Windows command may be needed. This and the " - "currently passing test test_clone_unsafe_options must be adjusted in the " - "same way. Until then, test_clone_unsafe_options is unreliable on Windows." - ), - raises=AssertionError, - ) - @with_rw_repo("HEAD") - def test_clone_unsafe_options_allowed(self, rw_repo): - with tempfile.TemporaryDirectory() as tdir: - tmp_dir = pathlib.Path(tdir) - tmp_file = tmp_dir / "pwn" - unsafe_options = [ - f"--upload-pack='touch {tmp_file}'", - f"-u 'touch {tmp_file}'", - ] - for i, unsafe_option in enumerate(unsafe_options): - destination = tmp_dir / str(i) - assert not tmp_file.exists() - # The options will be allowed, but the command will fail. - with self.assertRaises(GitCommandError): - rw_repo.clone(destination, multi_options=[unsafe_option], allow_unsafe_options=True) - assert tmp_file.exists() - tmp_file.unlink() - - unsafe_options = [ - "--config=protocol.ext.allow=always", - "-c protocol.ext.allow=always", - ] - for i, unsafe_option in enumerate(unsafe_options): - destination = tmp_dir / str(i) - assert not destination.exists() - rw_repo.clone(destination, multi_options=[unsafe_option], allow_unsafe_options=True) - assert destination.exists() - - @with_rw_repo("HEAD") - def test_clone_safe_options(self, rw_repo): - with tempfile.TemporaryDirectory() as tdir: - tmp_dir = pathlib.Path(tdir) - options = [ - "--depth=1", - "--single-branch", - "-q", - ] - for option in options: - destination = tmp_dir / option - assert not destination.exists() - rw_repo.clone(destination, multi_options=[option]) - assert destination.exists() - - @with_rw_repo("HEAD") - def test_clone_from_unsafe_options(self, rw_repo): - with tempfile.TemporaryDirectory() as tdir: - tmp_dir = pathlib.Path(tdir) - tmp_file = tmp_dir / "pwn" - unsafe_options = [ - f"--upload-pack='touch {tmp_file}'", - f"-u 'touch {tmp_file}'", - "--config=protocol.ext.allow=always", - "-c protocol.ext.allow=always", - ] - for unsafe_option in unsafe_options: - with self.assertRaises(UnsafeOptionError): - Repo.clone_from(rw_repo.working_dir, tmp_dir, multi_options=[unsafe_option]) - assert not tmp_file.exists() - - unsafe_options = [ - {"upload-pack": f"touch {tmp_file}"}, - {"u": f"touch {tmp_file}"}, - {"config": "protocol.ext.allow=always"}, - {"c": "protocol.ext.allow=always"}, - ] - for unsafe_option in unsafe_options: - with self.assertRaises(UnsafeOptionError): - Repo.clone_from(rw_repo.working_dir, tmp_dir, **unsafe_option) - assert not tmp_file.exists() - - @pytest.mark.xfail( - sys.platform == "win32", - reason=( - "File not created. A separate Windows command may be needed. This and the " - "currently passing test test_clone_from_unsafe_options must be adjusted in the " - "same way. Until then, test_clone_from_unsafe_options is unreliable on Windows." - ), - raises=AssertionError, - ) - @with_rw_repo("HEAD") - def test_clone_from_unsafe_options_allowed(self, rw_repo): - with tempfile.TemporaryDirectory() as tdir: - tmp_dir = pathlib.Path(tdir) - tmp_file = tmp_dir / "pwn" - unsafe_options = [ - f"--upload-pack='touch {tmp_file}'", - f"-u 'touch {tmp_file}'", - ] - for i, unsafe_option in enumerate(unsafe_options): - destination = tmp_dir / str(i) - assert not tmp_file.exists() - # The options will be allowed, but the command will fail. - with self.assertRaises(GitCommandError): - Repo.clone_from( - rw_repo.working_dir, destination, multi_options=[unsafe_option], allow_unsafe_options=True - ) - assert tmp_file.exists() - tmp_file.unlink() - - unsafe_options = [ - "--config=protocol.ext.allow=always", - "-c protocol.ext.allow=always", - ] - for i, unsafe_option in enumerate(unsafe_options): - destination = tmp_dir / str(i) - assert not destination.exists() - Repo.clone_from( - rw_repo.working_dir, destination, multi_options=[unsafe_option], allow_unsafe_options=True - ) - assert destination.exists() - - @with_rw_repo("HEAD") - def test_clone_from_safe_options(self, rw_repo): - with tempfile.TemporaryDirectory() as tdir: - tmp_dir = pathlib.Path(tdir) - options = [ - "--depth=1", - "--single-branch", - "-q", - ] - for option in options: - destination = tmp_dir / option - assert not destination.exists() - Repo.clone_from(rw_repo.common_dir, destination, multi_options=[option]) - assert destination.exists() - - def test_clone_from_unsafe_protocol(self): - with tempfile.TemporaryDirectory() as tdir: - tmp_dir = pathlib.Path(tdir) - tmp_file = tmp_dir / "pwn" - urls = [ - f"ext::sh -c touch% {tmp_file}", - "fd::17/foo", - ] - for url in urls: - with self.assertRaises(UnsafeProtocolError): - Repo.clone_from(url, tmp_dir / "repo") - assert not tmp_file.exists() - - def test_clone_from_unsafe_protocol_allowed(self): - with tempfile.TemporaryDirectory() as tdir: - tmp_dir = pathlib.Path(tdir) - tmp_file = tmp_dir / "pwn" - urls = [ - f"ext::sh -c touch% {tmp_file}", - "fd::/foo", - ] - for url in urls: - # The URL will be allowed into the command, but the command will - # fail since we don't have that protocol enabled in the Git config file. - with self.assertRaises(GitCommandError): - Repo.clone_from(url, tmp_dir / "repo", allow_unsafe_protocols=True) - assert not tmp_file.exists() - - def test_clone_from_unsafe_protocol_allowed_and_enabled(self): - with tempfile.TemporaryDirectory() as tdir: - tmp_dir = pathlib.Path(tdir) - tmp_file = tmp_dir / "pwn" - urls = [ - f"ext::sh -c touch% {tmp_file}", - ] - allow_ext = [ - "--config=protocol.ext.allow=always", - ] - for url in urls: - # The URL will be allowed into the command, and the protocol is enabled, - # but the command will fail since it can't read from the remote repo. - assert not tmp_file.exists() - with self.assertRaises(GitCommandError): - Repo.clone_from( - url, - tmp_dir / "repo", - multi_options=allow_ext, - allow_unsafe_protocols=True, - allow_unsafe_options=True, - ) - assert tmp_file.exists() - tmp_file.unlink() - @with_rw_repo("HEAD") def test_max_chunk_size(self, repo): class TestOutputStream(TestBase): @@ -643,6 +387,15 @@ def test_is_dirty_with_path(self, rwrepo): assert rwrepo.is_dirty(path="doc") is False assert rwrepo.is_dirty(untracked_files=True, path="doc") is True + @with_rw_repo("HEAD") + def test_is_dirty_with_pathlib_and_pathlike(self, rwrepo): + with open(osp.join(rwrepo.working_dir, "git", "util.py"), "at") as f: + f.write("junk") + assert rwrepo.is_dirty(path=Path("git")) is True + assert rwrepo.is_dirty(path=PathLikeMock("git")) is True + assert rwrepo.is_dirty(path=Path("doc")) is False + assert rwrepo.is_dirty(path=PathLikeMock("doc")) is False + def test_head(self): self.assertEqual(self.rorepo.head.reference.object, self.rorepo.active_branch.object) @@ -1064,9 +817,9 @@ def test_rev_parse(self): # TODO: Dereference tag into a blob 0.1.7^{blob} - quite a special one. # Needs a tag which points to a blob. - # ref^0 returns commit being pointed to, same with ref~0, and ^{} + # ref^0 returns commit being pointed to, same with ref~0, ^{}, and ^{commit} tag = rev_parse("0.1.4") - for token in ("~0", "^0", "^{}"): + for token in ("~0", "^0", "^{}", "^{commit}"): self.assertEqual(tag.object, rev_parse("0.1.4%s" % token)) # END handle multiple tokens @@ -1112,18 +865,18 @@ def test_rev_parse(self): # Currently, nothing more is supported. self.assertRaises(NotImplementedError, rev_parse, "@{1 week ago}") - # The last position. - assert rev_parse("@{1}") != head.commit + # The previous position, if this checkout has enough reflog history. + try: + previous = rev_parse("@{1}") + except IndexError: + pass + else: + self.assertNotEqual(previous, head.commit) def test_repo_odbtype(self): target_type = GitCmdObjectDB self.assertIsInstance(self.rorepo.odb, target_type) - @pytest.mark.xfail( - sys.platform == "cygwin", - reason="Cygwin GitPython can't find submodule SHA", - raises=ValueError, - ) def test_submodules(self): self.assertEqual(len(self.rorepo.submodules), 1) # non-recursive self.assertGreaterEqual(len(list(self.rorepo.iter_submodules())), 2) @@ -1226,6 +979,46 @@ def test_empty_repo(self, rw_dir): assert "BAD MESSAGE" not in contents, "log is corrupt" + @with_rw_directory + def test_active_branch_raises_value_error_when_head_ref_is_invalid(self, rw_dir): + repo = Repo.init(rw_dir) + with open(osp.join(rw_dir, ".git", "HEAD"), "w") as f: + f.write("ref: refs/heads/.invalid\n") + + self.assertRaisesRegex( + ValueError, + r"refs/heads/\.invalid.*older clients", + lambda: repo.active_branch, + ) + + @with_rw_directory + def test_empty_repo_reftable_active_branch(self, rw_dir): + git = Git(rw_dir) + try: + git.init(ref_format="reftable") + except GitCommandError as err: + if err.status == 129: + pytest.skip("git init --ref-format is not supported by this git version") + raise + + repo = Repo(rw_dir) + self.assertEqual(repo.head.reference.name, ".invalid") + self.assertRaisesRegex( + ValueError, + r"refs/heads/\.invalid.*older clients", + lambda: repo.active_branch, + ) + + @with_rw_directory + def test_active_branch_raises_type_error_when_head_is_detached(self, rw_dir): + repo = Repo.init(rw_dir) + with open(osp.join(rw_dir, "a.txt"), "w") as f: + f.write("a") + repo.index.add(["a.txt"]) + repo.index.commit("initial commit") + repo.git.checkout(repo.head.commit.hexsha) + self.assertRaisesRegex(TypeError, "detached symbolic reference", lambda: repo.active_branch) + def test_merge_base(self): repo = self.rorepo c1 = "f6aa8d1" @@ -1296,7 +1089,7 @@ def test_is_valid_object(self): self.assertFalse(repo.is_valid_object(tag_sha, "commit")) @with_rw_directory - def test_git_work_tree_dotgit(self, rw_dir): + def test_git_work_tree_dotgit(self, rw_dir, use_relative_paths=False): """Check that we find .git as a worktree file and find the worktree based on it.""" git = Git(rw_dir) @@ -1308,7 +1101,11 @@ def test_git_work_tree_dotgit(self, rw_dir): worktree_path = join_path_native(rw_dir, "worktree_repo") if Git.is_cygwin(): worktree_path = cygpath(worktree_path) - rw_master.git.worktree("add", worktree_path, branch.name) + wt_add_kwargs = {"insert_kwargs_after": "add"} + # relative worktree paths introduced in git 2.48.0 + if use_relative_paths and git.version_info[:3] >= (2, 48, 0): + wt_add_kwargs["relative_paths"] = True + rw_master.git.worktree("add", worktree_path, branch.name, **wt_add_kwargs) # This ensures that we can read the repo's gitdir correctly. repo = Repo(worktree_path) @@ -1326,6 +1123,15 @@ def test_git_work_tree_dotgit(self, rw_dir): self.assertIsInstance(repo.heads["aaaaaaaa"], Head) + def test_git_work_tree_dotgit_relative(self): + """Check that we find .git as a worktree file containing a relative path + and find the worktree based on it.""" + if Git().version_info[:3] < (2, 48, 0): + pytest.skip("relative worktree feature unsupported, needs git 2.48.0 or later") + # this class inherits from TestCase so we can't use pytest.mark.parametrize on + # test_git_work_tree_dotgit; delegate instead + self.test_git_work_tree_dotgit(use_relative_paths=True) + @with_rw_directory def test_git_work_tree_env(self, rw_dir): """Check that we yield to GIT_WORK_TREE.""" @@ -1350,6 +1156,42 @@ def test_git_work_tree_env(self, rw_dir): self.assertEqual(r.working_tree_dir, repo_dir) self.assertEqual(r.working_dir, repo_dir) + @with_rw_directory + def test_git_work_tree_env_in_linked_worktree(self, rw_dir): + """Check that Repo() autodiscovers a linked worktree when GIT_DIR is set.""" + git = Git(rw_dir) + if git.version_info[:3] < (2, 5, 1): + raise RuntimeError("worktree feature unsupported (test needs git 2.5.1 or later)") + + rw_master = self.rorepo.clone(join_path_native(rw_dir, "master_repo")) + branch = rw_master.create_head("bbbbbbbb") + worktree_path = join_path_native(rw_dir, "worktree_repo") + if Git.is_cygwin(): + worktree_path = cygpath(worktree_path) + + rw_master.git.worktree("add", worktree_path, branch.name) + + git_dir = Git(worktree_path).rev_parse("--git-dir") + + patched_env = dict(os.environ) + patched_env["GIT_DIR"] = git_dir + patched_env.pop("GIT_WORK_TREE", None) + patched_env.pop("GIT_COMMON_DIR", None) + + with mock.patch.dict(os.environ, patched_env, clear=True): + old_cwd = os.getcwd() + try: + os.chdir(worktree_path) + + explicit = Repo(os.getcwd()) + autodiscovered = Repo() + + self.assertTrue(osp.samefile(explicit.working_tree_dir, worktree_path)) + self.assertTrue(osp.samefile(autodiscovered.working_tree_dir, worktree_path)) + self.assertTrue(osp.samefile(autodiscovered.working_tree_dir, explicit.working_tree_dir)) + finally: + os.chdir(old_cwd) + @with_rw_directory def test_rebasing(self, rw_dir): r = Repo.init(rw_dir) diff --git a/test/test_rev_parse.py b/test/test_rev_parse.py new file mode 100644 index 000000000..b00347668 --- /dev/null +++ b/test/test_rev_parse.py @@ -0,0 +1,159 @@ +# Copyright (C) 2026 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +from pathlib import Path + +import pytest + +from git import Repo +from git.refs import RemoteReference +from git.refs import SymbolicReference +from gitdb.exc import BadName + + +def _write(repo, path, content): + full_path = Path(repo.working_tree_dir) / path + full_path.parent.mkdir(parents=True, exist_ok=True) + full_path.write_text(content) + repo.index.add([str(full_path)]) + + +@pytest.fixture +def rev_parse_repo(tmp_path): + repo = Repo.init(tmp_path) + with repo.config_writer() as writer: + writer.set_value("user", "name", "GitPython Tests") + writer.set_value("user", "email", "gitpython@example.com") + + _write(repo, "README.md", "root\n") + _write(repo, "CHANGES", "root changes\n") + _write(repo, "dir/file.txt", "root file\n") + root = repo.index.commit("root commit") + repo.create_tag("ann", ref=root, message="annotated tag") + + _write(repo, "README.md", "release\n") + release = repo.index.commit("release candidate") + repo.create_tag("v1.0", ref=release) + main = repo.active_branch + + _write(repo, "side.txt", "side\n") + side_commit = repo.index.commit("side branch", parent_commits=[root], head=False, skip_hooks=True) + repo.create_head("side", side_commit) + + merge = repo.index.commit("merge side", parent_commits=[release, side_commit], skip_hooks=True) + repo.head.log_append(side_commit.binsha, "checkout: moving from side to main", merge.binsha) + + repo.create_head("aaaaaaaa", merge) + repo.create_tag("@foo", ref=merge) + + return { + "repo": repo, + "root": root, + "release": release, + "side": side_commit, + "merge": merge, + "main": main, + } + + +def test_rev_parse_names_hex_and_describe_forms(rev_parse_repo): + repo = rev_parse_repo["repo"] + release = rev_parse_repo["release"] + merge = rev_parse_repo["merge"] + + assert repo.rev_parse("@") == merge + assert repo.rev_parse("@foo") == merge + assert repo.rev_parse("aaaaaaaa") == merge + assert repo.rev_parse(merge.hexsha[:7]) == merge + describe_name = "anything-9-g%s" % merge.hexsha[:7] + assert repo.rev_parse("v1.0-1-g%s" % merge.hexsha[:7]) == merge + assert repo.rev_parse(describe_name) == merge + assert repo.rev_parse("%s-dirty" % merge.hexsha[:7]) == merge + + repo.create_tag(describe_name, ref=release) + assert repo.rev_parse(describe_name) == release + + +def test_rev_parse_navigation_and_peeling(rev_parse_repo): + repo = rev_parse_repo["repo"] + root = rev_parse_repo["root"] + release = rev_parse_repo["release"] + side = rev_parse_repo["side"] + merge = rev_parse_repo["merge"] + tag = repo.rev_parse("ann") + + assert repo.rev_parse("HEAD^0") == merge + assert repo.rev_parse("HEAD~0") == merge + assert repo.rev_parse("HEAD^1") == release + assert repo.rev_parse("HEAD^2") == side + assert repo.rev_parse("HEAD~") == release + assert repo.rev_parse("HEAD^^") == root + + assert tag.type == "tag" + assert repo.rev_parse("ann^{object}") == tag + assert repo.rev_parse("ann^{tag}") == tag + assert repo.rev_parse("ann^{}") == root + assert repo.rev_parse("ann^{commit}") == root + assert repo.rev_parse("HEAD^{tree}") == merge.tree + with pytest.raises(ValueError): + repo.rev_parse("HEAD^{/}") + + +def test_rev_parse_tree_and_index_paths(rev_parse_repo): + repo = rev_parse_repo["repo"] + merge = rev_parse_repo["merge"] + + assert repo.rev_parse("HEAD:") == merge.tree + assert repo.rev_parse("HEAD:README.md") == merge.tree["README.md"] + assert repo.rev_parse("HEAD^{tree}:README.md") == merge.tree["README.md"] + assert repo.rev_parse(":README.md").binsha == merge.tree["README.md"].binsha + assert repo.rev_parse(":0:README.md").binsha == merge.tree["README.md"].binsha + + +def test_rev_parse_reflog_selectors(rev_parse_repo): + repo = rev_parse_repo["repo"] + merge = rev_parse_repo["merge"] + side = rev_parse_repo["side"] + main = rev_parse_repo["main"] + release = rev_parse_repo["release"] + + assert repo.rev_parse("@{0}") == merge + assert repo.rev_parse("@{+0}") == merge + assert repo.rev_parse("@{1}") == release + assert repo.rev_parse("%s@{0}" % main.name) == merge + assert repo.rev_parse("@{-1}") == side + + SymbolicReference.create(repo, "refs/remotes/origin/%s" % main.name, merge) + main.set_tracking_branch(RemoteReference(repo, "refs/remotes/origin/%s" % main.name)) + assert repo.rev_parse("%s@{upstream}" % main.name) == merge + + +def test_rev_parse_commit_message_search(rev_parse_repo): + repo = rev_parse_repo["repo"] + release = rev_parse_repo["release"] + merge = rev_parse_repo["merge"] + + assert repo.rev_parse(":/release") == release + assert repo.rev_parse("HEAD^{/release}") == release + assert repo.rev_parse("HEAD^{/!-release}") == merge + + +def test_rev_parse_rejects_invalid_object_specs(rev_parse_repo): + repo = rev_parse_repo["repo"] + + with pytest.raises(ValueError): + repo.rev_parse(":") + with pytest.raises(ValueError): + repo.rev_parse(":/") + with pytest.raises(ValueError): + repo.rev_parse(":/[") + with pytest.raises(ValueError): + repo.rev_parse("HEAD^{/[}") + with pytest.raises(ValueError): + repo.rev_parse("@{-0}") + with pytest.raises(ValueError): + repo.rev_parse("HEAD^{invalid}") + with pytest.raises(BadName): + repo.rev_parse(":missing") diff --git a/test/test_submodule.py b/test/test_submodule.py index d88f9dab0..778d22e3f 100644 --- a/test/test_submodule.py +++ b/test/test_submodule.py @@ -28,7 +28,7 @@ from git.repo.fun import find_submodule_git_dir, touch from git.util import HIDE_WINDOWS_KNOWN_ERRORS, join_path_native, to_native_path_linux -from test.lib import TestBase, with_rw_directory, with_rw_repo +from test.lib import TestBase, with_rw_directory, with_rw_repo, PathLikeMock @contextlib.contextmanager @@ -58,6 +58,7 @@ def update(self, op, cur_count, max_count, message=""): print(op, cur_count, max_count, message) +TestRootProgress.__test__ = False prog = TestRootProgress() @@ -174,6 +175,10 @@ def _do_base_tests(self, rwrepo): sma = Submodule.add(rwrepo, sm.name, sm.path) assert sma.path == sm.path + # Adding existing as pathlike + sma = Submodule.add(rwrepo, sm.name, PathLikeMock(sm.path)) + assert sma.path == sm.path + # No url and no module at path fails. self.assertRaises(ValueError, Submodule.add, rwrepo, "newsubm", "pathtorepo", url=None) @@ -475,11 +480,6 @@ def test_base_rw(self, rwrepo): def test_base_bare(self, rwrepo): self._do_base_tests(rwrepo) - @pytest.mark.xfail( - sys.platform == "cygwin", - reason="Cygwin GitPython can't find submodule SHA", - raises=ValueError, - ) @pytest.mark.xfail( HIDE_WINDOWS_KNOWN_ERRORS, reason=( @@ -508,9 +508,9 @@ def test_root_module(self, rwrepo): with rm.config_writer(): pass - # Deep traversal gitdb / async. + # Deep traversal yields gitdb and its nested smmap. rsmsp = [sm.path for sm in rm.traverse()] - assert len(rsmsp) >= 2 # gitdb and async [and smmap], async being a child of gitdb. + assert rsmsp == ["git/ext/gitdb", "gitdb/ext/smmap"] # Cannot set the parent commit as root module's path didn't exist. self.assertRaises(ValueError, rm.set_parent_commit, "HEAD") @@ -753,6 +753,22 @@ def test_add_empty_repo(self, rwdir): ) # END for each checkout mode + @with_rw_directory + @_patch_git_config("protocol.file.allow", "always") + def test_update_submodule_with_relative_path(self, rwdir): + repo_path = osp.join(rwdir, "parent") + repo = git.Repo.init(repo_path) + module_repo_path = osp.join(rwdir, "module") + module_repo = git.Repo.init(module_repo_path) + module_repo.git.commit(m="test", allow_empty=True) + repo.git.submodule("add", "../module", "module") + repo.index.commit("add submodule") + + cloned_repo_path = osp.join(rwdir, "cloned_repo") + cloned_repo = git.Repo.clone_from(repo_path, cloned_repo_path) + + cloned_repo.submodule_update(init=True, recursive=True) + @with_rw_directory @_patch_git_config("protocol.file.allow", "always") def test_list_only_valid_submodules(self, rwdir): @@ -916,7 +932,7 @@ def assert_exists(sm, value=True): csm.repo.index.commit("Have to commit submodule change for algorithm to pick it up") assert csm.url == "bar" - self.assertRaises( + self.assertRaises( # noqa: B017 Exception, rsm.update, recursive=True, @@ -990,6 +1006,7 @@ def test_rename(self, rwdir): # garbage collector detailed in https://github.com/python/cpython/issues/97922.) if sys.platform == "win32" and sys.version_info >= (3, 12): gc.collect() + gc.collect() # Some finalizer scenarios need two collections, at least in theory. new_path = "renamed/myname" assert sm.move(new_path).name == new_path @@ -1310,6 +1327,17 @@ def test_submodule_update_unsafe_options(self, rw_repo): submodule.update(clone_multi_options=[unsafe_option]) assert not tmp_file.exists() + @with_rw_repo("HEAD") + def test_submodule_update_unsafe_options_are_checked_after_splitting_multi_options(self, rw_repo): + with tempfile.TemporaryDirectory() as tdir: + tmp_dir = Path(tdir) + payload = "--single-branch --config protocol.ext.allow=always" + submodule = Submodule(rw_repo, b"\0" * 20, name="new", path="new", url=str(tmp_dir)) + + with self.assertRaises(UnsafeOptionError): + submodule.update(clone_multi_options=[payload]) + assert not submodule.module_exists() + @with_rw_repo("HEAD") def test_submodule_update_unsafe_options_allowed(self, rw_repo): with tempfile.TemporaryDirectory() as tdir: @@ -1334,3 +1362,23 @@ def test_submodule_update_unsafe_options_allowed(self, rw_repo): for unsafe_option in unsafe_options: with self.assertRaises(GitCommandError): submodule.update(clone_multi_options=[unsafe_option], allow_unsafe_options=True) + + @with_rw_directory + @_patch_git_config("protocol.file.allow", "always") + def test_submodule_update_relative_url(self, rwdir): + parent_path = osp.join(rwdir, "parent") + parent_repo = git.Repo.init(parent_path) + submodule_path = osp.join(rwdir, "module") + submodule_repo = git.Repo.init(submodule_path) + submodule_repo.git.commit(m="initial commit", allow_empty=True) + + parent_repo.git.submodule("add", "../module", "module") + parent_repo.index.commit("add submodule with relative URL") + + cloned_path = osp.join(rwdir, "cloned_repo") + cloned_repo = git.Repo.clone_from(parent_path, cloned_path) + + cloned_repo.submodule_update(init=True, recursive=True) + + has_module = any(sm.name == "module" for sm in cloned_repo.submodules) + assert has_module, "Relative submodule was not updated properly" diff --git a/test/test_tree.py b/test/test_tree.py index 73158113d..629fd4d32 100644 --- a/test/test_tree.py +++ b/test/test_tree.py @@ -8,10 +8,14 @@ from pathlib import Path import subprocess +import pytest + from git.objects import Blob, Tree +from git.repo import Repo from git.util import cwd from test.lib import TestBase, with_rw_directory +from .lib.helper import PathLikeMock, with_rw_repo class TestTree(TestBase): @@ -126,12 +130,18 @@ def test_traverse(self): assert len(list(root)) == len(list(root.traverse(depth=1))) # Only choose trees. - trees_only = lambda i, d: i.type == "tree" + + def trees_only(i, _d): + return i.type == "tree" + trees = list(root.traverse(predicate=trees_only)) assert len(trees) == len([i for i in root.traverse() if trees_only(i, 0)]) # Test prune. - lib_folder = lambda t, d: t.path == "lib" + + def lib_folder(t, _d): + return t.path == "lib" + pruned_trees = list(root.traverse(predicate=trees_only, prune=lib_folder)) assert len(pruned_trees) < len(trees) @@ -155,3 +165,57 @@ def test_traverse(self): assert root[item.path] == item == root / item.path # END for each item assert found_slash + + @with_rw_repo("0.3.2.1") + def test_repo_lookup_string_path(self, rw_repo): + repo = Repo(rw_repo.git_dir) + blob = repo.tree() / ".gitignore" + assert isinstance(blob, Blob) + assert blob.hexsha == "787b3d442a113b78e343deb585ab5531eb7187fa" + + @with_rw_repo("0.3.2.1") + def test_repo_lookup_pathlike_path(self, rw_repo): + repo = Repo(rw_repo.git_dir) + blob = repo.tree() / PathLikeMock(".gitignore") + assert isinstance(blob, Blob) + assert blob.hexsha == "787b3d442a113b78e343deb585ab5531eb7187fa" + + @with_rw_repo("0.3.2.1") + def test_repo_lookup_invalid_string_path(self, rw_repo): + repo = Repo(rw_repo.git_dir) + with pytest.raises(KeyError): + repo.tree() / "doesnotexist" + + @with_rw_repo("0.3.2.1") + def test_repo_lookup_invalid_pathlike_path(self, rw_repo): + repo = Repo(rw_repo.git_dir) + with pytest.raises(KeyError): + repo.tree() / PathLikeMock("doesnotexist") + + @with_rw_repo("0.3.2.1") + def test_repo_lookup_nested_string_path(self, rw_repo): + repo = Repo(rw_repo.git_dir) + blob = repo.tree() / "git/__init__.py" + assert isinstance(blob, Blob) + assert blob.hexsha == "d87dcbdbb65d2782e14eea27e7f833a209c052f3" + + @with_rw_repo("0.3.2.1") + def test_repo_lookup_nested_pathlike_path(self, rw_repo): + repo = Repo(rw_repo.git_dir) + blob = repo.tree() / PathLikeMock("git/__init__.py") + assert isinstance(blob, Blob) + assert blob.hexsha == "d87dcbdbb65d2782e14eea27e7f833a209c052f3" + + @with_rw_repo("0.3.2.1") + def test_repo_lookup_folder_string_path(self, rw_repo): + repo = Repo(rw_repo.git_dir) + tree = repo.tree() / "git" + assert isinstance(tree, Tree) + assert tree.hexsha == "ec8ae429156d65afde4bbb3455570193b56f0977" + + @with_rw_repo("0.3.2.1") + def test_repo_lookup_folder_pathlike_path(self, rw_repo): + repo = Repo(rw_repo.git_dir) + tree = repo.tree() / PathLikeMock("git") + assert isinstance(tree, Tree) + assert tree.hexsha == "ec8ae429156d65afde4bbb3455570193b56f0977" diff --git a/test/test_util.py b/test/test_util.py index dad2f3dcd..e7453769a 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -34,6 +34,7 @@ LockFile, cygpath, decygpath, + is_cygwin_git, get_user_id, remove_password_if_present, rmtree, @@ -112,7 +113,7 @@ def test_deletes_dir_with_readonly_files(self, tmp_path): sys.platform == "cygwin", reason="Cygwin can't set the permissions that make the test meaningful.", ) - def test_avoids_changing_permissions_outside_tree(self, tmp_path): + def test_avoids_changing_permissions_outside_tree(self, tmp_path, request): # Automatically works on Windows, but on Unix requires either special handling # or refraining from attempting to fix PermissionError by making chmod calls. @@ -124,9 +125,32 @@ def test_avoids_changing_permissions_outside_tree(self, tmp_path): dir2 = tmp_path / "dir2" dir2.mkdir() - (dir2 / "symlink").symlink_to(dir1 / "file") + symlink = dir2 / "symlink" + symlink.symlink_to(dir1 / "file") dir2.chmod(stat.S_IRUSR | stat.S_IXUSR) + def preen_dir2(): + """Don't leave unwritable directories behind. + + pytest has difficulties cleaning up after the fact on some platforms, + e.g., macOS, and whines incessantly until the issue is resolved--regardless + of the pytest session. + """ + rwx = stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR + if not dir2.exists(): + return + if symlink.exists(): + try: + # Try lchmod first, if the platform supports it. + symlink.lchmod(rwx) + except NotImplementedError: + # The platform (probably win32) doesn't support lchmod; fall back to chmod. + symlink.chmod(rwx) + dir2.chmod(rwx) + rmtree(dir2) + + request.addfinalizer(preen_dir2) + try: rmtree(dir2) except PermissionError: @@ -349,6 +373,24 @@ def test_decygpath(self, wpath, cpath): assert wcpath == wpath.replace("/", "\\"), cpath +class TestIsCygwinGit: + """Tests for :func:`is_cygwin_git`""" + + def test_on_path_executable(self): + # Currently we assume tests run on Cygwin use Cygwin git. See #533 and #1455 for background. + if sys.platform == "cygwin": + assert is_cygwin_git("git") + else: + assert not is_cygwin_git("git") + + def test_none_executable(self): + assert not is_cygwin_git(None) + + def test_with_missing_uname(self): + """Test for handling when `uname` isn't in the same directory as `git`""" + assert not is_cygwin_git("/bogus_path/git") + + class _Member: """A member of an IterableList."""