Skip to content

Commit a5ec845

Browse files
committed
Merge remote-tracking branch 'upstream/main' into feat/pep770-sbom
2 parents c256aa1 + 04a5f97 commit a5ec845

328 files changed

Lines changed: 7382 additions & 7858 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitattributes

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
*.dta binary
1414
*.xls binary
1515
*.xlsx binary
16-
pandas/_version.py export-subst
1716

1817

1918
*.bz2 export-ignore
@@ -72,7 +71,6 @@ web export-ignore
7271
CITATION.cff export-ignore
7372
codecov.yml export-ignore
7473
environment.yml export-ignore
75-
setup.py export-ignore
7674

7775

7876
# GH 39321

.github/PULL_REQUEST_TEMPLATE.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@
33
- [ ] All [code checks passed](https://pandas.pydata.org/pandas-docs/dev/development/contributing_codebase.html#pre-commit).
44
- [ ] Added [type annotations](https://pandas.pydata.org/pandas-docs/dev/development/contributing_codebase.html#type-hints) to new arguments/methods/functions.
55
- [ ] Added an entry in the latest `doc/source/whatsnew/vX.X.X.rst` file if fixing a bug or adding a new feature.
6+
- [ ] I have reviewed and followed all the [contribution guidelines](https://pandas.pydata.org/docs/dev/development/contributing.html)
67
- [ ] If I used AI to develop this pull request, I prompted it to follow `AGENTS.md`.

.github/workflows/code-checks.yml

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,8 @@ jobs:
4343
- name: Build Pandas
4444
id: build
4545
uses: ./.github/actions/build_pandas
46-
with:
47-
editable: false
4846

4947
# The following checks are independent of each other and should still be run if one fails
50-
51-
# TODO: The doctests have to be run first right now, since the Cython doctests only work
52-
# with pandas installed in non-editable mode
53-
# This can be removed once pytest-cython doesn't require C extensions to be installed inplace
54-
5548
- name: Extra installs
5649
# https://pytest-qt.readthedocs.io/en/latest/troubleshooting.html#github-actions-azure-pipelines-travis-ci-and-gitlab-ci-cd
5750
run: sudo apt-get update && sudo apt-get install -y libegl1 libopengl0
@@ -60,13 +53,6 @@ jobs:
6053
run: cd ci && ./code_checks.sh doctests
6154
if: ${{ steps.build.outcome == 'success' && always() }}
6255

63-
- name: Install pandas in editable mode
64-
id: build-editable
65-
if: ${{ steps.build.outcome == 'success' && always() }}
66-
uses: ./.github/actions/build_pandas
67-
with:
68-
editable: true
69-
7056
- name: Check for no warnings when building single-page docs
7157
run: ci/code_checks.sh single-docs
7258
if: ${{ steps.build.outcome == 'success' && always() }}

.github/workflows/docbuild-and-upload.yml

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,15 @@ on:
1717
description: 'The pandas version to override'
1818
required: false
1919
type: string
20+
publish_prod:
21+
description: 'Publish to production'
22+
type: boolean
23+
default: false
2024

2125
env:
2226
ENV_FILE: environment.yml
2327
PANDAS_CI: 1
2428
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
25-
PANDAS_VERSION: "${{ github.event_name == 'workflow_dispatch' && github.event.inputs.version || '' }}"
26-
PANDAS_VERSION_OVERRIDE: "${{ github.event_name == 'workflow_dispatch' && github.event.inputs.version || '' }}"
2729

2830
permissions:
2931
contents: read
@@ -45,13 +47,34 @@ jobs:
4547
steps:
4648
- name: Set pandas version
4749
run: |
48-
echo "PANDAS_VERSION=${GITHUB_REF_NAME:1}" >> "$GITHUB_ENV"
49-
if: github.event_name != 'workflow_dispatch'
50-
51-
- name: Show environment variables
52-
run: |
53-
echo "PANDAS_VERSION=${{ env.PANDAS_VERSION }}"
54-
echo "PANDAS_VERSION_OVERRIDE=${{ env.PANDAS_VERSION_OVERRIDE }}"
50+
# tags include a `v` prefix.
51+
tag_version_pat="^v\d+\.\d+\.\d+(rc\d+)?$"
52+
version_pat="^\d+\.\d+\.\d+(rc\d+)?$"
53+
54+
if [[ -n "${{ github.event.inputs.version }}" ]]; then
55+
PANDAS_VERSION="${{ github.event.inputs.version }}"
56+
elif [[ "${{ github.event_name }}" == "push" && "$GITHUB_REF_NAME" =~ $tag_version_pat ]]; then
57+
PANDAS_VERSION="${GITHUB_REF_NAME:1}"
58+
else
59+
PANDAS_VERSION=""
60+
fi
61+
echo "PANDAS_VERSION=$PANDAS_VERSION" >> "$GITHUB_ENV"
62+
63+
if [[ "${{ github.event_name }}" == "push" && -n "$PANDAS_VERSION" ]]; then
64+
PUBLISH_PROD="true"
65+
elif [[ "${{ github.event.inputs.publish_prod }}" == "true" ]]; then
66+
PUBLISH_PROD="true"
67+
else
68+
PUBLISH_PROD="false"
69+
fi
70+
echo "PUBLISH_PROD=$PUBLISH_PROD" >> "$GITHUB_ENV"
71+
72+
if [[ "$PUBLISH_PROD" == "true" ]] &&
73+
[[ ! "$PANDAS_VERSION" =~ $version_pat ]]
74+
then
75+
echo "Invalid version $PANDAS_VERSION for publishing to prod."
76+
exit 1
77+
fi
5578
5679
- name: Checkout
5780
uses: actions/checkout@v6
@@ -105,7 +128,7 @@ jobs:
105128

106129
- name: Upload prod docs
107130
run: rsync -az --delete doc/build/html/ web@${{ secrets.server_ip }}:/var/www/html/pandas-docs/version/${{ env.PANDAS_VERSION }}
108-
if: (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')) || (github.event_name == 'workflow_dispatch' && env.PANDAS_VERSION_OVERRIDE != '')
131+
if: ${{ env.PUBLISH_PROD == 'true' }}
109132

110133
- name: Move docs into site directory
111134
run: mv doc/build/html web/build/docs

.github/workflows/unit-tests.yml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
strategy:
2828
matrix:
2929
platform: [ubuntu-24.04, ubuntu-24.04-arm]
30-
env_file: [actions-311.yaml, actions-312.yaml, actions-313.yaml]
30+
env_file: [actions-311.yaml, actions-312.yaml, actions-313.yaml, actions-314.yaml]
3131
# Prevent the include jobs from overriding other jobs
3232
pattern: [""]
3333
pandas_future_infer_string: ["1"]
@@ -190,7 +190,7 @@ jobs:
190190
strategy:
191191
matrix:
192192
os: [macos-15-intel, macos-15, windows-2025]
193-
env_file: [actions-311.yaml, actions-312.yaml, actions-313.yaml]
193+
env_file: [actions-311.yaml, actions-312.yaml, actions-313.yaml, actions-314.yaml]
194194
fail-fast: false
195195
runs-on: ${{ matrix.os }}
196196
name: ${{ format('{0} {1}', matrix.os, matrix.env_file) }}
@@ -246,7 +246,7 @@ jobs:
246246
run: |
247247
/opt/python/cp313-cp313/bin/python -m venv ~/virtualenvs/pandas-dev
248248
. ~/virtualenvs/pandas-dev/bin/activate
249-
python -m pip install --no-cache-dir -U pip wheel setuptools meson[ninja]==1.2.1 meson-python==0.13.1
249+
python -m pip install --no-cache-dir -U pip wheel meson[ninja]==1.2.1 meson-python==0.13.1
250250
python -m pip install numpy -Csetup-args="-Dallow-noblas=true"
251251
python -m pip install --no-cache-dir versioneer[toml] cython python-dateutil pytest>=8.3.4 pytest-xdist>=3.6.1 hypothesis>=6.116.0
252252
python -m pip install --no-cache-dir --no-build-isolation -e . -Csetup-args="--werror"
@@ -284,7 +284,7 @@ jobs:
284284
run: |
285285
/opt/python/cp313-cp313/bin/python -m venv ~/virtualenvs/pandas-dev
286286
. ~/virtualenvs/pandas-dev/bin/activate
287-
python -m pip install --no-cache-dir -U pip wheel setuptools meson-python==0.13.1 meson[ninja]==1.2.1
287+
python -m pip install --no-cache-dir -U pip wheel meson-python==0.13.1 meson[ninja]==1.2.1
288288
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytest>=8.3.4 pytest-xdist>=3.6.1 hypothesis>=6.116.0
289289
python -m pip install --no-cache-dir --no-build-isolation -e . -Csetup-args="--werror"
290290
python -m pip list --no-cache-dir
@@ -318,7 +318,7 @@ jobs:
318318
# To freeze this file, uncomment out the ``if: false`` condition, and migrate the jobs
319319
# to the corresponding posix/windows-macos/sdist etc. workflows.
320320
# Feel free to modify this comment as necessary.
321-
# if: false
321+
if: false
322322
defaults:
323323
run:
324324
shell: bash -eou pipefail {0}
@@ -349,12 +349,12 @@ jobs:
349349
- name: Set up Python Dev Version
350350
uses: actions/setup-python@v6
351351
with:
352-
python-version: '3.14-dev'
352+
python-version: '3.15-dev'
353353

354354
- name: Build Environment
355355
run: |
356356
python --version
357-
python -m pip install --upgrade pip setuptools wheel meson[ninja]==1.2.1 meson-python==0.13.1
357+
python -m pip install --upgrade pip wheel meson[ninja]==1.2.1 meson-python==0.13.1
358358
python -m pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy
359359
python -m pip install versioneer[toml] python-dateutil tzdata cython hypothesis>=6.116.0 pytest>=8.3.4 pytest-xdist>=3.6.1 pytest-cov
360360
python -m pip install -ve . --no-build-isolation --no-index --no-deps -Csetup-args="--werror"

.github/workflows/wheels.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ jobs:
116116
# BackendUnavailable: Cannot import 'mesonpy'
117117
- buildplat: [windows-11-arm, win_arm64]
118118
python: ["cp313t", "3.13"]
119+
concurrency:
120+
# https://github.community/t/concurrecy-not-work-for-push/183068/7
121+
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.buildplat[0] }}-${{ matrix.buildplat[1] }}-${{ matrix.python[0]}}-build-wheels
122+
cancel-in-progress: true
119123

120124
env:
121125
IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }}
@@ -192,7 +196,8 @@ jobs:
192196
# installing wheel here because micromamba step was skipped
193197
if: matrix.buildplat[1] == 'win_arm64'
194198
shell: bash -el {0}
195-
run: python -m pip install wheel anaconda-client
199+
# cryptography pin due to https://github.com/pyca/cryptography/pull/14216
200+
run: python -m pip install wheel anaconda-client cryptography==46.0.3
196201

197202
- name: Validate wheel RECORD
198203
shell: bash -el {0}

.gitignore

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ debug
4545

4646
# Python files #
4747
################
48-
# setup.py working directory
48+
# meson working directory
4949
build
5050
# sphinx build directory
5151
doc/_build
52-
# setup.py dist directory
52+
# dist directory
5353
dist
5454
# Egg metadata
5555
*.egg-info

.pre-commit-config.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ ci:
1919
skip: [pyright, mypy]
2020
repos:
2121
- repo: https://github.com/astral-sh/ruff-pre-commit
22-
rev: v0.14.10
22+
rev: v0.14.14
2323
hooks:
2424
- id: ruff
2525
args: [--exit-non-zero-on-fix]
@@ -46,7 +46,7 @@ repos:
4646
- id: codespell
4747
types_or: [python, rst, markdown, cython, c]
4848
- repo: https://github.com/MarcoGorelli/cython-lint
49-
rev: v0.18.1
49+
rev: v0.19.0
5050
hooks:
5151
- id: cython-lint
5252
- id: double-quote-cython-strings
@@ -99,7 +99,7 @@ repos:
9999
args: ['-i', '--style', '{IndentPPDirectives: AfterHash}']
100100
types_or: [c, c++]
101101
- repo: https://github.com/trim21/pre-commit-mirror-meson
102-
rev: v1.10.0
102+
rev: v1.10.1
103103
hooks:
104104
- id: meson-fmt
105105
args: ['--inplace']
@@ -300,7 +300,7 @@ repos:
300300
entry: python scripts/validate_exception_location.py
301301
language: python
302302
files: ^pandas/
303-
exclude: ^(pandas/_libs/|pandas/tests/|pandas/errors/__init__.py$|pandas/_version.py)
303+
exclude: ^pandas/_libs/|pandas/tests/|pandas/errors/__init__.py$
304304
types: [python]
305305
- id: check-test-naming
306306
name: check that test names start with 'test'

asv_bench/benchmarks/indexing.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,28 @@ def time_frame_getitem_single_column_int(self):
438438
self.df_int_col[0]
439439

440440

441+
class DataFrameGetitemDuplicateColumns:
442+
"""
443+
Benchmark df[key] when columns have duplicate names but key is unique.
444+
445+
Previously each access called columns.drop_duplicates(keep=False), which
446+
built a new Index (O(n)). Now we use get_loc(key), so this path is O(1)
447+
for hash-based indexes.
448+
"""
449+
450+
params = [1_000, 10_000, 100_000, 1_000_000]
451+
param_names = ["ncols"]
452+
453+
def setup(self, ncols):
454+
# ncols-1 duplicate names + one unique column we access
455+
cols = ["a"] * (ncols - 1) + ["key"]
456+
self.df = DataFrame(0, index=range(100), columns=cols)
457+
458+
def time_getitem_single_column_with_duplicate_columns(self, ncols):
459+
for _ in range(100):
460+
self.df["key"]
461+
462+
441463
class IndexSingleRow:
442464
params = [True, False]
443465
param_names = ["unique_cols"]

0 commit comments

Comments
 (0)