diff --git a/.github/workflows/mine-cargo-packageurls.yml b/.github/workflows/mine-cargo-packageurls.yml new file mode 100644 index 00000000..22791805 --- /dev/null +++ b/.github/workflows/mine-cargo-packageurls.yml @@ -0,0 +1,16 @@ +on: [workflow_dispatch] + +jobs: + mine-pypi-purls: + runs-on: ubuntu-24.04 + name: Mine cargo PackageURLs + steps: + - uses: aboutcode-org/scancode-action@beta + with: + scancodeio-repo-branch: "collect-purl-metadata#egg=scancodeio[mining]" + pipelines: "mine_cargo" + env: + FEDERATEDCODE_GIT_ACCOUNT_URL: https://github.com/aboutcode-data/minecode-data-cargo-test + FEDERATEDCODE_GIT_SERVICE_TOKEN: ${{ secrets.MINING_GITHUB_TOKEN }} + FEDERATEDCODE_GIT_SERVICE_NAME: "AboutCode Automation" + FEDERATEDCODE_GIT_SERVICE_EMAIL: "automation@aboutcode.org" diff --git a/.github/workflows/mine-pypi-packageurls.yml b/.github/workflows/mine-pypi-packageurls.yml index e06d945d..72f7a89d 100644 --- a/.github/workflows/mine-pypi-packageurls.yml +++ b/.github/workflows/mine-pypi-packageurls.yml @@ -12,5 +12,5 @@ jobs: env: FEDERATEDCODE_GIT_ACCOUNT_URL: https://github.com/aboutcode-data/minecode-data-pypi-test FEDERATEDCODE_GIT_SERVICE_TOKEN: ${{ secrets.MINING_GITHUB_TOKEN }} - FEDERATEDCODE_GIT_SERVICE_NAME: "the AboutCode bot" + FEDERATEDCODE_GIT_SERVICE_NAME: "AboutCode Automation" FEDERATEDCODE_GIT_SERVICE_EMAIL: "automation@aboutcode.org" diff --git a/README.rst b/README.rst index 359eb266..d2d92414 100644 --- a/README.rst +++ b/README.rst @@ -21,16 +21,22 @@ Configuration format * last serial number processed (used in indexes at pypi, npm etc) * last processed commit (where the data is stored in git repos) - * directory to store las fetched index data (like the JSON fetched from pypi simple with package names and last updated info) + * directory to store las fetched index data + (like the JSON fetched from pypi simple with package names and last updated info) * state information in ``state``: * ``null``: mining has not started. - * ``initital-sync`` : at the start of mining we need to mine a huge amount of packages for packageURL to catch up. - This is typically very large and could take several hours to several days dependening on the ecosystem size. - We fetch and save an index state and mine all packageURLs till there. Once we reach a state where remaining - new packageURLs can be mined in a couple hours, we can move on to the next state where we mine new packageURLs - added in a periodic manner. - * ``periodic-sync`` : This is a periodic update of new packageURLs added in the index in a period, and typically this + * ``initital-sync`` : at the start of mining we need to mine a huge + amount of packages for packageURL to catch up. + This is typically very large and could take several hours to several days + dependening on the ecosystem size. + We fetch and save an index state and mine all packageURLs till there. + Once we reach a state where remaining + new packageURLs can be mined in a couple hours, we can move on to + the next state where we mine new packageURLs + added in a periodic manner. + * ``periodic-sync`` : This is a periodic update of new packageURLs + added in the index in a period, and typically this should not take more than a couple hours. * optional elements to improve readability/debugging: @@ -38,5 +44,5 @@ Configuration format * ``last_updated``: date and time of last checkpoint update * ``packages_checkpoints.json``: stores checkpoint related to: - + * ``packages_mined``: which packages have been mined in the ``initital-sync`` state. diff --git a/cargo/checkpoints.json b/cargo/checkpoints.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/cargo/checkpoints.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index 9ff88976..1fb7649c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,5 +1,5 @@ Welcome to miencode-pipelines documentation! -========================================= +============================================= This is released at pypi: https://pypi.org/project/minecode-pipelines/ diff --git a/etc/.gitkeep b/etc/.gitkeep new file mode 100644 index 00000000..e69de29b