Skip to content

Commit 83e0e2d

Browse files
committed
Merge remote-tracking branch 'upstream/main' into update-lazy-trt-compile
2 parents 9edc9c5 + 501947c commit 83e0e2d

167 files changed

Lines changed: 3614 additions & 1147 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/cicd-main.yml

Lines changed: 37 additions & 104 deletions
Large diffs are not rendered by default.

.github/workflows/code-formatting.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ jobs:
3535
ref: ${{ github.event.pull_request.head.ref }}
3636
# custom token is required to trigger actions after reformatting + pushing
3737
token: ${{ secrets.NEMO_REFORMAT_TOKEN }}
38+
fetch-depth: 0
3839

3940
- name: Get changed files
4041
id: changed-files

.github/workflows/gh-docs.yml

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ on:
88
# Set the access for individual scopes
99
permissions: write-all
1010

11+
env:
12+
PYTHON_VERSION: "3.11"
13+
1114
jobs:
1215
deploy:
1316
runs-on: ubuntu-latest
@@ -16,7 +19,7 @@ jobs:
1619
image: squidfunk/mkdocs-material
1720

1821
steps:
19-
- uses: actions/checkout@v3
22+
- uses: actions/checkout@v4
2023
if: github.event.repository.fork == false
2124
with:
2225
ref: gh-pages-src
@@ -36,3 +39,43 @@ jobs:
3639
continue-on-error: true
3740
run: mkdocs gh-deploy --force
3841

42+
linkcheck:
43+
runs-on: ubuntu-latest
44+
steps:
45+
- name: Checkout
46+
uses: actions/checkout@v4
47+
48+
- name: Get changed files
49+
id: changed-files
50+
uses: step-security/changed-files@v45.0.1
51+
with:
52+
files: docs/**
53+
files_separator: ","
54+
separator: " "
55+
56+
- name: Set up Python ${{ env.PYTHON_VERSION }}
57+
if: steps.changed-files.outputs.any_changed == 'true'
58+
uses: actions/setup-python@v5
59+
with:
60+
python-version: ${{ env.PYTHON_VERSION }}
61+
62+
- name: Install Sphinx dependencies
63+
if: steps.changed-files.outputs.any_changed == 'true'
64+
run: python3 -m pip install -r requirements/requirements_docs.txt
65+
66+
- name: Linkcheck docs build
67+
if: steps.changed-files.outputs.any_changed == 'true'
68+
run: make -C docs linkcheck || true
69+
70+
- name: Eliminate false positives
71+
if: steps.changed-files.outputs.any_changed == 'true'
72+
run: ./docs/check_for_broken_links.sh
73+
74+
- name: Upload linkcheck output
75+
if: steps.changed-files.outputs.any_changed == 'true'
76+
uses: actions/upload-artifact@v4
77+
with:
78+
name: linkcheck-artifact
79+
path: docs/build/linkcheck
80+
if-no-files-found: error
81+
retention-days: 7

docs/README.md

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Documentation Process for NeMo
2+
3+
## Building the Documentation
4+
5+
1. Create and activate a virtual environment.
6+
7+
1. Install the documentation dependencies:
8+
9+
```console
10+
$ python3 -m pip install -r requirements/requirements_docs.txt
11+
```
12+
13+
1. Build the documentation:
14+
15+
```console
16+
$ make -C docs html
17+
```
18+
19+
## Checking for Broken Links
20+
21+
1. Build the documentation, as described in the preceding section, but use the following command:
22+
23+
```shell
24+
make -C docs clean linkcheck
25+
```
26+
27+
1. Run the link-checking script:
28+
29+
```shell
30+
./docs/check_for_broken_links.sh
31+
```
32+
33+
If there are no broken links, then the script exits with `0`.
34+
35+
If the script produces any output, cut and paste the `uri` value into your browser to confirm
36+
that the link is broken.
37+
38+
```json
39+
{
40+
"filename": "nlp/text_normalization/nn_text_normalization.rst",
41+
"lineno": 247,
42+
"status": "broken",
43+
"code": 0,
44+
"uri": "https://research.fb.com/wp-content/uploads/2019/03/Neural-Models-of-Text-Normalization-for-Speech-Applications.pdf",
45+
"info": "400 Client Error: Bad Request for url: https://research.facebook.com/wp-content/uploads/2019/03/Neural-Models-of-Text-Normalization-for-Speech-Applications.pdf"
46+
}
47+
```
48+
49+
If the link is OK, and this is the case with many URLs that reference GitHub repository file headings,
50+
then cut and paste the JSON output and add it to `docs/false_positives.json`.
51+
Run the script again to confirm that the URL is no longer reported as a broken link.
52+
53+
There may be false positives due to Sphinx not being able to detect links from built html files.
54+
Instead of adding those to the `docs/false_positives.json` file, it would be best to rewrite the
55+
reference using a [:ref:](https://www.sphinx-doc.org/en/master/usage/referencing.html#role-ref).
56+
57+
For example, instead of writing `Modules <../api.html#modules>` to link to the modules section of
58+
a `api.rst` file, write it as ``:ref:`Modules <asr-api-modules>` ``. And in the `api.rst` file, add
59+
this label before the section being linked to:
60+
61+
```
62+
.. _asr-api-modules:
63+
```

docs/check_for_broken_links.sh

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#!/usr/bin/env bash
2+
3+
DOCS_DIR=$(dirname "${BASH_SOURCE[0]}")
4+
FALSE_POSITIVES_JSON="${DOCS_DIR}/false_positives.json"
5+
NEEDS_REVIEW_JSON="${DOCS_DIR}/links_needing_review.json"
6+
LINKCHECK_JSON="${DOCS_DIR}/build/linkcheck/output.json"
7+
8+
function check_environment {
9+
local err=0
10+
if ! [ -x "$(command -v jq)" ]; then
11+
>&2 echo "jq is required but is not found."
12+
((err++))
13+
fi
14+
if [ ! -f "${FALSE_POSITIVES_JSON}" ]; then
15+
>&2 echo "A JSON file with false positives is required: ${FALSE_POSITIVES_JSON}"
16+
((err++))
17+
fi
18+
if [ ! -f "${LINKCHECK_JSON}" ]; then
19+
>&2 echo "Did not find linkcheck output JSON file: ${LINKCHECK_JSON}."
20+
>&2 echo "Run Sphinx with the linkcheck arg: make -C docs clean linkcheck"
21+
((err++))
22+
fi
23+
if [ "${err}" -gt 0 ]; then
24+
exit 2
25+
fi
26+
}
27+
28+
function check_links {
29+
local err=0
30+
# If you know how to prevent the hack with using jq twice, lmk.
31+
broken=$(jq 'select(.status == "broken")' "${LINKCHECK_JSON}" | jq -s)
32+
count=$(echo "${broken}" | jq 'length')
33+
for i in $(seq 0 $(($count - 1)))
34+
do
35+
entry=$(echo "${broken}" | jq ".[${i}]")
36+
link=$(echo "${entry}" | jq -r '.uri')
37+
[ -n "${DEBUG}" ] && {
38+
echo >&2 "Checking for false positive: ${link}"
39+
}
40+
local false_positive_resp; false_positive_resp=$(jq --arg check "${link}" -s 'any(.uri == $check)' < "${FALSE_POSITIVES_JSON}")
41+
local needs_review_resp; needs_review_resp=$(jq --arg check "${link}" -s 'any(.uri == $check)' < "${NEEDS_REVIEW_JSON}")
42+
# "false" indicates that the URL did not match any of the URIs in the false positive file.
43+
if [[ "false" = "${false_positive_resp}" && "false" = "${needs_review_resp}" ]]; then
44+
((err++))
45+
echo "${entry}"
46+
fi
47+
done
48+
exit "${err}"
49+
}
50+
51+
check_environment
52+
check_links

docs/combined.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[]

docs/false_positives.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"filename": "nlp/text_normalization/nn_text_normalization.rst",
3+
"lineno": 247,
4+
"status": "broken",
5+
"code": 0,
6+
"uri": "https://research.fb.com/wp-content/uploads/2019/03/Neural-Models-of-Text-Normalization-for-Speech-Applications.pdf",
7+
"info": "400 Client Error: Bad Request for url: https://research.facebook.com/wp-content/uploads/2019/03/Neural-Models-of-Text-Normalization-for-Speech-Applications.pdf"
8+
}

0 commit comments

Comments
 (0)