From 5233751cf98636c2cf0a8fad5a9a2d1599c38aa9 Mon Sep 17 00:00:00 2001
From: RiskeyL <7a8y@163.com>
Date: Fri, 10 Apr 2026 12:45:53 +0800
Subject: [PATCH] fix: handle non-ASCII URLs and false positives in external
 link checker

---
 .github/workflows/check_external_links.yml    |  2 +-
 .../build-ai-image-generation-app.mdx         |  3 +-
 tools/check-links.py                          | 32 ++++++++++++++-----
 3 files changed, 27 insertions(+), 10 deletions(-)
diff --git a/.github/workflows/check_external_links.yml b/.github/workflows/check_external_links.yml
index 247c1f061..71f292e42 100644
--- a/.github/workflows/check_external_links.yml
+++ b/.github/workflows/check_external_links.yml
@@ -10,7 +10,7 @@ jobs:
     name: Check external links
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
 
       - uses: actions/setup-python@v5
         with:
diff --git a/ja/use-dify/tutorials/build-ai-image-generation-app.mdx b/ja/use-dify/tutorials/build-ai-image-generation-app.mdx
index 8dbd179e2..0e4074293 100644
--- a/ja/use-dify/tutorials/build-ai-image-generation-app.mdx
+++ b/ja/use-dify/tutorials/build-ai-image-generation-app.mdx
@@ -8,7 +8,8 @@ title: ゼロからAI画像生成アプリの構築方法
 
 <div class="image-side-by-side">
   <img
-src="https://assets-docs.dify.ai/dify-enterprise-mintlify/jp/workshop/basic/05ff829cf382e82c9ece2676032d2383.png)
+    src="https://assets-docs.dify.ai/dify-enterprise-mintlify/jp/workshop/basic/05ff829cf382e82c9ece2676032d2383.png"
+  />
 
 ## 今回の学ぶポイント
 
diff --git a/tools/check-links.py b/tools/check-links.py
index 770459d10..0814fd478 100644
--- a/tools/check-links.py
+++ b/tools/check-links.py
@@ -12,6 +12,7 @@
 import re
 import sys
 import urllib.error
+import urllib.parse
 import urllib.request
 from pathlib import Path
 
@@ -66,9 +67,8 @@ def classify_link(url: str) -> str:
     """Classify a link as internal, external, anchor, or skip."""
     if url.startswith(("http://", "https://")):
         # Skip localhost/loopback URLs
-        from urllib.parse import urlparse
         try:
-            host = urlparse(url).hostname or ""
+            host = urllib.parse.urlparse(url).hostname or ""
             if host in ("localhost", "127.0.0.1", "0.0.0.0", "::1"):
                 return "skip"
         except Exception:
@@ -246,18 +246,34 @@ def check_external_links():
     broken = []
     skipped = 0
 
+    # Domains that reliably block automated requests or are geo-restricted
+    skip_domains = {"assets-docs.dify.ai", "volcengine.com", "twitter.com", "x.com"}
+
     for i, url in enumerate(unique_urls):
         if (i + 1) % 50 == 0:
             print(f"  Progress: {i + 1}/{len(unique_urls)}")
 
-        # Skip asset CDN URLs (usually reliable, many of them)
-        if "assets-docs.dify.ai" in url:
-            skipped += 1
-            continue
+        # Skip unreliable domains by checking parsed hostname
+        try:
+            host = urllib.parse.urlparse(url).hostname or ""
+            if any(host == d or host.endswith("." + d) for d in skip_domains):
+                skipped += 1
+                continue
+        except Exception:
+            pass
+
+        # Encode non-ASCII characters in URL path, preserving existing percent-escapes
+        try:
+            parsed = urllib.parse.urlparse(url)
+            encoded_url = urllib.parse.urlunparse(parsed._replace(
+                path=urllib.parse.quote(parsed.path, safe="/:@!$&'()*+,;=-._~%")
+            ))
+        except Exception:
+            encoded_url = url
 
         try:
             req = urllib.request.Request(
-                url,
+                encoded_url,
                 method="HEAD",
                 headers={"User-Agent": "Mozilla/5.0 (Dify-Docs-LinkChecker/1.0)"}
             )
@@ -270,7 +286,7 @@ def check_external_links():
             if e.code == 405:
                 try:
                     req = urllib.request.Request(
-                        url,
+                        encoded_url,
                         method="GET",
                         headers={"User-Agent": "Mozilla/5.0 (Dify-Docs-LinkChecker/1.0)"}
                     )