|
1 | 1 | <?php |
2 | 2 | /** |
3 | 3 | * @file submit_sitemap.php |
4 | | - * @description Google Search Console API にサイトマップを送信するスクリプト |
5 | | - * @summary |
6 | | - * - コマンドラインからサイトマップ URL を受け取り、GSC API に送信 |
7 | | - * - サービスアカウント認証を使用 |
8 | | - * - sitemap index ファイルの再帰的な処理に対応 |
9 | | - * @recent_changes |
10 | | - * - ファイル先頭に説明コメントを追加 |
11 | | - * - 冗長な echo 出力を削減(重要な情報は維持) |
| 4 | + * @description Google Search Console API にサイトマップを送信するスクリプト(改良版) |
| 5 | + * - sitemapindex / urlset を明確に判別して送信対象を収集 |
| 6 | + * - 簡易なデバッグ(VERBOSE 環境変数で詳細ログ) |
12 | 7 | */ |
13 | 8 |
|
14 | | -//ドメイン |
15 | | -$siteUrl = "https://breadmotion.github.io/"; |
16 | | -//認証用のファイル |
| 9 | +$siteUrl = "https://breadmotion.github.io/"; // Search Console に登録しているサイトの URL(末尾の / を含めて正確に) |
17 | 10 | $credentialFile = "./tools/service_account.json"; |
18 | 11 |
|
19 | | -//サイトマップ |
20 | | -$sitemapOrIndexUrls = []; |
21 | | - |
22 | | -//Search Console APIに制限はないけど、間隔をあける |
23 | | -$intervalSecondsPerAPI = 1; |
24 | | - |
25 | 12 | require_once __DIR__ . "/../vendor/autoload.php"; |
26 | 13 |
|
27 | | -//コマンドラインパラメータ |
28 | | -foreach ($argv as $n => $v) { |
| 14 | +$verbose = getenv("VERBOSE") === "1"; |
| 15 | + |
| 16 | +// コマンドラインで渡された sitemap(s) |
| 17 | +$sitemapOrIndexUrls = []; |
| 18 | +foreach ($argv as $v) { |
29 | 19 | if ( |
30 | | - startsWith($v, "http://") || |
31 | | - startsWith($v, "https://") |
| 20 | + strpos($v, "http://") === 0 || |
| 21 | + strpos($v, "https://") === 0 |
32 | 22 | ) { |
33 | 23 | $sitemapOrIndexUrls[] = trim($v); |
34 | 24 | } |
35 | 25 | } |
36 | 26 | if (empty($sitemapOrIndexUrls)) { |
37 | | - echo "[ERROR] Put sitemap or sitemap index URL as commandline parameter" . PHP_EOL; |
38 | | - exit(); |
| 27 | + echo "[ERROR] Put sitemap or sitemap index URL as commandline parameter" . |
| 28 | + PHP_EOL; |
| 29 | + exit(1); |
39 | 30 | } |
40 | 31 |
|
41 | | -//URLからサイトマップ取りに行くところ |
42 | 32 | $options = ["exceptions" => false, "debug" => false]; |
43 | 33 | $http = new GuzzleHttp\Client($options); |
44 | 34 |
|
45 | | -$list = []; |
| 35 | +$toSubmit = []; // 実際に Search Console に PUT する sitemap URL の一覧 |
46 | 36 |
|
47 | | -do { |
| 37 | +// 再帰的に sitemapindex を展開して、最終的に「sitemap ファイルの URL」を toSubmit に集める |
| 38 | +while (!empty($sitemapOrIndexUrls)) { |
48 | 39 | $url = array_shift($sitemapOrIndexUrls); |
49 | | - // 冗長なログを削減: 重要な処理のみ表示 |
50 | | - $tags = readSitemapXml($http, $url); |
51 | | - |
52 | | - foreach ($tags as $name => $data) { |
53 | | - $loc = (string) $data->loc; |
54 | | - if ($name == "sitemap") { |
55 | | - $sitemapOrIndexUrls[] = $loc; |
56 | | - } elseif ($name == "url") { |
57 | | - $list[] = $url; |
58 | | - break; |
| 40 | + if ($verbose) { |
| 41 | + echo "[INFO] Fetching sitemap: {$url}" . PHP_EOL; |
| 42 | + } |
| 43 | + |
| 44 | + try { |
| 45 | + $response = $http->request("GET", $url); |
| 46 | + $body = $response->getBody()->getContents(); |
| 47 | + } catch (Exception $e) { |
| 48 | + echo "[ERROR] Failed to GET {$url}: " . |
| 49 | + $e->getMessage() . |
| 50 | + PHP_EOL; |
| 51 | + continue; |
| 52 | + } |
| 53 | + |
| 54 | + libxml_use_internal_errors(true); |
| 55 | + try { |
| 56 | + $xml = new SimpleXMLElement($body); |
| 57 | + } catch (Exception $e) { |
| 58 | + echo "[ERROR] Invalid XML from {$url}: " . |
| 59 | + $e->getMessage() . |
| 60 | + PHP_EOL; |
| 61 | + if ($verbose) { |
| 62 | + echo $body . PHP_EOL; |
59 | 63 | } |
| 64 | + continue; |
60 | 65 | } |
61 | | -} while (!empty($sitemapOrIndexUrls)); |
62 | 66 |
|
63 | | -echo "[INFO] Sitemap URLs to submit: " . count($list) . PHP_EOL; |
| 67 | + $rootName = $xml->getName(); |
| 68 | + if ($rootName === "sitemapindex") { |
| 69 | + // 子 sitemap を追加して再処理 |
| 70 | + foreach ($xml->sitemap as $sitemap) { |
| 71 | + $loc = trim((string) $sitemap->loc); |
| 72 | + if ($loc !== "") { |
| 73 | + $sitemapOrIndexUrls[] = $loc; |
| 74 | + if ($verbose) { |
| 75 | + echo "[INFO] Found child sitemap: {$loc}" . |
| 76 | + PHP_EOL; |
| 77 | + } |
| 78 | + } |
| 79 | + } |
| 80 | + } elseif ($rootName === "urlset") { |
| 81 | + // このファイル自体が sitemap(URL を並べるタイプ)なので、この sitemap URL を送信対象に追加 |
| 82 | + $toSubmit[] = $url; |
| 83 | + if ($verbose) { |
| 84 | + echo "[INFO] Added urlset for submission: {$url}" . |
| 85 | + PHP_EOL; |
| 86 | + } |
| 87 | + } else { |
| 88 | + if ($verbose) { |
| 89 | + echo "[WARN] Unknown root element '{$rootName}' in {$url}" . |
| 90 | + PHP_EOL; |
| 91 | + } |
| 92 | + } |
| 93 | +} |
64 | 94 |
|
65 | | -//Search Console API |
| 95 | +if (empty($toSubmit)) { |
| 96 | + echo "[ERROR] No sitemap files detected to submit." . |
| 97 | + PHP_EOL; |
| 98 | + exit(1); |
| 99 | +} |
| 100 | + |
| 101 | +echo "[INFO] Sitemap URLs to submit: " . |
| 102 | + count($toSubmit) . |
| 103 | + PHP_EOL; |
| 104 | + |
| 105 | +// Google Client 設定 |
66 | 106 | $client = new Google_Client(); |
67 | 107 | $client->setAuthConfig($credentialFile); |
68 | 108 | $client->addScope( |
69 | 109 | "https://www.googleapis.com/auth/webmasters", |
70 | 110 | ); |
71 | 111 | $httpClient = $client->authorize(); |
| 112 | + |
72 | 113 | $endpointBase = |
73 | 114 | "https://www.googleapis.com/webmasters/v3/sites/" . |
74 | 115 | urlencode($siteUrl) . |
75 | 116 | "/sitemaps/"; |
76 | 117 |
|
77 | 118 | $results = []; |
78 | | -foreach ($list as $n => $sitemap) { |
| 119 | +foreach ($toSubmit as $sitemap) { |
79 | 120 | $endpoint = $endpointBase . urlencode($sitemap); |
| 121 | + if ($verbose) { |
| 122 | + echo "[INFO] PUT {$endpoint}" . PHP_EOL; |
| 123 | + } |
80 | 124 |
|
81 | | - //このAPIはPUTするやつ |
82 | | - $response = $httpClient->put($endpoint); |
83 | | - $body = $response->getBody()->getContents(); |
84 | | - $json = json_decode($body, true); |
85 | | - |
86 | | - $status = $response->getStatusCode(); |
87 | | - $results[$status] = ($results[$status] ?? 0) + 1; |
88 | | - |
89 | | - // エラー時のみ詳細を出力 |
90 | | - if ($status != 204) { |
91 | | - $message = $json["error"]["message"] ?? "-"; |
92 | | - echo "[ERROR] " . $status . ":" . $response->getReasonPhrase() . " | " . $message . PHP_EOL; |
| 125 | + try { |
| 126 | + $response = $httpClient->put($endpoint); |
| 127 | + $status = $response->getStatusCode(); |
| 128 | + $body = $response->getBody()->getContents(); |
| 129 | + $results[$status] = ($results[$status] ?? 0) + 1; |
| 130 | + |
| 131 | + if ($status != 204) { |
| 132 | + $json = json_decode($body, true); |
| 133 | + $message = |
| 134 | + $json["error"]["message"] ?? |
| 135 | + $response->getReasonPhrase(); |
| 136 | + echo "[ERROR] {$status}: {$message}" . PHP_EOL; |
| 137 | + if ($verbose) { |
| 138 | + echo $body . PHP_EOL; |
| 139 | + } |
| 140 | + } else { |
| 141 | + if ($verbose) { |
| 142 | + echo "[INFO] 204 OK for {$sitemap}" . |
| 143 | + PHP_EOL; |
| 144 | + } |
| 145 | + } |
| 146 | + } catch (Exception $e) { |
| 147 | + echo "[ERROR] Exception while submitting {$sitemap}: " . |
| 148 | + $e->getMessage() . |
| 149 | + PHP_EOL; |
93 | 150 | } |
94 | 151 |
|
95 | | - sleep($intervalSecondsPerAPI); |
| 152 | + // API レート対策(短くスリープ) |
| 153 | + sleep(1); |
96 | 154 | } |
97 | 155 |
|
98 | | -// 結果サマリを出力 |
| 156 | +// 結果表示 |
99 | 157 | echo "[SUCCESS] Submission complete. Results:" . PHP_EOL; |
100 | 158 | foreach ($results as $status => $count) { |
101 | | - echo " Status " . $status . ": " . $count . " sitemaps" . PHP_EOL; |
102 | | -} |
103 | | - |
104 | | -function readSitemapXml($http, $url) |
105 | | -{ |
106 | | - $response = $http->request("GET", $url); |
107 | | - $body = $response->getBody()->getContents(); |
108 | | - $xml = new SimpleXMLElement($body); |
109 | | - return $xml; |
110 | | -} |
111 | | - |
112 | | -//Google APIのタイムスタンプがnano秒まであるので正規表現で削り取る |
113 | | -function toJST($datetime) |
114 | | -{ |
115 | | - $p = |
116 | | - "/(\d{4})-(\d{2})-(\d{2})T(\d{2})\:(\d{2})\:(\d{2})\.[0-9]{9}Z/"; |
117 | | - if (preg_match($p, $datetime, $_)) { |
118 | | - $datetime = "$_[1]-$_[2]-$_[3]T$_[4]:$_[5]:$_[6]Z"; |
119 | | - } |
120 | | - $t = new DateTime($datetime); |
121 | | - $t->setTimeZone(new DateTimeZone("Asia/Tokyo")); |
122 | | - return $t->format("Y-m-d H:i:s"); |
123 | | -} |
124 | | - |
125 | | -function startsWith($haystack, $needle) |
126 | | -{ |
127 | | - $length = strlen($needle); |
128 | | - return substr($haystack, 0, $length) === $needle; |
| 159 | + echo " Status " . |
| 160 | + $status . |
| 161 | + ": " . |
| 162 | + $count . |
| 163 | + " sitemaps" . |
| 164 | + PHP_EOL; |
129 | 165 | } |
0 commit comments