11<?php
2+
23/**
34 * Get Page Body
45 */
5- function get_url_contents ($ site_url , $ type = '' ){
6+ function run_curl ($ site_url , $ type = '' ){
67 $ curl = curl_init ($ site_url );
78 curl_setopt ($ curl , CURLOPT_URL , $ site_url );
89 curl_setopt ($ curl , CURLOPT_RETURNTRANSFER , true );
10+ curl_setopt ($ curl , CURLOPT_FOLLOWLOCATION , true );
911 curl_setopt ($ curl , CURLOPT_SSL_VERIFYHOST , false );
1012 curl_setopt ($ curl , CURLOPT_SSL_VERIFYPEER , false );
13+ curl_setopt ($ curl , CURLOPT_PROTOCOLS , CURLPROTO_HTTP | CURLPROTO_HTTPS );
14+ curl_setopt ($ curl , CURLOPT_REDIR_PROTOCOLS , CURLPROTO_HTTP | CURLPROTO_HTTPS );
1115 curl_setopt ($ curl , CURLOPT_USERAGENT , 'Equalify ' );
1216
1317 // Restrict CURL to the type of what you want to add.
@@ -16,12 +20,40 @@ function get_url_contents($site_url, $type = ''){
1620 if ($ type == 'xml ' )
1721 curl_setopt ($ curl , CURLOPT_HTTPHEADER , array ('Accept: application/xml ' ));
1822
19- // Execute CURL or fallback.
23+ // Execute CURL
2024 $ url_contents = curl_exec ($ curl );
25+
26+ // Add in DB info so we can see if URL is unique.
27+ require_once '../config.php ' ;
28+ require_once 'db.php ' ;
29+ $ db = connect (
30+ DB_HOST ,
31+ DB_USERNAME ,
32+ DB_PASSWORD ,
33+ DB_NAME
34+ );
35+
36+ // The curled URL is the URL we use as an ID.
37+ $ curled_url = curl_getinfo ($ curl , CURLINFO_EFFECTIVE_URL );
38+
39+ // We don't include added enpoints to the URL.
40+ $ json_endpoints = '/wp-json/wp/v2/pages?per_page=100 ' ;
41+ $ curled_url = str_replace ($ json_endpoints , '' , $ curled_url );
42+
43+ // Make sure URL is unique to minimize scans.
44+ if (!is_unique_site ($ db , $ curled_url ))
45+ throw new Exception ('" ' .$ curled_url .'" already exists ' );
46+
47+ // Fallback if no contents exist.
2148 if ($ url_contents == false )
22- throw new Exception ('Contents of " ' .$ site_url .'" cannot be loaded ' );
49+ throw new Exception ('Contents of " ' .$ curled_url .'" cannot be loaded ' );
2350 curl_close ($ curl );
24- return $ url_contents ;
51+
52+ // We use the curled URL as the unique ID.
53+ return array (
54+ 'url ' => $ curled_url ,
55+ 'contents ' => $ url_contents
56+ );
2557
2658}
2759
@@ -30,29 +62,26 @@ function get_url_contents($site_url, $type = ''){
3062 */
3163function single_page_adder ($ site_url ){
3264
33- // Reformat URL for JSON request.
34- $ json_url = $ site_url .'wp-json/wp/v2/pages?per_page=100 ' ;
35-
3665 // Get URL contents so we can make sure URL
3766 // can be scanned.
38- $ url_contents = get_url_contents ($ site_url );
39- echo $ url_contents ;
40- die;
67+ return run_curl ($ site_url );
68+
4169}
4270
4371/**
4472 * WordPress Pages Adder
4573 */
4674function wordpress_site_adder ($ site_url ){
4775
48- // Reformat URL for JSON request.
49- $ json_url = $ site_url .'wp-json/wp/v2/pages?per_page=100 ' ;
76+ // Add WP JSON URL endpoints for request.
77+ $ json_endpoints = '/wp-json/wp/v2/pages?per_page=100 ' ;
78+ $ json_url = $ site_url .$ json_endpoints ;
5079
5180 // Get URL contents.
52- $ url_contents = get_url_contents ($ json_url , 'wordpress ' );
81+ $ curled_site = run_curl ($ json_url , 'wordpress ' );
5382
5483 // Create JSON.
55- $ wp_api_json = json_decode ($ url_contents , true );
84+ $ wp_api_json = json_decode ($ curled_site [ ' contents ' ] , true );
5685 if (empty ($ wp_api_json [0 ]))
5786 throw new Exception ('The URL " ' .$ site_url .'" is not valid output ' );
5887
@@ -61,7 +90,17 @@ function wordpress_site_adder($site_url){
6190 foreach ($ wp_api_json as $ page ):
6291 array_push ($ pages , array ('url ' => $ page ['link ' ]));
6392 endforeach ;
64- return $ pages ;
93+
94+ // Remove WP JSON endbpoints.
95+ $ clean_curled_url = str_replace ($ json_endpoints , '' , $ curled_site ['url ' ]);
96+
97+ // Reformat the curled contents to be an array we can
98+ // work with.
99+ return array (
100+ 'url ' => $ clean_curled_url ,
101+ 'contents ' => $ pages
102+ );
103+
65104}
66105
67106/**
@@ -70,14 +109,15 @@ function wordpress_site_adder($site_url){
70109function xml_site_adder ($ site_url ){
71110
72111 // Get URL contents.
73- $ url_contents = get_url_contents ($ site_url , 'xml ' );
112+ $ curled_site = run_curl ($ site_url , 'xml ' );
74113
75114 // Valid XML files are only allowed!
76- if (!str_starts_with ($ url_contents , '<?xml ' ))
77- throw new Exception ('" ' .$ site_url .'" is not valid XML ' );
115+ $ xml_contents = $ curled_site ['contents ' ];
116+ if (!str_starts_with ($ xml_contents , '<?xml ' ))
117+ throw new Exception ('" ' .$ curled_site ['url ' ].'" is not a readable XML format ' );
78118
79119 // Convert XML to JSON, so we can use it later
80- $ xml = simplexml_load_string ($ url_contents );
120+ $ xml = simplexml_load_string ($ xml_contents );
81121 $ json = json_encode ($ xml );
82122 $ json_entries = json_decode ($ json ,TRUE );
83123
@@ -86,6 +126,12 @@ function xml_site_adder($site_url){
86126 foreach ($ json_entries ['url ' ] as $ page ):
87127 array_push ($ pages , array ('url ' => $ page ['loc ' ]));
88128 endforeach ;
89- return $ pages ;
90129
130+ // Reformat the curled contents to be an array we can
131+ // work with.
132+ return array (
133+ 'url ' => $ curled_site ['url ' ],
134+ 'contents ' => $ pages
135+ );
136+
91137}
0 commit comments