diff --git a/cleantalk.php b/cleantalk.php index d1189b66c..607880182 100644 --- a/cleantalk.php +++ b/cleantalk.php @@ -1582,7 +1582,7 @@ function apbct_sfw_update__create_temp_tables($direct_update = false) return $result; } - $result__clear_db = AntiCrawler::clearDataTable( + $result__clear_db = AntiCrawler::clearUADataTable( \Cleantalk\ApbctWP\DB::getInstance(), APBCT_TBL_AC_UA_BL ); @@ -2064,7 +2064,7 @@ function apbct_antiflood__clear_table() APBCT_TBL_AC_LOG ); $anticrawler->setDb(DB::getInstance()); - $anticrawler->clearTable(); + $anticrawler->clearLogTable(); unset($anticrawler); } } diff --git a/lib/Cleantalk/ApbctWP/Firewall/AntiCrawler.php b/lib/Cleantalk/ApbctWP/Firewall/AntiCrawler.php index 706eac273..5cb345f52 100644 --- a/lib/Cleantalk/ApbctWP/Firewall/AntiCrawler.php +++ b/lib/Cleantalk/ApbctWP/Firewall/AntiCrawler.php @@ -3,7 +3,7 @@ namespace Cleantalk\ApbctWP\Firewall; use Cleantalk\ApbctWP\RequestParameters\RequestParameters; -use Cleantalk\ApbctWP\Sanitize; +use Cleantalk\ApbctWP\State; use Cleantalk\ApbctWP\Validate; use Cleantalk\Common\Helper; use Cleantalk\ApbctWP\Variables\Cookie; @@ -20,18 +20,46 @@ */ class AntiCrawler extends \Cleantalk\Common\Firewall\FirewallModule { - public $module_name = 'ANTICRAWLER'; + const COOKIE_NAME__ANTIBOT = 'wordpress_apbct_antibot'; + const COOKIE_NAME__ANTICRAWLER_PASSED = 'apbct_anticrawler_passed'; + const PARAM_NAME__BOT_DETECTOR_EXIST = 'apbct_bot_detector_exist'; + public $module_name = 'ANTICRAWLER'; + /** + * @var null|string + */ private $db__table__ac_logs; + /** + * @var null|string + */ private $db__table__ac_ua_bl; + /** + * @var string + */ private $api_key = ''; + /** + * @var State + */ private $apbct; + /** + * @var int + */ private $store_interval = 86400; + /** + * @var string + */ private $sign; //Signature - User-Agent + Protocol + /** + * @var string + */ private $ua_id = 'null'; //User-Agent - + /** + * @var string + */ private $ac_log_result = ''; - + /** + * @var bool + */ public $isExcluded = false; /** @@ -61,12 +89,16 @@ class AntiCrawler extends \Cleantalk\Common\Firewall\FirewallModule */ private $server__http_referer; + private $default_module_results = []; + + private $debug_mode = false; + /** - * AntiBot constructor. + * AntiCrawler constructor. * - * @param $log_table - * @param $ac_logs_table - * @param array $params + * @param string|null $log_table Fully-qualified name of the SFW log table. + * @param string|null $ac_logs_table Fully-qualified name of the AntiCrawler log table. + * @param array $params Optional map of property overrides. */ public function __construct($log_table, $ac_logs_table, $params = array()) { @@ -81,7 +113,7 @@ public function __construct($log_table, $ac_logs_table, $params = array()) global $apbct; $this->apbct = $apbct; - $this->db__table__logs = $log_table ?: null; + $this->db__table__logs = $log_table ?: ''; $this->db__table__ac_logs = $ac_logs_table ?: null; $this->db__table__ac_ua_bl = defined('APBCT_TBL_AC_UA_BL') ? APBCT_TBL_AC_UA_BL : null; $this->sign = md5($this->server__http_user_agent . $this->server__https . $this->server__http_host); @@ -93,225 +125,306 @@ public function __construct($log_table, $ac_logs_table, $params = array()) $this->isExcluded = $this->checkExclusions(); } - public static function update($file_path_ua) + /** + * Use this method to execute the main logic of the module. + * + * @return array Array of the check results + */ + public function check() { - $file_content = file_get_contents($file_path_ua); + /** + * Module check start. + */ + $this->debug('check() started', null, true); - if ( ! function_exists('gzdecode') ) { - return array('error' => 'Function gzdecode not exists. Please update your PHP at least to version 5.4 '); + /** + * Precheck for an empty or invalid key + */ + if ( ! $this->isApiKeyValid() ) { + $this->debug('Module exit: isApiKeyValid'); + return $this->default_module_results; } - $unzipped_content = gzdecode($file_content); - - if ( $unzipped_content === false ) { - return array('error' => 'Can not unpack datafile'); + /** + * Pre-checks: redirect, UA blacklist, cookie pass + */ + $this->debug('Start handling pre-checks for IP pool'); + $precheck_result = $this->runPreChecksForIPPool($this->ip_array); + if ( false !== ($precheck_result) ) { + // return results if any result found, no need to process logs search + return $precheck_result; } - $lines = \Cleantalk\ApbctWP\Helper::bufferParseCsv($unzipped_content); - - for ( $count_result = 0; current($lines) !== false; ) { - $query = "INSERT INTO " . APBCT_TBL_AC_UA_BL . " (id, ua_template, ua_status) VALUES "; - - for ( - $i = 0, $values = array(); - APBCT_WRITE_LIMIT !== $i && current($lines) !== false; - $i++, $count_result++, next($lines) - ) { - $entry = current($lines); - - if ( empty($entry) || ! isset($entry[0], $entry[1]) ) { - continue; - } + /** + * Logs check: IP in logs + */ + $this->debug('Start handling IP log entries for IP pool'); + $log_search_results = $this->runLogSearchForIpPool($this->ip_array); - // Cast result to int - $ua_id = preg_replace('/[^\d]*/', '', $entry[0]); - $ua_template = isset($entry[1]) && Validate::isRegexp($entry[1]) ? Helper::dbPrepareParam( - $entry[1] - ) : 0; - $ua_status = isset($entry[2]) ? $entry[2] : 0; + /** + * Exit. + */ + $this->debug('Module exit with results', $log_search_results); + return $log_search_results; + } - if ( ! $ua_template ) { - continue; - } + /** + * Return true when both the API key and the key-is-ok flag are non-empty. + * + * @return bool + */ + private function isApiKeyValid() + { + return ! empty($this->apbct->key_is_ok) && ! empty($this->apbct->api_key); + } - $values[] = '(' . $ua_id . ',' . $ua_template . ',' . $ua_status . ')'; + /** + * @param string[] $ip_array + * @return array|false + */ + private function runPreChecksForIPPool($ip_array) + { + $results = array(); + // Pre-checks: redirect, UA blacklist, cookie pass + foreach ( $ip_array as $_ip_origin => $current_ip ) { + $this->debug('Pre-check IP', $current_ip); + if ( $this->requestIsRedirected() ) { + $results[] = $this->makeResult($current_ip, 'PASS_ANTICRAWLER'); + $this->debug('Early module exit: isRedirected', $results); + return $results; } - if ( ! empty($values) ) { - $query = $query . implode(',', $values) . ' ON DUPLICATE KEY UPDATE ua_status=0'; - \Cleantalk\ApbctWP\DB::getInstance()->execute($query); + $ua_check = $this->performUaCheck($current_ip); + $results = array_merge($results, $ua_check['entries']); + if ( $ua_check['early_return'] ) { + $this->debug('Early module exit: UA found with result', $results); + return $results; } - } - if ( file_exists($file_path_ua) ) { - unlink($file_path_ua); - } + $cookie_passed = $this->visitorHasAntiBotCookie(); + $has_bot_detector = $this->visitorHasBotDetectorRequestParam(); - return $count_result; + if ($cookie_passed || $has_bot_detector) { + $cookie_passed && $this->debug(self::COOKIE_NAME__ANTIBOT . ' cookie found'); + $has_bot_detector && $this->debug('bot detector found in request params'); + $results[] = $this->makeResult($current_ip, 'PASS_ANTICRAWLER'); + $cookie_reset = $this->handleAntiCrawlerPassedCookie(null); + if ( $cookie_reset ) { + $this->updateLog($current_ip, 'PASS_ANTICRAWLER'); + $this->debug('log updated - PASS_ANTICRAWLER', $current_ip); + } + $this->debug('Early module exit: bot detector or antibot cookie found', $results); + return $results; + } + } + //nothing found - proceed further + return false; } - public static function clearDataTable($db, $db__table__data) + /** + * This method will search records in the AntiCrawler log table for the current IP. + * If the IP is found, the module will return an array containing the check result entry. + * If the IP is not found, the method will return null. + * @param $ip_array + * @return array 1st element is array of results, 2nd element is early return status + */ + private function runLogSearchForIpPool($ip_array) { - $db->execute("TRUNCATE TABLE {$db__table__data};"); - $db->setQuery("SELECT COUNT(*) as cnt FROM {$db__table__data};")->fetch(); // Check if it is clear - if ( isset($db->result['cnt']) && $db->result['cnt'] != 0 ) { - $db->execute("DELETE FROM {$db__table__data};"); // Truncate table - $db->setQuery("SELECT COUNT(*) as cnt FROM {$db__table__data};")->fetch(); // Check if it is clear - if ( isset($db->result['cnt']) && $db->result['cnt'] != 0 ) { - return array('error' => 'COULD_NOT_CLEAR_UA_BL_TABLE'); // throw an error + $results = array(); + foreach ( $ip_array as $_ip_origin => $current_ip ) { + $this->debug('Check logs for IP', $current_ip); + $ip_check = $this->performIpLogCheck($current_ip); + if ( $ip_check !== null ) { + $results[] = $ip_check['entry']; + if ( $ip_check['early_return'] ) { + $this->debug('Early module exit: IP found in logs - DENY', $results); + return $results; + } } } - $db->execute("ALTER TABLE {$db__table__data} AUTO_INCREMENT = 1;"); // Drop AUTO INCREMENT + return $results; } /** - * Use this method to execute main logic of the module. + * Build a single check-result entry. * - * @return array Array of the check results + * @param string $ip + * @param string $status + * @return array */ - public function check() + private function makeResult($ip, $status) { - global $apbct; + return array('ip' => $ip, 'is_personal' => false, 'status' => $status); + } - $results = array(); + /** + * Match the current User-Agent against the UA blacklist table. + * + * @param string $current_ip + * @return array{entries: array, early_return: bool} + */ + private function performUaCheck($current_ip) + { + $ua_bl_results = $this->db->fetchAll( + "SELECT * FROM " . $this->db__table__ac_ua_bl . " ORDER BY `ua_status` DESC;" + ); - if (empty($apbct->key_is_ok) || empty($apbct->api_key)) { - return $results; + if ( empty($ua_bl_results) ) { + return array('entries' => array(), 'early_return' => false); } - foreach ( $this->ip_array as $_ip_origin => $current_ip ) { - // Skip by 301 response code - if ( $this->isRedirected() ) { - $results[] = array('ip' => $current_ip, 'is_personal' => false, 'status' => 'PASS_ANTICRAWLER',); + foreach ( $ua_bl_results as $ua_bl_result ) { + if ( + ! empty($ua_bl_result['ua_template']) && preg_match( + "%" . str_replace('"', '', $ua_bl_result['ua_template']) . "%i", + $this->server__http_user_agent + ) + ) { + $this->ua_id = TT::getArrayValueAsString($ua_bl_result, 'id'); + + if ( TT::getArrayValueAsString($ua_bl_result, 'ua_status') === '1' ) { + // Whitelisted — stop all further checks + return array( + 'entries' => array($this->makeResult($current_ip, 'PASS_ANTICRAWLER_UA')), + 'early_return' => true, + ); + } - return $results; + // Blacklisted — record but continue to cookie check + return array( + 'entries' => array($this->makeResult($current_ip, 'DENY_ANTICRAWLER_UA')), + 'early_return' => false, + ); } + } - // UA check - $ua_bl_results = $this->db->fetchAll( - "SELECT * FROM " . $this->db__table__ac_ua_bl . " ORDER BY `ua_status` DESC;" - ); + // No template matched + return array( + 'entries' => array($this->makeResult($current_ip, 'PASS_ANTICRAWLER_UA')), + 'early_return' => false, + ); + } - if ( ! empty($ua_bl_results) ) { - $is_blocked = false; - - foreach ( $ua_bl_results as $ua_bl_result ) { - if ( - ! empty($ua_bl_result['ua_template']) && preg_match( - "%" . str_replace('"', '', $ua_bl_result['ua_template']) . "%i", - $this->server__http_user_agent - ) - ) { - $this->ua_id = TT::getArrayValueAsString($ua_bl_result, 'id'); - - if ( TT::getArrayValueAsString($ua_bl_result, 'ua_status') === '1' ) { - // Whitelisted - $results[] = array( - 'ip' => $current_ip, - 'is_personal' => false, - 'status' => 'PASS_ANTICRAWLER_UA', - ); - - return $results; - } else { - // Blacklisted - $results[] = array( - 'ip' => $current_ip, - 'is_personal' => false, - 'status' => 'DENY_ANTICRAWLER_UA', - ); - $is_blocked = true; - break; - } - } - } + /** + * Return true when the visitor holds a valid antibot cookie whose value + * matches the SHA-256 hash of the API key and site salt. + * + * @return bool + */ + private function visitorHasAntiBotCookie() + { + $hash = hash('sha256', $this->api_key . $this->apbct->data['salt']); + return Cookie::getString(self::COOKIE_NAME__ANTIBOT) === $hash; + } - if ( ! $is_blocked ) { - $results[] = array('ip' => $current_ip, 'is_personal' => false, 'status' => 'PASS_ANTICRAWLER_UA',); - } + /** + * Return true when the bot-detector request parameter equals '1', + * indicating that the JS bot-detector script has confirmed the visitor is human. + * + * @return bool + */ + private function visitorHasBotDetectorRequestParam() + { + return RequestParameters::get('apbct_bot_detector_exist', true) == '1'; + } + + /** + * If the one-shot "anticrawler_passed" cookie is set, expire it. + * + * @param false|null $secure_attr + * @return bool + */ + private function handleAntiCrawlerPassedCookie($secure_attr) + { + if ( Cookie::getString(self::COOKIE_NAME__ANTICRAWLER_PASSED) === '1' ) { + if ( ! headers_sent() ) { + Cookie::set( + self::COOKIE_NAME__ANTICRAWLER_PASSED, + '0', + time() - 86400, + '/', + '', + $secure_attr, + true, + 'Lax' + ); + $this->debug(self::COOKIE_NAME__ANTICRAWLER_PASSED . ' cookie reset to 0'); } + return true; + } + return false; + } - // Skip by cookie - if ( - Cookie::get('wordpress_apbct_antibot') == hash( - 'sha256', - $this->api_key . $this->apbct->data['salt'] - ) || - RequestParameters::get('apbct_bot_detector_exist', true) == '1' - ) { - if ( Cookie::get('apbct_anticrawler_passed') == 1 ) { - if ( ! headers_sent() ) { - Cookie::set('apbct_anticrawler_passed', '0', time() - 86400, '/', '', null, true, 'Lax'); - } + /** + * Check whether this IP has been seen before (exists in the AC log table). + * If yes, delegate to handleKnownIp(); if not, register the WP hooks that + * will record this visit and inject the JS cookie-setter. + * + * @param string $current_ip + * @return array{entry: array, early_return: bool}|null + */ + private function performIpLogCheck($current_ip) + { + $result = $this->db->fetch( + "SELECT ip" + . ' FROM `' . $this->db__table__ac_logs . '`' + . " WHERE ip = '$current_ip'" + . " AND ua = '$this->sign' AND " . rand(1, 100000) . ";" + ); - // Do logging an one passed request - $this->updateLog($current_ip, 'PASS_ANTICRAWLER'); - } + if ( isset($result['ip']) ) { + return $this->handleKnownIp($current_ip); + } - $results[] = array('ip' => $current_ip, 'is_personal' => false, 'status' => 'PASS_ANTICRAWLER',); + $this->registerNewVisitorHooks(); + return null; + } - return $results; - } + /** + * Decide DENY or PASS for an IP that is already present in the AC log. + * + * @param string $current_ip + * @return array{entry: array, early_return: bool}|null + */ + private function handleKnownIp($current_ip) + { + if ( !$this->visitorHasAntiBotCookie() && !$this->visitorHasBotDetectorRequestParam() ) { + return array( + 'entry' => $this->makeResult($current_ip, 'DENY_ANTICRAWLER'), + 'early_return' => false, + ); } - // Common check - foreach ( $this->ip_array as $_ip_origin => $current_ip ) { - // IP check - $result = $this->db->fetch( - "SELECT ip" - . ' FROM `' . $this->db__table__ac_logs . '`' - . " WHERE ip = '$current_ip'" - . " AND ua = '$this->sign' AND " . rand(1, 100000) . ";" + $cookie_reset = $this->handleAntiCrawlerPassedCookie(false); + if ($cookie_reset) { + return array( + 'entry' => $this->makeResult($current_ip, 'PASS_ANTICRAWLER'), + 'early_return' => true, ); - if ( isset($result['ip']) ) { - if ( - Cookie::get('wordpress_apbct_antibot') !== hash( - 'sha256', - $this->api_key . $this->apbct->data['salt'] - ) && - ( - !RequestParameters::get('apbct_bot_detector_exist', true) || - RequestParameters::get('apbct_bot_detector_exist', true) == '0' - ) - ) { - $results[] = array('ip' => $current_ip, 'is_personal' => false, 'status' => 'DENY_ANTICRAWLER',); - } else { - if ( Cookie::get('apbct_anticrawler_passed') === '1' ) { - if ( ! headers_sent() ) { - \Cleantalk\ApbctWP\Variables\Cookie::set( - 'apbct_anticrawler_passed', - '0', - time() - 86400, - '/', - '', - false, - true, - 'Lax' - ); - } - - $results[] = array( - 'ip' => $current_ip, - 'is_personal' => false, - 'status' => 'PASS_ANTICRAWLER', - ); + } - return $results; - } - } - } else { - if ( ! Cookie::get('wordpress_apbct_antibot') ) { - add_action('template_redirect', array(& $this, 'updateAcLog'), 999); - } + return null; + } - add_action('wp_head', array('\Cleantalk\ApbctWP\Firewall\AntiCrawler', 'setCookie')); - add_action('login_head', array('\Cleantalk\ApbctWP\Firewall\AntiCrawler', 'setCookie')); - } + /** + * Register the WordPress actions that record this visit in the AC log + * and inject the JS cookie-setter into the page. + */ + private function registerNewVisitorHooks() + { + if ( empty(Cookie::getString(self::COOKIE_NAME__ANTIBOT, '')) ) { + add_action('template_redirect', array(& $this, 'updateAcLog'), 999); } - return $results; + add_action('wp_head', array('\Cleantalk\ApbctWP\Firewall\AntiCrawler', 'setCookie')); + add_action('login_head', array('\Cleantalk\ApbctWP\Firewall\AntiCrawler', 'setCookie')); } + /** + * Insert or update the visitor's IP and User-Agent signature in the AntiCrawler + * log table for the current time interval. + * Intended to be called as a WordPress 'template_redirect' action hook. + */ public function updateAcLog() { $interval_time = Helper::timeGetIntervalStart($this->store_interval); @@ -334,6 +447,11 @@ public function updateAcLog() } + /** + * Output an inline JavaScript snippet that sets the antibot cookie in the + * visitor's browser once the DOM is ready. + * Hooked to 'wp_head' and 'login_head'. + */ public static function setCookie() { global $apbct; @@ -341,7 +459,7 @@ public static function setCookie() $script = ""; @@ -349,11 +467,10 @@ public static function setCookie() } /** - * Add entry to SFW log. - * Writes to database. + * Insert or update a visitor entry in the SFW log table. * - * @param string $ip - * @param $status + * @param string $ip Visitor IP address. + * @param string $status Check result status (e.g. 'PASS_ANTICRAWLER', 'DENY_ANTICRAWLER'). */ public function updateLog($ip, $status) { @@ -404,6 +521,12 @@ public function updateLog($ip, $status) $this->db->execute($this->db->getQuery()); } + /** + * Load the AntiCrawler block-page template, populate all placeholders, and + * schedule rendering via the 'init' action hook. + * + * @param array $result Check result entry containing at least the 'ip' key. + */ public function diePage($result) { global $apbct; @@ -426,13 +549,15 @@ public function diePage($result) $custom_logo_img = ''; } + $ip = TT::getArrayValueAsString($result, 'ip'); + $block_message = sprintf( esc_html__( 'Anti-Crawler Protection is checking your browser and IP %s for spam bots', 'cleantalk-spam-protect' ), //HANDLE LINK - '' . $result['ip'] . '' + '' . $ip . '' ); // Translation @@ -450,7 +575,7 @@ public function diePage($result) . __('Don\'t close this page. Please, wait for 3 seconds to pass to the page.', 'cleantalk-spam-protect'), '{CLEANTALK_TITLE}' => $apbct->data['wl_brandname'], '{CLEANTALK_URL}' => $apbct->data['wl_url'], - '{REMOTE_ADDRESS}' => $result['ip'], + '{REMOTE_ADDRESS}' => $ip, '{SERVICE_ID}' => $this->apbct->data['service_id'] . ', ' . $net_count, '{HOST}' => get_home_url() . ', ' . APBCT_VERSION, '{COOKIE_ANTICRAWLER}' => hash('sha256', $apbct->api_key . $apbct->data['salt']), @@ -484,6 +609,11 @@ public function diePage($result) add_action('init', array($this, 'printDiePage')); } + /** + * Inject localised JS variables into the die page, send a 403 response header, + * and terminate execution with the rendered block page. + * Registered as the 'init' action by diePage(). + */ public function printDiePage() { global $apbct; @@ -532,9 +662,19 @@ public function printDiePage() die("IP BLACKLISTED. Blocked by AntiCrawler " . $this->apbct->stats['last_sfw_block']['ip']); } + /** + * Determine whether the current request should bypass AntiCrawler checks. + * + * Returns true (excluded) when any of the following conditions is met: + * - The URI points to a W3 Total Cache minified asset listed in the w3tc_minify option. + * - The skip_anticrawler_on_rss_feed service constant is defined and the request is an RSS feed. + * - An SFW test is running and the visitor holds a valid antibot cookie or bot-detector param. + * - The current user has the 'administrator' or 'editor' role. + * + * @return bool True when the request should be excluded from AntiCrawler checks. + */ private function checkExclusions() { - global $apbct; /** * Check if W3 Total Cache minified files requested during Anti-Crawler Work. * All the next conditions should be true: @@ -556,6 +696,7 @@ private function checkExclusions() $w3tc_minified_files = array_keys($w3tc_minify_option); if ( !empty($w3tc_minified_files) && is_array($w3tc_minified_files) ) { if (in_array($w3tc_js_file_name_in_uri, $w3tc_minified_files)) { + $this->debug('exclusions precheck: found W3 rules'); return true; } } @@ -564,13 +705,18 @@ private function checkExclusions() } // skip for RSS Feed requests - if ($apbct->service_constants->skip_anticrawler_on_rss_feed->isDefined()) { + if ($this->apbct->service_constants->skip_anticrawler_on_rss_feed->isDefined()) { if (Server::getString('REQUEST_URI') && preg_match_all('/feed/i', Server::getString('REQUEST_URI')) ) { + $this->debug( + 'exclusions precheck: RSS feed requests disabled by service constant', + $this->apbct->service_constants->skip_anticrawler_on_rss_feed->allowed_public_names + ); return true; } if (is_feed()) { + $this->debug('exclusions precheck: native RSS feed request sign'); return true; } } @@ -578,12 +724,13 @@ private function checkExclusions() //skip check if SFW test is running if ( Get::get('sfw_test_ip') && - (Cookie::get('wordpress_apbct_antibot') == hash( + (Cookie::getString(self::COOKIE_NAME__ANTIBOT) == hash( 'sha256', $this->api_key . $this->apbct->data['salt'] ) || - RequestParameters::get('apbct_bot_detector_exist', true) == '1') + RequestParameters::get(self::PARAM_NAME__BOT_DETECTOR_EXIST, true) == '1') ) { + $this->debug('exclusions precheck: SFW test request sign'); return true; } @@ -596,6 +743,7 @@ private function checkExclusions() foreach ( $allowed_roles as $role ) { if ( in_array($role, (array)$user->roles) ) { + $this->debug('exclusions precheck: allowed user role found', $role); return true; } } @@ -603,10 +751,17 @@ private function checkExclusions() return false; } - private function isRedirected() + /** + * Return true when the current response is a 301/302 redirect, or when a + * Cloudflare-proxied request arrives from a different host than its referer, + * indicating that the visitor was just redirected to this page. + * + * @return bool + */ + private function requestIsRedirected() { $is_redirect = false; - if ( $this->server__http_referer !== '' && $this->server__http_host !== '' && $this->isCloudflare() ) { + if ( $this->server__http_referer !== '' && $this->server__http_host !== '' && $this->serverIsOnCloudflare() ) { $parse_referer = parse_url($this->server__http_referer); if ( $parse_referer && isset($parse_referer['host']) ) { $is_redirect = $this->server__http_host !== $parse_referer['host']; @@ -616,16 +771,22 @@ private function isRedirected() return http_response_code() === 301 || http_response_code() === 302 || $is_redirect; } - private function isCloudflare() + /** + * Return true when the request carries Cloudflare-specific HTTP headers + * (CF-Ray, CF-Connecting-IP, CF-Request-ID), indicating it was proxied through Cloudflare. + * + * @return bool + */ + private function serverIsOnCloudflare() { return Server::get('HTTP_CF_RAY') && Server::get('HTTP_CF_CONNECTING_IP') && Server::get('HTTP_CF_REQUEST_ID'); } /** - * Clear table APBCT_TBL_AC_LOG - * once a day + * Remove stale AntiCrawler log entries that belong to a previous time interval. + * Should be called once per $store_interval seconds (default: once a day). */ - public function clearTable() + public function clearLogTable() { $interval_start = \Cleantalk\ApbctWP\Helper::timeGetIntervalStart($this->store_interval); @@ -637,4 +798,134 @@ public function clearTable() LIMIT 100000;' ); } + /** + * Import the UA blacklist from a gzip-compressed CSV file into the database. + * Processes records in batches and deletes the source file after import. + * + * @param string $file_path_ua Absolute path to the gzipped CSV file. + * @return int|array Number of imported rows on success, or an array with an 'error' key on failure. + */ + public static function updateUADataTable($file_path_ua) + { + $file_content = file_get_contents($file_path_ua); + + if ( ! function_exists('gzdecode') ) { + return array('error' => 'Function gzdecode not exists. Please update your PHP at least to version 5.4 '); + } + + $unzipped_content = gzdecode($file_content); + + if ( $unzipped_content === false ) { + return array('error' => 'Can not unpack datafile'); + } + + $lines = \Cleantalk\ApbctWP\Helper::bufferParseCsv($unzipped_content); + + for ( $count_result = 0; current($lines) !== false; ) { + $query = "INSERT INTO " . APBCT_TBL_AC_UA_BL . " (id, ua_template, ua_status) VALUES "; + + for ( + $i = 0, $values = array(); + APBCT_WRITE_LIMIT !== $i && current($lines) !== false; + $i++, $count_result++, next($lines) + ) { + $entry = current($lines); + + if ( empty($entry) || ! isset($entry[0], $entry[1]) ) { + continue; + } + + // Cast result to int + $ua_id = preg_replace('/[^\d]*/', '', $entry[0]); + $ua_template = isset($entry[1]) && Validate::isRegexp($entry[1]) ? Helper::dbPrepareParam( + $entry[1] + ) : 0; + $ua_status = isset($entry[2]) ? $entry[2] : 0; + + if ( ! $ua_template ) { + continue; + } + + $values[] = '(' . $ua_id . ',' . $ua_template . ',' . $ua_status . ')'; + } + + if ( ! empty($values) ) { + $query = $query . implode(',', $values) . ' ON DUPLICATE KEY UPDATE ua_status=0'; + \Cleantalk\ApbctWP\DB::getInstance()->execute($query); + } + } + + if ( file_exists($file_path_ua) ) { + unlink($file_path_ua); + } + + return $count_result; + } + + /** + * Truncate the UA blacklist data table and reset its AUTO_INCREMENT counter. + * Falls back to a full DELETE if TRUNCATE does not completely empty the table. + * + * @param \Cleantalk\ApbctWP\DB $db Database instance. + * @param string $db__table__data Fully-qualified table name to clear. + * @return array|void Array with an 'error' key if the table could not be cleared. + */ + public static function clearUADataTable($db, $db__table__data) + { + $db->execute("TRUNCATE TABLE {$db__table__data};"); + $db->setQuery("SELECT COUNT(*) as cnt FROM {$db__table__data};")->fetch(); // Check if it is clear + if ( isset($db->result['cnt']) && $db->result['cnt'] != 0 ) { + $db->execute("DELETE FROM {$db__table__data};"); // Truncate table + $db->setQuery("SELECT COUNT(*) as cnt FROM {$db__table__data};")->fetch(); // Check if it is clear + if ( isset($db->result['cnt']) && $db->result['cnt'] != 0 ) { + return array('error' => 'COULD_NOT_CLEAR_UA_BL_TABLE'); // throw an error + } + } + $db->execute("ALTER TABLE {$db__table__data} AUTO_INCREMENT = 1;"); // Drop AUTO INCREMENT + } + + /** + * Write a timestamped debug message to the PHP error log when DEBUG mode is enabled. + * + * @param string $message Human-readable description of the event. + * @param mixed|null $data Optional context data; arrays are serialised with print_r(). + */ + private function debug(string $message, $data = null, $add_counstructor_data = false) + { + if ( ! $this->debug_mode) { + return; + } + + if ( is_array($data) ) { + $data = ' ' . print_r($data, true); + } else { + $data = ''; + } + + $constructor_data = ''; + if ( $add_counstructor_data ) { + $constructor_data = print_r([ + 'server__http_user_agent' => $this->server__http_user_agent, + 'ip_array' => $this->ip_array, + 'server__https' => $this->server__https, + 'server__http_host' => $this->server__http_host, + 'server__request_uri' => $this->server__request_uri, + 'server__http_referer' => $this->server__http_referer, + ], true); + if ( empty($constructor_data) ) { + $constructor_data = 'invalid constructor data'; + } + } + + $log_record = sprintf( + '%s: [%s] %s%s%s', + date('Y-m-d H:i:s'), + $this->module_name, + $message, + $data, + $constructor_data + ); + + error_log($log_record); + } } diff --git a/lib/Cleantalk/ApbctWP/Firewall/SFWUpdateHelper.php b/lib/Cleantalk/ApbctWP/Firewall/SFWUpdateHelper.php index df027d5d6..9f704c2bb 100644 --- a/lib/Cleantalk/ApbctWP/Firewall/SFWUpdateHelper.php +++ b/lib/Cleantalk/ApbctWP/Firewall/SFWUpdateHelper.php @@ -56,7 +56,7 @@ public static function processFile($file_path, $direction = 'common') */ public static function processUA($file_path) { - $result = AntiCrawler::update($file_path); + $result = AntiCrawler::updateUADataTable($file_path); if ( ! empty($result['error']) ) { return array('error' => 'UPDATING UA LIST: ' . $result['error']);