403Webshell
Server IP : 66.29.132.122  /  Your IP : 18.119.163.69
Web Server : LiteSpeed
System : Linux business142.web-hosting.com 4.18.0-553.lve.el8.x86_64 #1 SMP Mon May 27 15:27:34 UTC 2024 x86_64
User : admazpex ( 531)
PHP Version : 7.2.34
Disable Function : NONE
MySQL : OFF  |  cURL : ON  |  WGET : ON  |  Perl : ON  |  Python : ON  |  Sudo : OFF  |  Pkexec : OFF
Directory :  /proc/thread-self/root/proc/self/root/usr/src/litespeed-wp-plugin/6.3.0.1/litespeed-cache/src/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Command :


[ Back ]     

Current File : /proc/thread-self/root/proc/self/root/usr/src/litespeed-wp-plugin/6.3.0.1/litespeed-cache/src/crawler.cls.php
<?php

/**
 * The crawler class
 *
 * @since      	1.1.0
 */

namespace LiteSpeed;

defined('WPINC') || exit();

class Crawler extends Root
{
	const LOG_TAG = 'πŸ•ΈοΈ';

	const TYPE_REFRESH_MAP = 'refresh_map';
	const TYPE_EMPTY = 'empty';
	const TYPE_BLACKLIST_EMPTY = 'blacklist_empty';
	const TYPE_BLACKLIST_DEL = 'blacklist_del';
	const TYPE_BLACKLIST_ADD = 'blacklist_add';
	const TYPE_START = 'start';
	const TYPE_RESET = 'reset';

	const USER_AGENT = 'lscache_walker';
	const FAST_USER_AGENT = 'lscache_runner';
	const CHUNKS = 10000;

	private $_sitemeta = 'meta.data';
	private $_resetfile;
	private $_end_reason;
	private $_ncpu = 1;

	private $_crawler_conf = array(
		'cookies' => array(),
		'headers' => array(),
		'ua' => '',
	);
	private $_crawlers = array();
	private $_cur_threads = -1;
	private $_max_run_time;
	private $_cur_thread_time;
	private $_map_status_list = array(
		'H' => array(),
		'M' => array(),
		'B' => array(),
		'N' => array(),
	);
	protected $_summary;

	/**
	 * Initialize crawler, assign sitemap path
	 *
	 * @since    1.1.0
	 */
	public function __construct()
	{
		if (is_multisite()) {
			$this->_sitemeta = 'meta' . get_current_blog_id() . '.data';
		}

		$this->_resetfile = LITESPEED_STATIC_DIR . '/crawler/' . $this->_sitemeta . '.reset';

		$this->_summary = self::get_summary();

		$this->_ncpu = $this->_get_server_cpu();

		self::debug('Init w/ CPU cores=' . $this->_ncpu);
	}

	/**
	 * Try get server CPUs
	 * @since 5.2
	 */
	private function _get_server_cpu()
	{
		$cpuinfo_file = '/proc/cpuinfo';
		$setting_open_dir = ini_get('open_basedir');
		if ($setting_open_dir) {
			return 1;
		} // Server has limit

		try {
			if (!@is_file($cpuinfo_file)) {
				return 1;
			}
		} catch (\Exception $e) {
			return 1;
		}

		$cpuinfo = file_get_contents($cpuinfo_file);
		preg_match_all('/^processor/m', $cpuinfo, $matches);
		return count($matches[0]) ?: 1;
	}

	/**
	 * Check whether the current crawler is active/runable/useable/enabled/want it to work or not
	 *
	 * @since  4.3
	 */
	public function is_active($curr)
	{
		$bypass_list = self::get_option('bypass_list', array());
		return !in_array($curr, $bypass_list);
	}

	/**
	 * Toggle the current crawler's activeness state, i.e., runable/useable/enabled/want it to work or not, and return the updated state
	 *
	 * @since  4.3
	 */
	public function toggle_activeness($curr)
	{
		// param type: int
		$bypass_list = self::get_option('bypass_list', array());
		if (in_array($curr, $bypass_list)) {
			// when the ith opt was off / in the bypassed list, turn it on / remove it from the list
			unset($bypass_list[array_search($curr, $bypass_list)]);
			$bypass_list = array_values($bypass_list);
			self::update_option('bypass_list', $bypass_list);
			return true;
		} else {
			// when the ith opt was on / not in the bypassed list, turn it off / add it to the list
			$bypass_list[] = (int) $curr;
			self::update_option('bypass_list', $bypass_list);
			return false;
		}
	}

	/**
	 * Clear bypassed list
	 *
	 * @since  4.3
	 * @access public
	 */
	public function clear_disabled_list()
	{
		self::update_option('bypass_list', array());

		$msg = __('Crawler disabled list is cleared! All crawlers are set to active! ', 'litespeed-cache');
		Admin_Display::note($msg);

		self::debug('All crawlers are set to active...... ');
	}

	/**
	 * Overwrite get_summary to init elements
	 *
	 * @since  3.0
	 * @access public
	 */
	public static function get_summary($field = false)
	{
		$_default = array(
			'list_size' => 0,
			'last_update_time' => 0,
			'curr_crawler' => 0,
			'curr_crawler_beginning_time' => 0,
			'last_pos' => 0,
			'last_count' => 0,
			'last_crawled' => 0,
			'last_start_time' => 0,
			'last_status' => '',
			'is_running' => 0,
			'end_reason' => '',
			'meta_save_time' => 0,
			'pos_reset_check' => 0,
			'done' => 0,
			'this_full_beginning_time' => 0,
			'last_full_time_cost' => 0,
			'last_crawler_total_cost' => 0,
			'crawler_stats' => array(), // this will store all crawlers hit/miss crawl status
		);

		wp_cache_delete('alloptions', 'options'); // ensure the summary is current
		$summary = parent::get_summary();
		$summary = array_merge($_default, $summary);

		if (!$field) {
			return $summary;
		}

		if (array_key_exists($field, $summary)) {
			return $summary[$field];
		}

		return null;
	}

	/**
	 * Overwrite save_summary
	 *
	 * @since  3.0
	 * @access public
	 */
	public static function save_summary($data = false, $reload = false, $overwrite = false)
	{
		$instance = self::cls();
		$instance->_summary['meta_save_time'] = time();

		if (!$data) {
			$data = $instance->_summary;
		}

		parent::save_summary($data, $reload, $overwrite);

		File::save(LITESPEED_STATIC_DIR . '/crawler/' . $instance->_sitemeta, \json_encode($data), true);
	}

	/**
	 * Cron start async crawling
	 *
	 * @since 5.5
	 */
	public static function start_async_cron()
	{
		Task::async_call('crawler');
	}

	/**
	 * Manually start async crawling
	 *
	 * @since 5.5
	 */
	public static function start_async()
	{
		Task::async_call('crawler_force');

		$msg = __('Started async crawling', 'litespeed-cache');
		Admin_Display::success($msg);
	}

	/**
	 * Ajax crawl handler
	 *
	 * @since 5.5
	 */
	public static function async_handler($manually_run = false)
	{
		self::debug('------------async-------------start_async_handler');
		// self::debug('-------------async------------ check_ajax_referer');
		// add_action('check_ajax_referer', function ($a, $b) {
		// 	\LiteSpeed\Crawler::debug('---------------' . $a . $b);
		// });
		// check_ajax_referer('async_crawler', 'nonce');
		// self::debug('--------------async----------- start async crawling');
		self::start($manually_run);
	}

	/**
	 * Proceed crawling
	 *
	 * @since    1.1.0
	 * @access public
	 */
	public static function start($manually_run = false)
	{
		if (!Router::can_crawl()) {
			self::debug('......crawler is NOT allowed by the server admin......');
			return false;
		}

		if ($manually_run) {
			self::debug('......crawler manually ran......');
		}
		// $i = 0;
		// while ($i < 100) {
		// 	self::debug('......sleep ' . ($i++) . '......' . time());
		// 	sleep(1);
		// }
		// return;

		self::cls()->_crawl_data($manually_run);
	}

	/**
	 * Crawling start
	 *
	 * @since    1.1.0
	 * @access   private
	 */
	private function _crawl_data($manually_run)
	{
		if (!defined('LITESPEED_LANE_HASH')) {
			define('LITESPEED_LANE_HASH', Str::rrand(8));
		}
		if ($this->_check_valid_lane()) {
			$this->_take_over_lane();
		} else {
			self::debug('⚠️ lane in use');
			return;
			// if ($manually_run) {
			// 	self::debug('......crawler started (manually_rund)......');
			// 	// Log pid to prevent from multi running
			// 	if (defined('LITESPEED_CLI')) {
			// 		// Take over lane
			// 		self::debug('⚠️⚠️⚠️ Forced take over lane (CLI)');
			// 		$this->_take_over_lane();
			// 	}
			// }
		}
		self::debug('......crawler started......');

		// for the first time running
		if (!$this->_summary || !Data::cls()->tb_exist('crawler') || !Data::cls()->tb_exist('crawler_blacklist')) {
			$this->cls('Crawler_Map')->gen();
		}

		// if finished last time, regenerate sitemap
		if ($this->_summary['done'] === 'touchedEnd') {
			// check whole crawling interval
			$last_fnished_at = $this->_summary['last_full_time_cost'] + $this->_summary['this_full_beginning_time'];
			if (!$manually_run && time() - $last_fnished_at < $this->conf(Base::O_CRAWLER_CRAWL_INTERVAL)) {
				self::debug('Cron abort: cache warmed already.');
				// if not reach whole crawling interval, exit
				$this->Release_lane();
				return;
			}
			self::debug('TouchedEnd. regenerate sitemap....');
			$this->cls('Crawler_Map')->gen();
		}

		$this->list_crawlers();

		// Skip the crawlers that in bypassed list
		while (!$this->is_active($this->_summary['curr_crawler']) && $this->_summary['curr_crawler'] < count($this->_crawlers)) {
			self::debug('Skipped the Crawler #' . $this->_summary['curr_crawler'] . ' ......');
			$this->_summary['curr_crawler']++;
		}
		if ($this->_summary['curr_crawler'] >= count($this->_crawlers)) {
			$this->_end_reason = 'end';
			$this->_terminate_running();
			$this->Release_lane();
			return;
		}

		// In case crawlers are all done but not reload, reload it
		if (empty($this->_summary['curr_crawler']) || empty($this->_crawlers[$this->_summary['curr_crawler']])) {
			$this->_summary['curr_crawler'] = 0;
			$this->_summary['crawler_stats'][$this->_summary['curr_crawler']] = array();
		}

		$this->load_conf();

		try {
			$this->_engine_start();
			$this->Release_lane();
		} catch (\Exception $e) {
			self::debug('πŸ›‘ ' . $e->getMessage());
		}
	}

	/**
	 * Load conf before running crawler
	 *
	 * @since  3.0
	 * @access private
	 */
	private function load_conf()
	{
		$this->_crawler_conf['base'] = home_url();

		$current_crawler = $this->_crawlers[$this->_summary['curr_crawler']];

		/**
		 * Set role simulation
		 * @since 1.9.1
		 */
		if (!empty($current_crawler['uid'])) {
			// Get role simulation vary name
			$vary_name = $this->cls('Vary')->get_vary_name();
			$vary_val = $this->cls('Vary')->finalize_default_vary($current_crawler['uid']);
			$this->_crawler_conf['cookies'][$vary_name] = $vary_val;
			$this->_crawler_conf['cookies']['litespeed_role'] = $current_crawler['uid'];
		}

		/**
		 * Check cookie crawler
		 * @since  2.8
		 */
		foreach ($current_crawler as $k => $v) {
			if (strpos($k, 'cookie:') !== 0) {
				continue;
			}

			if ($v == '_null') {
				continue;
			}

			$this->_crawler_conf['cookies'][substr($k, 7)] = $v;
		}

		/**
		 * Set WebP simulation
		 * @since  1.9.1
		 */
		if (!empty($current_crawler['webp'])) {
			$this->_crawler_conf['headers'][] = 'Accept: image/webp,*/*';
		}

		/**
		 * Set mobile crawler
		 * @since  2.8
		 */
		if (!empty($current_crawler['mobile'])) {
			$this->_crawler_conf['ua'] = 'Mobile iPhone';
		}

		/**
		 * Limit delay to use server setting
		 * @since 1.8.3
		 */
		$this->_crawler_conf['run_delay'] = $this->conf(Base::O_CRAWLER_USLEEP); // microseconds
		if (!empty($_SERVER[Base::ENV_CRAWLER_USLEEP]) && $_SERVER[Base::ENV_CRAWLER_USLEEP] > $this->_crawler_conf['run_delay']) {
			$this->_crawler_conf['run_delay'] = $_SERVER[Base::ENV_CRAWLER_USLEEP];
		}

		$this->_crawler_conf['run_duration'] = $this->conf(Base::O_CRAWLER_RUN_DURATION);

		$this->_crawler_conf['load_limit'] = $this->conf(Base::O_CRAWLER_LOAD_LIMIT);
		if (!empty($_SERVER[Base::ENV_CRAWLER_LOAD_LIMIT_ENFORCE])) {
			$this->_crawler_conf['load_limit'] = $_SERVER[Base::ENV_CRAWLER_LOAD_LIMIT_ENFORCE];
		} elseif (!empty($_SERVER[Base::ENV_CRAWLER_LOAD_LIMIT]) && $_SERVER[Base::ENV_CRAWLER_LOAD_LIMIT] < $this->_crawler_conf['load_limit']) {
			$this->_crawler_conf['load_limit'] = $_SERVER[Base::ENV_CRAWLER_LOAD_LIMIT];
		}
	}

	/**
	 * Start crawler
	 *
	 * @since  1.1.0
	 * @access private
	 */
	private function _engine_start()
	{
		// check if is running
		// if ($this->_summary['is_running'] && time() - $this->_summary['is_running'] < $this->_crawler_conf['run_duration']) {
		// 	$this->_end_reason = 'stopped';
		// 	self::debug('The crawler is running.');
		// 	return;
		// }

		// check current load
		$this->_adjust_current_threads();
		if ($this->_cur_threads == 0) {
			$this->_end_reason = 'stopped_highload';
			self::debug('Stopped due to heavy load.');
			return;
		}

		// log started time
		self::save_summary(array('last_start_time' => time()));

		// set time limit
		$maxTime = (int) ini_get('max_execution_time');
		self::debug('ini_get max_execution_time=' . $maxTime);
		if ($maxTime == 0) {
			$maxTime = 300; // hardlimit
		} else {
			$maxTime -= 5;
		}
		if ($maxTime >= $this->_crawler_conf['run_duration']) {
			$maxTime = $this->_crawler_conf['run_duration'];
			self::debug('Use run_duration setting as max_execution_time=' . $maxTime);
		} elseif (ini_set('max_execution_time', $this->_crawler_conf['run_duration'] + 15) !== false) {
			$maxTime = $this->_crawler_conf['run_duration'];
			self::debug('ini_set max_execution_time=' . $maxTime);
		}
		self::debug('final max_execution_time=' . $maxTime);
		$this->_max_run_time = $maxTime + time();

		// mark running
		$this->_prepare_running();
		// run cralwer
		$this->_do_running();
		$this->_terminate_running();
	}

	/**
	 * Get server load
	 *
	 * @since 5.5
	 */
	public function get_server_load()
	{
		/**
		 * If server is windows, exit
		 * @see  https://wordpress.org/support/topic/crawler-keeps-causing-crashes/
		 */
		if (!function_exists('sys_getloadavg')) {
			return -1;
		}

		$curload = sys_getloadavg();
		$curload = $curload[0];
		self::debug('Server load: ' . $curload);
		return $curload;
	}

	/**
	 * Adjust threads dynamically
	 *
	 * @since  1.1.0
	 * @access private
	 */
	private function _adjust_current_threads()
	{
		$curload = $this->get_server_load();
		if ($curload == -1) {
			self::debug('set threads=0 due to func sys_getloadavg not exist!');
			$this->_cur_threads = 0;
			return;
		}

		$curload /= $this->_ncpu;
		// $curload = 1;

		if ($this->_cur_threads == -1) {
			// init
			if ($curload > $this->_crawler_conf['load_limit']) {
				$curthreads = 0;
			} elseif ($curload >= $this->_crawler_conf['load_limit'] - 1) {
				$curthreads = 1;
			} else {
				$curthreads = intval($this->_crawler_conf['load_limit'] - $curload);
				if ($curthreads > $this->conf(Base::O_CRAWLER_THREADS)) {
					$curthreads = $this->conf(Base::O_CRAWLER_THREADS);
				}
			}
		} else {
			// adjust
			$curthreads = $this->_cur_threads;
			if ($curload >= $this->_crawler_conf['load_limit'] + 1) {
				sleep(5); // sleep 5 secs
				if ($curthreads >= 1) {
					$curthreads--;
				}
			} elseif ($curload >= $this->_crawler_conf['load_limit']) {
				// if ( $curthreads > 1 ) {// if already 1, keep
				$curthreads--;
				// }
			} elseif ($curload + 1 < $this->_crawler_conf['load_limit']) {
				if ($curthreads < $this->conf(Base::O_CRAWLER_THREADS)) {
					$curthreads++;
				}
			}
		}

		// $log = 'set current threads = ' . $curthreads . ' previous=' . $this->_cur_threads
		// 	. ' max_allowed=' . $this->conf( Base::O_CRAWLER_THREADS ) . ' load_limit=' . $this->_crawler_conf[ 'load_limit' ] . ' current_load=' . $curload;

		$this->_cur_threads = $curthreads;
		$this->_cur_thread_time = time();
	}

	/**
	 * Mark running status
	 *
	 * @since  1.1.0
	 * @access private
	 */
	private function _prepare_running()
	{
		$this->_summary['is_running'] = time();
		$this->_summary['done'] = 0; // reset done status
		$this->_summary['last_status'] = 'prepare running';
		$this->_summary['last_crawled'] = 0;

		// Current crawler starttime mark
		if ($this->_summary['last_pos'] == 0) {
			$this->_summary['curr_crawler_beginning_time'] = time();
		}

		if ($this->_summary['curr_crawler'] == 0 && $this->_summary['last_pos'] == 0) {
			$this->_summary['this_full_beginning_time'] = time();
			$this->_summary['list_size'] = $this->cls('Crawler_Map')->count_map();
		}

		if ($this->_summary['end_reason'] == 'end' && $this->_summary['last_pos'] == 0) {
			$this->_summary['crawler_stats'][$this->_summary['curr_crawler']] = array();
		}

		self::save_summary();
	}

	/**
	 * Take over lane
	 * @since 6.1
	 */
	private function _take_over_lane()
	{
		self::debug('Take over lane as lane is free: ' . $this->json_local_path() . '.pid');
		file::save($this->json_local_path() . '.pid', LITESPEED_LANE_HASH);
	}

	/**
	 * Update lane file
	 * @since 6.1
	 */
	private function _touch_lane()
	{
		touch($this->json_local_path() . '.pid');
	}

	/**
	 * Release lane file
	 * @since 6.1
	 */
	public function Release_lane()
	{
		$lane_file = $this->json_local_path() . '.pid';
		if (!file_exists($lane_file)) {
			return;
		}

		self::debug('Release lane');
		unlink($lane_file);
	}

	/**
	 * Check if lane is used by other crawlers
	 * @since 6.1
	 */
	private function _check_valid_lane($strict_mode = false)
	{
		// Check lane hash
		$lane_file = $this->json_local_path() . '.pid';
		if ($strict_mode) {
			if (!file_exists($lane_file)) {
				self::debug("lane file not existed, strict mode is false [file] $lane_file");
				return false;
			}
		}
		$pid = file::read($lane_file);
		if ($pid && LITESPEED_LANE_HASH != $pid) {
			// If lane file is older than 1h, ignore
			if (time() - filemtime($lane_file) > 3600) {
				self::debug('Lane file is older than 1h, releasing lane');
				$this->Release_lane();
				return true;
			}
			return false;
		}
		return true;
	}

	/**
	 * Run crawler
	 *
	 * @since  1.1.0
	 * @access private
	 */
	private function _do_running()
	{
		$options = $this->_get_curl_options(true);

		while ($urlChunks = $this->cls('Crawler_Map')->list_map(self::CHUNKS, $this->_summary['last_pos'])) {
			// self::debug('$urlChunks=' . count($urlChunks) . ' $this->_cur_threads=' . $this->_cur_threads);
			// start crawling
			$urlChunks = array_chunk($urlChunks, $this->_cur_threads);
			// self::debug('$urlChunks after array_chunk: ' . count($urlChunks));
			foreach ($urlChunks as $rows) {
				if (!$this->_check_valid_lane(true)) {
					$this->_end_reason = 'lane_invalid';
					self::debug('πŸ›‘ The crawler lane is used by newer crawler.');
					throw new \Exception('invalid crawler lane');
				}
				// Update time
				$this->_touch_lane();

				// self::debug('chunk fetching count($rows)= ' . count($rows));
				// multi curl
				$rets = $this->_multi_request($rows, $options);

				// check result headers
				foreach ($rows as $row) {
					// self::debug('chunk fetching 553');
					if (empty($rets[$row['id']])) {
						// If already in blacklist, no curl happened, no corresponding record
						continue;
					}
					// self::debug('chunk fetching 557');
					// check response
					if ($rets[$row['id']]['code'] == 428) {
						// HTTP/1.1 428 Precondition Required (need to test)
						$this->_end_reason = 'crawler_disabled';
						self::debug('crawler_disabled');
						return;
					}

					$status = $this->_status_parse($rets[$row['id']]['header'], $rets[$row['id']]['code'], $row['url']); // B or H or M or N(nocache)
					self::debug('[status] ' . $this->_status2title($status) . "\t\t [url] " . $row['url']);
					$this->_map_status_list[$status][$row['id']] = array(
						'url' => $row['url'],
						'code' => $rets[$row['id']]['code'], // 201 or 200 or 404
					);
					if (empty($this->_summary['crawler_stats'][$this->_summary['curr_crawler']][$status])) {
						$this->_summary['crawler_stats'][$this->_summary['curr_crawler']][$status] = 0;
					}
					$this->_summary['crawler_stats'][$this->_summary['curr_crawler']][$status]++;
				}

				// update offset position
				$_time = time();
				$this->_summary['last_count'] = count($rows);
				$this->_summary['last_pos'] += $this->_summary['last_count'];
				$this->_summary['last_crawled'] += $this->_summary['last_count'];
				$this->_summary['last_update_time'] = $_time;
				$this->_summary['last_status'] = 'updated position';
				// self::debug("chunk fetching 604 last_pos:{$this->_summary['last_pos']} last_count:{$this->_summary['last_count']} last_crawled:{$this->_summary['last_crawled']}");
				// check duration
				if ($this->_summary['last_update_time'] > $this->_max_run_time) {
					$this->_end_reason = 'stopped_maxtime';
					self::debug('Terminated due to maxtime');
					return;
					// return __('Stopped due to exceeding defined Maximum Run Time', 'litespeed-cache');
				}

				// make sure at least each 10s save meta & map status once
				if ($_time - $this->_summary['meta_save_time'] > 10) {
					$this->_map_status_list = $this->cls('Crawler_Map')->save_map_status($this->_map_status_list, $this->_summary['curr_crawler']);
					self::save_summary();
				}
				// self::debug('chunk fetching 597');
				// check if need to reset pos each 5s
				if ($_time > $this->_summary['pos_reset_check']) {
					$this->_summary['pos_reset_check'] = $_time + 5;
					if (file_exists($this->_resetfile) && unlink($this->_resetfile)) {
						self::debug('Terminated due to reset file');

						$this->_summary['last_pos'] = 0;
						$this->_summary['curr_crawler'] = 0;
						$this->_summary['crawler_stats'][$this->_summary['curr_crawler']] = array();
						// reset done status
						$this->_summary['done'] = 0;
						$this->_summary['this_full_beginning_time'] = 0;
						$this->_end_reason = 'stopped_reset';
						return;
						// return __('Stopped due to reset meta position', 'litespeed-cache');
					}
				}
				// self::debug('chunk fetching 615');
				// check loads
				if ($this->_summary['last_update_time'] - $this->_cur_thread_time > 60) {
					$this->_adjust_current_threads();
					if ($this->_cur_threads == 0) {
						$this->_end_reason = 'stopped_highload';
						self::debug('πŸ›‘ Terminated due to highload');
						return;
						// return __('Stopped due to load over limit', 'litespeed-cache');
					}
				}

				$this->_summary['last_status'] = 'sleeping ' . $this->_crawler_conf['run_delay'] . 'ms';

				usleep($this->_crawler_conf['run_delay']);
			}
			// self::debug('chunk fetching done');
		}

		// All URLs are done for current crawler
		$this->_end_reason = 'end';
		$this->_summary['crawler_stats'][$this->_summary['curr_crawler']]['W'] = 0;
		self::debug('Crawler #' . $this->_summary['curr_crawler'] . ' touched end');
	}

	/**
	 * Send multi curl requests
	 * If res=B, bypass request and won't return
	 *
	 * @since  1.1.0
	 * @access private
	 */
	private function _multi_request($rows, $options)
	{
		if (!function_exists('curl_multi_init')) {
			exit('curl_multi_init disabled');
		}
		$mh = curl_multi_init();
		$curls = array();
		foreach ($rows as $row) {
			if (substr($row['res'], $this->_summary['curr_crawler'], 1) == 'B') {
				continue;
			}
			if (substr($row['res'], $this->_summary['curr_crawler'], 1) == 'N') {
				continue;
			}

			if (!function_exists('curl_init')) {
				exit('curl_init disabled');
			}

			$curls[$row['id']] = curl_init();

			// Append URL
			$url = $row['url'];
			if ($this->conf(Base::O_CRAWLER_DROP_DOMAIN)) {
				$url = $this->_crawler_conf['base'] . $row['url'];
			}
			curl_setopt($curls[$row['id']], CURLOPT_URL, $url);
			self::debug('Crawling [url] ' . $url . ($url == $row['url'] ? '' : ' [ori] ' . $row['url']));

			curl_setopt_array($curls[$row['id']], $options);

			curl_multi_add_handle($mh, $curls[$row['id']]);
		}
		// self::debug('-----debug1');
		// execute curl
		if ($curls) {
			do {
				$status = curl_multi_exec($mh, $active);
				if ($active) {
					curl_multi_select($mh);
				}
			} while ($active && $status == CURLM_OK);
		}
		// self::debug('-----debug2');
		// curl done
		$ret = array();
		foreach ($rows as $row) {
			if (substr($row['res'], $this->_summary['curr_crawler'], 1) == 'B') {
				continue;
			}
			if (substr($row['res'], $this->_summary['curr_crawler'], 1) == 'N') {
				continue;
			}
			// self::debug('-----debug3');
			$ch = $curls[$row['id']];

			// Parse header
			$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
			$content = curl_multi_getcontent($ch);
			$header = substr($content, 0, $header_size);

			$ret[$row['id']] = array(
				'header' => $header,
				'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE),
			);
			// self::debug('-----debug4');
			curl_multi_remove_handle($mh, $ch);
			curl_close($ch);
		}
		// self::debug('-----debug5');
		curl_multi_close($mh);
		// self::debug('-----debug6');
		return $ret;
	}

	/**
	 * Translate the status to title
	 * @since 6.0
	 */
	private function _status2title($status)
	{
		if ($status == 'H') {
			return 'βœ… Hit';
		}
		if ($status == 'M') {
			return '😊 Miss';
		}
		if ($status == 'B') {
			return 'πŸ˜… Blacklisted';
		}
		if ($status == 'N') {
			return 'πŸ˜… Blacklisted';
		}
		return 'πŸ›Έ Unknown';
	}

	/**
	 * Check returned curl header to find if cached or not
	 *
	 * @since  2.0
	 * @access private
	 */
	private function _status_parse($header, $code, $url)
	{
		if ($code == 201) {
			return 'H';
		}

		if (stripos($header, 'X-Litespeed-Cache-Control: no-cache') !== false) {
			// If is from DIVI, taken as miss
			if (defined('LITESPEED_CRAWLER_IGNORE_NONCACHEABLE') && LITESPEED_CRAWLER_IGNORE_NONCACHEABLE) {
				return 'M';
			}

			// If blacklist is disabled
			if (
				(defined('LITESPEED_CRAWLER_DISABLE_BLOCKLIST') && LITESPEED_CRAWLER_DISABLE_BLOCKLIST) ||
				apply_filters('litespeed_crawler_disable_blocklist', '__return_false', $url)
			) {
				return 'M';
			}

			return 'N'; // Blacklist
		}

		$_cache_headers = array('x-litespeed-cache', 'x-lsadc-cache', 'x-qc-cache');

		foreach ($_cache_headers as $_header) {
			if (stripos($header, $_header) !== false) {
				if (stripos($header, $_header . ': miss') !== false) {
					return 'M'; // Miss
				}
				return 'H'; // Hit
			}
		}

		// If blacklist is disabled
		if (
			(defined('LITESPEED_CRAWLER_DISABLE_BLOCKLIST') && LITESPEED_CRAWLER_DISABLE_BLOCKLIST) ||
			apply_filters('litespeed_crawler_disable_blocklist', '__return_false', $url)
		) {
			return 'M';
		}

		return 'B'; // Blacklist
	}

	/**
	 * Get curl_options
	 *
	 * @since  1.1.0
	 * @access private
	 */
	private function _get_curl_options($crawler_only = false)
	{
		$options = array(
			CURLOPT_RETURNTRANSFER => true,
			CURLOPT_HEADER => true,
			CURLOPT_CUSTOMREQUEST => 'GET',
			CURLOPT_FOLLOWLOCATION => false,
			CURLOPT_ENCODING => 'gzip',
			CURLOPT_CONNECTTIMEOUT => 10,
			CURLOPT_TIMEOUT => $this->conf(Base::O_CRAWLER_TIMEOUT), // Larger timeout to avoid incorrect blacklist addition #900171
			CURLOPT_SSL_VERIFYHOST => 0,
			CURLOPT_SSL_VERIFYPEER => false,
			CURLOPT_NOBODY => false,
			CURLOPT_HTTPHEADER => $this->_crawler_conf['headers'],
		);
		$options[CURLOPT_HTTPHEADER][] = 'Cache-Control: max-age=0';

		/**
		 * Try to enable http2 connection (only available since PHP7+)
		 * @since  1.9.1
		 * @since  2.2.7 Commented due to cause no-cache issue
		 * @since  2.9.1+ Fixed wrongly usage of CURL_HTTP_VERSION_1_1 const
		 */
		$options[CURLOPT_HTTP_VERSION] = CURL_HTTP_VERSION_1_1;
		// 	$options[ CURL_HTTP_VERSION_2 ] = 1;

		// IP resolve
		if ($this->conf(Base::O_SERVER_IP)) {
			Utility::compatibility();
			if (($this->conf(Base::O_CRAWLER_DROP_DOMAIN) || !$crawler_only) && $this->_crawler_conf['base']) {
				// Resolve URL to IP
				$parsed_url = parse_url($this->_crawler_conf['base']);

				if (!empty($parsed_url['host'])) {
					$dom = $parsed_url['host'];
					$port = $parsed_url['scheme'] == 'https' ? '443' : '80';
					$url = $dom . ':' . $port . ':' . $this->conf(Base::O_SERVER_IP);

					$options[CURLOPT_RESOLVE] = array($url);
					$options[CURLOPT_DNS_USE_GLOBAL_CACHE] = false;
				}
			}
		}

		// if is walker
		// $options[ CURLOPT_FRESH_CONNECT ] = true;

		// Referer
		if (isset($_SERVER['HTTP_HOST']) && isset($_SERVER['REQUEST_URI'])) {
			$options[CURLOPT_REFERER] = 'http://' . $_SERVER['HTTP_HOST'] . $_SERVER['REQUEST_URI'];
		}

		// User Agent
		if ($crawler_only) {
			if (strpos($this->_crawler_conf['ua'], Crawler::FAST_USER_AGENT) !== 0) {
				$this->_crawler_conf['ua'] = Crawler::FAST_USER_AGENT . ' ' . $this->_crawler_conf['ua'];
			}
		}
		$options[CURLOPT_USERAGENT] = $this->_crawler_conf['ua'];

		/**
		 * Append hash to cookie for validation
		 * @since  1.9.1
		 */
		if ($crawler_only) {
			$this->_crawler_conf['cookies']['litespeed_hash'] = Router::get_hash();
		}

		// Cookies
		$cookies = array();
		foreach ($this->_crawler_conf['cookies'] as $k => $v) {
			if (!$v) {
				continue;
			}
			$cookies[] = $k . '=' . urlencode($v);
		}
		if ($cookies) {
			$options[CURLOPT_COOKIE] = implode('; ', $cookies);
		}

		return $options;
	}

	/**
	 * Self curl to get HTML content
	 *
	 * @since  3.3
	 */
	public function self_curl($url, $ua, $uid = false, $accept = false)
	{
		// $accept not in use yet
		$this->_crawler_conf['base'] = home_url();
		$this->_crawler_conf['ua'] = $ua;
		if ($accept) {
			$this->_crawler_conf['headers'] = array('Accept: ' . $accept);
		}
		if ($uid) {
			$this->_crawler_conf['cookies']['litespeed_role'] = $uid;
			$this->_crawler_conf['cookies']['litespeed_hash'] = Router::get_hash();
		}

		$options = $this->_get_curl_options();
		$options[CURLOPT_HEADER] = false;
		$options[CURLOPT_FOLLOWLOCATION] = true;

		$ch = curl_init();
		curl_setopt_array($ch, $options);
		curl_setopt($ch, CURLOPT_URL, $url);
		$result = curl_exec($ch);
		$code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
		curl_close($ch);

		if ($code != 200) {
			self::debug('❌ Response code is not 200 in self_curl() [code] ' . var_export($code, true));
			return false;
		}

		return $result;
	}

	/**
	 * Terminate crawling
	 *
	 * @since  1.1.0
	 * @access private
	 */
	private function _terminate_running()
	{
		$this->_map_status_list = $this->cls('Crawler_Map')->save_map_status($this->_map_status_list, $this->_summary['curr_crawler']);

		if ($this->_end_reason == 'end') {
			// Current crawler is fully done
			// $end_reason = sprintf( __( 'Crawler %s reached end of sitemap file.', 'litespeed-cache' ), '#' . ( $this->_summary['curr_crawler'] + 1 ) );
			$this->_summary['curr_crawler']++; // Jump to next cralwer
			// $this->_summary[ 'crawler_stats' ][ $this->_summary[ 'curr_crawler' ] ] = array(); // reset this at next crawl time
			$this->_summary['last_pos'] = 0; // reset last position
			$this->_summary['last_crawler_total_cost'] = time() - $this->_summary['curr_crawler_beginning_time'];
			$count_crawlers = count($this->list_crawlers());
			if ($this->_summary['curr_crawler'] >= $count_crawlers) {
				self::debug('_terminate_running Touched end, whole crawled. Reload crawler!');
				$this->_summary['curr_crawler'] = 0;
				// $this->_summary[ 'crawler_stats' ][ $this->_summary[ 'curr_crawler' ] ] = array();
				$this->_summary['done'] = 'touchedEnd'; // log done status
				$this->_summary['last_full_time_cost'] = time() - $this->_summary['this_full_beginning_time'];
			}
		}
		$this->_summary['last_status'] = 'stopped';
		$this->_summary['is_running'] = 0;
		$this->_summary['end_reason'] = $this->_end_reason;
		self::save_summary();
	}

	/**
	 * List all crawlers ( tagA => [ valueA => titleA, ... ] ...)
	 *
	 * @since    1.9.1
	 * @access   public
	 */
	public function list_crawlers()
	{
		if ($this->_crawlers) {
			return $this->_crawlers;
		}

		$crawler_factors = array();

		// Add default Guest crawler
		$crawler_factors['uid'] = array(0 => __('Guest', 'litespeed-cache'));

		// WebP on/off
		if (($this->conf(Base::O_GUEST) && $this->conf(Base::O_GUEST_OPTM)) || $this->conf(Base::O_IMG_OPTM_WEBP)) {
			$crawler_factors['webp'] = array(1 => 'WebP', 0 => '');
		}

		// Guest Mode on/off
		if ($this->conf(Base::O_GUEST)) {
			$vary_name = $this->cls('Vary')->get_vary_name();
			$vary_val = 'guest_mode:1';
			if (!defined('LSCWP_LOG')) {
				$vary_val = md5($this->conf(Base::HASH) . $vary_val);
			}
			$crawler_factors['cookie:' . $vary_name] = array($vary_val => '', '_null' => '<font data-balloon-pos="up" aria-label="Guest Mode">πŸ‘’</font>');
		}

		// Mobile crawler
		if ($this->conf(Base::O_CACHE_MOBILE)) {
			$crawler_factors['mobile'] = array(1 => '<font data-balloon-pos="up" aria-label="Mobile">πŸ“±</font>', 0 => '');
		}

		// Get roles set
		// List all roles
		foreach ($this->conf(Base::O_CRAWLER_ROLES) as $v) {
			$role_title = '';
			$udata = get_userdata($v);
			if (isset($udata->roles) && is_array($udata->roles)) {
				$tmp = array_values($udata->roles);
				$role_title = array_shift($tmp);
			}
			if (!$role_title) {
				continue;
			}

			$crawler_factors['uid'][$v] = ucfirst($role_title);
		}

		// Cookie crawler
		foreach ($this->conf(Base::O_CRAWLER_COOKIES) as $v) {
			if (empty($v['name'])) {
				continue;
			}

			$this_cookie_key = 'cookie:' . $v['name'];

			$crawler_factors[$this_cookie_key] = array();

			foreach ($v['vals'] as $v2) {
				$crawler_factors[$this_cookie_key][$v2] =
					$v2 == '_null' ? '' : '<font data-balloon-pos="up" aria-label="Cookie">πŸͺ</font>' . esc_html($v['name']) . '=' . esc_html($v2);
			}
		}

		// Crossing generate the crawler list
		$this->_crawlers = $this->_recursive_build_crawler($crawler_factors);

		return $this->_crawlers;
	}

	/**
	 * Build a crawler list recursively
	 *
	 * @since 2.8
	 * @access private
	 */
	private function _recursive_build_crawler($crawler_factors, $group = array(), $i = 0)
	{
		$current_factor = array_keys($crawler_factors);
		$current_factor = $current_factor[$i];

		$if_touch_end = $i + 1 >= count($crawler_factors);

		$final_list = array();

		foreach ($crawler_factors[$current_factor] as $k => $v) {
			// Don't alter $group bcos of loop usage
			$item = $group;
			$item['title'] = !empty($group['title']) ? $group['title'] : '';
			if ($v) {
				if ($item['title']) {
					$item['title'] .= ' - ';
				}
				$item['title'] .= $v;
			}
			$item[$current_factor] = $k;

			if ($if_touch_end) {
				$final_list[] = $item;
			} else {
				// Inception: next layer
				$final_list = array_merge($final_list, $this->_recursive_build_crawler($crawler_factors, $item, $i + 1));
			}
		}

		return $final_list;
	}

	/**
	 * Return crawler meta file local path
	 *
	 * @since    6.1
	 * @access public
	 */
	public function json_local_path()
	{
		if (!file_exists(LITESPEED_STATIC_DIR . '/crawler/' . $this->_sitemeta)) {
			return false;
		}

		return LITESPEED_STATIC_DIR . '/crawler/' . $this->_sitemeta;
	}

	/**
	 * Return crawler meta file
	 *
	 * @since    1.1.0
	 * @access public
	 */
	public function json_path()
	{
		if (!file_exists(LITESPEED_STATIC_DIR . '/crawler/' . $this->_sitemeta)) {
			return false;
		}

		return LITESPEED_STATIC_URL . '/crawler/' . $this->_sitemeta;
	}

	/**
	 * Create reset pos file
	 *
	 * @since    1.1.0
	 * @access public
	 */
	public function reset_pos()
	{
		File::save($this->_resetfile, time(), true);

		self::save_summary(array('is_running' => 0));
	}

	/**
	 * Display status based by matching crawlers order
	 *
	 * @since  3.0
	 * @access public
	 */
	public function display_status($status_row, $reason_set)
	{
		if (!$status_row) {
			return '';
		}

		$_status_list = array(
			'-' => 'default',
			'M' => 'primary',
			'H' => 'success',
			'B' => 'danger',
			'N' => 'warning',
		);

		$reason_set = explode(',', $reason_set);

		$status = '';
		foreach (str_split($status_row) as $k => $v) {
			$reason = $reason_set[$k];
			if ($reason == 'Man') {
				$reason = __('Manually added to blocklist', 'litespeed-cache');
			}
			if ($reason == 'Existed') {
				$reason = __('Previously existed in blocklist', 'litespeed-cache');
			}
			if ($reason) {
				$reason = 'data-balloon-pos="up" aria-label="' . $reason . '"';
			}
			$status .= '<i class="litespeed-dot litespeed-bg-' . $_status_list[$v] . '" ' . $reason . '>' . ($k + 1) . '</i>';
		}

		return $status;
	}

	/**
	 * Output info and exit
	 *
	 * @since    1.1.0
	 * @access protected
	 * @param  string $error Error info
	 */
	protected function output($msg)
	{
		if (defined('DOING_CRON')) {
			echo $msg;
			// exit();
		} else {
			echo "<script>alert('" . htmlspecialchars($msg) . "');</script>";
			// exit;
		}
	}

	/**
	 * Handle all request actions from main cls
	 *
	 * @since  3.0
	 * @access public
	 */
	public function handler()
	{
		$type = Router::verify_type();

		switch ($type) {
			case self::TYPE_REFRESH_MAP:
				$this->cls('Crawler_Map')->gen(true);
				break;

			case self::TYPE_EMPTY:
				$this->cls('Crawler_Map')->empty_map();
				break;

			case self::TYPE_BLACKLIST_EMPTY:
				$this->cls('Crawler_Map')->blacklist_empty();
				break;

			case self::TYPE_BLACKLIST_DEL:
				if (!empty($_GET['id'])) {
					$this->cls('Crawler_Map')->blacklist_del($_GET['id']);
				}
				break;

			case self::TYPE_BLACKLIST_ADD:
				if (!empty($_GET['id'])) {
					$this->cls('Crawler_Map')->blacklist_add($_GET['id']);
				}
				break;

			// Handle the ajax request to proceed crawler manually by admin
			case self::TYPE_START:
				self::start_async();
				break;

			case self::TYPE_RESET:
				$this->reset_pos();
				break;

			default:
				break;
		}

		Admin::redirect();
	}
}

Youez - 2016 - github.com/yon3zu
LinuXploit