From 719b82d2305f9364c1cd184d6b6aaad6ca048290 Mon Sep 17 00:00:00 2001 From: Kovah Date: Mon, 11 Feb 2019 16:46:52 +0100 Subject: [PATCH] Implement first version for automated link checks (#20) --- app/Console/CheckLinksCommand.php | 189 ++++++++++++++++++++ app/Console/Kernel.php | 12 +- app/Http/Controllers/API/CronController.php | 14 +- 3 files changed, 209 insertions(+), 6 deletions(-) create mode 100644 app/Console/CheckLinksCommand.php diff --git a/app/Console/CheckLinksCommand.php b/app/Console/CheckLinksCommand.php new file mode 100644 index 00000000..99028654 --- /dev/null +++ b/app/Console/CheckLinksCommand.php @@ -0,0 +1,189 @@ +client = new Client(); + + parent::__construct(); + } + + /** + * Execute the console command. + * + * @return void + */ + public function handle(): void + { + // Check if the command should skip the execution + $skip_timestamp = Cache::get($this->cache_key_skip_timestamp); + $this->offset = Cache::get($this->cache_key_offset, 0); + $this->checked_link_count = Cache::get($this->cache_key_checked_count, 0); + + if (now()->timestamp < $skip_timestamp) { + return; + } + + $links = $this->getLinks(); + + // Cancel if there are no links to check + if ($links->isEmpty()) { + Cache::forget($this->cache_key_offset); + Cache::forget($this->cache_key_skip_timestamp); + + $this->comment('No links found, aborting...'); + return; + } + + // Check all provided links + $this->comment('Checking ' . $links->count() . ' links now.'); + + $links->each(function ($link) { + $this->checkLink($link); + + // Prevent spam-ish behaviour by limiting outgoing HTTP requests + sleep(1); + }); + + // Check if there are more links to check + $checked_count = $this->checked_link_count + $links->count(); + Cache::forever($this->cache_key_checked_count, $checked_count); + + if ($this->total > $checked_count) { + + // If yes, simply save the offset to the cache. + // The next link check will pick it up and continue the check + $next_offset = $this->offset + $this->limit; + Cache::forever($this->cache_key_offset, $next_offset); + + $this->comment('Saving offset for next link check.'); + + } else { + + // If not, all links have been successfully checked. + // Save a cache flag that prevents link checks for the next days. + $next_check = now()->addDays(5)->timestamp; + Cache::forever($this->cache_key_skip_timestamp, $next_check); + + $this->comment('All existing links checked, next link check scheduled for ' . now()->addDays(5)->toDateTimeString()); + + } + } + + /** + * Get links but limit the results to a fixed number of links + * If there is an offset saved, use this instead of beginning from the first entry + * + * @return mixed + */ + protected function getLinks() + { + // Get the total amount of remaining links + $this->total = Link::count(); + + // Get a porton of the remaining links based on the limit + return Link::orderBy('id', 'ASC')->offset($this->offset)->limit($this->limit)->get(); + } + + /** + * Check the URL of an link and set the status accordingly + * + * @param Link $link + * @return void + * @throws \GuzzleHttp\Exception\GuzzleException + */ + protected function checkLink(Link $link): void + { + $this->comment('Checking link ' . $link->url); + + $options = [ + 'http_errors' => false, // Do not throw exceptions for 4xx and 5xx errors + 'timeout' => 5, // wait a maximum of 5 seconds for the request to finish + ]; + + try { + $res = $this->client->request('GET', $link->url, $options); + $status_code = $res->getStatusCode(); + } catch (\Exception $e) { + // Just abort now, may be a temporary issue + $this->warn('› Unknown error while trying to check the URL, trying again later.'); + return; + } + + // Check if the status code is not 200 + if ($status_code !== 200) { + // If the link target is a redirect, set the status 2 (moved) + // else set the status to 3 (broken) + if ($status_code === 301 || $status_code === 302) { + $link->status = 2; + $this->warn('› Link moved to another URL!'); + } else { + $link->status = 3; + $this->error('› Link seems to be broken!'); + } + + $link->save(); + + } else { + $this->info('› Link looks okay.'); + } + } +} diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php index dc448328..77a5a495 100644 --- a/app/Console/Kernel.php +++ b/app/Console/Kernel.php @@ -2,10 +2,16 @@ namespace App\Console; +use App\Console\Commands\CheckLinksCommand; use App\Console\Commands\RegisterUserCommand; use Illuminate\Console\Scheduling\Schedule; use Illuminate\Foundation\Console\Kernel as ConsoleKernel; +/** + * Class Kernel + * + * @package App\Console + */ class Kernel extends ConsoleKernel { /** @@ -14,7 +20,8 @@ class Kernel extends ConsoleKernel * @var array */ protected $commands = [ - RegisterUserCommand::class + RegisterUserCommand::class, + CheckLinksCommand::class, ]; /** @@ -25,8 +32,7 @@ class Kernel extends ConsoleKernel */ protected function schedule(Schedule $schedule) { - // $schedule->command('inspire') - // ->hourly(); + $schedule->command('links:check')->hourly(); } /** diff --git a/app/Http/Controllers/API/CronController.php b/app/Http/Controllers/API/CronController.php index 4f257621..b83b3cbb 100644 --- a/app/Http/Controllers/API/CronController.php +++ b/app/Http/Controllers/API/CronController.php @@ -4,6 +4,7 @@ namespace App\Http\Controllers\API; use App\Http\Controllers\Controller; use Illuminate\Http\Request; +use Illuminate\Support\Facades\Artisan; /** * Class CronController @@ -13,11 +14,18 @@ use Illuminate\Http\Request; class CronController extends Controller { /** - * @param Request $request + * @param Request $request + * @param null|string $cron_token * @return void */ - public function run(Request $request) + public function run(Request $request, $cron_token) { - // + // Verify the cron token + if (!$cron_token || $cron_token !== systemsettings('cron_token')) { + abort(403); + } + + // Run all cron tasks + Artisan::call('schedule:run'); } }