2020-06-09 20:21:34 +02:00
|
|
|
<?php
|
|
|
|
|
|
|
|
class MarktplaatsBridge extends BridgeAbstract
|
|
|
|
{
|
|
|
|
const NAME = 'Marktplaats';
|
|
|
|
const URI = 'https://marktplaats.nl';
|
|
|
|
const DESCRIPTION = 'Read search queries from marktplaats.nl';
|
|
|
|
const PARAMETERS = [
|
|
|
|
'Search' => [
|
|
|
|
'q' => [
|
|
|
|
'name' => 'query',
|
|
|
|
'type' => 'text',
|
2022-03-24 11:59:34 +01:00
|
|
|
'exampleValue' => 'lamp',
|
2020-06-09 20:21:34 +02:00
|
|
|
'required' => true,
|
|
|
|
'title' => 'The search string for marktplaats',
|
|
|
|
],
|
2023-10-22 17:36:36 +02:00
|
|
|
'c' => [
|
|
|
|
'name' => 'Category',
|
|
|
|
'type' => 'list',
|
|
|
|
'values' => [
|
|
|
|
'Select a category' => '',
|
|
|
|
'Antiek en Kunst' => '1',
|
|
|
|
'Audio, Tv en Foto' => '31',
|
|
|
|
'Auto's' => '91',
|
|
|
|
'Auto-onderdelen' => '2600',
|
|
|
|
'Auto diversen' => '48',
|
|
|
|
'Boeken' => '201',
|
|
|
|
'Caravans en Kamperen' => '289',
|
|
|
|
'Cd's en Dvd's' => '1744',
|
|
|
|
'Computers en Software' => '322',
|
|
|
|
'Contacten en Berichten' => '378',
|
|
|
|
'Diensten en Vakmensen' => '1098',
|
|
|
|
'Dieren en Toebehoren' => '395',
|
|
|
|
'Doe-het-zelf en Verbouw' => '239',
|
|
|
|
'Fietsen en Brommers' => '445',
|
|
|
|
'Hobby en Vrije tijd' => '1099',
|
|
|
|
'Huis en Inrichting' => '504',
|
|
|
|
'Huizen en Kamers' => '1032',
|
|
|
|
'Kinderen en Baby's' => '565',
|
|
|
|
'Kleding | Dames' => '621',
|
|
|
|
'Kleding | Heren' => '1776',
|
|
|
|
'Motoren' => '678',
|
|
|
|
'Muziek en Instrumenten' => '728',
|
|
|
|
'Postzegels en Munten' => '1784',
|
|
|
|
'Sieraden, Tassen en Uiterlijk' => '1826',
|
|
|
|
'Spelcomputers en Games' => '356',
|
|
|
|
'Sport en Fitness' => '784',
|
|
|
|
'Telecommunicatie' => '820',
|
|
|
|
'Tickets en Kaartjes' => '1984',
|
|
|
|
'Tuin en Terras' => '1847',
|
|
|
|
'Vacatures' => '167',
|
|
|
|
'Vakantie' => '856',
|
|
|
|
'Verzamelen' => '895',
|
|
|
|
'Watersport en Boten' => '976',
|
|
|
|
'Witgoed en Apparatuur' => '537',
|
|
|
|
'Zakelijke goederen' => '1085',
|
|
|
|
'Diversen' => '428',
|
|
|
|
],
|
|
|
|
'required' => false,
|
|
|
|
'title' => 'The category to search in',
|
|
|
|
],
|
2020-06-09 20:21:34 +02:00
|
|
|
'z' => [
|
|
|
|
'name' => 'zipcode',
|
|
|
|
'type' => 'text',
|
|
|
|
'required' => false,
|
2022-06-04 21:06:38 +02:00
|
|
|
'exampleValue' => '1013AA',
|
2020-06-09 20:21:34 +02:00
|
|
|
'title' => 'Zip code for location limited searches',
|
|
|
|
],
|
|
|
|
'd' => [
|
|
|
|
'name' => 'distance',
|
|
|
|
'type' => 'number',
|
|
|
|
'required' => false,
|
2022-06-04 21:06:38 +02:00
|
|
|
'exampleValue' => '100000',
|
2020-06-09 20:21:34 +02:00
|
|
|
'title' => 'The distance in meters from the zipcode',
|
|
|
|
],
|
|
|
|
'f' => [
|
|
|
|
'name' => 'priceFrom',
|
|
|
|
'type' => 'number',
|
|
|
|
'required' => false,
|
|
|
|
'title' => 'The minimal price in cents',
|
|
|
|
],
|
|
|
|
't' => [
|
|
|
|
'name' => 'priceTo',
|
|
|
|
'type' => 'number',
|
|
|
|
'required' => false,
|
|
|
|
'title' => 'The maximal price in cents',
|
|
|
|
],
|
|
|
|
's' => [
|
|
|
|
'name' => 'showGlobal',
|
|
|
|
'type' => 'checkbox',
|
|
|
|
'required' => false,
|
|
|
|
'title' => 'Include result with negative distance',
|
|
|
|
],
|
|
|
|
'i' => [
|
|
|
|
'name' => 'includeImage',
|
|
|
|
'type' => 'checkbox',
|
|
|
|
'required' => false,
|
|
|
|
'title' => 'Include the image at the end of the content',
|
|
|
|
],
|
|
|
|
'r' => [
|
|
|
|
'name' => 'includeRaw',
|
|
|
|
'type' => 'checkbox',
|
|
|
|
'required' => false,
|
|
|
|
'title' => 'Include the raw data behind the content',
|
2023-10-22 17:36:36 +02:00
|
|
|
],
|
|
|
|
'sc' => [
|
|
|
|
'name' => 'Sub category',
|
|
|
|
'type' => 'number',
|
|
|
|
'required' => false,
|
|
|
|
'exampleValue' => '12345',
|
|
|
|
'title' => 'Sub category has to be given by id as the list is too big to show here.
|
|
|
|
Only use subcategories that belong to the main category. Both have to be correct',
|
|
|
|
],
|
2022-07-01 15:10:30 +02:00
|
|
|
]
|
2020-06-09 20:21:34 +02:00
|
|
|
];
|
|
|
|
const CACHE_TIMEOUT = 900;
|
|
|
|
|
|
|
|
public function collectData()
|
|
|
|
{
|
|
|
|
$query = '';
|
|
|
|
$excludeGlobal = false;
|
|
|
|
if (!is_null($this->getInput('z')) && !is_null($this->getInput('d'))) {
|
|
|
|
$query = '&postcode=' . $this->getInput('z') . '&distanceMeters=' . $this->getInput('d');
|
|
|
|
}
|
|
|
|
if (!is_null($this->getInput('f'))) {
|
|
|
|
$query .= '&PriceCentsFrom=' . $this->getInput('f');
|
|
|
|
}
|
|
|
|
if (!is_null($this->getInput('t'))) {
|
|
|
|
$query .= '&PriceCentsTo=' . $this->getInput('t');
|
|
|
|
}
|
|
|
|
if (!is_null($this->getInput('s'))) {
|
|
|
|
if (!$this->getInput('s')) {
|
|
|
|
$excludeGlobal = true;
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
}
|
2023-10-22 17:36:36 +02:00
|
|
|
if (!empty($this->getInput('c'))) {
|
|
|
|
$query .= '&l1CategoryId=' . $this->getInput('c');
|
|
|
|
}
|
|
|
|
if (!is_null($this->getInput('sc'))) {
|
|
|
|
$query .= '&l2CategoryId=' . $this->getInput('sc');
|
|
|
|
}
|
2020-06-09 20:21:34 +02:00
|
|
|
$url = 'https://www.marktplaats.nl/lrp/api/search?query=' . urlencode($this->getInput('q')) . $query;
|
2022-06-04 21:05:43 +02:00
|
|
|
$jsonString = getSimpleHTMLDOM($url);
|
2020-06-09 20:21:34 +02:00
|
|
|
$jsonObj = json_decode($jsonString);
|
|
|
|
foreach ($jsonObj->listings as $listing) {
|
|
|
|
if (!$excludeGlobal || $listing->location->distanceMeters >= 0) {
|
|
|
|
$item = [];
|
|
|
|
$item['uri'] = 'https://marktplaats.nl' . $listing->vipUrl;
|
|
|
|
$item['title'] = $listing->title;
|
|
|
|
$item['timestamp'] = $listing->date;
|
|
|
|
$item['author'] = $listing->sellerInformation->sellerName;
|
|
|
|
$item['content'] = $listing->description;
|
|
|
|
$item['categories'] = $listing->verticals;
|
|
|
|
$item['uid'] = $listing->itemId;
|
|
|
|
if (!is_null($this->getInput('i')) && !empty($listing->imageUrls)) {
|
2020-09-03 07:44:32 +02:00
|
|
|
$item['enclosures'] = $listing->imageUrls;
|
|
|
|
if (is_array($listing->imageUrls)) {
|
|
|
|
foreach ($listing->imageUrls as $imgurl) {
|
2023-10-22 17:36:36 +02:00
|
|
|
$item['content'] .= "<br />\n<img alt='' src='https:" . $imgurl . "' />";
|
2020-06-09 20:21:34 +02:00
|
|
|
}
|
2020-09-03 07:44:32 +02:00
|
|
|
} else {
|
2023-10-22 17:36:36 +02:00
|
|
|
$item['content'] .= "<br>\n<img alt='' src='https:" . $listing->imageUrls . "' />";
|
2020-06-09 20:21:34 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!is_null($this->getInput('r'))) {
|
|
|
|
if ($this->getInput('r')) {
|
2023-10-22 17:36:36 +02:00
|
|
|
$item['content'] .= "<br />\n<br />\n<br />\n" . json_encode($listing) . "<br />$url";
|
2020-06-09 20:21:34 +02:00
|
|
|
}
|
|
|
|
}
|
2020-10-15 12:53:19 +05:00
|
|
|
$item['content'] .= "<br>\n<br>\nPrice: " . $listing->priceInfo->priceCents / 100;
|
|
|
|
$item['content'] .= ' (' . $listing->priceInfo->priceType . ')';
|
2020-09-03 07:44:32 +02:00
|
|
|
if (!empty($listing->location->cityName)) {
|
|
|
|
$item['content'] .= "<br><br>\n" . $listing->location->cityName;
|
|
|
|
}
|
|
|
|
if (!is_null($this->getInput('r'))) {
|
|
|
|
if ($this->getInput('r')) {
|
|
|
|
$item['content'] .= "<br />\n<br />\n<br />\n" . json_encode($listing);
|
|
|
|
}
|
|
|
|
}
|
2020-06-09 20:21:34 +02:00
|
|
|
$this->items[] = $item;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-10-15 12:53:19 +05:00
|
|
|
|
2020-09-03 07:44:32 +02:00
|
|
|
public function getName()
|
|
|
|
{
|
2020-10-15 12:53:19 +05:00
|
|
|
if (!is_null($this->getInput('q'))) {
|
|
|
|
return $this->getInput('q') . ' - Marktplaats';
|
|
|
|
}
|
|
|
|
return parent::getName();
|
|
|
|
}
|
2023-10-22 17:36:36 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Method can be used to scrape the subcategories from marktplaats
|
|
|
|
*/
|
|
|
|
private static function scrapeSubCategories()
|
|
|
|
{
|
|
|
|
$main = [];
|
|
|
|
$main['Select a category'] = '';
|
|
|
|
$marktplaatsHTML = file_get_html('https://www.marktplaats.nl');
|
|
|
|
foreach ($marktplaatsHTML->find('select[id=categoryId] option') as $opt) {
|
|
|
|
if (!str_contains($opt->innertext, 'categorie')) {
|
|
|
|
$main[$opt->innertext] = $opt->value;
|
|
|
|
$ids[] = $opt->value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
$result = [];
|
|
|
|
foreach ($ids as $id) {
|
|
|
|
$url = 'https://www.marktplaats.nl/lrp/api/search?l1CategoryId=' . $id;
|
|
|
|
$jsonstring = getContents($url);
|
|
|
|
$jsondata = json_decode((string)$jsonstring);
|
|
|
|
if (isset($jsondata->searchCategoryOptions)) {
|
|
|
|
$categories = $jsondata->searchCategoryOptions;
|
|
|
|
if (isset($jsondata->categoriesById->$id)) {
|
|
|
|
$maincategory = $jsondata->categoriesById->$id;
|
|
|
|
$array = [];
|
|
|
|
foreach ($categories as $categorie) {
|
|
|
|
$array[$categorie->fullName] = $categorie->id;
|
|
|
|
}
|
|
|
|
$result[$maincategory->fullName] = $array;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
print($jsonstring);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
$combinedResult = [
|
|
|
|
'main' => $main,
|
|
|
|
'sub' => $result
|
|
|
|
];
|
|
|
|
return $combinedResult;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Helper method to construct the array that could be used for categories
|
|
|
|
*
|
|
|
|
* @param $array
|
|
|
|
* @param $indent
|
|
|
|
* @return void
|
|
|
|
*/
|
|
|
|
private static function printArrayAsCode($array, $indent = 0)
|
|
|
|
{
|
|
|
|
foreach ($array as $key => $value) {
|
|
|
|
if (is_array($value)) {
|
|
|
|
echo str_repeat(' ', $indent) . "'$key' => [" . PHP_EOL;
|
|
|
|
self::printArrayAsCode($value, $indent + 1);
|
|
|
|
echo str_repeat(' ', $indent) . '],' . PHP_EOL;
|
|
|
|
} else {
|
|
|
|
$value = str_replace('\'', '\\\'', $value);
|
|
|
|
$key = str_replace('\'', '\\\'', $key);
|
|
|
|
echo str_repeat(' ', $indent) . "'$key' => '$value'," . PHP_EOL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private static function printScrapeArray()
|
|
|
|
{
|
|
|
|
$array = (MarktplaatsBridge::scrapeSubCategories());
|
|
|
|
|
|
|
|
echo '$myArray = [' . PHP_EOL;
|
|
|
|
self::printArrayAsCode($array['main'], 1);
|
|
|
|
echo '];' . PHP_EOL;
|
|
|
|
|
|
|
|
echo '$myArray = [' . PHP_EOL;
|
|
|
|
self::printArrayAsCode($array['sub'], 1);
|
|
|
|
echo '];' . PHP_EOL;
|
|
|
|
}
|
2020-06-09 20:21:34 +02:00
|
|
|
}
|