Skip to content

Commit 18ab64b

Browse files
committed
issue:120 exclude url field map and test
1 parent be7589c commit 18ab64b

File tree

2 files changed

+39
-1
lines changed

2 files changed

+39
-1
lines changed

classes/robot/crawler.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ public function mark_for_crawl($baseurl, $url, $courseid = null, $priority = TOO
335335
if (!self::is_external($url)) {
336336
$excludes = str_replace(PHP_EOL, ',', self::get_config()->excludemdlurl);
337337
} else {
338-
$excludes = str_replace(PHP_EOL, ',', self::get_config()->excludemdlurl);
338+
$excludes = str_replace(PHP_EOL, ',', self::get_config()->excludeexturl);
339339
}
340340

341341
$isexcluded = profiling_string_matches($url, $excludes);

tests/phpunit/robot_crawler_test.php

+38
Original file line numberDiff line numberDiff line change
@@ -389,4 +389,42 @@ public function test_parse_html_priority_inheritance($parentpriority) {
389389
// Indirect child nodes should not be able to have a high priority.
390390
$this->assertLessThan(TOOL_CRAWLER_PRIORITY_HIGH, $indirectchildnode->priority);
391391
}
392+
393+
/**
394+
* Test for Issue #120:Specified external urls should be excluded.
395+
*/
396+
public function should_be_crawled_provider() {
397+
return [
398+
['http://moodle.org/', false],
399+
['http://validator.w3.org/', false],
400+
['https://www.facebook.com/crawler_au', true],
401+
['/moodle/course/view.php?id=1&section=2', true],
402+
['/moodle/admin/settings.php?section=tool_crawler', false],
403+
['/moodle/admin', false],
404+
];
405+
}
406+
407+
/**
408+
* Test will given url be crawled or not
409+
*
410+
* @dataProvider should_be_crawled_provider
411+
* @param string $url
412+
* @param bool $expected
413+
*/
414+
public function test_should_be_crawled($url, $expected) {
415+
global $CFG;
416+
$baseurl = 'https://www.example.com/moodle';
417+
$this->resetAfterTest(true);
418+
419+
$urltoexclude = "http://moodle.org/\nhttp://validator.w3.org/";
420+
set_config('excludeexturl', $urltoexclude, 'tool_crawler');
421+
422+
$urlexcludemdl = "/admin";
423+
set_config('excludemdlurl', $urlexcludemdl, 'tool_crawler');
424+
425+
$result = $this->robot->mark_for_crawl($baseurl, $url);
426+
$result = (is_object($result)) ? true : $result;
427+
428+
self::assertSame($result, $expected);
429+
}
392430
}

0 commit comments

Comments
 (0)