@@ -389,4 +389,42 @@ public function test_parse_html_priority_inheritance($parentpriority) {
389
389
// Indirect child nodes should not be able to have a high priority.
390
390
$ this ->assertLessThan (TOOL_CRAWLER_PRIORITY_HIGH , $ indirectchildnode ->priority );
391
391
}
392
+
393
+ /**
394
+ * Test for Issue #120:Specified external urls should be excluded.
395
+ */
396
+ public function should_be_crawled_provider () {
397
+ return [
398
+ ['http://moodle.org/ ' , false ],
399
+ ['http://validator.w3.org/ ' , false ],
400
+ ['https://www.facebook.com/crawler_au ' , true ],
401
+ ['/moodle/course/view.php?id=1§ion=2 ' , true ],
402
+ ['/moodle/admin/settings.php?section=tool_crawler ' , false ],
403
+ ['/moodle/admin ' , false ],
404
+ ];
405
+ }
406
+
407
+ /**
408
+ * Test will given url be crawled or not
409
+ *
410
+ * @dataProvider should_be_crawled_provider
411
+ * @param string $url
412
+ * @param bool $expected
413
+ */
414
+ public function test_should_be_crawled ($ url , $ expected ) {
415
+ global $ CFG ;
416
+ $ baseurl = 'https://www.example.com/moodle ' ;
417
+ $ this ->resetAfterTest (true );
418
+
419
+ $ urltoexclude = "http://moodle.org/ \nhttp://validator.w3.org/ " ;
420
+ set_config ('excludeexturl ' , $ urltoexclude , 'tool_crawler ' );
421
+
422
+ $ urlexcludemdl = "/admin " ;
423
+ set_config ('excludemdlurl ' , $ urlexcludemdl , 'tool_crawler ' );
424
+
425
+ $ result = $ this ->robot ->mark_for_crawl ($ baseurl , $ url );
426
+ $ result = (is_object ($ result )) ? true : $ result ;
427
+
428
+ self ::assertSame ($ result , $ expected );
429
+ }
392
430
}
0 commit comments