issue:120 exclude url field map and test

hdagheda · hdagheda · commit 18ab64b03fa2 · 2020-03-17T12:21:04.000+11:00
diff --git a/classes/robot/crawler.php b/classes/robot/crawler.php
@@ -335,7 +335,7 @@ public function mark_for_crawl($baseurl, $url, $courseid = null, $priority = TOO
         if (!self::is_external($url)) {
             $excludes = str_replace(PHP_EOL, ',', self::get_config()->excludemdlurl);
         } else {
-            $excludes = str_replace(PHP_EOL, ',', self::get_config()->excludemdlurl);
+            $excludes = str_replace(PHP_EOL, ',', self::get_config()->excludeexturl);
         }
 
         $isexcluded = profiling_string_matches($url, $excludes);
diff --git a/tests/phpunit/robot_crawler_test.php b/tests/phpunit/robot_crawler_test.php
@@ -389,4 +389,42 @@ public function test_parse_html_priority_inheritance($parentpriority) {
         // Indirect child nodes should not be able to have a high priority.
         $this->assertLessThan(TOOL_CRAWLER_PRIORITY_HIGH, $indirectchildnode->priority);
     }
+
+    /**
+     * Test for Issue #120:Specified external urls should be excluded.
+     */
+    public function should_be_crawled_provider() {
+        return [
+            ['http://moodle.org/', false],
+            ['http://validator.w3.org/', false],
+            ['https://www.facebook.com/crawler_au', true],
+            ['/moodle/course/view.php?id=1&section=2', true],
+            ['/moodle/admin/settings.php?section=tool_crawler', false],
+            ['/moodle/admin', false],
+        ];
+    }
+
+    /**
+     * Test will given url be crawled or not
+     *
+     * @dataProvider should_be_crawled_provider
+     * @param   string $url
+     * @param   bool   $expected
+     */
+    public function test_should_be_crawled($url, $expected) {
+        global $CFG;
+        $baseurl = 'https://www.example.com/moodle';
+        $this->resetAfterTest(true);
+
+        $urltoexclude = "http://moodle.org/\nhttp://validator.w3.org/";
+        set_config('excludeexturl', $urltoexclude, 'tool_crawler');
+
+        $urlexcludemdl = "/admin";
+        set_config('excludemdlurl', $urlexcludemdl, 'tool_crawler');
+
+        $result = $this->robot->mark_for_crawl($baseurl, $url);
+        $result = (is_object($result)) ? true : $result;
+
+        self::assertSame($result, $expected);
+    }
 }

Original file line number	Diff line number	Diff line change
`@@ -335,7 +335,7 @@ public function mark_for_crawl($baseurl, $url, $courseid = null, $priority = TOO`
`335`	`335`	`if (!self::is_external($url)) {`
`336`	`336`	`$excludes = str_replace(PHP_EOL, ',', self::get_config()->excludemdlurl);`
`337`	`337`	`} else {`
`338`		`- $excludes = str_replace(PHP_EOL, ',', self::get_config()->excludemdlurl);`
	`338`	`+ $excludes = str_replace(PHP_EOL, ',', self::get_config()->excludeexturl);`
`339`	`339`	`}`
`340`	`340`
`341`	`341`	`$isexcluded = profiling_string_matches($url, $excludes);`