Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
integration
prechecker
Commits
34c6ec18
Commit
34c6ec18
authored
Aug 05, 2015
by
Andrew Nicols
Browse files
MDL-50891 useragent: Move web crawler checks to useragent class
parent
6d392b30
Changes
6
Hide whitespace changes
Inline
Side-by-side
lib/classes/session/manager.php
View file @
34c6ec18
...
...
@@ -377,7 +377,7 @@ class manager {
$user
=
null
;
if
(
!
empty
(
$CFG
->
opentogoogle
))
{
if
(
is_web_crawler
())
{
if
(
\
core_useragent
::
is_web_crawler
())
{
$user
=
guest_user
();
}
$referer
=
get_local_referer
(
false
);
...
...
lib/classes/useragent.php
View file @
34c6ec18
...
...
@@ -71,7 +71,7 @@ class core_useragent {
self
::
DEVICETYPE_DEFAULT
,
self
::
DEVICETYPE_LEGACY
,
self
::
DEVICETYPE_MOBILE
,
self
::
DEVICETYPE_TABLET
self
::
DEVICETYPE_TABLET
,
);
/**
...
...
@@ -201,6 +201,7 @@ class core_useragent {
/**
* Returns true if the user appears to be on a tablet.
*
* @return int
*/
protected
function
is_useragent_tablet
()
{
...
...
@@ -208,6 +209,16 @@ class core_useragent {
return
(
preg_match
(
$tabletregex
,
$this
->
useragent
));
}
/**
* Whether the user agent relates to a web crawler.
* This includes all types of web crawler.
* @return bool
*/
protected
function
is_useragent_web_crawler
()
{
$regex
=
'/Googlebot|google\.com|Yahoo! Slurp|\[ZSEBOT\]|msnbot|bingbot|BingPreview|Yandex|AltaVista|Baiduspider|Teoma/'
;
return
(
preg_match
(
$regex
,
$this
->
useragent
));
}
/**
* Gets a list of known device types.
*
...
...
@@ -926,4 +937,15 @@ class core_useragent {
// This browser does not support json.
return
false
;
}
/**
* Returns true if the client appears to be some kind of web crawler.
* This may include other types of crawler.
*
* @return bool
*/
public
static
function
is_web_crawler
()
{
$instance
=
self
::
instance
();
return
(
bool
)
$instance
->
is_useragent_web_crawler
();
}
}
lib/deprecatedlib.php
View file @
34c6ec18
...
...
@@ -1151,7 +1151,7 @@ function navmenu($course, $cm=NULL, $targetwindow='self') {
/**
* @deprecated please use calendar_event::create() instead.
*/
function
add_event
(
$event
)
{
function
add_event
(
$event
)
{
throw
new
coding_exception
(
'add_event() can not be used any more, please use calendar_event::create() instead.'
);
}
...
...
@@ -2378,4 +2378,21 @@ function get_referer($stripquery = true) {
}
else
{
return
''
;
}
}
\ No newline at end of file
}
/**
* Checks if current user is a web crawler.
*
* This list can not be made complete, this is not a security
* restriction, we make the list only to help these sites
* especially when automatic guest login is disabled.
*
* If admin needs security they should enable forcelogin
* and disable guest access!!
*
* @return bool
* @deprecated since Moodle 3.0 use \core_useragent::is_web_crawler instead.
*/
function
is_web_crawler
()
{
debugging
(
"is_web_crawler() has been deprecated, please use
\\
core_useragent
\\
is_web_crawler() instead."
,
DEBUG_DEVELOPER
);
return
core_useragent
::
is_crawler
();
}
lib/setuplib.php
View file @
34c6ec18
...
...
@@ -1692,45 +1692,6 @@ function make_localcache_directory($directory, $exceptiononerror = true) {
return
make_writable_directory
(
"
$CFG->localcachedir
/
$directory
"
,
$exceptiononerror
);
}
/**
* Checks if current user is a web crawler.
*
* This list can not be made complete, this is not a security
* restriction, we make the list only to help these sites
* especially when automatic guest login is disabled.
*
* If admin needs security they should enable forcelogin
* and disable guest access!!
*
* @return bool
*/
function
is_web_crawler
()
{
if
(
!
empty
(
$_SERVER
[
'HTTP_USER_AGENT'
]))
{
if
(
strpos
(
$_SERVER
[
'HTTP_USER_AGENT'
],
'Googlebot'
)
!==
false
)
{
return
true
;
}
else
if
(
strpos
(
$_SERVER
[
'HTTP_USER_AGENT'
],
'google.com'
)
!==
false
)
{
// Google
return
true
;
}
else
if
(
strpos
(
$_SERVER
[
'HTTP_USER_AGENT'
],
'Yahoo! Slurp'
)
!==
false
)
{
// Yahoo
return
true
;
}
else
if
(
strpos
(
$_SERVER
[
'HTTP_USER_AGENT'
],
'[ZSEBOT]'
)
!==
false
)
{
// Zoomspider
return
true
;
}
else
if
(
stripos
(
$_SERVER
[
'HTTP_USER_AGENT'
],
'msnbot'
)
!==
false
)
{
// MSN Search
return
true
;
}
else
if
(
strpos
(
$_SERVER
[
'HTTP_USER_AGENT'
],
'bingbot'
)
!==
false
)
{
// Bing
return
true
;
}
else
if
(
strpos
(
$_SERVER
[
'HTTP_USER_AGENT'
],
'Yandex'
)
!==
false
)
{
return
true
;
}
else
if
(
strpos
(
$_SERVER
[
'HTTP_USER_AGENT'
],
'AltaVista'
)
!==
false
)
{
return
true
;
}
else
if
(
stripos
(
$_SERVER
[
'HTTP_USER_AGENT'
],
'baiduspider'
)
!==
false
)
{
// Baidu
return
true
;
}
else
if
(
strpos
(
$_SERVER
[
'HTTP_USER_AGENT'
],
'Teoma'
)
!==
false
)
{
// Ask.com
return
true
;
}
}
return
false
;
}
/**
* This class solves the problem of how to initialise $OUTPUT.
*
...
...
lib/tests/setuplib_test.php
View file @
34c6ec18
...
...
@@ -73,53 +73,6 @@ class core_setuplib_testcase extends advanced_testcase {
get_docs_url
(
'%%WWWROOT%%/lib/tests/setuplib_test.php'
));
}
public
function
test_is_web_crawler
()
{
$browsers
=
array
(
'Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))'
,
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:18.0) Gecko/18.0 Firefox/18.0'
,
'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/412 (KHTML, like Gecko) Safari/412'
,
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.215 Safari/534.10'
,
'Opera/9.0 (Windows NT 5.1; U; en)'
,
'Mozilla/5.0 (Linux; U; Android 2.1; en-us; Nexus One Build/ERD62) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17 –Nexus'
,
'Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5'
,
);
$crawlers
=
array
(
// Google.
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
,
'Googlebot/2.1 (+http://www.googlebot.com/bot.html)'
,
'Googlebot-Image/1.0'
,
// Yahoo.
'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)'
,
// Bing.
'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)'
,
'Mozilla/5.0 (compatible; bingbot/2.0 +http://www.bing.com/bingbot.htm)'
,
// MSN.
'msnbot/2.1'
,
// Yandex.
'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)'
,
'Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)'
,
// AltaVista.
'AltaVista V2.0B crawler@evreka.com'
,
// ZoomSpider.
'ZoomSpider - wrensoft.com [ZSEBOT]'
,
// Baidu.
'Baiduspider+(+http://www.baidu.com/search/spider_jp.html)'
,
'Baiduspider+(+http://www.baidu.com/search/spider.htm)'
,
'BaiDuSpider'
,
// Ask.com.
'User-Agent: Mozilla/2.0 (compatible; Ask Jeeves/Teoma)'
,
);
foreach
(
$browsers
as
$agent
)
{
$_SERVER
[
'HTTP_USER_AGENT'
]
=
$agent
;
$this
->
assertFalse
(
is_web_crawler
());
}
foreach
(
$crawlers
as
$agent
)
{
$_SERVER
[
'HTTP_USER_AGENT'
]
=
$agent
;
$this
->
assertTrue
(
is_web_crawler
(),
"
$agent
should be considered a search engine"
);
}
}
/**
* Test if get_exception_info() removes file system paths.
*/
...
...
lib/tests/useragent_test.php
View file @
34c6ec18
...
...
@@ -1091,6 +1091,235 @@ class core_useragent_testcase extends basic_testcase {
'supports_svg'
=>
false
,
),
),
// Google web crawlers.
array
(
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
,
array
(
'is_web_crawler'
=>
true
,
'versionclasses'
=>
array
(
),
),
),
array
(
'Googlebot/2.1 (+http://www.googlebot.com/bot.html)'
,
array
(
'is_web_crawler'
=>
true
,
'versionclasses'
=>
array
(
),
),
),
array
(
'Googlebot-Image/1.0'
,
array
(
'is_web_crawler'
=>
true
,
'versionclasses'
=>
array
(
),
),
),
// Yahoo crawlers.
// See https://help.yahoo.com/kb/slurp-crawling-page-sln22600.html.
array
(
'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)'
,
array
(
'is_web_crawler'
=>
true
,
'versionclasses'
=>
array
(
),
),
),
// Bing / MSN / AdIdx crawlers.
// See http://www.bing.com/webmaster/help/which-crawlers-does-bing-use-8c184ec0.
array
(
'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)'
,
array
(
'is_web_crawler'
=>
true
,
'versionclasses'
=>
array
(
),
),
),
array
(
'Mozilla/5.0 (compatible; bingbot/2.0 +http://www.bing.com/bingbot.htm)'
,
array
(
'is_web_crawler'
=>
true
,
'versionclasses'
=>
array
(
),
),
),
array
(
'Mozilla/5.0 (iPhone; CPU iPhone OS 7_0 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11A465 Safari/9537.53 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)'
,
array
(
'is_web_crawler'
=>
true
,
'is_webkit'
=>
true
,
'is_safari_ios'
=>
true
,
'check_safari_ios_version'
=>
array
(
'527'
=>
true
,
),
'versionclasses'
=>
array
(
'safari'
,
'ios'
,
),
'devicetype'
=>
'mobile'
,
),
),
array
(
'Mozilla/5.0 (Windows Phone 8.1; ARM; Trident/7.0; Touch; rv:11.0; IEMobile/11.0; NOKIA; Lumia 530) like Gecko (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)'
,
array
(
'is_web_crawler'
=>
true
,
'is_ie'
=>
true
,
'check_ie_version'
=>
array
(
'0'
=>
true
,
'5.0'
=>
true
,
'5.5'
=>
true
,
'6.0'
=>
true
,
'7.0'
=>
true
,
'8.0'
=>
true
,
'9.0'
=>
true
,
'10'
=>
true
,
'11'
=>
true
,
),
'versionclasses'
=>
array
(
'ie'
,
'ie11'
,
),
'devicetype'
=>
'mobile'
,
),
),
array
(
'msnbot/2.0b (+http://search.msn.com/msnbot.htm)'
,
array
(
'is_web_crawler'
=>
true
,
'versionclasses'
=>
array
(
),
),
),
array
(
'msnbot/2.1'
,
array
(
'is_web_crawler'
=>
true
,
'versionclasses'
=>
array
(
),
),
),
array
(
'msnbot-media/1.1 (+http://search.msn.com/msnbot.htm)'
,
array
(
'is_web_crawler'
=>
true
,
'versionclasses'
=>
array
(
),
),
),
array
(
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534+ (KHTML, like Gecko) BingPreview/1.0b'
,
array
(
'is_web_crawler'
=>
true
,
'is_webkit'
=>
true
,
'is_safari'
=>
true
,
'check_safari_version'
=>
array
(
'1'
=>
true
,
'312'
=>
true
,
'500'
=>
true
,
),
'versionclasses'
=>
array
(
'safari'
,
),
),
),
array
(
'Mozilla/5.0 (Windows Phone 8.1; ARM; Trident/7.0; Touch; rv:11.0; IEMobile/11.0; NOKIA; Lumia 530) like Gecko BingPreview/1.0b'
,
array
(
'is_web_crawler'
=>
true
,
'is_ie'
=>
true
,
'check_ie_version'
=>
array
(
'0'
=>
true
,
'5.0'
=>
true
,
'5.5'
=>
true
,
'6.0'
=>
true
,
'7.0'
=>
true
,
'8.0'
=>
true
,
'9.0'
=>
true
,
'10'
=>
true
,
'11'
=>
true
,
),
'versionclasses'
=>
array
(
'ie'
,
'ie11'
,
),
'devicetype'
=>
'mobile'
,
),
),
// Yandex.
// See http://help.yandex.com/search/robots/agent.xml.
array
(
'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)'
,
array
(
'is_web_crawler'
=>
true
,
'versionclasses'
=>
array
(
),
),
),
array
(
'Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)'
,
array
(
'is_web_crawler'
=>
true
,
'versionclasses'
=>
array
(
),
),
),
// AltaVista.
array
(
'AltaVista V2.0B crawler@evreka.com'
,
array
(
'is_web_crawler'
=>
true
,
'versionclasses'
=>
array
(
),
),
),
// ZoomSpider.
array
(
'ZoomSpider - wrensoft.com [ZSEBOT]'
,
array
(
'is_web_crawler'
=>
true
,
'versionclasses'
=>
array
(
),
),
),
// Baidu.
array
(
'Baiduspider+(+http://www.baidu.com/search/spider_jp.html)'
,
array
(
'is_web_crawler'
=>
true
,
'versionclasses'
=>
array
(
),
),
),
array
(
'Baiduspider+(+http://www.baidu.com/search/spider.htm)'
,
array
(
'is_web_crawler'
=>
true
,
'versionclasses'
=>
array
(
),
),
),
// Ask.com.
array
(
'User-Agent: Mozilla/2.0 (compatible; Ask Jeeves/Teoma)'
,
array
(
'is_web_crawler'
=>
true
,
'versionclasses'
=>
array
(
),
),
),
);
}
...
...
@@ -1525,4 +1754,15 @@ class core_useragent_testcase extends basic_testcase {
}
$this
->
assertCount
(
count
(
$tests
[
'versionclasses'
]),
$actual
);
}
/**
* @dataProvider user_agents_providers
*/
public
function
test_useragent_web_crawler
(
$useragent
,
$tests
)
{
// Setup the core_useragent instance.
core_useragent
::
instance
(
true
,
$useragent
);
$expectation
=
isset
(
$tests
[
'is_web_crawler'
])
?
$tests
[
'is_web_crawler'
]
:
false
;
$this
->
assertSame
(
$expectation
,
core_useragent
::
is_web_crawler
());
}
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment