1
0
mirror of https://github.com/vichan-devel/vichan.git synced 2025-01-19 01:24:05 +01:00

Add a spam filter that unshortens urls

This commit is contained in:
seisatsu 2024-07-31 12:44:39 -05:00
parent a20f618d80
commit 098edb9cd7
3 changed files with 45 additions and 1 deletions

View File

@ -511,6 +511,17 @@
// 'action' => 'reject'
// );
// Example: Expand shortened links in a post, looking for and blocking URLs that lead to an unwanted
// endpoint. Many botspam posts include a variety of shortened URLs which all point to the same few
// webhosts. You can use this filter to block the endpoint webhost instead of just the apparent URL.
// $config['filters'][] = array(
// 'condition' => array(
// 'unshorten' => '/endpoint.net/i',
// ),
// 'action' => 'reject',
// 'message' => 'None of that, please.'
// );
// Filter flood prevention conditions ("flood-match") depend on a table which contains a cache of recent
// posts across all boards. This table is automatically purged of older posts, determining the maximum
// "age" by looking at each filter. However, when determining the maximum age, vichan does not look

View File

@ -136,6 +136,14 @@ class Filter {
return $post['board'] == $match;
case 'password':
return $post['password'] == $match;
case 'unshorten':
$extracted_urls = get_urls($post['body_nomarkup']);
foreach ($extracted_urls as $url) {
if (preg_match($match, trace_url($url))) {
return true;
}
}
return false;
default:
error('Unknown filter condition: ' . $condition);
}

View File

@ -3028,3 +3028,28 @@ function hashPassword($password) {
return hash('sha3-256', $password . $config['secure_password_salt']);
}
// Thanks to https://gist.github.com/marijn/3901938
function trace_url($url) {
$ch = curl_init($url);
curl_setopt_array($ch, array(
CURLOPT_FOLLOWLOCATION => TRUE, // the magic sauce
CURLOPT_RETURNTRANSFER => TRUE,
CURLOPT_SSL_VERIFYHOST => FALSE, // suppress certain SSL errors
CURLOPT_SSL_VERIFYPEER => FALSE,
CURLOPT_TIMEOUT => 30,
));
curl_exec($ch);
$url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
curl_close($ch);
return $url;
}
// Thanks to https://stackoverflow.com/questions/10002227/linkify-regex-function-php-daring-fireball-method/10002262#10002262
function get_urls($body) {
$regex = '(?xi)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))';
$result = preg_match_all("#$regex#i", $body, $match);
return $match[0];
}