Improved domain name regex
This commit is contained in:
parent
bf833991bf
commit
73823e3eb9
@ -4,7 +4,6 @@ node_js:
|
||||
install: npm install
|
||||
before_script:
|
||||
- npm install -g grunt
|
||||
- if [ "$TRAVIS_TAG" ]; then git checkout -b travis-build; fi
|
||||
script:
|
||||
- grunt lint
|
||||
- grunt test
|
||||
|
@ -2140,7 +2140,7 @@ const OperationConfig = {
|
||||
]
|
||||
},
|
||||
"Extract domains": {
|
||||
description: "Extracts domain names with common Top-Level Domains (TLDs).<br>Note that this will not include paths. Use <strong>Extract URLs</strong> to find entire URLs.",
|
||||
description: "Extracts domain names.<br>Note that this will not include paths. Use <strong>Extract URLs</strong> to find entire URLs.",
|
||||
run: Extract.runDomains,
|
||||
inputType: "string",
|
||||
outputType: "string",
|
||||
|
@ -187,11 +187,8 @@ const Extract = {
|
||||
* @returns {string}
|
||||
*/
|
||||
runDomains: function(input, args) {
|
||||
let displayTotal = args[0],
|
||||
protocol = "https?://",
|
||||
hostname = "[-\\w\\.]+",
|
||||
tld = "\\.(?:com|net|org|biz|info|co|uk|onion|int|mobi|name|edu|gov|mil|eu|ac|ae|af|de|ca|ch|cn|cy|es|gb|hk|il|in|io|tv|me|nl|no|nz|ro|ru|tr|us|az|ir|kz|uz|pk)+",
|
||||
regex = new RegExp("(?:" + protocol + ")?" + hostname + tld, "ig");
|
||||
const displayTotal = args[0],
|
||||
regex = /\b((?=[a-z0-9-]{1,63}\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\.)+[a-z]{2,63}\b/ig;
|
||||
|
||||
return Extract._search(input, regex, null, displayTotal);
|
||||
},
|
||||
|
@ -40,7 +40,7 @@ const StrUtils = {
|
||||
},
|
||||
{
|
||||
name: "Domain",
|
||||
value: "(?:(https?):\\/\\/)?([-\\w.]+)\\.(com|net|org|biz|info|co|uk|onion|int|mobi|name|edu|gov|mil|eu|ac|ae|af|de|ca|ch|cn|cy|es|gb|hk|il|in|io|tv|me|nl|no|nz|ro|ru|tr|us|az|ir|kz|uz|pk)+"
|
||||
value: "\\b((?=[a-z0-9-]{1,63}\\.)(xn--)?[a-z0-9]+(-[a-z0-9]+)*\\.)+[a-z]{2,63}\\b"
|
||||
},
|
||||
{
|
||||
name: "Windows file path",
|
||||
|
Loading…
x
Reference in New Issue
Block a user