diff --git a/src/core/Utils.mjs b/src/core/Utils.mjs
index 9c687ee4..698af68c 100755
--- a/src/core/Utils.mjs
+++ b/src/core/Utils.mjs
@@ -704,8 +704,21 @@ class Utils {
* Utils.stripHtmlTags("
Test
");
*/
static stripHtmlTags(htmlStr, removeScriptAndStyle=false) {
+ /**
+ * Recursively remove a pattern from a string until there are no more matches.
+ * Avoids incomplete sanitization e.g. "aabcbc".replace(/abc/g, "") === "abc"
+ *
+ * @param {RegExp} pattern
+ * @param {string} str
+ * @returns {string}
+ */
+ function recursiveRemove(pattern, str) {
+ const newStr = str.replace(pattern, "");
+ return newStr.length === str.length ? newStr : recursiveRemove(pattern, newStr);
+ }
+
if (removeScriptAndStyle) {
- htmlStr = htmlStr.replace(/<(script|style)[^>]*>.*?<\/(script|style)>/gi, "");
+ htmlStr = recursiveRemove(/<(script|style)[^>]*>.*?<\/(script|style)>/gi, htmlStr);
}
return htmlStr.replace(/<[^>]+>/g, "");
}
diff --git a/src/web/HTMLOperation.mjs b/src/web/HTMLOperation.mjs
index df5de5a8..04e9b3cc 100755
--- a/src/web/HTMLOperation.mjs
+++ b/src/web/HTMLOperation.mjs
@@ -6,6 +6,7 @@
import HTMLIngredient from "./HTMLIngredient.mjs";
import Utils from "../core/Utils.mjs";
+import url from "url";
/**
@@ -147,22 +148,29 @@ class HTMLOperation {
/**
* Given a URL for a Wikipedia (or other wiki) page, this function returns a link to that page.
*
- * @param {string} url
+ * @param {string} urlStr
* @returns {string}
*/
-function titleFromWikiLink(url) {
- const splitURL = url.split("/");
- if (!splitURL.includes("wikipedia.org") && !splitURL.includes("forensicswiki.xyz")) {
- // Not a wiki link, return full URL
- return `More Informationopen_in_new`;
+function titleFromWikiLink(urlStr) {
+ const urlObj = url.parse(urlStr);
+ let wikiName = "",
+ pageTitle = "";
+
+ switch (urlObj.host) {
+ case "forensicswiki.xyz":
+ wikiName = "Forensics Wiki";
+ pageTitle = urlObj.query.substr(6).replace(/_/g, " "); // Chop off 'title='
+ break;
+ case "wikipedia.org":
+ wikiName = "Wikipedia";
+ pageTitle = urlObj.pathname.substr(6).replace(/_/g, " "); // Chop off '/wiki/'
+ break;
+ default:
+ // Not a wiki link, return full URL
+ return `More Informationopen_in_new`;
}
- const wikiName = splitURL.includes("forensicswiki.xyz") ? "Forensics Wiki" : "Wikipedia";
-
- const pageTitle = decodeURIComponent(splitURL[splitURL.length - 1])
- .replace(/_/g, " ")
- .replace(/index\.php\?title=/g, "");
- return `${pageTitle}open_in_new on ${wikiName}`;
+ return `${pageTitle}open_in_new on ${wikiName}`;
}
export default HTMLOperation;