Added 'Fuzzy Match' operation
This commit is contained in:
parent
5893ac1a37
commit
8ad18bc7db
2
.github/workflows/master.yml
vendored
2
.github/workflows/master.yml
vendored
@ -19,7 +19,7 @@ jobs:
|
|||||||
- name: Install
|
- name: Install
|
||||||
run: |
|
run: |
|
||||||
npm install
|
npm install
|
||||||
export NODE_OPTIONS=--max_old_space_size=2048
|
npm run setheapsize
|
||||||
|
|
||||||
- name: Lint
|
- name: Lint
|
||||||
run: npx grunt lint
|
run: npx grunt lint
|
||||||
|
2
.github/workflows/pull_requests.yml
vendored
2
.github/workflows/pull_requests.yml
vendored
@ -18,7 +18,7 @@ jobs:
|
|||||||
- name: Install
|
- name: Install
|
||||||
run: |
|
run: |
|
||||||
npm install
|
npm install
|
||||||
export NODE_OPTIONS=--max_old_space_size=2048
|
npm run setheapsize
|
||||||
|
|
||||||
- name: Lint
|
- name: Lint
|
||||||
run: npx grunt lint
|
run: npx grunt lint
|
||||||
|
2
.github/workflows/releases.yml
vendored
2
.github/workflows/releases.yml
vendored
@ -19,7 +19,7 @@ jobs:
|
|||||||
- name: Install
|
- name: Install
|
||||||
run: |
|
run: |
|
||||||
npm install
|
npm install
|
||||||
export NODE_OPTIONS=--max_old_space_size=2048
|
npm run setheapsize
|
||||||
|
|
||||||
- name: Lint
|
- name: Lint
|
||||||
run: npx grunt lint
|
run: npx grunt lint
|
||||||
|
@ -173,6 +173,8 @@
|
|||||||
"testuidev": "npx nightwatch --env=dev",
|
"testuidev": "npx nightwatch --env=dev",
|
||||||
"lint": "npx grunt lint",
|
"lint": "npx grunt lint",
|
||||||
"postinstall": "npx grunt exec:fixCryptoApiImports",
|
"postinstall": "npx grunt exec:fixCryptoApiImports",
|
||||||
"newop": "node --experimental-modules src/core/config/scripts/newOperation.mjs"
|
"newop": "node --experimental-modules src/core/config/scripts/newOperation.mjs",
|
||||||
|
"getheapsize": "node -e 'console.log(`node heap limit = ${require(\"v8\").getHeapStatistics().heap_size_limit / (1024 * 1024)} Mb`)'",
|
||||||
|
"setheapsize": "export NODE_OPTIONS=--max_old_space_size=2048"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -238,6 +238,7 @@
|
|||||||
"Pad lines",
|
"Pad lines",
|
||||||
"Find / Replace",
|
"Find / Replace",
|
||||||
"Regular expression",
|
"Regular expression",
|
||||||
|
"Fuzzy Match",
|
||||||
"Offset checker",
|
"Offset checker",
|
||||||
"Hamming Distance",
|
"Hamming Distance",
|
||||||
"Convert distance",
|
"Convert distance",
|
||||||
|
@ -16,40 +16,72 @@
|
|||||||
* Anurag Awasthi - updated to 0.2.0
|
* Anurag Awasthi - updated to 0.2.0
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const SEQUENTIAL_BONUS = 15; // bonus for adjacent matches
|
export const DEFAULT_WEIGHTS = {
|
||||||
const SEPARATOR_BONUS = 30; // bonus if match occurs after a separator
|
sequentialBonus: 15, // bonus for adjacent matches
|
||||||
const CAMEL_BONUS = 30; // bonus if match is uppercase and prev is lower
|
separatorBonus: 30, // bonus if match occurs after a separator
|
||||||
const FIRST_LETTER_BONUS = 15; // bonus if the first letter is matched
|
camelBonus: 30, // bonus if match is uppercase and prev is lower
|
||||||
|
firstLetterBonus: 15, // bonus if the first letter is matched
|
||||||
|
|
||||||
const LEADING_LETTER_PENALTY = -5; // penalty applied for every letter in str before the first match
|
leadingLetterPenalty: -5, // penalty applied for every letter in str before the first match
|
||||||
const MAX_LEADING_LETTER_PENALTY = -15; // maximum penalty for leading letters
|
maxLeadingLetterPenalty: -15, // maximum penalty for leading letters
|
||||||
const UNMATCHED_LETTER_PENALTY = -1;
|
unmatchedLetterPenalty: -1
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Does a fuzzy search to find pattern inside a string.
|
* Does a fuzzy search to find pattern inside a string.
|
||||||
* @param {*} pattern string pattern to search for
|
* @param {string} pattern pattern to search for
|
||||||
* @param {*} str string string which is being searched
|
* @param {string} str string which is being searched
|
||||||
|
* @param {boolean} global whether to search for all matches or just one
|
||||||
* @returns [boolean, number] a boolean which tells if pattern was
|
* @returns [boolean, number] a boolean which tells if pattern was
|
||||||
* found or not and a search score
|
* found or not and a search score
|
||||||
*/
|
*/
|
||||||
export function fuzzyMatch(pattern, str) {
|
export function fuzzyMatch(pattern, str, global=false, weights=DEFAULT_WEIGHTS) {
|
||||||
const recursionCount = 0;
|
const recursionCount = 0;
|
||||||
const recursionLimit = 10;
|
const recursionLimit = 10;
|
||||||
const matches = [];
|
const matches = [];
|
||||||
const maxMatches = 256;
|
const maxMatches = 256;
|
||||||
|
|
||||||
|
if (!global) {
|
||||||
return fuzzyMatchRecursive(
|
return fuzzyMatchRecursive(
|
||||||
pattern,
|
pattern,
|
||||||
str,
|
str,
|
||||||
0 /* patternCurIndex */,
|
0 /* patternCurIndex */,
|
||||||
0 /* strCurrIndex */,
|
0 /* strCurrIndex */,
|
||||||
null /* srcMatces */,
|
null /* srcMatches */,
|
||||||
matches,
|
matches,
|
||||||
maxMatches,
|
maxMatches,
|
||||||
0 /* nextMatch */,
|
0 /* nextMatch */,
|
||||||
recursionCount,
|
recursionCount,
|
||||||
recursionLimit
|
recursionLimit,
|
||||||
|
weights
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return all matches
|
||||||
|
let foundMatch = true,
|
||||||
|
score,
|
||||||
|
idxs,
|
||||||
|
strCurrIndex = 0;
|
||||||
|
const results = [];
|
||||||
|
|
||||||
|
while (foundMatch) {
|
||||||
|
[foundMatch, score, idxs] = fuzzyMatchRecursive(
|
||||||
|
pattern,
|
||||||
|
str,
|
||||||
|
0 /* patternCurIndex */,
|
||||||
|
strCurrIndex,
|
||||||
|
null /* srcMatches */,
|
||||||
|
matches,
|
||||||
|
maxMatches,
|
||||||
|
0 /* nextMatch */,
|
||||||
|
recursionCount,
|
||||||
|
recursionLimit,
|
||||||
|
weights
|
||||||
|
);
|
||||||
|
if (foundMatch) results.push([foundMatch, score, [...idxs]]);
|
||||||
|
strCurrIndex = idxs[idxs.length - 1] + 1;
|
||||||
|
}
|
||||||
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -65,7 +97,8 @@ function fuzzyMatchRecursive(
|
|||||||
maxMatches,
|
maxMatches,
|
||||||
nextMatch,
|
nextMatch,
|
||||||
recursionCount,
|
recursionCount,
|
||||||
recursionLimit
|
recursionLimit,
|
||||||
|
weights
|
||||||
) {
|
) {
|
||||||
let outScore = 0;
|
let outScore = 0;
|
||||||
|
|
||||||
@ -110,7 +143,8 @@ function fuzzyMatchRecursive(
|
|||||||
maxMatches,
|
maxMatches,
|
||||||
nextMatch,
|
nextMatch,
|
||||||
recursionCount,
|
recursionCount,
|
||||||
recursionLimit
|
recursionLimit,
|
||||||
|
weights
|
||||||
);
|
);
|
||||||
|
|
||||||
if (matched) {
|
if (matched) {
|
||||||
@ -134,16 +168,16 @@ function fuzzyMatchRecursive(
|
|||||||
outScore = 100;
|
outScore = 100;
|
||||||
|
|
||||||
// Apply leading letter penalty
|
// Apply leading letter penalty
|
||||||
let penalty = LEADING_LETTER_PENALTY * matches[0];
|
let penalty = weights.leadingLetterPenalty * matches[0];
|
||||||
penalty =
|
penalty =
|
||||||
penalty < MAX_LEADING_LETTER_PENALTY ?
|
penalty < weights.maxLeadingLetterPenalty ?
|
||||||
MAX_LEADING_LETTER_PENALTY :
|
weights.maxLeadingLetterPenalty :
|
||||||
penalty;
|
penalty;
|
||||||
outScore += penalty;
|
outScore += penalty;
|
||||||
|
|
||||||
// Apply unmatched penalty
|
// Apply unmatched penalty
|
||||||
const unmatched = str.length - nextMatch;
|
const unmatched = str.length - nextMatch;
|
||||||
outScore += UNMATCHED_LETTER_PENALTY * unmatched;
|
outScore += weights.unmatchedLetterPenalty * unmatched;
|
||||||
|
|
||||||
// Apply ordering bonuses
|
// Apply ordering bonuses
|
||||||
for (let i = 0; i < nextMatch; i++) {
|
for (let i = 0; i < nextMatch; i++) {
|
||||||
@ -152,7 +186,7 @@ function fuzzyMatchRecursive(
|
|||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
const prevIdx = matches[i - 1];
|
const prevIdx = matches[i - 1];
|
||||||
if (currIdx === prevIdx + 1) {
|
if (currIdx === prevIdx + 1) {
|
||||||
outScore += SEQUENTIAL_BONUS;
|
outScore += weights.sequentialBonus;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -165,15 +199,15 @@ function fuzzyMatchRecursive(
|
|||||||
neighbor !== neighbor.toUpperCase() &&
|
neighbor !== neighbor.toUpperCase() &&
|
||||||
curr !== curr.toLowerCase()
|
curr !== curr.toLowerCase()
|
||||||
) {
|
) {
|
||||||
outScore += CAMEL_BONUS;
|
outScore += weights.camelBonus;
|
||||||
}
|
}
|
||||||
const isNeighbourSeparator = neighbor === "_" || neighbor === " ";
|
const isNeighbourSeparator = neighbor === "_" || neighbor === " ";
|
||||||
if (isNeighbourSeparator) {
|
if (isNeighbourSeparator) {
|
||||||
outScore += SEPARATOR_BONUS;
|
outScore += weights.separatorBonus;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// First letter
|
// First letter
|
||||||
outScore += FIRST_LETTER_BONUS;
|
outScore += weights.firstLetterBonus;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
120
src/core/operations/FuzzyMatch.mjs
Normal file
120
src/core/operations/FuzzyMatch.mjs
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
/**
|
||||||
|
* @author n1474335 [n1474335@gmail.com]
|
||||||
|
* @copyright Crown Copyright 2021
|
||||||
|
* @license Apache-2.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
import Operation from "../Operation.mjs";
|
||||||
|
import {fuzzyMatch, calcMatchRanges, DEFAULT_WEIGHTS} from "../lib/FuzzyMatch.mjs";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fuzzy Match operation
|
||||||
|
*/
|
||||||
|
class FuzzyMatch extends Operation {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* FuzzyMatch constructor
|
||||||
|
*/
|
||||||
|
constructor() {
|
||||||
|
super();
|
||||||
|
|
||||||
|
this.name = "Fuzzy Match";
|
||||||
|
this.module = "Default";
|
||||||
|
this.description = "Conducts a fuzzy search to find a pattern within the input based on weighted criteria.<br><br>e.g. A search for <code>dpan</code> will match on <code><b>D</b>on't <b>Pan</b>ic</code>";
|
||||||
|
this.infoURL = "https://wikipedia.org/wiki/Fuzzy_matching_(computer-assisted_translation)";
|
||||||
|
this.inputType = "string";
|
||||||
|
this.outputType = "html";
|
||||||
|
this.args = [
|
||||||
|
{
|
||||||
|
name: "Search",
|
||||||
|
type: "binaryString",
|
||||||
|
value: ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Sequential bonus",
|
||||||
|
type: "number",
|
||||||
|
value: DEFAULT_WEIGHTS.sequentialBonus,
|
||||||
|
hint: "Bonus for adjacent matches"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Separator bonus",
|
||||||
|
type: "number",
|
||||||
|
value: DEFAULT_WEIGHTS.separatorBonus,
|
||||||
|
hint: "Bonus if match occurs after a separator"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Camel bonus",
|
||||||
|
type: "number",
|
||||||
|
value: DEFAULT_WEIGHTS.camelBonus,
|
||||||
|
hint: "Bonus if match is uppercase and previous is lower"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "First letter bonus",
|
||||||
|
type: "number",
|
||||||
|
value: DEFAULT_WEIGHTS.firstLetterBonus,
|
||||||
|
hint: "Bonus if the first letter is matched"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Leading letter penalty",
|
||||||
|
type: "number",
|
||||||
|
value: DEFAULT_WEIGHTS.leadingLetterPenalty,
|
||||||
|
hint: "Penalty applied for every letter in the input before the first match"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Max leading letter penalty",
|
||||||
|
type: "number",
|
||||||
|
value: DEFAULT_WEIGHTS.maxLeadingLetterPenalty,
|
||||||
|
hint: "Maxiumum penalty for leading letters"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Unmatched letter penalty",
|
||||||
|
type: "number",
|
||||||
|
value: DEFAULT_WEIGHTS.unmatchedLetterPenalty
|
||||||
|
},
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {string} input
|
||||||
|
* @param {Object[]} args
|
||||||
|
* @returns {html}
|
||||||
|
*/
|
||||||
|
run(input, args) {
|
||||||
|
const searchStr = args[0];
|
||||||
|
const weights = {
|
||||||
|
sequentialBonus: args[1],
|
||||||
|
separatorBonus: args[2],
|
||||||
|
camelBonus: args[3],
|
||||||
|
firstLetterBonus: args[4],
|
||||||
|
leadingLetterPenalty: args[5],
|
||||||
|
maxLeadingLetterPenalty: args[6],
|
||||||
|
unmatchedLetterPenalty: args[7]
|
||||||
|
};
|
||||||
|
const matches = fuzzyMatch(searchStr, input, true, weights);
|
||||||
|
|
||||||
|
if (!matches) {
|
||||||
|
return "No matches.";
|
||||||
|
}
|
||||||
|
|
||||||
|
let result = "", pos = 0, hlClass = "hl1";
|
||||||
|
matches.forEach(([matches, score, idxs]) => {
|
||||||
|
const matchRanges = calcMatchRanges(idxs);
|
||||||
|
|
||||||
|
matchRanges.forEach(([start, length], i) => {
|
||||||
|
result += input.slice(pos, start);
|
||||||
|
if (i === 0) result += `<span class="${hlClass}">`;
|
||||||
|
pos = start + length;
|
||||||
|
result += `<b>${input.slice(start, pos)}</b>`;
|
||||||
|
});
|
||||||
|
result += "</span>";
|
||||||
|
hlClass = hlClass === "hl1" ? "hl2" : "hl1";
|
||||||
|
});
|
||||||
|
|
||||||
|
result += input.slice(pos, input.length);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
export default FuzzyMatch;
|
@ -185,7 +185,7 @@ class RegularExpression extends Operation {
|
|||||||
* @param {boolean} captureGroups - Display each of the capture groups separately
|
* @param {boolean} captureGroups - Display each of the capture groups separately
|
||||||
* @returns {string}
|
* @returns {string}
|
||||||
*/
|
*/
|
||||||
function regexList (input, regex, displayTotal, matches, captureGroups) {
|
function regexList(input, regex, displayTotal, matches, captureGroups) {
|
||||||
let output = "",
|
let output = "",
|
||||||
total = 0,
|
total = 0,
|
||||||
match;
|
match;
|
||||||
@ -225,7 +225,7 @@ function regexList (input, regex, displayTotal, matches, captureGroups) {
|
|||||||
* @param {boolean} displayTotal
|
* @param {boolean} displayTotal
|
||||||
* @returns {string}
|
* @returns {string}
|
||||||
*/
|
*/
|
||||||
function regexHighlight (input, regex, displayTotal) {
|
function regexHighlight(input, regex, displayTotal) {
|
||||||
let output = "",
|
let output = "",
|
||||||
title = "",
|
title = "",
|
||||||
hl = 1,
|
hl = 1,
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
|
|
||||||
import HTMLOperation from "../HTMLOperation.mjs";
|
import HTMLOperation from "../HTMLOperation.mjs";
|
||||||
import Sortable from "sortablejs";
|
import Sortable from "sortablejs";
|
||||||
import {fuzzyMatch, calcMatchRanges} from "../../core/lib/FuzzySearch.mjs";
|
import {fuzzyMatch, calcMatchRanges} from "../../core/lib/FuzzyMatch.mjs";
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
Reference in New Issue
Block a user