Add operation to normalise unicode
This commit is contained in:
parent
610d46a1a4
commit
a6fa0628f2
5
package-lock.json
generated
5
package-lock.json
generated
@ -14403,6 +14403,11 @@
|
||||
"normalize-path": "^2.1.1"
|
||||
}
|
||||
},
|
||||
"unorm": {
|
||||
"version": "1.6.0",
|
||||
"resolved": "https://registry.npmjs.org/unorm/-/unorm-1.6.0.tgz",
|
||||
"integrity": "sha512-b2/KCUlYZUeA7JFUuRJZPUtr4gZvBh7tavtv4fvk4+KV9pfGiR6CQAQAWl49ZpR3ts2dk4FYkP7EIgDJoiOLDA=="
|
||||
},
|
||||
"unpipe": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
|
||||
|
@ -147,6 +147,7 @@
|
||||
"ssdeep.js": "0.0.2",
|
||||
"tesseract.js": "^2.0.0-alpha.15",
|
||||
"ua-parser-js": "^0.7.20",
|
||||
"unorm": "^1.6.0",
|
||||
"utf8": "^3.0.0",
|
||||
"vkbeautify": "^0.99.3",
|
||||
"xmldom": "^0.1.27",
|
||||
|
@ -39,6 +39,7 @@
|
||||
"URL Decode",
|
||||
"Escape Unicode Characters",
|
||||
"Unescape Unicode Characters",
|
||||
"Normalise Unicode",
|
||||
"To Quoted Printable",
|
||||
"From Quoted Printable",
|
||||
"To Punycode",
|
||||
|
@ -164,3 +164,15 @@ export const IO_FORMAT = {
|
||||
"Simplified Chinese GB18030 (54936)": 54936,
|
||||
};
|
||||
|
||||
/**
|
||||
* Unicode Normalisation Forms
|
||||
*
|
||||
* @author Matthieu [m@tthieu.xyz]
|
||||
* @copyright Crown Copyright 2016
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* Character encoding format mappings.
|
||||
*/
|
||||
export const UNICODE_NORMALISATION_FORMS = ["NFD", "NFC", "NFKD", "NFKC"];
|
||||
|
60
src/core/operations/NormaliseUnicode.mjs
Normal file
60
src/core/operations/NormaliseUnicode.mjs
Normal file
@ -0,0 +1,60 @@
|
||||
/**
|
||||
* @author Matthieu [m@tthieu.xyz]
|
||||
* @copyright Crown Copyright 2019
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
|
||||
import Operation from "../Operation.mjs";
|
||||
import OperationError from "../errors/OperationError.mjs";
|
||||
import unorm from "unorm";
|
||||
import {UNICODE_NORMALISATION_FORMS} from "../lib/ChrEnc";
|
||||
|
||||
/**
|
||||
* Normalise Unicode operation
|
||||
*/
|
||||
class NormaliseUnicode extends Operation {
|
||||
|
||||
/**
|
||||
* NormaliseUnicode constructor
|
||||
*/
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.name = "Normalise Unicode";
|
||||
this.module = "UnicodeNormalisation";
|
||||
this.description = "Transform Unicode to one of the Normalisation Form";
|
||||
this.infoURL = "http://www.unicode.org/reports/tr15/";
|
||||
this.inputType = "string";
|
||||
this.outputType = "string";
|
||||
this.args = [
|
||||
{
|
||||
name: "Normal Form",
|
||||
type: "option",
|
||||
value: UNICODE_NORMALISATION_FORMS
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
run(input, args) {
|
||||
const [normalForm] = args;
|
||||
if (normalForm === "NFD") {
|
||||
return unorm.nfd(input);
|
||||
} else if (normalForm === "NFC") {
|
||||
return unorm.nfc(input);
|
||||
} else if (normalForm === "NFKD") {
|
||||
return unorm.nfkd(input);
|
||||
} else if (normalForm === "NFKC") {
|
||||
return unorm.nfc(input);
|
||||
}
|
||||
|
||||
throw new OperationError("Unknown Normalisation Form");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
export default NormaliseUnicode;
|
@ -57,6 +57,7 @@ import "./tests/MS.mjs";
|
||||
import "./tests/Magic.mjs";
|
||||
import "./tests/MorseCode.mjs";
|
||||
import "./tests/NetBIOS.mjs";
|
||||
import "./tests/NormaliseUnicode.mjs";
|
||||
import "./tests/OTP.mjs";
|
||||
import "./tests/PGP.mjs";
|
||||
import "./tests/PHP.mjs";
|
||||
|
54
tests/operations/tests/NormaliseUnicode.mjs
Normal file
54
tests/operations/tests/NormaliseUnicode.mjs
Normal file
@ -0,0 +1,54 @@
|
||||
/**
|
||||
* Text Encoding Brute Force tests.
|
||||
*
|
||||
* @author Matthieu [m@tthieux.xyz]
|
||||
*
|
||||
* @copyright Crown Copyright 2018
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
import TestRegister from "../../lib/TestRegister.mjs";
|
||||
|
||||
TestRegister.addTests([
|
||||
{
|
||||
name: "Normalise Unicode - NFD",
|
||||
input: "\u00c7\u0043\u0327\u2160",
|
||||
expectedMatch: /C\u0327C\u0327\u2160/,
|
||||
recipeConfig: [
|
||||
{
|
||||
op: "Normalise Unicode",
|
||||
args: ["NFD"],
|
||||
},
|
||||
],
|
||||
}, {
|
||||
name: "Normalise Unicode - NFC",
|
||||
input: "\u00c7\u0043\u0327\u2160",
|
||||
expectedMatch: /\u00C7\u00C7\u2160/,
|
||||
recipeConfig: [
|
||||
{
|
||||
op: "Normalise Unicode",
|
||||
args: ["NFC"],
|
||||
},
|
||||
],
|
||||
}, {
|
||||
name: "Normalise Unicode - NFKD",
|
||||
input: "\u00c7\u0043\u0327\u2160",
|
||||
expectedMatch: /C\u0327C\u0327I/,
|
||||
recipeConfig: [
|
||||
{
|
||||
op: "Normalise Unicode",
|
||||
args: ["NFKD"],
|
||||
},
|
||||
],
|
||||
}, {
|
||||
name: "Normalise Unicode - NFKC",
|
||||
input: "\u00c7\u0043\u0327\u2160",
|
||||
expectedMatch: /\u00C7\u00C7\u2160/,
|
||||
recipeConfig: [
|
||||
{
|
||||
op: "Normalise Unicode",
|
||||
args: ["NFKC"],
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
Loading…
x
Reference in New Issue
Block a user