1
0
mirror of synced 2024-11-15 02:37:40 +01:00

add Shuffle operation

This commit is contained in:
MikeCAT 2022-11-11 22:26:41 +09:00
parent ed8bd34915
commit 39143fa6a1
4 changed files with 251 additions and 0 deletions

View File

@ -249,6 +249,7 @@
"To Table",
"Reverse",
"Sort",
"Shuffle",
"Unique",
"Split",
"Filter",

View File

@ -0,0 +1,157 @@
/**
* @author mikecat
* @copyright Crown Copyright 2022
* @license Apache-2.0
*/
import Operation from "../Operation.mjs";
/**
* Shuffle operation
*/
class Shuffle extends Operation {
/**
* Shuffle constructor
*/
constructor() {
super();
this.name = "Shuffle";
this.module = "Default";
this.description = "Randomly reorders input elements.";
this.infoURL = "https://wikipedia.org/wiki/Shuffling";
this.inputType = "ArrayBuffer";
this.outputType = "ArrayBuffer";
this.args = [
{
"name": "By",
"type": "option",
"value": ["Byte", "Character", "Line"],
"defaultIndex": 1
}
];
}
/**
* @param {ArrayBuffer} input
* @param {Object[]} args
* @returns {ArrayBuffer}
*/
run(input, args) {
const type = args[0];
if (input.byteLength === 0) return input;
if (ArrayBuffer.isView(input)) {
if (input.byteOffset === 0 && input.byteLength === input.buffer.byteLength) {
input = input.buffer;
} else {
input = input.buffer.slice(input.byteOffset, input.byteOffset + input.byteLength);
}
}
const inputBytes = new Uint8Array(input);
// return a random number in [0, 1)
const rng = (typeof crypto) !== "undefined" && crypto.getRandomValues ? (function() {
const buf = new Uint32Array(2);
return function() {
// generate 53-bit random integer: 21 + 32 bits
crypto.getRandomValues(buf);
const value = (buf[0] >>> (32 - 21)) * ((1 << 30) * 4) + buf[1];
return value / ((1 << 23) * (1 << 30));
};
})() : Math.random;
// return a random integer in [0, max)
const randint = function(max) {
return Math.floor(rng() * max);
};
const toShuffle = [];
let addLastNewLine = false;
switch (type) {
case "Character":
// split input into UTF-8 code points
for (let i = 0; i < inputBytes.length;) {
const charLength = (function() {
if (inputBytes[i] < 0xc0) return 1;
if (inputBytes[i] < 0xe0) return 2;
if (inputBytes[i] < 0xf0) return 3;
if (inputBytes[i] < 0xf8) return 4;
return 1;
})();
if (i + charLength <= inputBytes.length) {
let elementLength = charLength;
for (let j = 1; j < charLength; j++) {
if ((inputBytes[i + j] & 0xc0) !== 0x80) {
elementLength = 1;
break;
}
}
toShuffle.push([i, elementLength]);
i += elementLength;
} else {
toShuffle.push([i, 1]);
i++;
}
}
break;
case "Line":
{
// split input by newline characters
let lineBegin = 0;
for (let i = 0; i < inputBytes.length; i++) {
if (inputBytes[i] === 0xd || inputBytes[i] === 0xa) {
if (i + 1 < inputBytes.length && inputBytes[i] === 0xd && inputBytes[i + 1] === 0xa) {
i++;
}
toShuffle.push([lineBegin, i - lineBegin + 1]);
lineBegin = i + 1;
}
}
if (lineBegin < inputBytes.length) {
toShuffle.push([lineBegin, inputBytes.length - lineBegin]);
addLastNewLine = true;
}
}
break;
default:
{
// Creating element information for each bytes looks very wasteful.
// Therefore, directly shuffle here.
const outputBytes = new Uint8Array(inputBytes);
for (let i = outputBytes.length - 1; i > 0; i--) {
const idx = randint(i + 1);
const tmp = outputBytes[idx];
outputBytes[idx] = outputBytes[i];
outputBytes[i] = tmp;
}
return outputBytes.buffer;
}
}
// shuffle elements
const lastStart = toShuffle[toShuffle.length - 1][0];
for (let i = toShuffle.length - 1; i > 0; i--) {
const idx = randint(i + 1);
const tmp = toShuffle[idx];
toShuffle[idx] = toShuffle[i];
toShuffle[i] = tmp;
}
// place shuffled elements
const outputBytes = new Uint8Array(inputBytes.length + (addLastNewLine ? 1 : 0));
let outputPos = 0;
for (let i = 0; i < toShuffle.length; i++) {
outputBytes.set(new Uint8Array(input, toShuffle[i][0], toShuffle[i][1]), outputPos);
outputPos += toShuffle[i][1];
if (addLastNewLine && toShuffle[i][0] === lastStart) {
outputBytes[outputPos] = 0xa;
outputPos++;
}
}
return outputBytes.buffer;
}
}
export default Shuffle;

View File

@ -124,6 +124,7 @@ import "./tests/UnescapeString.mjs";
import "./tests/LS47.mjs";
import "./tests/LZString.mjs";
import "./tests/NTLM.mjs";
import "./tests/Shuffle.mjs";
// Cannot test operations that use the File type yet
// import "./tests/SplitColourChannels.mjs";

View File

@ -0,0 +1,92 @@
/**
* @author mikecat
* @copyright Crown Copyright 2022
* @license Apache-2.0
*/
import TestRegister from "../../lib/TestRegister.mjs";
TestRegister.addTests([
{
"name": "Shuffle empty",
"input": "",
"expectedOutput": "",
"recipeConfig": [
{
"op": "Shuffle",
"args": ["Character"]
}
]
},
{
"name": "Shuffle bytes",
"input": "12345678",
"expectedOutput": "31 32 33 34 35 36 37 38",
"recipeConfig": [
{
"op": "Shuffle",
"args": ["Byte"]
},
{
"op": "To Hex",
"args": ["Space", 0]
},
{
"op": "Sort",
"args": ["Space", false, "Alphabetical (case sensitive)"]
}
]
},
{
"name": "Shuffle characters",
"input": "1234\uff15\uff16\uff17\uff18",
"expectedOutput": " 0031 0032 0033 0034 FF15 FF16 FF17 FF18",
"recipeConfig": [
{
"op": "Shuffle",
"args": ["Character"]
},
{
"op": "Escape Unicode Characters",
"args": ["%u", true, 4, true]
},
{
"op": "Split",
"args": ["%u", " "]
},
{
"op": "Sort",
"args": ["Space", false, "Alphabetical (case sensitive)"]
}
]
},
{
"name": "Shuffle lines",
"input": "1\n2\n3\n4\n5\n6\n7\n8\n9\na\nb\nc\nd\ne\nf\n",
"expectedOutput": "\n1\n2\n3\n4\n5\n6\n7\n8\n9\na\nb\nc\nd\ne\nf",
"recipeConfig": [
{
"op": "Shuffle",
"args": ["Line"]
},
{
"op": "Sort",
"args": ["Line feed", false, "Alphabetical (case sensitive)"]
}
]
},
{
"name": "Shuffle lines (last character is not newline)",
"input": "1\n2\n3\n4\n5\n6\n7\n8\n9\na\nb\nc\nd\ne\nf",
"expectedOutput": "\n1\n2\n3\n4\n5\n6\n7\n8\n9\na\nb\nc\nd\ne\nf",
"recipeConfig": [
{
"op": "Shuffle",
"args": ["Line"]
},
{
"op": "Sort",
"args": ["Line feed", false, "Alphabetical (case sensitive)"]
}
]
},
]);