1
0
mirror of synced 2024-11-17 11:37:13 +01:00

Refactored scanning for file types to be more than twice as fast.

This commit is contained in:
n1474335 2019-01-01 15:12:01 +00:00
parent ede75530d0
commit 4c285bce57
3 changed files with 83 additions and 56 deletions

View File

@ -16,13 +16,22 @@ import {FILE_SIGNATURES} from "./FileSignatures";
* These values can be numbers for static checks, arrays of potential valid matches, * These values can be numbers for static checks, arrays of potential valid matches,
* or bespoke functions to check the validity of the buffer value at that offset. * or bespoke functions to check the validity of the buffer value at that offset.
* @param {Uint8Array} buf * @param {Uint8Array} buf
* @param {number} [offset=0] Where in the buffer to start searching from
* @returns {boolean} * @returns {boolean}
*/ */
function signatureMatches(sig, buf) { function signatureMatches(sig, buf, offset=0) {
if (sig instanceof Array) { // Using a length check seems to be more performant than `sig instanceof Array`
return sig.reduce((acc, s) => acc || bytesMatch(s, buf), false); if (sig.length) {
// sig is an Array - return true if any of them match
// The following `reduce` method is nice, but performance matters here, so we
// opt for a faster, if less elegant, for loop.
// return sig.reduce((acc, s) => acc || bytesMatch(s, buf, offset), false);
for (let i = 0; i < sig.length; i++) {
if (bytesMatch(sig[i], buf, offset)) return true;
}
return false;
} else { } else {
return bytesMatch(sig, buf); return bytesMatch(sig, buf, offset);
} }
} }
@ -34,25 +43,27 @@ function signatureMatches(sig, buf) {
* These values can be numbers for static checks, arrays of potential valid matches, * These values can be numbers for static checks, arrays of potential valid matches,
* or bespoke functions to check the validity of the buffer value at that offset. * or bespoke functions to check the validity of the buffer value at that offset.
* @param {Uint8Array} buf * @param {Uint8Array} buf
* @param {number} [offset=0] Where in the buffer to start searching from
* @returns {boolean} * @returns {boolean}
*/ */
function bytesMatch(sig, buf) { function bytesMatch(sig, buf, offset=0) {
for (const offset in sig) { for (const sigoffset in sig) {
switch (typeof sig[offset]) { const pos = parseInt(sigoffset, 10) + offset;
switch (typeof sig[sigoffset]) {
case "number": // Static check case "number": // Static check
if (buf[offset] !== sig[offset]) if (buf[pos] !== sig[sigoffset])
return false; return false;
break; break;
case "object": // Array of options case "object": // Array of options
if (sig[offset].indexOf(buf[offset]) < 0) if (sig[sigoffset].indexOf(buf[pos]) < 0)
return false; return false;
break; break;
case "function": // More complex calculation case "function": // More complex calculation
if (!sig[offset](buf[offset])) if (!sig[sigoffset](buf[pos]))
return false; return false;
break; break;
default: default:
throw new Error(`Unrecognised signature type at offset ${offset}`); throw new Error(`Unrecognised signature type at offset ${sigoffset}`);
} }
} }
return true; return true;
@ -91,6 +102,46 @@ export function detectFileType(buf) {
} }
/**
* Given a buffer, searches for magic byte sequences at all possible positions and returns
* the extensions and mime types.
*
* @param {Uint8Array} buf
* @returns {Object[]} foundFiles
* @returns {number} foundFiles.offset - The position in the buffer at which this file was found
* @returns {Object} foundFiles.fileDetails
* @returns {string} foundFiles.fileDetails.name - Name of file type
* @returns {string} foundFiles.fileDetails.ext - File extension
* @returns {string} foundFiles.fileDetails.mime - Mime type
* @returns {string} [foundFiles.fileDetails.desc] - Description
*/
export function scanForFileTypes(buf) {
if (!(buf && buf.length > 1)) {
return [];
}
const foundFiles = [];
// TODO allow user to select which categories to check
for (const cat in FILE_SIGNATURES) {
const category = FILE_SIGNATURES[cat];
for (let i = 0; i < category.length; i++) {
const filetype = category[i];
for (let pos = 0; pos < buf.length; pos++) {
if (signatureMatches(filetype.signature, buf, pos)) {
foundFiles.push({
offset: pos,
fileDetails: filetype
});
}
}
}
}
return foundFiles;
}
/** /**
* Detects whether the given buffer is a file of the type specified. * Detects whether the given buffer is a file of the type specified.
* *

View File

@ -7,7 +7,7 @@
import Operation from "../Operation"; import Operation from "../Operation";
// import OperationError from "../errors/OperationError"; // import OperationError from "../errors/OperationError";
import Utils from "../Utils"; import Utils from "../Utils";
import {detectFileType, extractFile} from "../lib/FileType"; import {scanForFileTypes, extractFile} from "../lib/FileType";
/** /**
* Extract Files operation * Extract Files operation
@ -39,7 +39,7 @@ class ExtractFiles extends Operation {
const bytes = new Uint8Array(input); const bytes = new Uint8Array(input);
// Scan for embedded files // Scan for embedded files
const detectedFiles = scanForEmbeddedFiles(bytes); const detectedFiles = scanForFileTypes(bytes);
// Extract each file that we support // Extract each file that we support
const files = []; const files = [];
@ -64,26 +64,4 @@ class ExtractFiles extends Operation {
} }
/**
* TODO refactor
* @param data
*/
function scanForEmbeddedFiles(data) {
const detectedFiles = [];
for (let i = 0; i < data.length; i++) {
const fileDetails = detectFileType(data.slice(i));
if (fileDetails.length) {
fileDetails.forEach(match => {
detectedFiles.push({
offset: i,
fileDetails: match,
});
});
}
}
return detectedFiles;
}
export default ExtractFiles; export default ExtractFiles;

View File

@ -6,7 +6,7 @@
import Operation from "../Operation"; import Operation from "../Operation";
import Utils from "../Utils"; import Utils from "../Utils";
import {detectFileType} from "../lib/FileType"; import {scanForFileTypes} from "../lib/FileType";
/** /**
* Scan for Embedded Files operation * Scan for Embedded Files operation
@ -41,32 +41,30 @@ class ScanForEmbeddedFiles extends Operation {
*/ */
run(input, args) { run(input, args) {
let output = "Scanning data for 'magic bytes' which may indicate embedded files. The following results may be false positives and should not be treat as reliable. Any suffiently long file is likely to contain these magic bytes coincidentally.\n", let output = "Scanning data for 'magic bytes' which may indicate embedded files. The following results may be false positives and should not be treat as reliable. Any suffiently long file is likely to contain these magic bytes coincidentally.\n",
types,
numFound = 0, numFound = 0,
numCommonFound = 0; numCommonFound = 0;
const ignoreCommon = args[0], const ignoreCommon = args[0],
commonExts = ["ico", "ttf", ""], commonExts = ["ttf", "utf16le", ""],
data = new Uint8Array(input); data = new Uint8Array(input),
types = scanForFileTypes(data);
for (let i = 0; i < data.length; i++) {
types = detectFileType(data.slice(i));
if (types.length) {
types.forEach(type => {
if (ignoreCommon && commonExts.indexOf(type.extension) > -1) {
numCommonFound++;
return;
}
numFound++; if (types.length) {
output += "\nOffset " + i + " (0x" + Utils.hex(i) + "):\n" + types.forEach(type => {
" File extension: " + type.extension + "\n" + if (ignoreCommon && commonExts.indexOf(type.fileDetails.extension) > -1) {
" MIME type: " + type.mime + "\n"; numCommonFound++;
return;
}
if (type.description && type.description.length) { numFound++;
output += " Description: " + type.description + "\n"; output += "\nOffset " + type.offset + " (0x" + Utils.hex(type.offset) + "):\n" +
} " File extension: " + type.fileDetails.extension + "\n" +
}); " MIME type: " + type.fileDetails.mime + "\n";
}
if (type.fileDetails.description && type.fileDetails.description.length) {
output += " Description: " + type.fileDetails.description + "\n";
}
});
} }
if (numFound === 0) { if (numFound === 0) {