diff --git a/src/core/lib/FileSignatures.mjs b/src/core/lib/FileSignatures.mjs new file mode 100644 index 00000000..69699d7d --- /dev/null +++ b/src/core/lib/FileSignatures.mjs @@ -0,0 +1,1136 @@ +/** + * File signatures and extractor functions + * + * @author n1474335 [n1474335@gmail.com] + * @copyright Crown Copyright 2018 + * @license Apache-2.0 + * + */ +import Stream from "./Stream"; + +/** + * A categorised table of file types, including signatures to identify them and functions + * to extract them where possible. + */ +export const FILE_SIGNATURES = { + "Images": [ + { + name: "Joint Photographic Experts Group image", + extension: "jpg", + mime: "image/jpeg", + description: "", + signature: { + 0: 0xff, + 1: 0xd8, + 2: 0xff + }, + extractor: extractJPEG + }, + { + name: "Graphics Interchange Format image", + extension: "gif", + mime: "image/gif", + description: "", + signature: { + 0: 0x47, + 1: 0x49, + 2: 0x46 + }, + extractor: null + }, + { + name: "Portable Network Graphics image", + extension: "png", + mime: "image/png", + description: "", + signature: { + 0: 0x89, + 1: 0x50, + 2: 0x4e, + 3: 0x47 + }, + extractor: null + }, + { + name: "WEBP Image", + extension: "webp", + mime: "image/webp", + description: "", + signature: { + 8: 0x57, + 9: 0x45, + 10: 0x42, + 11: 0x50 + }, + extractor: null + }, + { // Place before tiff check + name: "Canon CR2 raw image", + extension: "cr2", + mime: "image/x-canon-cr2", + description: "", + signature: [ + { + 0: 0x49, + 1: 0x49, + 2: 0x2a, + 3: 0x0, + 8: 0x43, + 9: 0x52 + }, + { + 0: 0x4d, + 1: 0x4d, + 2: 0x0, + 3: 0x2a, + 8: 0x43, + 9: 0x52 + } + ], + extractor: null + }, + { + name: "Tagged Image File Format image", + extension: "tif", + mime: "image/tiff", + description: "", + signature: [ + { + 0: 0x49, + 1: 0x49, + 2: 0x2a, + 3: 0x0 + }, + { + 0: 0x4d, + 1: 0x4d, + 2: 0x0, + 3: 0x2a + } + ], + extractor: null + }, + { + name: "Bitmap image", + extension: "bmp", + mime: "image/bmp", + description: "", + signature: { + 0: 0x42, + 1: 0x4d + }, + extractor: null + }, + { + name: "JPEG Extended Range image", + extension: "jxr", + mime: "image/vnd.ms-photo", + description: "", + signature: { + 0: 0x49, + 1: 0x49, + 2: 0xbc + }, + extractor: null + }, + { + name: "Photoshop image", + extension: "psd", + mime: "image/vnd.adobe.photoshop", + description: "", + signature: { + 0: 0x38, + 1: 0x42, + 2: 0x50, + 3: 0x53 + }, + extractor: null + }, + { + name: "Icon image", + extension: "ico", + mime: "image/x-icon", + description: "", + signature: { + 0: 0x0, + 1: 0x0, + 2: 0x1, + 3: 0x0 + }, + extractor: null + } + ], + "Video": [ + { // Place before webm + name: "Matroska Multimedia Container", + extension: "mkv", + mime: "video/x-matroska", + description: "", + signature: { + 31: 0x6d, + 32: 0x61, + 33: 0x74, + 34: 0x72, + 35: 0x6f, + 36: 0x73, + 37: 0x6b, + 38: 0x61 + }, + extractor: null + }, + { + name: "WEBM video", + extension: "webm", + mime: "video/webm", + description: "", + signature: { + 0: 0x1a, + 1: 0x45, + 2: 0xdf, + 3: 0xa3 + }, + extractor: null + }, + { + name: "MPEG-4 video", + extension: "mp4", + mime: "video/mp4", + description: "", + signature: [ + { + 0: 0x0, + 1: 0x0, + 2: 0x0, + 3: [0x18, 0x20], + 4: 0x66, + 5: 0x74, + 6: 0x79, + 7: 0x70 + }, + { + 0: 0x33, // 3gp5 + 1: 0x67, + 2: 0x70, + 3: 0x35 + }, + { + 0: 0x0, + 1: 0x0, + 2: 0x0, + 3: 0x1c, + 4: 0x66, + 5: 0x74, + 6: 0x79, + 7: 0x70, + 8: 0x6d, + 9: 0x70, + 10: 0x34, + 11: 0x32, + 16: 0x6d, // mp41mp42isom + 17: 0x70, + 18: 0x34, + 19: 0x31, + 20: 0x6d, + 21: 0x70, + 22: 0x34, + 23: 0x32, + 24: 0x69, + 25: 0x73, + 26: 0x6f, + 27: 0x6d + } + ], + extractor: null + }, + { + name: "M4V video", + extension: "m4v", + mime: "video/x-m4v", + description: "", + signature: { + 0: 0x0, + 1: 0x0, + 2: 0x0, + 3: 0x1c, + 4: 0x66, + 5: 0x74, + 6: 0x79, + 7: 0x70, + 8: 0x4d, + 9: 0x34, + 10: 0x56 + }, + extractor: null + }, + { + name: "Quicktime video", + extension: "mov", + mime: "video/quicktime", + description: "", + signature: { + 0: 0x0, + 1: 0x0, + 2: 0x0, + 3: 0x14, + 4: 0x66, + 5: 0x74, + 6: 0x79, + 7: 0x70 + }, + extractor: null + }, + { + name: "Audio Video Interleave", + extension: "avi", + mime: "video/x-msvideo", + description: "", + signature: { + 0: 0x52, + 1: 0x49, + 2: 0x46, + 3: 0x46, + 8: 0x41, + 9: 0x56, + 10: 0x49 + }, + extractor: null + }, + { + name: "Windows Media Video", + extension: "wmv", + mime: "video/x-ms-wmv", + description: "", + signature: { + 0: 0x30, + 1: 0x26, + 2: 0xb2, + 3: 0x75, + 4: 0x8e, + 5: 0x66, + 6: 0xcf, + 7: 0x11, + 8: 0xa6, + 9: 0xd9 + }, + extractor: null + }, + { + name: "MPEG video", + extension: "mpg", + mime: "video/mpeg", + description: "", + signature: { + 0: 0x0, + 1: 0x0, + 2: 0x1, + 3: 0xba + }, + extractor: null + }, + { + name: "Flash Video", + extension: "flv", + mime: "video/x-flv", + description: "", + signature: { + 0: 0x46, + 1: 0x4c, + 2: 0x56, + 3: 0x1 + }, + extractor: null + }, + ], + "Audio": [ + { + name: "Waveform Audio", + extension: "wav", + mime: "audio/x-wav", + description: "", + signature: { + 0: 0x52, + 1: 0x49, + 2: 0x46, + 3: 0x46, + 8: 0x57, + 9: 0x41, + 10: 0x56, + 11: 0x45 + }, + extractor: null + }, + { + name: "OGG audio", + extension: "ogg", + mime: "audio/ogg", + description: "", + signature: { + 0: 0x4f, + 1: 0x67, + 2: 0x67, + 3: 0x53 + }, + extractor: null + }, + { + name: "Musical Instrument Digital Interface audio", + extension: "midi", + mime: "audio/midi", + description: "", + signature: { + 0: 0x4d, + 1: 0x54, + 2: 0x68, + 3: 0x64 + }, + extractor: null + }, + { + name: "MPEG-3 audio", + extension: "mp3", + mime: "audio/mpeg", + description: "", + signature: [ + { + 0: 0x49, + 1: 0x44, + 2: 0x33 + }, + { + 0: 0xff, + 1: 0xfb + } + ], + extractor: null + }, + { + name: "MPEG-4 Part 14 audio", + extension: "m4a", + mime: "audio/m4a", + description: "", + signature: [ + { + 4: 0x66, + 5: 0x74, + 6: 0x79, + 7: 0x70, + 8: 0x4d, + 9: 0x34, + 10: 0x41 + }, + { + 0: 0x4d, + 1: 0x34, + 2: 0x41, + 3: 0x20 + } + ], + extractor: null + }, + { + name: "Free Lossless Audio Codec", + extension: "flac", + mime: "audio/x-flac", + description: "", + signature: { + 0: 0x66, + 1: 0x4c, + 2: 0x61, + 3: 0x43 + }, + extractor: null + }, + { + name: "Adaptive Multi-Rate audio codec", + extension: "amr", + mime: "audio/amr", + description: "", + signature: { + 0: 0x23, + 1: 0x21, + 2: 0x41, + 3: 0x4d, + 4: 0x52, + 5: 0x0a + }, + extractor: null + }, + ], + "Documents": [ + { + name: "Portable Document Format", + extension: "pdf", + mime: "application/pdf", + description: "", + signature: { + 0: 0x25, + 1: 0x50, + 2: 0x44, + 3: 0x46 + }, + extractor: extractPDF + }, + { + name: "PostScript", + extension: "ps", + mime: "application/postscript", + description: "", + signature: { + 0: 0x25, + 1: 0x21 + }, + extractor: null + }, + { + name: "Rich Text Format", + extension: "rtf", + mime: "application/rtf", + description: "", + signature: { + 0: 0x7b, + 1: 0x5c, + 2: 0x72, + 3: 0x74, + 4: 0x66 + }, + extractor: null + }, + { + name: "Microsoft Office documents/OLE2", + extension: "ole2,doc,xls,dot,ppt,xla,ppa,pps,pot,msi,sdw,db,vsd,msg", + mime: "application/msword,application/vnd.ms-excel,application/vnd.ms-powerpoint", + description: "Microsoft Office documents", + signature: { + 0: 0xd0, + 1: 0xcf, + 2: 0x11, + 3: 0xe0, + 4: 0xa1, + 5: 0xb1, + 6: 0x1a, + 7: 0xe1 + }, + extractor: null + }, + { + name: "EPUB e-book", + extension: "epub", + mime: "application/epub+zip", + description: "", + signature: { + 0: 0x50, + 1: 0x4b, + 2: 0x3, + 3: 0x4, + 30: 0x6d, // mimetypeapplication/epub_zip + 31: 0x69, + 32: 0x6d, + 33: 0x65, + 34: 0x74, + 35: 0x79, + 36: 0x70, + 37: 0x65, + 38: 0x61, + 39: 0x70, + 40: 0x70, + 41: 0x6c, + 42: 0x69, + 43: 0x63, + 44: 0x61, + 45: 0x74, + 46: 0x69, + 47: 0x6f, + 48: 0x6e, + 49: 0x2f, + 50: 0x65, + 51: 0x70, + 52: 0x75, + 53: 0x62, + 54: 0x2b, + 55: 0x7a, + 56: 0x69, + 57: 0x70 + }, + extractor: null + }, + ], + "Applications": [ + { + name: "Windows Portable Executable", + extension: "exe,dll,drv,vxd,sys,ocx,vbx,com,fon,scr", + mime: "application/x-msdownload", + description: "", + signature: { + 0: 0x4d, + 1: 0x5a, + 3: [0x0, 0x1, 0x2], + 5: [0x0, 0x1, 0x2] + }, + extractor: extractMZPE + }, + { + name: "Executable and Linkable Format file", + extension: "elf,bin,axf,o,prx,so", + mime: "application/x-executable", + description: "Executable and Linkable Format file. No standard file extension.", + signature: { + 0: 0x7f, + 1: 0x45, + 2: 0x4c, + 3: 0x46 + }, + extractor: null + }, + { + name: "Adobe Flash", + extension: "swf", + mime: "application/x-shockwave-flash", + description: "", + signature: { + 0: [0x43, 0x46], + 1: 0x57, + 2: 0x53 + }, + extractor: null + }, + { + name: "Java Class", + extension: "class", + mime: "application/java-vm", + description: "", + signature: { + 0: 0xca, + 1: 0xfe, + 2: 0xba, + 3: 0xbe + }, + extractor: null + }, + { + name: "Dalvik Executable", + extension: "dex", + mime: "application/octet-stream", + description: "Dalvik Executable as used by Android", + signature: { + 0: 0x64, + 1: 0x65, + 2: 0x78, + 3: 0x0a, + 4: 0x30, + 5: 0x33, + 6: 0x35, + 7: 0x0 + }, + extractor: null + }, + { + name: "Google Chrome Extension", + extension: "crx", + mime: "application/crx", + description: "Google Chrome extension or packaged app", + signature: { + 0: 0x43, + 1: 0x72, + 2: 0x32, + 3: 0x34 + }, + extractor: null + }, + ], + "Archives": [ + { + name: "PKZIP archive", + extension: "zip", + mime: "application/zip", + description: "", + signature: { + 0: 0x50, + 1: 0x4b, + 2: [0x3, 0x5, 0x7], + 3: [0x4, 0x6, 0x8] + }, + extractor: extractZIP + }, + { + name: "TAR archive", + extension: "tar", + mime: "application/x-tar", + description: "", + signature: { + 257: 0x75, + 258: 0x73, + 259: 0x74, + 260: 0x61, + 261: 0x72 + }, + extractor: null + }, + { + name: "Roshal Archive", + extension: "rar", + mime: "application/x-rar-compressed", + description: "", + signature: { + 0: 0x52, + 1: 0x61, + 2: 0x72, + 3: 0x21, + 4: 0x1a, + 5: 0x7, + 6: [0x0, 0x1] + }, + extractor: null + }, + { + name: "Gzip", + extension: "gz", + mime: "application/gzip", + description: "", + signature: { + 0: 0x1f, + 1: 0x8b, + 2: 0x8 + }, + extractor: null + }, + { + name: "Bzip2", + extension: "bz2", + mime: "application/x-bzip2", + description: "", + signature: { + 0: 0x42, + 1: 0x5a, + 2: 0x68 + }, + extractor: null + }, + { + name: "7zip", + extension: "7z", + mime: "application/x-7z-compressed", + description: "", + signature: { + 0: 0x37, + 1: 0x7a, + 2: 0xbc, + 3: 0xaf, + 4: 0x27, + 5: 0x1c + }, + extractor: null + }, + { + name: "Zlib Deflate", + extension: "zlib", + mime: "application/x-deflate", + description: "", + signature: { + 0: 0x78, + 1: [0x1, 0x9c, 0xda, 0x5e] + }, + extractor: null + }, + { + name: "xz compression", + extension: "xz", + mime: "application/x-xz", + description: "", + signature: { + 0: 0xfd, + 1: 0x37, + 2: 0x7a, + 3: 0x58, + 4: 0x5a, + 5: 0x0 + }, + extractor: null + }, + { + name: "Tarball", + extension: "tar.z", + mime: "application/x-gtar", + description: "", + signature: { + 0: 0x1f, + 1: [0x9d, 0xa0] + }, + extractor: null + }, + { + name: "ISO disk image", + extension: "iso", + mime: "application/octet-stream", + description: "ISO 9660 CD/DVD image file", + signature: [ + { + 0x8001: 0x43, + 0x8002: 0x44, + 0x8003: 0x30, + 0x8004: 0x30, + 0x8005: 0x31 + }, + { + 0x8801: 0x43, + 0x8802: 0x44, + 0x8803: 0x30, + 0x8804: 0x30, + 0x8805: 0x31 + }, + { + 0x9001: 0x43, + 0x9002: 0x44, + 0x9003: 0x30, + 0x9004: 0x30, + 0x9005: 0x31 + } + ], + extractor: null + }, + { + name: "Virtual Machine Disk", + extension: "vmdk", + mime: "application/vmdk,application/x-virtualbox-vmdk", + description: "", + signature: { + 0: 0x4b, + 1: 0x44, + 2: 0x4d + }, + extractor: null + }, + ], + "Miscellaneous": [ + { + name: "UTF-8 text file", + extension: "txt", + mime: "text/plain", + description: "UTF-8 encoded Unicode byte order mark, commonly but not exclusively seen in text files.", + signature: { + 0: 0xef, + 1: 0xbb, + 2: 0xbf + }, + extractor: null + }, + { // Place before UTF-16 LE file + name: "UTF-32 LE file", + extension: "utf32le", + mime: "charset/utf32le", + description: "Little-endian UTF-32 encoded Unicode byte order mark.", + signature: { + 0: 0xff, + 1: 0xfe, + 2: 0x00, + 3: 0x00 + }, + extractor: null + }, + { + name: "UTF-16 LE file", + extension: "utf16le", + mime: "charset/utf16le", + description: "Little-endian UTF-16 encoded Unicode byte order mark.", + signature: { + 0: 0xff, + 1: 0xfe + }, + extractor: null + }, + { + name: "Web Open Font Format", + extension: "woff", + mime: "application/font-woff", + description: "", + signature: { + 0: 0x77, + 1: 0x4f, + 2: 0x46, + 3: 0x46, + 4: 0x0, + 5: 0x1, + 6: 0x0, + 7: 0x0 + }, + extractor: null + }, + { + name: "Web Open Font Format 2", + extension: "woff2", + mime: "application/font-woff", + description: "", + signature: { + 0: 0x77, + 1: 0x4f, + 2: 0x46, + 3: 0x32, + 4: 0x0, + 5: 0x1, + 6: 0x0, + 7: 0x0 + }, + extractor: null + }, + { + name: "Embedded OpenType font", + extension: "eot", + mime: "application/octet-stream", + description: "", + signature: [ + { + 8: 0x2, + 9: 0x0, + 10: 0x1, + 34: 0x4c, + 35: 0x50 + }, + { + 8: 0x1, + 9: 0x0, + 10: 0x0, + 34: 0x4c, + 35: 0x50 + }, + { + 8: 0x2, + 9: 0x0, + 10: 0x2, + 34: 0x4c, + 35: 0x50 + }, + ], + extractor: null + }, + { + name: "TrueType Font", + extension: "ttf", + mime: "application/font-sfnt", + description: "", + signature: { + 0: 0x0, + 1: 0x1, + 2: 0x0, + 3: 0x0, + 4: 0x0 + }, + extractor: null + }, + { + name: "OpenType Font", + extension: "otf", + mime: "application/font-sfnt", + description: "", + signature: { + 0: 0x4f, + 1: 0x54, + 2: 0x54, + 3: 0x4f, + 4: 0x0 + }, + extractor: null + }, + { + name: "SQLite", + extension: "sqlite", + mime: "application/x-sqlite3", + description: "", + signature: { + 0: 0x53, + 1: 0x51, + 2: 0x4c, + 3: 0x69 + }, + extractor: null + }, + ] +}; + + +/** + * JPEG extractor. + * + * @param {Uint8Array} bytes + * @param {number} offset + * @returns {Uint8Array} + */ +export function extractJPEG(bytes, offset) { + const stream = new Stream(bytes.slice(offset)); + + while (stream.hasMore()) { + const marker = stream.getBytes(2); + if (marker[0] !== 0xff) throw new Error("Invalid JPEG marker: " + marker); + + let segmentSize = 0; + switch (marker[1]) { + // No length + case 0xd8: // Start of Image + case 0x01: // For temporary use in arithmetic coding + break; + case 0xd9: // End found + return stream.carve(); + + // Variable size segment + case 0xc0: // Start of frame (Baseline DCT) + case 0xc1: // Start of frame (Extended sequential DCT) + case 0xc2: // Start of frame (Progressive DCT) + case 0xc3: // Start of frame (Lossless sequential) + case 0xc4: // Define Huffman Table + case 0xc5: // Start of frame (Differential sequential DCT) + case 0xc6: // Start of frame (Differential progressive DCT) + case 0xc7: // Start of frame (Differential lossless) + case 0xc8: // Reserved for JPEG extensions + case 0xc9: // Start of frame (Extended sequential DCT) + case 0xca: // Start of frame (Progressive DCT) + case 0xcb: // Start of frame (Lossless sequential) + case 0xcc: // Define arithmetic conditioning table + case 0xcd: // Start of frame (Differential sequential DCT) + case 0xce: // Start of frame (Differential progressive DCT) + case 0xcf: // Start of frame (Differential lossless) + case 0xdb: // Define Quantization Table + case 0xde: // Define hierarchical progression + case 0xe0: // Application-specific + case 0xe1: // Application-specific + case 0xe2: // Application-specific + case 0xe3: // Application-specific + case 0xe4: // Application-specific + case 0xe5: // Application-specific + case 0xe6: // Application-specific + case 0xe7: // Application-specific + case 0xe8: // Application-specific + case 0xe9: // Application-specific + case 0xea: // Application-specific + case 0xeb: // Application-specific + case 0xec: // Application-specific + case 0xed: // Application-specific + case 0xee: // Application-specific + case 0xef: // Application-specific + case 0xfe: // Comment + segmentSize = stream.readInt(2, "be"); + stream.position += segmentSize - 2; + break; + + // 1 byte + case 0xdf: // Expand reference image + stream.position++; + break; + + // 2 bytes + case 0xdc: // Define number of lines + case 0xdd: // Define restart interval + stream.position += 2; + break; + + // Start scan + case 0xda: // Start of scan + segmentSize = stream.readInt(2, "be"); + stream.position += segmentSize - 2; + stream.continueUntil(0xff); + break; + + // Continue through encoded data + case 0x00: // Byte stuffing + case 0xd0: // Restart + case 0xd1: // Restart + case 0xd2: // Restart + case 0xd3: // Restart + case 0xd4: // Restart + case 0xd5: // Restart + case 0xd6: // Restart + case 0xd7: // Restart + stream.continueUntil(0xff); + break; + + default: + stream.continueUntil(0xff); + break; + } + } + + throw new Error("Unable to parse JPEG successfully"); +} + + +/** + * Portable executable extractor. + * Assumes that the offset refers to an MZ header. + * + * @param {Uint8Array} bytes + * @param {number} offset + * @returns {Uint8Array} + */ +export function extractMZPE(bytes, offset) { + const stream = new Stream(bytes.slice(offset)); + + // Move to PE header pointer + stream.moveTo(0x3c); + const peAddress = stream.readInt(4, "le"); + + // Move to PE header + stream.moveTo(peAddress); + + // Get number of sections + stream.moveForwardsBy(6); + const numSections = stream.readInt(2, "le"); + + // Get optional header size + stream.moveForwardsBy(12); + const optionalHeaderSize = stream.readInt(2, "le"); + + // Move past optional header to section header + stream.moveForwardsBy(2 + optionalHeaderSize); + + // Move to final section header + stream.moveForwardsBy((numSections - 1) * 0x28); + + // Get raw data info + stream.moveForwardsBy(16); + const rawDataSize = stream.readInt(4, "le"); + const rawDataAddress = stream.readInt(4, "le"); + + // Move to end of final section + stream.moveTo(rawDataAddress + rawDataSize); + + return stream.carve(); +} + + +/** + * PDF extractor. + * + * @param {Uint8Array} bytes + * @param {number} offset + * @returns {Uint8Array} + */ +export function extractPDF(bytes, offset) { + const stream = new Stream(bytes.slice(offset)); + + // Find end-of-file marker (%%EOF) + stream.continueUntil([0x25, 0x25, 0x45, 0x4f, 0x46]); + stream.moveForwardsBy(5); + stream.consumeIf(0x0d); + stream.consumeIf(0x0a); + + return stream.carve(); +} + + +/** + * ZIP extractor. + * + * @param {Uint8Array} bytes + * @param {number} offset + * @returns {Uint8Array} + */ +export function extractZIP(bytes, offset) { + const stream = new Stream(bytes.slice(offset)); + + // Find End of central directory record + stream.continueUntil([0x50, 0x4b, 0x05, 0x06]); + + // Get comment length and consume + stream.moveForwardsBy(20); + const commentLength = stream.readInt(2, "le"); + stream.moveForwardsBy(commentLength); + + return stream.carve(); +} diff --git a/src/core/lib/FileType.mjs b/src/core/lib/FileType.mjs index ef6cfb03..b96ea69e 100644 --- a/src/core/lib/FileType.mjs +++ b/src/core/lib/FileType.mjs @@ -6,256 +6,7 @@ * @license Apache-2.0 * */ -import Stream from "./Stream"; - -/** - * A categorised table of file types, including signatures to identifying them and functions - * to extract them where possible. - */ -const FILE_SIGNATURES = { - "Images": [ - { - name: "JPEG Image", - extension: "jpg", - mime: "image/jpeg", - description: "", - signature: { - 0: 0xff, - 1: 0xd8, - 2: 0xff - }, - extractor: extractJPEG - }, - { - name: "GIF Image", - extension: "gif", - mime: "image/gif", - description: "", - signature: { - 0: 0x47, - 1: 0x49, - 2: 0x46 - }, - extractor: null - }, - { - name: "PNG Image", - extension: "png", - mime: "image/png", - description: "", - signature: { - 0: 0x89, - 1: 0x50, - 2: 0x4e, - 3: 0x47 - }, - extractor: null - }, - { - name: "WEBP Image", - extension: "webp", - mime: "image/webp", - description: "", - signature: { - 8: 0x57, - 9: 0x45, - 10: 0x42, - 11: 0x50 - }, - extractor: null - }, - { - name: "TIFF Image", - extension: "tif", - mime: "image/tiff", - description: "", - signature: [ - { - 0: 0x49, - 1: 0x49, - 2: 0x2a, - 3: 0x0 - }, - { - 0: 0x4d, - 1: 0x4d, - 2: 0x0, - 3: 0x2a - } - ], - extractor: null - }, /* - { - name: " Image", - extension: "", - mime: "image/", - description: "", - signature: { - 0: 0x, - 1: 0x, - 2: 0x, - 3: 0x - }, - extractor: null - }, - { - name: " Image", - extension: "", - mime: "image/", - description: "", - signature: { - 0: 0x, - 1: 0x, - 2: 0x, - 3: 0x - }, - extractor: null - }, - { - name: " Image", - extension: "", - mime: "image/", - description: "", - signature: { - 0: 0x, - 1: 0x, - 2: 0x, - 3: 0x - }, - extractor: null - }, - { - name: " Image", - extension: "", - mime: "image/", - description: "", - signature: { - 0: 0x, - 1: 0x, - 2: 0x, - 3: 0x - }, - extractor: null - }, - { - name: " Image", - extension: "", - mime: "image/", - description: "", - signature: { - 0: 0x, - 1: 0x, - 2: 0x, - 3: 0x - }, - extractor: null - }, - { - name: " Image", - extension: "", - mime: "image/", - description: "", - signature: { - 0: 0x, - 1: 0x, - 2: 0x, - 3: 0x - }, - extractor: null - },*/ - ], - "Video": [ - { - name: "WEBM", - extension: "webm", - mime: "video/webm", - description: "", - signature: { - 0: 0x1a, - 1: 0x45, - 2: 0xdf, - 3: 0xa3 - }, - extractor: null - }, - ], - "Audio": [ - { - name: "WAV", - extension: "wav", - mime: "audio/x-wav", - description: "", - signature: { - 0: 0x52, - 1: 0x49, - 2: 0x46, - 3: 0x46, - 8: 0x57, - 9: 0x41, - 10: 0x56, - 11: 0x45 - }, - extractor: null - }, - { - name: "OGG", - extension: "ogg", - mime: "audio/ogg", - description: "", - signature: { - 0: 0x4f, - 1: 0x67, - 2: 0x67, - 3: 0x53 - }, - extractor: null - }, - ], - "Documents": [ - { - name: "Portable Document Format", - extension: "pdf", - mime: "application/pdf", - description: "", - signature: { - 0: 0x25, - 1: 0x50, - 2: 0x44, - 3: 0x46 - }, - extractor: extractPDF - }, - ], - "Applications": [ - { - name: "Windows Portable Executable", - extension: "exe", - mime: "application/x-msdownload", - description: "", - signature: { - 0: 0x4d, - 1: 0x5a - }, - extractor: extractMZPE - }, - ], - "Archives": [ - { - name: "ZIP", - extension: "zip", - mime: "application/zip", - description: "", - signature: { - 0: 0x50, - 1: 0x4b, - 2: [0x3, 0x5, 0x7], - 3: [0x4, 0x6, 0x8] - }, - extractor: extractZIP - }, - - ], -}; +import {FILE_SIGNATURES} from "./FileSignatures"; /** @@ -313,7 +64,8 @@ function bytesMatch(sig, buf) { * extension and mime type. * * @param {Uint8Array} buf - * @returns {Object[]} type + * @returns {Object[]} types + * @returns {string} type.name - Name of file type * @returns {string} type.ext - File extension * @returns {string} type.mime - Mime type * @returns {string} [type.desc] - Description @@ -336,370 +88,6 @@ export function detectFileType(buf) { }); } return matchingFiles; - - // Delete all below this line once implemented in FILE_SIGNATURES above. - - - /* - // needs to be before `tif` check - if (((buf[0] === 0x49 && buf[1] === 0x49 && buf[2] === 0x2A && buf[3] === 0x0) || (buf[0] === 0x4D && buf[1] === 0x4D && buf[2] === 0x0 && buf[3] === 0x2A)) && buf[8] === 0x43 && buf[9] === 0x52) { - return { - ext: "cr2", - mime: "image/x-canon-cr2" - }; - } - - if ((buf[0] === 0x49 && buf[1] === 0x49 && buf[2] === 0x2A && buf[3] === 0x0) || (buf[0] === 0x4D && buf[1] === 0x4D && buf[2] === 0x0 && buf[3] === 0x2A)) { - return { - ext: "tif", - mime: "image/tiff" - }; - } - - if (buf[0] === 0x42 && buf[1] === 0x4D) { - return { - ext: "bmp", - mime: "image/bmp" - }; - } - - if (buf[0] === 0x49 && buf[1] === 0x49 && buf[2] === 0xBC) { - return { - ext: "jxr", - mime: "image/vnd.ms-photo" - }; - } - - if (buf[0] === 0x38 && buf[1] === 0x42 && buf[2] === 0x50 && buf[3] === 0x53) { - return { - ext: "psd", - mime: "image/vnd.adobe.photoshop" - }; - } - - // needs to be before `zip` check - if (buf[0] === 0x50 && buf[1] === 0x4B && buf[2] === 0x3 && buf[3] === 0x4 && buf[30] === 0x6D && buf[31] === 0x69 && buf[32] === 0x6D && buf[33] === 0x65 && buf[34] === 0x74 && buf[35] === 0x79 && buf[36] === 0x70 && buf[37] === 0x65 && buf[38] === 0x61 && buf[39] === 0x70 && buf[40] === 0x70 && buf[41] === 0x6C && buf[42] === 0x69 && buf[43] === 0x63 && buf[44] === 0x61 && buf[45] === 0x74 && buf[46] === 0x69 && buf[47] === 0x6F && buf[48] === 0x6E && buf[49] === 0x2F && buf[50] === 0x65 && buf[51] === 0x70 && buf[52] === 0x75 && buf[53] === 0x62 && buf[54] === 0x2B && buf[55] === 0x7A && buf[56] === 0x69 && buf[57] === 0x70) { - return { - ext: "epub", - mime: "application/epub+zip" - }; - } - - if (buf[257] === 0x75 && buf[258] === 0x73 && buf[259] === 0x74 && buf[260] === 0x61 && buf[261] === 0x72) { - return { - ext: "tar", - mime: "application/x-tar" - }; - } - - if (buf[0] === 0x52 && buf[1] === 0x61 && buf[2] === 0x72 && buf[3] === 0x21 && buf[4] === 0x1A && buf[5] === 0x7 && (buf[6] === 0x0 || buf[6] === 0x1)) { - return { - ext: "rar", - mime: "application/x-rar-compressed" - }; - } - - if (buf[0] === 0x1F && buf[1] === 0x8B && buf[2] === 0x8) { - return { - ext: "gz", - mime: "application/gzip" - }; - } - - if (buf[0] === 0x42 && buf[1] === 0x5A && buf[2] === 0x68) { - return { - ext: "bz2", - mime: "application/x-bzip2" - }; - } - - if (buf[0] === 0x37 && buf[1] === 0x7A && buf[2] === 0xBC && buf[3] === 0xAF && buf[4] === 0x27 && buf[5] === 0x1C) { - return { - ext: "7z", - mime: "application/x-7z-compressed" - }; - } - - if (buf[0] === 0x78 && buf[1] === 0x01) { - return { - ext: "dmg, zlib", - mime: "application/x-apple-diskimage, application/x-deflate" - }; - } - - if ((buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && (buf[3] === 0x18 || buf[3] === 0x20) && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70) || (buf[0] === 0x33 && buf[1] === 0x67 && buf[2] === 0x70 && buf[3] === 0x35) || (buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && buf[3] === 0x1C && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70 && buf[8] === 0x6D && buf[9] === 0x70 && buf[10] === 0x34 && buf[11] === 0x32 && buf[16] === 0x6D && buf[17] === 0x70 && buf[18] === 0x34 && buf[19] === 0x31 && buf[20] === 0x6D && buf[21] === 0x70 && buf[22] === 0x34 && buf[23] === 0x32 && buf[24] === 0x69 && buf[25] === 0x73 && buf[26] === 0x6F && buf[27] === 0x6D)) { - return { - ext: "mp4", - mime: "video/mp4" - }; - } - - if ((buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && buf[3] === 0x1C && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70 && buf[8] === 0x4D && buf[9] === 0x34 && buf[10] === 0x56)) { - return { - ext: "m4v", - mime: "video/x-m4v" - }; - } - - if (buf[0] === 0x4D && buf[1] === 0x54 && buf[2] === 0x68 && buf[3] === 0x64) { - return { - ext: "mid", - mime: "audio/midi" - }; - } - - // needs to be before the `webm` check - if (buf[31] === 0x6D && buf[32] === 0x61 && buf[33] === 0x74 && buf[34] === 0x72 && buf[35] === 0x6f && buf[36] === 0x73 && buf[37] === 0x6B && buf[38] === 0x61) { - return { - ext: "mkv", - mime: "video/x-matroska" - }; - } - - if (buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && buf[3] === 0x14 && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70) { - return { - ext: "mov", - mime: "video/quicktime" - }; - } - - if (buf[0] === 0x52 && buf[1] === 0x49 && buf[2] === 0x46 && buf[3] === 0x46 && buf[8] === 0x41 && buf[9] === 0x56 && buf[10] === 0x49) { - return { - ext: "avi", - mime: "video/x-msvideo" - }; - } - - if (buf[0] === 0x30 && buf[1] === 0x26 && buf[2] === 0xB2 && buf[3] === 0x75 && buf[4] === 0x8E && buf[5] === 0x66 && buf[6] === 0xCF && buf[7] === 0x11 && buf[8] === 0xA6 && buf[9] === 0xD9) { - return { - ext: "wmv", - mime: "video/x-ms-wmv" - }; - } - - if (buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x1 && buf[3].toString(16)[0] === "b") { - return { - ext: "mpg", - mime: "video/mpeg" - }; - } - - if ((buf[0] === 0x49 && buf[1] === 0x44 && buf[2] === 0x33) || (buf[0] === 0xFF && buf[1] === 0xfb)) { - return { - ext: "mp3", - mime: "audio/mpeg" - }; - } - - if ((buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70 && buf[8] === 0x4D && buf[9] === 0x34 && buf[10] === 0x41) || (buf[0] === 0x4D && buf[1] === 0x34 && buf[2] === 0x41 && buf[3] === 0x20)) { - return { - ext: "m4a", - mime: "audio/m4a" - }; - } - - if (buf[0] === 0x66 && buf[1] === 0x4C && buf[2] === 0x61 && buf[3] === 0x43) { - return { - ext: "flac", - mime: "audio/x-flac" - }; - } - - if (buf[0] === 0x23 && buf[1] === 0x21 && buf[2] === 0x41 && buf[3] === 0x4D && buf[4] === 0x52 && buf[5] === 0x0A) { - return { - ext: "amr", - mime: "audio/amr" - }; - } - - if ((buf[0] === 0x43 || buf[0] === 0x46) && buf[1] === 0x57 && buf[2] === 0x53) { - return { - ext: "swf", - mime: "application/x-shockwave-flash" - }; - } - - if (buf[0] === 0x7B && buf[1] === 0x5C && buf[2] === 0x72 && buf[3] === 0x74 && buf[4] === 0x66) { - return { - ext: "rtf", - mime: "application/rtf" - }; - } - - if (buf[0] === 0x77 && buf[1] === 0x4F && buf[2] === 0x46 && buf[3] === 0x46 && buf[4] === 0x00 && buf[5] === 0x01 && buf[6] === 0x00 && buf[7] === 0x00) { - return { - ext: "woff", - mime: "application/font-woff" - }; - } - - if (buf[0] === 0x77 && buf[1] === 0x4F && buf[2] === 0x46 && buf[3] === 0x32 && buf[4] === 0x00 && buf[5] === 0x01 && buf[6] === 0x00 && buf[7] === 0x00) { - return { - ext: "woff2", - mime: "application/font-woff" - }; - } - - if (buf[34] === 0x4C && buf[35] === 0x50 && ((buf[8] === 0x02 && buf[9] === 0x00 && buf[10] === 0x01) || (buf[8] === 0x01 && buf[9] === 0x00 && buf[10] === 0x00) || (buf[8] === 0x02 && buf[9] === 0x00 && buf[10] === 0x02))) { - return { - ext: "eot", - mime: "application/octet-stream" - }; - } - - if (buf[0] === 0x00 && buf[1] === 0x01 && buf[2] === 0x00 && buf[3] === 0x00 && buf[4] === 0x00) { - return { - ext: "ttf", - mime: "application/font-sfnt" - }; - } - - if (buf[0] === 0x4F && buf[1] === 0x54 && buf[2] === 0x54 && buf[3] === 0x4F && buf[4] === 0x00) { - return { - ext: "otf", - mime: "application/font-sfnt" - }; - } - - if (buf[0] === 0x00 && buf[1] === 0x00 && buf[2] === 0x01 && buf[3] === 0x00) { - return { - ext: "ico", - mime: "image/x-icon" - }; - } - - if (buf[0] === 0x46 && buf[1] === 0x4C && buf[2] === 0x56 && buf[3] === 0x01) { - return { - ext: "flv", - mime: "video/x-flv" - }; - } - - if (buf[0] === 0x25 && buf[1] === 0x21) { - return { - ext: "ps", - mime: "application/postscript" - }; - } - - if (buf[0] === 0xFD && buf[1] === 0x37 && buf[2] === 0x7A && buf[3] === 0x58 && buf[4] === 0x5A && buf[5] === 0x00) { - return { - ext: "xz", - mime: "application/x-xz" - }; - } - - if (buf[0] === 0x53 && buf[1] === 0x51 && buf[2] === 0x4C && buf[3] === 0x69) { - return { - ext: "sqlite", - mime: "application/x-sqlite3" - }; - } - */ - - /** - * - * Added by n1474335 [n1474335@gmail.com] from here on - * - */ - /* - if ((buf[0] === 0x1F && buf[1] === 0x9D) || (buf[0] === 0x1F && buf[1] === 0xA0)) { - return { - ext: "z, tar.z", - mime: "application/x-gtar" - }; - } - - if (buf[0] === 0x7F && buf[1] === 0x45 && buf[2] === 0x4C && buf[3] === 0x46) { - return { - ext: "none, axf, bin, elf, o, prx, puff, so", - mime: "application/x-executable", - desc: "Executable and Linkable Format file. No standard file extension." - }; - } - - if (buf[0] === 0xCA && buf[1] === 0xFE && buf[2] === 0xBA && buf[3] === 0xBE) { - return { - ext: "class", - mime: "application/java-vm" - }; - } - - if (buf[0] === 0xEF && buf[1] === 0xBB && buf[2] === 0xBF) { - return { - ext: "txt", - mime: "text/plain", - desc: "UTF-8 encoded Unicode byte order mark detected, commonly but not exclusively seen in text files." - }; - } - - // Must be before Little-endian UTF-16 BOM - if (buf[0] === 0xFF && buf[1] === 0xFE && buf[2] === 0x00 && buf[3] === 0x00) { - return { - ext: "UTF32LE", - mime: "charset/utf32le", - desc: "Little-endian UTF-32 encoded Unicode byte order mark detected." - }; - } - - if (buf[0] === 0xFF && buf[1] === 0xFE) { - return { - ext: "UTF16LE", - mime: "charset/utf16le", - desc: "Little-endian UTF-16 encoded Unicode byte order mark detected." - }; - } - - if ((buf[0x8001] === 0x43 && buf[0x8002] === 0x44 && buf[0x8003] === 0x30 && buf[0x8004] === 0x30 && buf[0x8005] === 0x31) || - (buf[0x8801] === 0x43 && buf[0x8802] === 0x44 && buf[0x8803] === 0x30 && buf[0x8804] === 0x30 && buf[0x8805] === 0x31) || - (buf[0x9001] === 0x43 && buf[0x9002] === 0x44 && buf[0x9003] === 0x30 && buf[0x9004] === 0x30 && buf[0x9005] === 0x31)) { - return { - ext: "iso", - mime: "application/octet-stream", - desc: "ISO 9660 CD/DVD image file" - }; - } - - if (buf[0] === 0xD0 && buf[1] === 0xCF && buf[2] === 0x11 && buf[3] === 0xE0 && buf[4] === 0xA1 && buf[5] === 0xB1 && buf[6] === 0x1A && buf[7] === 0xE1) { - return { - ext: "doc, xls, ppt", - mime: "application/msword, application/vnd.ms-excel, application/vnd.ms-powerpoint", - desc: "Microsoft Office documents" - }; - } - - if (buf[0] === 0x64 && buf[1] === 0x65 && buf[2] === 0x78 && buf[3] === 0x0A && buf[4] === 0x30 && buf[5] === 0x33 && buf[6] === 0x35 && buf[7] === 0x00) { - return { - ext: "dex", - mime: "application/octet-stream", - desc: "Dalvik Executable (Android)" - }; - } - - if (buf[0] === 0x4B && buf[1] === 0x44 && buf[2] === 0x4D) { - return { - ext: "vmdk", - mime: "application/vmdk, application/x-virtualbox-vmdk" - }; - } - - if (buf[0] === 0x43 && buf[1] === 0x72 && buf[2] === 0x32 && buf[3] === 0x34) { - return { - ext: "crx", - mime: "application/crx", - desc: "Google Chrome extension or packaged app" - }; - } - - if (buf[0] === 0x78 && (buf[1] === 0x01 || buf[1] === 0x9C || buf[1] === 0xDA || buf[1] === 0x5e)) { - return { - ext: "zlib", - mime: "application/x-deflate" - }; - } - - return null; - */ } @@ -750,198 +138,9 @@ export function isImage(buf) { export function extractFile(bytes, fileDetail, offset) { if (fileDetail.extractor) { const fileData = fileDetail.extractor(bytes, offset); - return new File([fileData], `extracted_at_0x${offset.toString(16)}.${fileDetail.extension}`); + const ext = fileDetail.extension.split(",")[0]; + return new File([fileData], `extracted_at_0x${offset.toString(16)}.${ext}`); } throw new Error(`No extraction algorithm available for "${fileDetail.mime}" files`); } - - -/** - * JPEG extractor. - * - * @param {Uint8Array} bytes - * @param {number} offset - * @returns {Uint8Array} - */ -export function extractJPEG(bytes, offset) { - const stream = new Stream(bytes.slice(offset)); - - while (stream.hasMore()) { - const marker = stream.getBytes(2); - if (marker[0] !== 0xff) throw new Error("Invalid JPEG marker: " + marker); - - let segmentSize = 0; - switch (marker[1]) { - // No length - case 0xd8: // Start of Image - case 0x01: // For temporary use in arithmetic coding - break; - case 0xd9: // End found - return stream.carve(); - - // Variable size segment - case 0xc0: // Start of frame (Baseline DCT) - case 0xc1: // Start of frame (Extended sequential DCT) - case 0xc2: // Start of frame (Progressive DCT) - case 0xc3: // Start of frame (Lossless sequential) - case 0xc4: // Define Huffman Table - case 0xc5: // Start of frame (Differential sequential DCT) - case 0xc6: // Start of frame (Differential progressive DCT) - case 0xc7: // Start of frame (Differential lossless) - case 0xc8: // Reserved for JPEG extensions - case 0xc9: // Start of frame (Extended sequential DCT) - case 0xca: // Start of frame (Progressive DCT) - case 0xcb: // Start of frame (Lossless sequential) - case 0xcc: // Define arithmetic conditioning table - case 0xcd: // Start of frame (Differential sequential DCT) - case 0xce: // Start of frame (Differential progressive DCT) - case 0xcf: // Start of frame (Differential lossless) - case 0xdb: // Define Quantization Table - case 0xde: // Define hierarchical progression - case 0xe0: // Application-specific - case 0xe1: // Application-specific - case 0xe2: // Application-specific - case 0xe3: // Application-specific - case 0xe4: // Application-specific - case 0xe5: // Application-specific - case 0xe6: // Application-specific - case 0xe7: // Application-specific - case 0xe8: // Application-specific - case 0xe9: // Application-specific - case 0xea: // Application-specific - case 0xeb: // Application-specific - case 0xec: // Application-specific - case 0xed: // Application-specific - case 0xee: // Application-specific - case 0xef: // Application-specific - case 0xfe: // Comment - segmentSize = stream.readInt(2, "be"); - stream.position += segmentSize - 2; - break; - - // 1 byte - case 0xdf: // Expand reference image - stream.position++; - break; - - // 2 bytes - case 0xdc: // Define number of lines - case 0xdd: // Define restart interval - stream.position += 2; - break; - - // Start scan - case 0xda: // Start of scan - segmentSize = stream.readInt(2, "be"); - stream.position += segmentSize - 2; - stream.continueUntil(0xff); - break; - - // Continue through encoded data - case 0x00: // Byte stuffing - case 0xd0: // Restart - case 0xd1: // Restart - case 0xd2: // Restart - case 0xd3: // Restart - case 0xd4: // Restart - case 0xd5: // Restart - case 0xd6: // Restart - case 0xd7: // Restart - stream.continueUntil(0xff); - break; - - default: - stream.continueUntil(0xff); - break; - } - } - - throw new Error("Unable to parse JPEG successfully"); -} - - -/** - * Portable executable extractor. - * Assumes that the offset refers to an MZ header. - * - * @param {Uint8Array} bytes - * @param {number} offset - * @returns {Uint8Array} - */ -export function extractMZPE(bytes, offset) { - const stream = new Stream(bytes.slice(offset)); - - // Move to PE header pointer - stream.moveTo(0x3c); - const peAddress = stream.readInt(4, "le"); - - // Move to PE header - stream.moveTo(peAddress); - - // Get number of sections - stream.moveForwardsBy(6); - const numSections = stream.readInt(2, "le"); - - // Get optional header size - stream.moveForwardsBy(12); - const optionalHeaderSize = stream.readInt(2, "le"); - - // Move past optional header to section header - stream.moveForwardsBy(2 + optionalHeaderSize); - - // Move to final section header - stream.moveForwardsBy((numSections - 1) * 0x28); - - // Get raw data info - stream.moveForwardsBy(16); - const rawDataSize = stream.readInt(4, "le"); - const rawDataAddress = stream.readInt(4, "le"); - - // Move to end of final section - stream.moveTo(rawDataAddress + rawDataSize); - - return stream.carve(); -} - - -/** - * PDF extractor. - * - * @param {Uint8Array} bytes - * @param {number} offset - * @returns {Uint8Array} - */ -export function extractPDF(bytes, offset) { - const stream = new Stream(bytes.slice(offset)); - - // Find end-of-file marker (%%EOF) - stream.continueUntil([0x25, 0x25, 0x45, 0x4f, 0x46]); - stream.moveForwardsBy(5); - stream.consumeIf(0x0d); - stream.consumeIf(0x0a); - - return stream.carve(); -} - - -/** - * ZIP extractor. - * - * @param {Uint8Array} bytes - * @param {number} offset - * @returns {Uint8Array} - */ -export function extractZIP(bytes, offset) { - const stream = new Stream(bytes.slice(offset)); - - // Find End of central directory record - stream.continueUntil([0x50, 0x4b, 0x05, 0x06]); - - // Get comment length and consume - stream.moveForwardsBy(20); - const commentLength = stream.readInt(2, "le"); - stream.moveForwardsBy(commentLength); - - return stream.carve(); -}