1
0
mirror of synced 2024-11-25 08:00:21 +01:00
OpenTaiko/TJAPlayer3/Songs/CDTXCompanionFileFinder.cs
2021-09-21 00:16:38 +02:00

228 lines
11 KiB
C#

using System.Diagnostics;
using System.IO;
using System.Text;
namespace TJAPlayer3
{
internal static class CDTXCompanionFileFinder
{
internal static string FindFileName(
string directory,
string mainFileName,
string expectedCompanionFileName)
{
var expectedCompanionPath = Path.Combine(directory, expectedCompanionFileName);
if (File.Exists(expectedCompanionPath))
{
return expectedCompanionFileName;
}
// If we could not find the file by its exact provided name, in
// the vast majority of cases it has been mangled during zip
// compression by a zip tool which is not properly aware of
// multi-byte encodings, Unicode, etc. When decompressed, such
// zipped files end up a file names which are simply the raw bytes
// of the Shift-JIS encoded form. Some of these bytes will be
// invalid as characters of file names and will have been further
// mangled, usually to a single underscore character.
// To begin finding the right file, we first need to get the raw
// bytes that would comprise the file name if encoded into
// Shift-JIS.
var encodedCompanionFileNameBytes = Encoding.GetEncoding("Shift_JIS").GetBytes(expectedCompanionFileName);
// Here we have a helper method that will be used to try finding
// the file by interpreting the byte representation encoded
// just above, this time in terms of some other encoding which
// might be in use in the user's file system.
bool TryFindViaDecodedFileName(string prefix, Encoding encoding, out string foundCompanionFileName)
{
var decodedCompanionFileName = DecodeToLegalFileName(encodedCompanionFileNameBytes, encoding);
try
{
if (!File.Exists(Path.Combine(directory, decodedCompanionFileName)))
{
foundCompanionFileName = null;
return false;
}
}
catch
{
Trace.TraceWarning(
$"{nameof(CDTXCompanionFileFinder)} could not find expected file '{expectedCompanionPath}' and could not check the existence of a file via {prefix} '{encoding.EncodingName}'. Possible illegal file path when combining directory '{directory}' with encoded file name '{decodedCompanionFileName}'.");
foundCompanionFileName = null;
return false;
}
Trace.TraceInformation(
$"{nameof(CDTXCompanionFileFinder)} could not find expected file '{expectedCompanionPath}' but found '{decodedCompanionFileName}' via {prefix} '{encoding.EncodingName}', Code Page {encoding.CodePage}, Windows Code Page {encoding.WindowsCodePage}.");
foundCompanionFileName = decodedCompanionFileName;
return true;
}
// Attempt to find the file as if the companion file's name was
// mangled into codepage 437 (effectively the legacy DOS codepage,
// and the one used by zip tools that are not unicode aware.)
// This step finds >99% of files with mangled names.
if (TryFindViaDecodedFileName(
"Encoding.GetEncoding(437)",
Encoding.GetEncoding(437),
out var foundCompanionFileNameViaEncoding437))
{
return foundCompanionFileNameViaEncoding437;
}
// Attempt to find the file as if the companion file's name
// was mangled into this computer's default encoding. This case
// has not been observed during testing on US English computers,
// but it is safe to perform and may assist other locales.
if (TryFindViaDecodedFileName(
"Encoding.Default",
Encoding.Default,
out var foundCompanionFileNameViaEncodingDefault))
{
return foundCompanionFileNameViaEncodingDefault;
}
// If the companion file still cannot be found, try to find a file
// with the expected extension but having the same file name as the
// main file with which it is associated (in most use cases: the .tja file.)
if (TryFindViaMainFileName(
directory,
mainFileName,
expectedCompanionPath,
out var foundCompanionFileNameByMainFileName))
{
return foundCompanionFileNameByMainFileName;
}
// If the file still cannot be found, try to find a single file
// with the expected supplementary file extension. (If more than
// one file is found with the same extension, we can't reliably
// choose the right one of them.)
if (TryFindViaCompanionFileExtension(
directory,
expectedCompanionPath,
out var foundCompanionFileNameByExtension))
{
return foundCompanionFileNameByExtension;
}
// If the file still cannot be found, produce a warning
// and return the original file name unchanged.
Trace.TraceWarning(
$"{nameof(CDTXCompanionFileFinder)} could not find expected file '{expectedCompanionPath}' by any available means.");
return expectedCompanionFileName;
}
private static string DecodeToLegalFileName(byte[] encodedBytes, Encoding encoding)
{
// Decode and then replace characters which are illegal in file
// names in all locales, except for the backslash character which
// will be handled immediately after this.
var decodedBeforeDirectoryRemoval = encoding.GetString(encodedBytes)
.Replace('<', '_')
.Replace('>', '_')
.Replace(':', '_')
.Replace('"', '_')
.Replace('/', '_')
.Replace('|', '_')
.Replace('?', '_')
.Replace('*', '_');
// During decompression of incorrectly-generated zip files,
// Shift-JIS characters which encode to a representation that
// includes a backslash result in the decompressor placing the files
// in a subdirectory (or even subdirectories) based on characters
// before and between all backslashes, and then names the file based
// on the characters appearing after the final backslash. In these
// cases, we're already parsing files in one of those generated
// subdirectories and have only to deal with the file names having
// been abbreviated. We can usually find such files in the
// applicable directory via the substring after the final backslash.
var lastIndexOfBackslash = decodedBeforeDirectoryRemoval.LastIndexOf('\\');
return lastIndexOfBackslash == -1
? decodedBeforeDirectoryRemoval
: decodedBeforeDirectoryRemoval.Substring(lastIndexOfBackslash + 1);
}
private static bool TryFindViaMainFileName(
string directory,
string mainFileName,
string expectedCompanionPath,
out string foundCompanionFileName)
{
var mainFilePath = Path.Combine(directory, mainFileName);
var companionFileExtension = Path.GetExtension(expectedCompanionPath);
var mainFilePathWithCompanionFileExtension =
Path.ChangeExtension(mainFilePath, companionFileExtension);
// Whether mangled or not, most companion files have names which
// match the name of the main file, except for the difference in
// the file extension. We can check for these by determining what
// the file might be called when the extension is replaced with the
// appropriate one and then check for the existence of that file.
var mainFileNameWithCompanionFileExtension =
Path.GetFileName(mainFilePathWithCompanionFileExtension);
if (File.Exists(mainFilePathWithCompanionFileExtension))
{
Trace.TraceInformation(
$"{nameof(CDTXCompanionFileFinder)} could not find expected file '{expectedCompanionPath}' but found '{mainFileNameWithCompanionFileExtension}' by matching the '{mainFileName}' file name with the expected file extension.");
foundCompanionFileName = mainFileNameWithCompanionFileExtension;
return true;
}
foundCompanionFileName = null;
return false;
}
private static bool TryFindViaCompanionFileExtension(
string directory,
string expectedCompanionPath,
out string foundCompanionFileName)
{
var companionFileExtension = Path.GetExtension(expectedCompanionPath);
if (string.IsNullOrEmpty(companionFileExtension))
{
Trace.TraceWarning(
$"{nameof(CDTXCompanionFileFinder)} could not find expected file '{expectedCompanionPath}' and could not search for appropriate sibling files because this file has no extension.");
}
else
{
// If no more precise approach can find the right file, we can
// usually safely find it by looking for any file with the
// expected file extension in the same folder as the main file.
// However, if someone extracts a collection of songs into a
// single folder, we will see many files with the expected
// extension. Therefore, we will only treat the file as found
// if there is one and only one file with the expected file
// extension within in the directory in question.
var filesWithTheCompanionFileExtension =
Directory.GetFiles(directory, "*" + companionFileExtension);
if (filesWithTheCompanionFileExtension.Length == 1)
{
var foundCompanionFilePath = filesWithTheCompanionFileExtension[0];
foundCompanionFileName = Path.GetFileName(foundCompanionFilePath);
Trace.TraceInformation(
$"{nameof(CDTXCompanionFileFinder)} could not find expected file '{expectedCompanionPath}' but found '{foundCompanionFileName}' by searching for a single sibling file with the expected extension.");
return true;
}
}
foundCompanionFileName = null;
return false;
}
}
}