mirror of
https://github.com/vichan-devel/vichan.git
synced 2025-02-17 11:28:41 +01:00
post.php: refactor image OCR into function
This commit is contained in:
parent
4439f1736a
commit
9ae988dd6f
48
post.php
48
post.php
@ -143,6 +143,28 @@ function download_file_from_url($file_url, $request_timeout, $allowed_extensions
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try extract text from the given image.
|
||||||
|
*
|
||||||
|
* @param array $config Instance configuration.
|
||||||
|
* @param string $img_path The file path to the image.
|
||||||
|
* @return string|false Returns a string with the extracted text on success (if any).
|
||||||
|
* @throws RuntimeException Throws if executing tesseract fails.
|
||||||
|
*/
|
||||||
|
function ocr_image(array $config, string $img_path): string {
|
||||||
|
// The default preprocess command is an ImageMagick b/w quantization.
|
||||||
|
$ret = shell_exec_error(
|
||||||
|
sprintf($config['tesseract_preprocess_command'], escapeshellarg($img_path))
|
||||||
|
. ' | tesseract stdin stdout 2>/dev/null'
|
||||||
|
. $config['tesseract_params']
|
||||||
|
);
|
||||||
|
if ($ret === false) {
|
||||||
|
throw new RuntimeException('Unable to run tesseract');
|
||||||
|
}
|
||||||
|
|
||||||
|
return trim($ret);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Method handling functions
|
* Method handling functions
|
||||||
*/
|
*/
|
||||||
@ -1086,7 +1108,6 @@ if (isset($_POST['delete'])) {
|
|||||||
$image->destroy();
|
$image->destroy();
|
||||||
} else {
|
} else {
|
||||||
// not an image
|
// not an image
|
||||||
//copy($config['file_thumb'], $post['thumb']);
|
|
||||||
$file['thumb'] = 'file';
|
$file['thumb'] = 'file';
|
||||||
|
|
||||||
$size = @getimagesize(sprintf($config['file_thumb'],
|
$size = @getimagesize(sprintf($config['file_thumb'],
|
||||||
@ -1104,23 +1125,18 @@ if (isset($_POST['delete'])) {
|
|||||||
$fname = $file['thumb'];
|
$fname = $file['thumb'];
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($fname == 'spoiler') { // We don't have that much CPU time, do we?
|
if ($fname !== 'spoiler') { // We don't have that much CPU time, do we?
|
||||||
}
|
try {
|
||||||
else {
|
$txt = ocr_image($config, $fname);
|
||||||
$tmpname = "tmp/tesseract/".rand(0,10000000);
|
if ($txt !== '') {
|
||||||
|
|
||||||
// Preprocess command is an ImageMagick b/w quantization
|
|
||||||
$error = shell_exec_error(sprintf($config['tesseract_preprocess_command'], escapeshellarg($fname)) . " | " .
|
|
||||||
'tesseract stdin '.escapeshellarg($tmpname).' '.$config['tesseract_params']);
|
|
||||||
$tmpname .= ".txt";
|
|
||||||
|
|
||||||
$value = @file_get_contents($tmpname);
|
|
||||||
@unlink($tmpname);
|
|
||||||
|
|
||||||
if ($value && trim($value)) {
|
|
||||||
// This one has an effect, that the body is appended to a post body. So you can write a correct
|
// This one has an effect, that the body is appended to a post body. So you can write a correct
|
||||||
// spamfilter.
|
// spamfilter.
|
||||||
$post['body_nomarkup'] .= "<tinyboard ocr image $key>".htmlspecialchars($value)."</tinyboard>";
|
$post['body_nomarkup'] .= "<tinyboard ocr image $key>" . htmlspecialchars($value) . "</tinyboard>";
|
||||||
|
}
|
||||||
|
} catch (RuntimeException $e) {
|
||||||
|
if ($config['syslog']) {
|
||||||
|
_syslog(LOG_ERR, "Could not OCR image: {$e->getMessage()}");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user