Merge pull request #20 from Mikescher/feature_xpath
Added operations 'XPath expression' and 'CSS selector'
This commit is contained in:
commit
650fd9a940
@ -130,6 +130,7 @@ module.exports = function(grunt) {
|
|||||||
"src/js/lib/vkbeautify.js",
|
"src/js/lib/vkbeautify.js",
|
||||||
"src/js/lib/Sortable.js",
|
"src/js/lib/Sortable.js",
|
||||||
"src/js/lib/bootstrap-colorpicker.js",
|
"src/js/lib/bootstrap-colorpicker.js",
|
||||||
|
"src/js/lib/xpath.js",
|
||||||
|
|
||||||
// Custom libraries
|
// Custom libraries
|
||||||
"src/js/lib/canvas_components.js",
|
"src/js/lib/canvas_components.js",
|
||||||
|
@ -189,6 +189,8 @@ const Categories = [
|
|||||||
"Extract file paths",
|
"Extract file paths",
|
||||||
"Extract dates",
|
"Extract dates",
|
||||||
"Regular expression",
|
"Regular expression",
|
||||||
|
"XPath expression",
|
||||||
|
"CSS selector",
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -1938,6 +1938,42 @@ const OperationConfig = {
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"XPath expression": {
|
||||||
|
description: "Extract information from an xml document with an XPath query",
|
||||||
|
run: Extract.run_xpath,
|
||||||
|
input_type: "string",
|
||||||
|
output_type: "string",
|
||||||
|
args: [
|
||||||
|
{
|
||||||
|
name: "XPath",
|
||||||
|
type: "string",
|
||||||
|
value: Extract.XPATH_INITIAL
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Result delimiter",
|
||||||
|
type: "binary_short_string",
|
||||||
|
value: Extract.XPATH_DELIMITER
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"CSS selector": {
|
||||||
|
description: "Extract information from an HTML document with an CSS selector",
|
||||||
|
run: Extract.run_css_query,
|
||||||
|
input_type: "string",
|
||||||
|
output_type: "string",
|
||||||
|
args: [
|
||||||
|
{
|
||||||
|
name: "CSS selector",
|
||||||
|
type: "string",
|
||||||
|
value: Extract.SELECTOR_INITIAL
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "Delimiter",
|
||||||
|
type: "binary_short_string",
|
||||||
|
value: Extract.CSS_QUERY_DELIMITER
|
||||||
|
},
|
||||||
|
]
|
||||||
|
},
|
||||||
"From UNIX Timestamp": {
|
"From UNIX Timestamp": {
|
||||||
description: "Converts a UNIX timestamp to a datetime string.<br><br>e.g. <code>978346800</code> becomes <code>Mon 1 January 2001 11:00:00 UTC</code>",
|
description: "Converts a UNIX timestamp to a datetime string.<br><br>e.g. <code>978346800</code> becomes <code>Mon 1 January 2001 11:00:00 UTC</code>",
|
||||||
run: DateTime.run_from_unix_timestamp,
|
run: DateTime.run_from_unix_timestamp,
|
||||||
|
8466
src/js/lib/xpath.js
Normal file
8466
src/js/lib/xpath.js
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,5 @@
|
|||||||
|
/* globals xpath */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Identifier extraction operations.
|
* Identifier extraction operations.
|
||||||
*
|
*
|
||||||
@ -10,7 +12,7 @@
|
|||||||
var Extract = {
|
var Extract = {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Runs search operations across the input data using refular expressions.
|
* Runs search operations across the input data using regular expressions.
|
||||||
*
|
*
|
||||||
* @private
|
* @private
|
||||||
* @param {string} input
|
* @param {string} input
|
||||||
@ -294,4 +296,117 @@ var Extract = {
|
|||||||
return output;
|
return output;
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @constant
|
||||||
|
* @default
|
||||||
|
*/
|
||||||
|
XPATH_INITIAL: "",
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @constant
|
||||||
|
* @default
|
||||||
|
*/
|
||||||
|
XPATH_DELIMITER: "\\n",
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract information (from an xml document) with an XPath query
|
||||||
|
*
|
||||||
|
* @author Mikescher (https://github.com/Mikescher | https://mikescher.com)
|
||||||
|
*
|
||||||
|
* @param {string} input
|
||||||
|
* @param {Object[]} args
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
run_xpath:function(input, args) {
|
||||||
|
const query = args[0];
|
||||||
|
const delimiter = args[1];
|
||||||
|
|
||||||
|
var xml;
|
||||||
|
try {
|
||||||
|
xml = $.parseXML(input);
|
||||||
|
} catch (err) {
|
||||||
|
return "Invalid input XML.";
|
||||||
|
}
|
||||||
|
|
||||||
|
var result;
|
||||||
|
try {
|
||||||
|
result = xpath.evaluate(xml, query);
|
||||||
|
} catch (err) {
|
||||||
|
return "Invalid XPath. Details:\n" + err.message;
|
||||||
|
}
|
||||||
|
|
||||||
|
const serializer = new XMLSerializer();
|
||||||
|
const nodeToString = function(node) {
|
||||||
|
switch (node.nodeType) {
|
||||||
|
case Node.ELEMENT_NODE: return serializer.serializeToString(node);
|
||||||
|
case Node.ATTRIBUTE_NODE: return node.value;
|
||||||
|
case Node.COMMENT_NODE: return node.data;
|
||||||
|
case Node.DOCUMENT_NODE: return serializer.serializeToString(node);
|
||||||
|
default: throw new Error("Unknown Node Type: " + node.nodeType);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return Object.values(result).slice(0, -1) // all values except last (length)
|
||||||
|
.map(nodeToString)
|
||||||
|
.join(delimiter);
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @constant
|
||||||
|
* @default
|
||||||
|
*/
|
||||||
|
SELECTOR_INITIAL: "",
|
||||||
|
/**
|
||||||
|
* @constant
|
||||||
|
* @default
|
||||||
|
*/
|
||||||
|
CSS_QUERY_DELIMITER: "\\n",
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract information (from an hmtl document) with an css selector
|
||||||
|
*
|
||||||
|
* @author Mikescher (https://github.com/Mikescher | https://mikescher.com)
|
||||||
|
*
|
||||||
|
* @param {string} input
|
||||||
|
* @param {Object[]} args
|
||||||
|
* @returns {string}
|
||||||
|
*/
|
||||||
|
run_css_query: function(input, args) {
|
||||||
|
const query = args[0];
|
||||||
|
const delimiter = args[1];
|
||||||
|
|
||||||
|
var html;
|
||||||
|
try {
|
||||||
|
html = $.parseHTML(input);
|
||||||
|
} catch (err) {
|
||||||
|
return "Invalid input HTML.";
|
||||||
|
}
|
||||||
|
|
||||||
|
var result;
|
||||||
|
try {
|
||||||
|
result = $(html).find(query);
|
||||||
|
} catch (err) {
|
||||||
|
return "Invalid CSS Selector. Details:\n" + err.message;
|
||||||
|
}
|
||||||
|
|
||||||
|
const nodeToString = function(node) {
|
||||||
|
switch (node.nodeType) {
|
||||||
|
case Node.ELEMENT_NODE: return node.outerHTML;
|
||||||
|
case Node.ATTRIBUTE_NODE: return node.value;
|
||||||
|
case Node.COMMENT_NODE: return node.data;
|
||||||
|
case Node.TEXT_NODE: return node.wholeText;
|
||||||
|
case Node.DOCUMENT_NODE: return node.outerHTML;
|
||||||
|
default: throw new Error("Unknown Node Type: " + node.nodeType);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return Array.apply(null, Array(result.length))
|
||||||
|
.map(function(_, i) {
|
||||||
|
return result[i];
|
||||||
|
})
|
||||||
|
.map(nodeToString)
|
||||||
|
.join(delimiter);
|
||||||
|
},
|
||||||
|
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user