1#ifndef _MBXMLUTILSHELPER_DOM_H_
2#define _MBXMLUTILSHELPER_DOM_H_
4#include <boost/algorithm/string/trim.hpp>
5#include <fmatvec/atom.h>
12#include <boost/filesystem.hpp>
13#include <xercesc/dom/DOMErrorHandler.hpp>
14#include <xercesc/dom/DOMElement.hpp>
15#include <xercesc/dom/DOMText.hpp>
16#include <xercesc/dom/DOMLSParser.hpp>
17#include <xercesc/dom/DOMLocator.hpp>
18#include <xercesc/dom/DOMUserDataHandler.hpp>
19#include <xercesc/dom/DOMDocument.hpp>
20#include <xercesc/util/XMLEntityResolver.hpp>
21#include <xercesc/framework/psvi/PSVIHandler.hpp>
22#include <boost/lexical_cast.hpp>
23#include <boost/container/small_vector.hpp>
24#include <boost/functional/hash.hpp>
25#include <fmatvec/toString.h>
28namespace XERCES_CPP_NAMESPACE {
29 class DOMProcessingInstruction;
30 class DOMImplementation;
31 class AbstractDOMParser;
35 template<> std::vector<double> lexical_cast(
const std::string& str);
36 template<> std::vector<std::vector<double>> lexical_cast(
const std::string& str);
37 template<> std::vector<int> lexical_cast(
const std::string& str);
38 template<> std::vector<std::vector<int>> lexical_cast(
const std::string& str);
39 template<>
bool lexical_cast<bool>(
const std::string& arg);
46 static void check(
const xercesc::DOMElement *me,
const T &value,
int r,
int c);
49struct CheckSize<std::vector<T>> {
50 static void check(
const xercesc::DOMElement *me,
const std::vector<T> &value,
int r,
int c);
53struct CheckSize<std::vector<std::vector<T>>> {
54 static void check(
const xercesc::DOMElement *me,
const std::vector<std::vector<T>> &value,
int r,
int c);
63template<
class T>
using XercesUniquePtr = std::unique_ptr<T, XercesUniquePtrDeleter<T>>;
66template<
typename DOMDocumentType>
68template<
typename DOMDocumentType>
70template<
typename DOMElementType>
72template<
typename DOMElementType>
79 xercesc::XMLPlatformUtils::Initialize();
82 xercesc::XMLPlatformUtils::Terminate();
90 const XMLCh *operator%(
const std::string &in) {
91 auto &out = store.emplace_back(in.size()*2+1);
92 auto outSize = simdutf::convert_valid_utf8_to_utf16(in.data(), in.size(), out.data());
96 const XMLCh *operator()(
const std::string &in) {
return operator%(in); }
97 std::string operator%(
const XMLCh *in) {
100 auto inSize = std::char_traits<char16_t>::length(in);
101 std::string out(inSize*4, 0);
102 auto outSize = simdutf::convert_valid_utf16_to_utf8(in, inSize, out.data());
106 std::string operator()(
const XMLCh *in) {
return operator%(in); }
109 boost::container::small_vector<boost::container::small_vector<char16_t, 100*2+1>, 1> store;
114class FQN :
public std::pair<std::string, std::string> {
119 FQN(
const std::string &name) : std::pair<std::string, std::string>(
"", name) {}
121 FQN(
const char *name) : std::pair<std::string, std::string>(
"", name) {}
123 FQN(
const std::string &ns,
const std::string &name) : std::pair<std::string, std::string>(ns, name) {}
130 EmbedDOMLocator(
const boost::filesystem::path &file_,
int row_,
int embedCount_, std::string xpath_) : DOMLocator(),
131 file(x%file_.string()), row(row_), embedCount(embedCount_), xpath(std::move(xpath_)) {}
133 file(x%(
X()%src.file)), row(src.row), embedCount(src.embedCount), xpath(src.xpath) {}
135 file=x%(
X()%src.file);
137 embedCount=src.embedCount;
144 XMLFileLoc getLineNumber()
const override {
return row; }
145 XMLFileLoc getColumnNumber()
const override {
return 0; }
146 XMLFilePos getByteOffset()
const override {
return ~(XMLFilePos(0)); }
147 XMLFilePos getUtf16Offset()
const override {
return ~(XMLFilePos(0)); }
148 xercesc::DOMNode *getRelatedNode()
const override {
return nullptr; }
149 const XMLCh *getURI()
const override {
return file; }
150 int getEmbedCount()
const {
return embedCount; }
155 static void addNSURIPrefix(std::string nsuri,
const std::vector<std::string> &prefix);
156 static const std::map<std::string, std::string>& getNSURIPrefix() {
return nsURIPrefix(); }
163 static std::map<std::string, std::string>& nsURIPrefix();
169 NamespaceURI(std::string nsuri_,
const std::vector<std::string> &preferredPrefix={}) : nsuri(std::move(nsuri_)) {
170 EmbedDOMLocator::addNSURIPrefix(nsuri, preferredPrefix);
172 FQN operator%(
const std::string &localName)
const {
return {nsuri, localName}; }
173 const std::string& getNamespaceURI()
const {
return nsuri; }
179const NamespaceURI XINCLUDE(
"http://www.w3.org/2001/XInclude", {
"xi",
"xinc",
"xinclude"});
181const NamespaceURI XMLNS(
"http://www.w3.org/2000/xmlns/", {
"xmlns"});
183const NamespaceURI PV(
"http://www.mbsim-env.de/MBXMLUtils", {
"p",
"pv",
"mbxmlutils"});
185const NamespaceURI XMLCATALOG(
"urn:oasis:names:tc:entity:xmlns:xml:catalog", {
"catalog",
"xmlcatalog"});
189#define RETHROW_AS_DOMEVALEXCEPTION(e) \
190 catch(MBXMLUtils::DOMEvalException &ex) { \
193 catch(const std::exception &ex) { \
194 throw DOMEvalException(ex.what(), e); \
205 void appendContext(
const xercesc::DOMNode *n,
int lineNr=0);
206 const std::string& getMessage()
const {
return errorMsg; }
207 void setMessage(
const std::string& errorMsg_) { errorMsg=errorMsg_; }
208 void setSubsequentError(
bool sse) { subsequentError=sse; }
209 const char* what()
const noexcept override;
210 xercesc::DOMNode::NodeType getNodeType()
const {
return nodeType; }
211 static bool isHTMLOutputEnabled();
212 static void htmlEscaping(std::string &msg);
214 DOMEvalException(
const std::string &errorMsg_,
const xercesc::DOMLocator &loc);
252 bool subsequentError{
false};
253 std::string errorMsg;
254 std::vector<EmbedDOMLocator> locationStack;
255 mutable std::string whatStr;
256 xercesc::DOMNode::NodeType nodeType {
static_cast<xercesc::DOMNode::NodeType
>(-1) };
264 bool handleError(
const xercesc::DOMError&)
override;
265 bool hasError() {
return errorSet; }
267 void resetError() { errorSet=
false; }
269 bool errorSet{
false};
274template<
typename DOMElementType>
296 std::string
getEmbedData(
const std::string &name)
const;
299 void addEmbedData(
const std::string &name,
const std::string &data);
315 template<
class T> T
getText(
int r=0,
int c=0)
const {
317 auto textEle=E(me)->getFirstTextChild();
319 if constexpr(std::is_same_v<T, std::string>) {
320 for(
auto *n=me->getFirstChild(); n; n=n->getNextSibling())
321 if(n->getNodeType()==xercesc_3_2::DOMNode::TEXT_NODE || n->getNodeType()==xercesc_3_2::DOMNode::CDATA_SECTION_NODE)
322 if(!boost::trim_copy(
X()%
static_cast<xercesc::DOMText*
>(n)->getData()).empty())
323 throw std::runtime_error(
"There must be no or a single, none empty, text node but the text node is split by a comment or processing-instruction node.");
327 throw std::runtime_error(
"There must be a single, none empty, text node but either, no text node exists at all, or the text node is split by a comment or processing-instruction node.");
329 auto text=
X()%textEle->getData();
330 auto ret=boost::lexical_cast<T>(text);
331 CheckSize<T>::check(me, ret, r, c);
334 catch(
const boost::bad_lexical_cast &ex) {
337 catch(
const std::exception &ex) {
342 template<
class T>
void addElementText(
const FQN &name,
const T &value) {
343 xercesc::DOMElement *ele=D(me->getOwnerDocument())->createElement(name);
344 ele->insertBefore(me->getOwnerDocument()->createTextNode(
MBXMLUtils::X()%fmatvec::toString(value)),
nullptr);
345 me->insertBefore(ele,
nullptr);
356 const xercesc::DOMElement *&found=DOMElementWrapper<DOMElementType>::dummyArg)
const;
364 boost::filesystem::path
convertPath(
const boost::filesystem::path &relPath)
const;
403 me->setAttributeNS(
X()%name.first,
X()%name.second,
X()%fmatvec::toString(value));
412 typename std::conditional<std::is_same<DOMElementType, const xercesc::DOMElement>::value,
416 static const xercesc::DOMElement *dummyArg;
421template<
typename DOMElementType>
422DOMElementWrapper<DOMElementType> E(DOMElementType *me) {
return DOMElementWrapper<DOMElementType>(me); }
424template<
typename DOMElementType>
425DOMElementWrapper<DOMElementType> E(
const std::shared_ptr<DOMElementType> &me) {
return DOMElementWrapper<DOMElementType>(me.get()); }
427template<
typename DOMElementType>
428DOMElementWrapper<DOMElementType> E(
const XercesUniquePtr<DOMElementType> &me) {
return DOMElementWrapper<DOMElementType>(me.get()); }
430template<>
const xercesc::DOMElement *DOMElementWrapper< xercesc::DOMElement>::dummyArg;
431template<>
const xercesc::DOMElement *DOMElementWrapper<const xercesc::DOMElement>::dummyArg;
434template<
typename DOMAttrType>
446 typename std::conditional<std::is_same<DOMAttrType, const xercesc::DOMAttr>::value,
454template<
typename DOMAttrType>
455DOMAttrWrapper<DOMAttrType> A(DOMAttrType *me) {
return DOMAttrWrapper<DOMAttrType>(me); }
457template<
typename DOMAttrType>
458DOMAttrWrapper<DOMAttrType> A(
const std::shared_ptr<DOMAttrType> &me) {
return DOMAttrWrapper<DOMAttrType>(me.get()); }
460template<
typename DOMAttrType>
461DOMAttrWrapper<DOMAttrType> A(
const XercesUniquePtr<DOMAttrType> &me) {
return DOMAttrWrapper<DOMAttrType>(me.get()); }
466template<
typename DOMDocumentType>
475 XercesUniquePtr<xercesc::DOMElement>
validate();
480 std::shared_ptr<DOMParser>
getParser()
const;
491 xercesc::DOMElement*
locateElement(
const std::vector<int> &idx)
const;
493 typename std::conditional<std::is_same<DOMDocumentType, const xercesc::DOMDocument>::value,
501template<
typename DOMDocumentType>
502DOMDocumentWrapper<DOMDocumentType> D(DOMDocumentType *me) {
return DOMDocumentWrapper<DOMDocumentType>(me); }
504template<
typename DOMDocumentType>
505DOMDocumentWrapper<DOMDocumentType> D(
const std::shared_ptr<DOMDocumentType> &me) {
return DOMDocumentWrapper<DOMDocumentType>(me.get()); }
507template<
typename DOMDocumentType>
508DOMDocumentWrapper<DOMDocumentType> D(
const XercesUniquePtr<DOMDocumentType> &me) {
return DOMDocumentWrapper<DOMDocumentType>(me.get()); }
512 void setParser(
DOMParser *parser_) { parser=parser_; }
513 xercesc::DOMLSParserFilter::FilterAction acceptNode(xercesc::DOMNode *n)
override;
514 xercesc::DOMLSParserFilter::FilterAction startElement(xercesc::DOMElement *e)
override;
515 xercesc::DOMNodeFilter::ShowType getWhatToShow()
const override;
516 void setLineNumberOffset(
int offset) { lineNumberOffset=offset; }
519 int lineNumberOffset { 0 };
524 void setParser(
DOMParser *parser_) { parser=parser_; }
525 void handleElementPSVI(
const XMLCh *localName,
const XMLCh *uri, xercesc::PSVIElement *info)
override;
526 void handleAttributesPSVI(
const XMLCh *localName,
const XMLCh *uri, xercesc::PSVIAttributeList *psviAttributes)
override;
533 void handle(DOMOperationType operation,
const XMLCh* key,
void *data,
const xercesc::DOMNode *src, xercesc::DOMNode *dst)
override;
540 void setParser(
DOMParser *parser_) { parser=parser_; }
541 xercesc::InputSource* resolveEntity(xercesc::XMLResourceIdentifier *resourceIdentifier)
override;
547class DOMParser :
public std::enable_shared_from_this<DOMParser> {
559 static std::shared_ptr<DOMParser>
create(const std::variant<boost::filesystem::path, xercesc::DOMElement*> &xmlCatalog=static_cast<xercesc::DOMElement*>(nullptr));
563 std::shared_ptr<xercesc::DOMDocument>
parse(const boost::filesystem::path &inputSource,
564 std::vector<boost::filesystem::path> *dependencies=nullptr,
565 bool doXInclude=true);
569 std::shared_ptr<xercesc::DOMDocument>
parse( std::istream &inputStream,
570 std::vector<boost::filesystem::path> *dependencies=nullptr,
571 bool doXInclude=true);
575 xercesc::DOMElement*
parseWithContext(const std::string &str, xercesc::DOMNode *contextNode, xercesc::DOMLSParser::ActionType action,
576 std::vector<boost::filesystem::path> *dependencies=nullptr,
577 bool doXInclude=true);
580 static void
serialize(xercesc::DOMNode *n, const boost::filesystem::path &outputSource);
589 const std::unordered_map<FQN, xercesc::XSTypeDefinition*, boost::hash<FQN>>& getTypeMap() const {
return typeMap; }
591 xercesc::DOMImplementation *domImpl;
592 DOMParser(
const std::variant<boost::filesystem::path, xercesc::DOMElement*> &xmlCatalog);
593 std::shared_ptr<xercesc::DOMLSParser> parser;
594 std::unordered_map<FQN, xercesc::XSTypeDefinition*, boost::hash<FQN>> typeMap;
599 std::map<std::string, boost::filesystem::path> registeredGrammar;
601 static void handleXInclude(xercesc::DOMElement *&e, std::vector<boost::filesystem::path> *dependencies);
609void CheckSize<T>::check(
const xercesc::DOMElement *me,
const T &value,
int r,
int c) {}
611void CheckSize<std::vector<T>>::check(
const xercesc::DOMElement *me,
const std::vector<T> &value,
int r,
int c) {
612 if(r!=0 && r!=
static_cast<int>(value.size()))
614 " but got vector of size "+fmatvec::toString(value.size())+
".", me);
617void CheckSize<std::vector<std::vector<T>>>::check(
const xercesc::DOMElement *me,
const std::vector<std::vector<T>> &value,
int r,
int c) {
618 if(r!=0 && r!=
static_cast<int>(value.size()))
620 " but got matrix of row-size "+fmatvec::toString(value.size())+
".", me);
621 if(!value.empty() && c!=0 && c!=
static_cast<int>(value[0].size()))
623 " but got matrix of col-size "+fmatvec::toString(value[0].size())+
".", me);
Helper class for extending DOMAttr (use the function A(...)).
Definition: dom.h:435
DOMAttrWrapper(DOMAttrType *me_)
Wrap DOMAttr to my special element.
Definition: dom.h:438
std::string getRootXPathExpression() const
Definition: dom.cc:742
bool isDerivedFrom(const FQN &baseTypeName) const
Definition: dom.cc:736
std::conditional< std::is_same< DOMAttrType, constxercesc::DOMAttr >::value, constDOMAttrWrapper *, DOMAttrWrapper * >::type operator->()
Treat this object as a pointer (like DOMAttr*)
Definition: dom.h:447
Helper class for extending DOMDocument (use the function D(...)).
Definition: dom.h:467
xercesc::DOMElement * locateElement(const std::vector< int > &idx) const
Definition: dom.cc:874
boost::filesystem::path getDocumentFilename() const
Definition: dom.cc:807
xercesc::DOMElement * createElement(const FQN &name)
Definition: dom.cc:793
DOMDocumentWrapper(DOMDocumentType *me_)
Wrap DOMDocument to my special element.
Definition: dom.h:470
std::conditional< std::is_same< DOMDocumentType, constxercesc::DOMDocument >::value, constDOMDocumentWrapper *, DOMDocumentWrapper * >::type operator->()
Treat this object as a pointer (like DOMDocument*)
Definition: dom.h:494
xercesc::DOMNode * evalRootXPathExpression(std::string xpathExpression, xercesc::DOMElement *context=nullptr)
Definition: dom.cc:834
XercesUniquePtr< xercesc::DOMElement > validate()
Definition: dom.cc:758
std::shared_ptr< DOMParser > getParser() const
Get full qualified tag name.
Definition: dom.cc:801
Helper class for extending DOMElement (use the function E(...)).
Definition: dom.h:275
void setOriginalElementLineNumber(int lineNr)
Set the line number of the original element.
Definition: dom.cc:698
std::string getRootXPathExpression() const
Definition: dom.cc:623
void setEmbedCountNumber(int embedCount)
Definition: dom.cc:600
std::conditional< std::is_same< DOMElementType, constxercesc::DOMElement >::value, constDOMElementWrapper *, DOMElementWrapper * >::type operator->()
Treat this object as a pointer (like DOMElement*)
Definition: dom.h:413
std::string getAttribute(const FQN &name) const
Get attribute named name.
Definition: dom.cc:514
const xercesc::DOMComment * getFirstCommentChild() const
Get first child comment.
Definition: dom.cc:412
bool hasAttribute(const FQN &name) const
check if this element has a attibute named name.
Definition: dom.cc:705
const xercesc::DOMProcessingInstruction * getFirstProcessingInstructionChildNamed(const std::string &target) const
Get first child processing instruction of the specified target.
Definition: dom.cc:361
const xercesc::DOMText * getFirstTextChild() const
Definition: dom.cc:431
boost::filesystem::path getOriginalFilename(bool skipThis=false, const xercesc::DOMElement *&found=DOMElementWrapper< DOMElementType >::dummyArg) const
Definition: dom.cc:472
int getEmbedCountNumber() const
Definition: dom.cc:591
std::vector< int > getElementLocation() const
Definition: dom.cc:671
FQN getAttributeQName(const FQN &name) const
Get attribute named name of type QName.
Definition: dom.cc:520
void addProcessingInstructionChildNamed(const std::string &target, const std::string &data)
Add a processing instruction child of the specified target.
Definition: dom.cc:384
const xercesc::DOMAttr * getAttributeNode(const FQN &name) const
Get attribute node named name.
Definition: dom.cc:531
const xercesc::DOMElement * getNextElementSiblingNamed(const FQN &name) const
Get next sibling element of the specified full qualified name.
Definition: dom.cc:344
void removeAttribute(const FQN &name)
remove from this element the attibute named name.
Definition: dom.cc:586
std::string getEmbedData(const std::string &name) const
Get the embed data named name from the current element. Returns "" if not such data exists.
Definition: dom.cc:390
boost::filesystem::path convertPath(const boost::filesystem::path &relPath) const
Definition: dom.cc:506
T getText(int r=0, int c=0) const
Get the child text as type T.
Definition: dom.h:315
void setEmbedXPathCount(int xPathCount)
Definition: dom.cc:616
const xercesc::DOMElement * getFirstElementChildNamed(const FQN &name) const
Get first child element of the specified full qualified name.
Definition: dom.cc:327
bool isDerivedFrom(const FQN &baseTypeName) const
Definition: dom.cc:727
void setOriginalFilename(boost::filesystem::path orgFileName=boost::filesystem::path())
Definition: dom.cc:499
void setAttribute(const FQN &name, const T &value)
Set attribute.
Definition: dom.h:402
FQN getTagName() const
Get full qualified tag name.
Definition: dom.h:280
int getOriginalElementLineNumber() const
Get the line number of the original element.
Definition: dom.cc:689
DOMElementWrapper(DOMElementType *me_)
Wrap DOMElement to my special element.
Definition: dom.h:278
void addEmbedData(const std::string &name, const std::string &data)
Definition: dom.cc:402
int getEmbedXPathCount() const
Definition: dom.cc:607
int getLineNumber() const
Definition: dom.cc:577
Print DOM error messages.
Definition: dom.h:261
static std::string convertToString(const EmbedDOMLocator &loc, const std::string &message, bool subsequentError=false)
Definition: dom.cc:1024
A XML DOM parser.
Definition: dom.h:547
std::shared_ptr< xercesc::DOMDocument > createDocument()
create a empty document
Definition: dom.cc:1459
xercesc::DOMElement * parseWithContext(const std::string &str, xercesc::DOMNode *contextNode, xercesc::DOMLSParser::ActionType action, std::vector< boost::filesystem::path > *dependencies=nullptr, bool doXInclude=true)
Definition: dom.cc:1390
std::shared_ptr< xercesc::DOMDocument > parse(const boost::filesystem::path &inputSource, std::vector< boost::filesystem::path > *dependencies=nullptr, bool doXInclude=true)
static void serialize(xercesc::DOMNode *n, const boost::filesystem::path &outputSource)
Definition: dom.cc:1422
static void serializeToString(xercesc::DOMNode *n, std::string &outputData)
Definition: dom.cc:1430
void resetCachedGrammarPool()
reset all loaded grammars
Definition: dom.cc:1454
static std::shared_ptr< DOMParser > create(const std::variant< boost::filesystem::path, xercesc::DOMElement * > &xmlCatalog=static_cast< xercesc::DOMElement * >(nullptr))
Definition: dom.cc:1197
static std::string convertToRootHRXPathExpression(const std::string &xpath)
get human readable (simple) XPath expression to the location
Definition: dom.cc:298
const std::string & getRootXPathExpression() const
get a (simple) XPath expression to the location: each element is prefixed with the namespace URI betw...
Definition: dom.h:152
FQN(const char *name)
Anonymous FQN (required for implicit casting of string literals to anonymous FQNs)
Definition: dom.h:121
FQN(const std::string &ns, const std::string &name)
FQN.
Definition: dom.h:123
FQN(const std::string &name)
Anonymous FQN.
Definition: dom.h:119
Initialize Xerces on load and terminate it on unload of the library/program.
Definition: dom.h:76
Helper class to easily construct full qualified XML names (FQN) using XML namespace prefixes.
Definition: dom.h:167