1#ifndef _MBXMLUTILSHELPER_DOM_H_
2#define _MBXMLUTILSHELPER_DOM_H_
4#include <boost/algorithm/string/trim.hpp>
5#include <fmatvec/atom.h>
12#include <boost/filesystem.hpp>
13#include <xercesc/dom/DOMErrorHandler.hpp>
14#include <xercesc/dom/DOMElement.hpp>
15#include <xercesc/dom/DOMText.hpp>
16#include <xercesc/dom/DOMLSParser.hpp>
17#include <xercesc/dom/DOMLocator.hpp>
18#include <xercesc/dom/DOMUserDataHandler.hpp>
19#include <xercesc/dom/DOMDocument.hpp>
20#include <xercesc/util/TransService.hpp>
21#include <xercesc/util/XMLEntityResolver.hpp>
22#include <xercesc/framework/psvi/PSVIHandler.hpp>
23#include <boost/lexical_cast.hpp>
24#include <boost/container/small_vector.hpp>
25#include <fmatvec/toString.h>
27namespace XERCES_CPP_NAMESPACE {
28 class DOMProcessingInstruction;
29 class DOMImplementation;
30 class AbstractDOMParser;
34 template<> std::vector<double> lexical_cast(
const std::string& str);
35 template<> std::vector<std::vector<double>> lexical_cast(
const std::string& str);
36 template<> std::vector<int> lexical_cast(
const std::string& str);
37 template<> std::vector<std::vector<int>> lexical_cast(
const std::string& str);
38 template<>
bool lexical_cast<bool>(
const std::string& arg);
45 static void check(
const xercesc::DOMElement *me,
const T &value,
int r,
int c);
48struct CheckSize<std::vector<T>> {
49 static void check(
const xercesc::DOMElement *me,
const std::vector<T> &value,
int r,
int c);
52struct CheckSize<std::vector<std::vector<T>>> {
53 static void check(
const xercesc::DOMElement *me,
const std::vector<std::vector<T>> &value,
int r,
int c);
62template<
class T>
using XercesUniquePtr = std::unique_ptr<T, XercesUniquePtrDeleter<T>>;
65template<
typename DOMDocumentType>
67template<
typename DOMDocumentType>
69template<
typename DOMElementType>
71template<
typename DOMElementType>
78 xercesc::XMLPlatformUtils::Initialize();
81 xercesc::XMLPlatformUtils::Terminate();
89 const XMLCh *operator%(
const std::string &str) {
91 return &xercesc::chNull;
92 const XMLCh *unicode=xercesc::TranscodeFromStr(
reinterpret_cast<const XMLByte*
>(str.c_str()), str.length(),
"UTF8").adopt();
93 store.emplace_back(unicode, &releaseXMLCh);
96 const XMLCh *operator()(
const std::string &str) {
return operator%(str); }
97 std::string operator%(
const XMLCh *unicode) {
98 if(!unicode || unicode[0]==0)
100 return reinterpret_cast<const char*
>(xercesc::TranscodeToStr(unicode,
"UTF8").str());
102 std::string operator()(
const XMLCh *unicode) {
return operator%(unicode); }
103 static void releaseXMLCh(
const XMLCh *s) { xercesc::XMLPlatformUtils::fgMemoryManager->deallocate(
const_cast<XMLCh*
>(s)); }
105 boost::container::small_vector<std::unique_ptr<
const XMLCh,
decltype(&releaseXMLCh)>, 1> store;
110class FQN :
public std::pair<std::string, std::string> {
115 FQN(
const std::string &name) : std::pair<std::string, std::string>(
"", name) {}
117 FQN(
const char *name) : std::pair<std::string, std::string>(
"", name) {}
119 FQN(
const std::string &ns,
const std::string &name) : std::pair<std::string, std::string>(ns, name) {}
126 EmbedDOMLocator(
const boost::filesystem::path &file_,
int row_,
int embedCount_, std::string xpath_) : DOMLocator(),
127 file(x%file_.string()), row(row_), embedCount(embedCount_), xpath(std::move(xpath_)) {}
129 file(x%(
X()%src.file)), row(src.row), embedCount(src.embedCount), xpath(src.xpath) {}
131 file=x%(
X()%src.file);
133 embedCount=src.embedCount;
140 XMLFileLoc getLineNumber()
const override {
return row; }
141 XMLFileLoc getColumnNumber()
const override {
return 0; }
142 XMLFilePos getByteOffset()
const override {
return ~(XMLFilePos(0)); }
143 XMLFilePos getUtf16Offset()
const override {
return ~(XMLFilePos(0)); }
144 xercesc::DOMNode *getRelatedNode()
const override {
return nullptr; }
145 const XMLCh *getURI()
const override {
return file; }
146 int getEmbedCount()
const {
return embedCount; }
151 static void addNSURIPrefix(std::string nsuri,
const std::vector<std::string> &prefix);
152 static const std::map<std::string, std::string>& getNSURIPrefix() {
return nsURIPrefix(); }
159 static std::map<std::string, std::string>& nsURIPrefix();
165 NamespaceURI(std::string nsuri_,
const std::vector<std::string> &preferredPrefix={}) : nsuri(std::move(nsuri_)) {
166 EmbedDOMLocator::addNSURIPrefix(nsuri, preferredPrefix);
168 FQN operator%(
const std::string &localName)
const {
return {nsuri, localName}; }
169 const std::string& getNamespaceURI()
const {
return nsuri; }
175const NamespaceURI XINCLUDE(
"http://www.w3.org/2001/XInclude", {
"xi",
"xinc",
"xinclude"});
177const NamespaceURI XMLNS(
"http://www.w3.org/2000/xmlns/", {
"xmlns"});
179const NamespaceURI PV(
"http://www.mbsim-env.de/MBXMLUtils", {
"p",
"pv",
"mbxmlutils"});
181const NamespaceURI XMLCATALOG(
"urn:oasis:names:tc:entity:xmlns:xml:catalog", {
"catalog",
"xmlcatalog"});
185#define RETHROW_AS_DOMEVALEXCEPTION(e) \
186 catch(MBXMLUtils::DOMEvalException &ex) { \
189 catch(const std::exception &ex) { \
190 throw DOMEvalException(ex.what(), e); \
201 void appendContext(
const xercesc::DOMNode *n,
int lineNr=0);
202 const std::string& getMessage()
const {
return errorMsg; }
203 void setMessage(
const std::string& errorMsg_) { errorMsg=errorMsg_; }
204 void setSubsequentError(
bool sse) { subsequentError=sse; }
205 const char* what()
const noexcept override;
206 xercesc::DOMNode::NodeType getNodeType()
const {
return nodeType; }
207 static bool isHTMLOutputEnabled();
208 static void htmlEscaping(std::string &msg);
210 DOMEvalException(
const std::string &errorMsg_,
const xercesc::DOMLocator &loc);
248 bool subsequentError{
false};
249 std::string errorMsg;
250 std::vector<EmbedDOMLocator> locationStack;
251 mutable std::string whatStr;
252 xercesc::DOMNode::NodeType nodeType {
static_cast<xercesc::DOMNode::NodeType
>(-1) };
260 bool handleError(
const xercesc::DOMError&)
override;
261 bool hasError() {
return errorSet; }
263 void resetError() { errorSet=
false; }
265 bool errorSet{
false};
270template<
typename DOMElementType>
292 std::string
getEmbedData(
const std::string &name)
const;
295 void addEmbedData(
const std::string &name,
const std::string &data);
311 template<
class T> T
getText(
int r=0,
int c=0)
const {
313 auto textEle=E(me)->getFirstTextChild();
315 if constexpr(std::is_same_v<T, std::string>) {
316 for(
auto *n=me->getFirstChild(); n; n=n->getNextSibling())
317 if(n->getNodeType()==xercesc_3_2::DOMNode::TEXT_NODE || n->getNodeType()==xercesc_3_2::DOMNode::CDATA_SECTION_NODE)
318 if(!boost::trim_copy(
X()%
static_cast<xercesc::DOMText*
>(n)->getData()).empty())
319 throw std::runtime_error(
"There must be no or a single, none empty, text node but the text node is split by a comment or processing-instruction node.");
323 throw std::runtime_error(
"There must be a single, none empty, text node but either, no text node exists at all, or the text node is split by a comment or processing-instruction node.");
325 auto text=
X()%textEle->getData();
326 auto ret=boost::lexical_cast<T>(text);
327 CheckSize<T>::check(me, ret, r, c);
330 catch(
const boost::bad_lexical_cast &ex) {
333 catch(
const std::exception &ex) {
338 template<
class T>
void addElementText(
const FQN &name,
const T &value) {
339 xercesc::DOMElement *ele=D(me->getOwnerDocument())->createElement(name);
340 ele->insertBefore(me->getOwnerDocument()->createTextNode(
MBXMLUtils::X()%fmatvec::toString(value)),
nullptr);
341 me->insertBefore(ele,
nullptr);
352 const xercesc::DOMElement *&found=DOMElementWrapper<DOMElementType>::dummyArg)
const;
360 boost::filesystem::path
convertPath(
const boost::filesystem::path &relPath)
const;
399 me->setAttributeNS(
X()%name.first,
X()%name.second,
X()%fmatvec::toString(value));
408 typename std::conditional<std::is_same<DOMElementType, const xercesc::DOMElement>::value,
412 static const xercesc::DOMElement *dummyArg;
417template<
typename DOMElementType>
418DOMElementWrapper<DOMElementType> E(DOMElementType *me) {
return DOMElementWrapper<DOMElementType>(me); }
420template<
typename DOMElementType>
421DOMElementWrapper<DOMElementType> E(
const std::shared_ptr<DOMElementType> &me) {
return DOMElementWrapper<DOMElementType>(me.get()); }
423template<
typename DOMElementType>
424DOMElementWrapper<DOMElementType> E(
const XercesUniquePtr<DOMElementType> &me) {
return DOMElementWrapper<DOMElementType>(me.get()); }
426template<>
const xercesc::DOMElement *DOMElementWrapper< xercesc::DOMElement>::dummyArg;
427template<>
const xercesc::DOMElement *DOMElementWrapper<const xercesc::DOMElement>::dummyArg;
430template<
typename DOMAttrType>
442 typename std::conditional<std::is_same<DOMAttrType, const xercesc::DOMAttr>::value,
450template<
typename DOMAttrType>
451DOMAttrWrapper<DOMAttrType> A(DOMAttrType *me) {
return DOMAttrWrapper<DOMAttrType>(me); }
453template<
typename DOMAttrType>
454DOMAttrWrapper<DOMAttrType> A(
const std::shared_ptr<DOMAttrType> &me) {
return DOMAttrWrapper<DOMAttrType>(me.get()); }
456template<
typename DOMAttrType>
457DOMAttrWrapper<DOMAttrType> A(
const XercesUniquePtr<DOMAttrType> &me) {
return DOMAttrWrapper<DOMAttrType>(me.get()); }
462template<
typename DOMDocumentType>
471 XercesUniquePtr<xercesc::DOMElement>
validate();
476 std::shared_ptr<DOMParser>
getParser()
const;
487 xercesc::DOMElement*
locateElement(
const std::vector<int> &idx)
const;
489 typename std::conditional<std::is_same<DOMDocumentType, const xercesc::DOMDocument>::value,
497template<
typename DOMDocumentType>
498DOMDocumentWrapper<DOMDocumentType> D(DOMDocumentType *me) {
return DOMDocumentWrapper<DOMDocumentType>(me); }
500template<
typename DOMDocumentType>
501DOMDocumentWrapper<DOMDocumentType> D(
const std::shared_ptr<DOMDocumentType> &me) {
return DOMDocumentWrapper<DOMDocumentType>(me.get()); }
503template<
typename DOMDocumentType>
504DOMDocumentWrapper<DOMDocumentType> D(
const XercesUniquePtr<DOMDocumentType> &me) {
return DOMDocumentWrapper<DOMDocumentType>(me.get()); }
508 void setParser(
DOMParser *parser_) { parser=parser_; }
509 xercesc::DOMLSParserFilter::FilterAction acceptNode(xercesc::DOMNode *n)
override;
510 xercesc::DOMLSParserFilter::FilterAction startElement(xercesc::DOMElement *e)
override;
511 xercesc::DOMNodeFilter::ShowType getWhatToShow()
const override;
512 void setLineNumberOffset(
int offset) { lineNumberOffset=offset; }
515 int lineNumberOffset { 0 };
520 void setParser(
DOMParser *parser_) { parser=parser_; }
521 void handleElementPSVI(
const XMLCh *localName,
const XMLCh *uri, xercesc::PSVIElement *info)
override;
522 void handleAttributesPSVI(
const XMLCh *localName,
const XMLCh *uri, xercesc::PSVIAttributeList *psviAttributes)
override;
529 void handle(DOMOperationType operation,
const XMLCh* key,
void *data,
const xercesc::DOMNode *src, xercesc::DOMNode *dst)
override;
536 void setParser(
DOMParser *parser_) { parser=parser_; }
537 xercesc::InputSource* resolveEntity(xercesc::XMLResourceIdentifier *resourceIdentifier)
override;
543class DOMParser :
public std::enable_shared_from_this<DOMParser> {
555 static std::shared_ptr<DOMParser>
create(const std::variant<boost::filesystem::path, xercesc::DOMElement*> &xmlCatalog=static_cast<xercesc::DOMElement*>(nullptr));
559 std::shared_ptr<xercesc::DOMDocument>
parse(const boost::filesystem::path &inputSource,
560 std::vector<boost::filesystem::path> *dependencies=nullptr,
561 bool doXInclude=true);
565 std::shared_ptr<xercesc::DOMDocument>
parse( std::istream &inputStream,
566 std::vector<boost::filesystem::path> *dependencies=nullptr,
567 bool doXInclude=true);
571 xercesc::DOMElement*
parseWithContext(const std::string &str, xercesc::DOMNode *contextNode, xercesc::DOMLSParser::ActionType action,
572 std::vector<boost::filesystem::path> *dependencies=nullptr,
573 bool doXInclude=true);
576 static void
serialize(xercesc::DOMNode *n, const boost::filesystem::path &outputSource);
585 const std::map<FQN, xercesc::XSTypeDefinition*>& getTypeMap() const {
return typeMap; }
587 xercesc::DOMImplementation *domImpl;
588 DOMParser(
const std::variant<boost::filesystem::path, xercesc::DOMElement*> &xmlCatalog);
589 std::shared_ptr<xercesc::DOMLSParser> parser;
590 std::map<FQN, xercesc::XSTypeDefinition*> typeMap;
595 std::map<std::string, boost::filesystem::path> registeredGrammar;
597 static void handleXInclude(xercesc::DOMElement *&e, std::vector<boost::filesystem::path> *dependencies);
605void CheckSize<T>::check(
const xercesc::DOMElement *me,
const T &value,
int r,
int c) {}
607void CheckSize<std::vector<T>>::check(
const xercesc::DOMElement *me,
const std::vector<T> &value,
int r,
int c) {
608 if(r!=0 && r!=
static_cast<int>(value.size()))
610 " but got vector of size "+fmatvec::toString(value.size())+
".", me);
613void CheckSize<std::vector<std::vector<T>>>::check(
const xercesc::DOMElement *me,
const std::vector<std::vector<T>> &value,
int r,
int c) {
614 if(r!=0 && r!=
static_cast<int>(value.size()))
616 " but got matrix of row-size "+fmatvec::toString(value.size())+
".", me);
617 if(!value.empty() && c!=0 && c!=
static_cast<int>(value[0].size()))
619 " but got matrix of col-size "+fmatvec::toString(value[0].size())+
".", me);
Helper class for extending DOMAttr (use the function A(...)).
Definition: dom.h:431
DOMAttrWrapper(DOMAttrType *me_)
Wrap DOMAttr to my special element.
Definition: dom.h:434
std::string getRootXPathExpression() const
Definition: dom.cc:743
bool isDerivedFrom(const FQN &baseTypeName) const
Definition: dom.cc:737
std::conditional< std::is_same< DOMAttrType, constxercesc::DOMAttr >::value, constDOMAttrWrapper *, DOMAttrWrapper * >::type operator->()
Treat this object as a pointer (like DOMAttr*)
Definition: dom.h:443
Helper class for extending DOMDocument (use the function D(...)).
Definition: dom.h:463
xercesc::DOMElement * locateElement(const std::vector< int > &idx) const
Definition: dom.cc:868
boost::filesystem::path getDocumentFilename() const
Definition: dom.cc:801
xercesc::DOMElement * createElement(const FQN &name)
Definition: dom.cc:787
DOMDocumentWrapper(DOMDocumentType *me_)
Wrap DOMDocument to my special element.
Definition: dom.h:466
std::conditional< std::is_same< DOMDocumentType, constxercesc::DOMDocument >::value, constDOMDocumentWrapper *, DOMDocumentWrapper * >::type operator->()
Treat this object as a pointer (like DOMDocument*)
Definition: dom.h:490
xercesc::DOMNode * evalRootXPathExpression(std::string xpathExpression, xercesc::DOMElement *context=nullptr)
Definition: dom.cc:828
XercesUniquePtr< xercesc::DOMElement > validate()
Definition: dom.cc:752
std::shared_ptr< DOMParser > getParser() const
Get full qualified tag name.
Definition: dom.cc:795
Helper class for extending DOMElement (use the function E(...)).
Definition: dom.h:271
void setOriginalElementLineNumber(int lineNr)
Set the line number of the original element.
Definition: dom.cc:699
std::string getRootXPathExpression() const
Definition: dom.cc:624
void setEmbedCountNumber(int embedCount)
Definition: dom.cc:601
std::conditional< std::is_same< DOMElementType, constxercesc::DOMElement >::value, constDOMElementWrapper *, DOMElementWrapper * >::type operator->()
Treat this object as a pointer (like DOMElement*)
Definition: dom.h:409
std::string getAttribute(const FQN &name) const
Get attribute named name.
Definition: dom.cc:515
const xercesc::DOMComment * getFirstCommentChild() const
Get first child comment.
Definition: dom.cc:413
bool hasAttribute(const FQN &name) const
check if this element has a attibute named name.
Definition: dom.cc:706
const xercesc::DOMProcessingInstruction * getFirstProcessingInstructionChildNamed(const std::string &target) const
Get first child processing instruction of the specified target.
Definition: dom.cc:362
const xercesc::DOMText * getFirstTextChild() const
Definition: dom.cc:432
boost::filesystem::path getOriginalFilename(bool skipThis=false, const xercesc::DOMElement *&found=DOMElementWrapper< DOMElementType >::dummyArg) const
Definition: dom.cc:473
int getEmbedCountNumber() const
Definition: dom.cc:592
std::vector< int > getElementLocation() const
Definition: dom.cc:672
FQN getAttributeQName(const FQN &name) const
Get attribute named name of type QName.
Definition: dom.cc:521
void addProcessingInstructionChildNamed(const std::string &target, const std::string &data)
Add a processing instruction child of the specified target.
Definition: dom.cc:385
const xercesc::DOMAttr * getAttributeNode(const FQN &name) const
Get attribute node named name.
Definition: dom.cc:532
const xercesc::DOMElement * getNextElementSiblingNamed(const FQN &name) const
Get next sibling element of the specified full qualified name.
Definition: dom.cc:345
void removeAttribute(const FQN &name)
remove from this element the attibute named name.
Definition: dom.cc:587
std::string getEmbedData(const std::string &name) const
Get the embed data named name from the current element. Returns "" if not such data exists.
Definition: dom.cc:391
boost::filesystem::path convertPath(const boost::filesystem::path &relPath) const
Definition: dom.cc:507
T getText(int r=0, int c=0) const
Get the child text as type T.
Definition: dom.h:311
void setEmbedXPathCount(int xPathCount)
Definition: dom.cc:617
const xercesc::DOMElement * getFirstElementChildNamed(const FQN &name) const
Get first child element of the specified full qualified name.
Definition: dom.cc:328
bool isDerivedFrom(const FQN &baseTypeName) const
Definition: dom.cc:728
void setOriginalFilename(boost::filesystem::path orgFileName=boost::filesystem::path())
Definition: dom.cc:500
void setAttribute(const FQN &name, const T &value)
Set attribute.
Definition: dom.h:398
FQN getTagName() const
Get full qualified tag name.
Definition: dom.h:276
int getOriginalElementLineNumber() const
Get the line number of the original element.
Definition: dom.cc:690
DOMElementWrapper(DOMElementType *me_)
Wrap DOMElement to my special element.
Definition: dom.h:274
void addEmbedData(const std::string &name, const std::string &data)
Definition: dom.cc:403
int getEmbedXPathCount() const
Definition: dom.cc:608
int getLineNumber() const
Definition: dom.cc:578
Print DOM error messages.
Definition: dom.h:257
static std::string convertToString(const EmbedDOMLocator &loc, const std::string &message, bool subsequentError=false)
Definition: dom.cc:1018
A XML DOM parser.
Definition: dom.h:543
std::shared_ptr< xercesc::DOMDocument > createDocument()
create a empty document
Definition: dom.cc:1446
xercesc::DOMElement * parseWithContext(const std::string &str, xercesc::DOMNode *contextNode, xercesc::DOMLSParser::ActionType action, std::vector< boost::filesystem::path > *dependencies=nullptr, bool doXInclude=true)
Definition: dom.cc:1377
std::shared_ptr< xercesc::DOMDocument > parse(const boost::filesystem::path &inputSource, std::vector< boost::filesystem::path > *dependencies=nullptr, bool doXInclude=true)
static void serialize(xercesc::DOMNode *n, const boost::filesystem::path &outputSource)
Definition: dom.cc:1409
static void serializeToString(xercesc::DOMNode *n, std::string &outputData)
Definition: dom.cc:1417
void resetCachedGrammarPool()
reset all loaded grammars
Definition: dom.cc:1441
static std::shared_ptr< DOMParser > create(const std::variant< boost::filesystem::path, xercesc::DOMElement * > &xmlCatalog=static_cast< xercesc::DOMElement * >(nullptr))
Definition: dom.cc:1184
static std::string convertToRootHRXPathExpression(const std::string &xpath)
get human readable (simple) XPath expression to the location
Definition: dom.cc:299
const std::string & getRootXPathExpression() const
get a (simple) XPath expression to the location: each element is prefixed with the namespace URI betw...
Definition: dom.h:148
FQN(const char *name)
Anonymous FQN (required for implicit casting of string literals to anonymous FQNs)
Definition: dom.h:117
FQN(const std::string &ns, const std::string &name)
FQN.
Definition: dom.h:119
FQN(const std::string &name)
Anonymous FQN.
Definition: dom.h:115
Initialize Xerces on load and terminate it on unload of the library/program.
Definition: dom.h:75
Helper class to easily construct full qualified XML names (FQN) using XML namespace prefixes.
Definition: dom.h:163