module xmlstorage; // // XML storage D classes version 1.2 // // Copyright (c) 2007 Martin Fuchs // /// \file xmlstorage.d /// XMLStorage implementation file /* All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ import std.stdio; import std.ctype; import std.string; import std.stream; import std.cstream; // dout import std.conv; // toDouble() const BUFFER_LEN = 2048; const char[] XS_TRUE = "true"; const char[] XS_FALSE = "false"; const char[] XS_INTFMT = "%d"; const char[] XS_FLOATFMT = "%d"; const char[] XS_EMPTY = ""; int isxmlsym(byte c) { return isalnum(c) || c=='_' || c=='-'; } /// input file stream with ANSI/UNICODE file names class tifstream : public BufferedFile { public: this(in char[] path) { super(path, FileMode.In); } }; /// output file stream with ANSI/UNICODE file names class tofstream : public BufferedFile { public: this(in char[] path) { super(path, FileMode.Out); } ~this() { close(); } }; class list(T) { public: void push_back(T entry) { _entries.length = _entries.length + 1; _entries[_entries.length-1] = entry; } T front() { assert(_entries.length>0); return _entries[0]; } T back() { assert(_entries.length>0); return _entries[_entries.length-1]; } bool empty() { return !_entries.length; } int length() { return _entries.length; } void clear() { foreach(entry; _entries) delete entry; _entries = null; } int opApply(int delegate(inout T) dg) { int result = 0; foreach(e; _entries) { result = dg(e); if (result) break; } return result; } protected: T[] _entries; }; class stack(T) { public: void push(T entry) { _entries.length = _entries.length + 1; _entries[_entries.length-1] = entry; } T pop() { assert(_entries.length>0); T ret = _entries[_entries.length-1]; _entries.length = _entries.length - 1; return ret; } T top() { assert(_entries.length>0); return _entries[_entries.length-1]; } bool empty() { return !_entries.length; } int length() { return _entries.length; } void clear() { foreach(entry; _entries) delete entry; _entries = null; } protected: T[] _entries; }; // write XML files with 2 spaces indenting const char[] XML_INDENT_SPACE = " "; /// XML Error with message and location class XMLError { public: this() { _line = 0; _column = 0; _error_code = 0; } /// return formated error message char[] str() { return format("%s(%d) [column %d] : %s", _systemId, _line, _column, _message); } Stream output(Stream o) { o.writeString(str()); return o; } char[] _message; char[] _systemId; int _line; int _column; int _error_code; }; /// list of XMLError entries class XMLErrorList : public list!(XMLError) { public: /// return merged error strings char[] str() { Stream o = new MemoryStream; foreach(e; _entries) o.writeString(e.str() ~ "\n"); return o.toString(); } }; version(XMLNODE_LOCATION) { /// location of XML Node including XML file name class XMLLocation { this() { _display_path = null; _line = 0; _column = 0; } this(in char[] display_path, int line, int column) { _display_path = display_path; _line = line; _column = column; } char[] str() { return ""; //TODO } protected: char[] _display_path; // character pointer for fast reference int _line; int _column; }; } // XMLNODE_LOCATION enum PRETTY_FLAGS { PRETTY_PLAIN = 0, PRETTY_LINEFEED = 1, PRETTY_INDENT = 2 }; /// XML Stylesheet entry class StyleSheet { public: char[] _href; // CDATA #REQUIRED char[] _type; // CDATA #REQUIRED char[] _title; // CDATA #IMPLIED char[] _media; // CDATA #IMPLIED char[] _charset; // CDATA #IMPLIED bool _alternate; // (yes|no) "no" this() { _alternate = false; } this(in char[] href, in char[] type="text/xsl", bool alternate=false) { _href = href; _type = type; _alternate = alternate; } bool empty() { return !_href.length; } void print(Stream o) { o.writeString(""); } }; /// list of StyleSheet entries class StyleSheetList : public list!(StyleSheet) { public: void set(StyleSheet stylesheet) { clear(); push_back(stylesheet); } }; /// XML document type description class DocType { char[] _name; // External Document Types are noted, but not parsed. char[] _public; char[] _system; // Internal DTDs are not supported. void parse(in char[] str) { int i = 0; while(i"); if (lf) o.writeString(_endl); if (!_doctype.empty()) { o.writeString(""); if (lf) o.writeString(_endl); } foreach(it; _stylesheets) { it.print(o); if (lf) o.writeString(_endl); } /* if (!_additional.empty()) { o.writeString(_additional); if (lf) o.writeString(_endl; } */ } PRETTY_FLAGS _pretty; char[] _endl; // line ending string: "\n" or "\r\n" char[] _version; char[] _encoding; DocType _doctype; StyleSheetList _stylesheets; // char[] _additional; int _standalone; }; enum WRITE_MODE { FORMAT_PLAIN, /// write XML without any white space FORMAT_SMART, /// preserve original white space and comments if present; pretty print otherwise FORMAT_ORIGINAL, /// write XML stream preserving original white space and comments FORMAT_PRETTY /// pretty print node to stream without preserving original white space }; /// in memory representation of an XML node class XMLNode { public: char[] _name; // instead of deriving from string class /// map of XML node attributes public typedef char[][char[]] AttributeMap; /// internal children node list class Children : public list!(XMLNode) { public: void assign(Children other) { clear(); foreach(node; other) push_back(new XMLNode(node)); } }; this(in char[] name) { _name = name; _children = new Children; } this(in char[] name, char[] leading) { this(name); _leading = leading; } this(XMLNode other) { this(other._name); _attributes = other._attributes; _leading = other._leading; _content = other._content; _end_leading = other._end_leading; _trailing = other._trailing; version(XMLNODE_LOCATION) { _location = other._location; } foreach(node; other._children) _children.push_back(new XMLNode(node)); } /* leads to access violation ~this() { _children.clear(); } */ void clear() { _leading.length = 0; _content.length = 0; _end_leading.length = 0; _trailing.length = 0; _attributes = null; _children.clear(); _name.length = 0; } XMLNode assign(XMLNode other) { _children.assign(other._children); _attributes = other._attributes; _leading = other._leading; _content = other._content; _end_leading = other._end_leading; _trailing = other._trailing; return this; } /// add a new child node void add_child(XMLNode child) { _children.push_back(child); } /// write access to an attribute void put(in char[] attr_name, in char[] value) { _attributes[attr_name] = value; } /// index operator read access to an attribute char[] opIndex(in char[] attr_name) { return _attributes[attr_name]; } /// index operator write access to an attribute void opIndexAssign(in char[] attr_name, in char[] value) { _attributes[attr_name] = value; } /// read only access to an attribute char[] get(in char[] attr_name, in char[] def="") { char[]* found = attr_name in _attributes; if (found) return *found; else return def; } /// write access to an attribute char[]* get_ref(in char[] attr_name, in char[] def="") { char[]* found = attr_name in _attributes; if (found) return found; else { _attributes[attr_name] = def; return &_attributes[attr_name]; } } /// convenient value access in children node char[] subvalue(in char[] name, char[] attr_name, int n=0) { XMLNode node = find(name, n); if (node) return node.get(attr_name); else return ""; } /// convenient value access in children node char[]* subvalue_ref(in char[] name, char[] attr_name, int n=0) { XMLNode node = find(name, n); if (node) return node.get_ref(attr_name); else { _attributes[attr_name] = ""; return &_attributes[attr_name]; } } /// convenient storage of distinct values in children node char[] put_subvalue(in char[] name, char[] attr_name, int n=0) { XMLNode node = find(name, n); if (!node) { node = new XMLNode(name); add_child(node); } return node.get(attr_name); } Children get_children() { return _children; } AttributeMap get_attributes() { return _attributes; } char[] get_content() { return DecodeXMLString(_content); } void set_content(in char[] s, bool cdata=false) { _content = EncodeXMLString(s, cdata); } version(XMLNODE_LOCATION) { XMLLocation get_location() {return _location;} } /// write node with children tree to output stream void write(Stream o, XMLFormat format, WRITE_MODE mode=WRITE_MODE.FORMAT_SMART, int indent=0) { switch(mode) { case WRITE_MODE.FORMAT_PLAIN: plain_write_worker(o); break; case WRITE_MODE.FORMAT_PRETTY: pretty_write_worker(o, format, indent); break; case WRITE_MODE.FORMAT_ORIGINAL: write_worker(o, indent); break; default: // WRITE_MODE.FORMAT_SMART smart_write_worker(o, format, indent); } } protected: Children _children; AttributeMap _attributes; char[] _leading; // UTF-8 encoded char[] _content; // UTF-8 and entity encoded, may contain CDATA sections; decode with DecodeXMLString() char[] _end_leading; // UTF-8 encoded char[] _trailing; // UTF-8 encoded version(XMLNODE_LOCATION) { XMLLocation _location; } XMLNode get_first_child() { if (!_children.empty()) return _children.front(); else return null; } XMLNode find(in char[] name, int n=0) { foreach(node; _children) if (node._name == name) if (!n--) return node; return null; } XMLNode find(in char[] name, char[] attr_name, char[] attr_value, int n=0) { foreach(node; _children) if (node._name==name && node.get(attr_name)==attr_value) if (!n--) return node; return null; } /// XPath find function XMLNode find_relative(in char[] path) { XMLNode node = this; // parse relative path while(path.length) { node = node.get_child_relative(path, false); if (!node) return null; if (*path == '/') path = path[1..length]; } return node; } /// relative XPath create function XMLNode create_relative(in char[] path) { XMLNode node = this; // parse relative path while(path.length) { node = node.get_child_relative(path, true); if (*path == '/') path = path[1..length]; } return node; } /// write node with children tree to output stream using original white space void write_worker(Stream o, int indent) { o.writeString(_leading ~ '<' ~ EncodeXMLString(_name)); foreach(key,value; _attributes) o.writeString(' ' ~ EncodeXMLString(key) ~ "=\"" ~ EncodeXMLString(value) ~ "\""); if (!_children.empty() || _content.length) { o.writeString('>' ~ _content); foreach(child; _children) child.write_worker(o, indent+1); o.writeString(_end_leading ~ "'); } else o.writeString("/>"); o.writeString(_trailing); } /// print node without any white space void plain_write_worker(Stream o) { o.writeString('<' ~ EncodeXMLString(_name)); foreach(key,value; _attributes) o.writeString(' ' ~ EncodeXMLString(key) ~ "=\"" ~ EncodeXMLString(value) ~ "\""); if (!_children.empty() || _content.length) { o.writeString(">"); o.writeString(_content); foreach(child; _children) child.plain_write_worker(o); o.writeString(""); } else o.writeString("/>"); } /// pretty print node with children tree to output stream void pretty_write_worker(Stream o, XMLFormat format, int indent) { for(int i=indent; i--; ) o.writeString(XML_INDENT_SPACE); o.writeString('<' ~ EncodeXMLString(_name)); foreach(key,value; _attributes) o.writeString(' ' ~ EncodeXMLString(key) ~ "=\"" ~ EncodeXMLString(value) ~ "\""); if (!_children.empty() || _content.length) { o.writeString('>' ~ _content); if (!_children.empty()) o.writeString(format._endl); foreach(child; _children) child.pretty_write_worker(o, format, indent+1); for(int i=indent; i--; ) o.writeString(XML_INDENT_SPACE); o.writeString("' ~ format._endl); } else o.writeString("/>" ~ format._endl); } /// write node with children tree to output stream using smart formating void smart_write_worker(Stream o, XMLFormat format, int indent) { if (!_leading.length) for(int i=indent; i--; ) o.writeString(XML_INDENT_SPACE); else o.writeString(_leading); o.writeString('<' ~ EncodeXMLString(_name)); foreach(key,value; _attributes) o.writeString(' ' ~ EncodeXMLString(key) ~ "=\"" ~ EncodeXMLString(value) ~ "\""); if (_children.empty() && !_content.length) o.writeString("/>"); else { o.writeString(">"); if (!_content.length) o.writeString(format._endl); else o.writeString(_content); if (!_children.empty()) { foreach(child; _children) child.smart_write_worker(o, format, indent+1); if (!_end_leading.length) for(int i=indent; i--; ) o.writeString(XML_INDENT_SPACE); else o.writeString(_end_leading); } else o.writeString(_end_leading); o.writeString("'); } if (!_trailing.length) o.writeString(format._endl); else o.writeString(_trailing); } protected: XMLNode get_child_relative(inout char[] path, bool create) { int slash_pos = .find(path, '/'); if (slash_pos == 0) return null; size_t l = slash_pos!=-1? slash_pos: path.length; char[] comp = path[0 .. l]; path = path[l .. length]; // look for [n] and [@attr_name="attr_value"] expressions in path components int bracket_pos = .find(comp, '['); l = bracket_pos!=-1? bracket_pos: comp.length; char[] child_name = comp[0 .. l]; char[] attr_name, attr_value; int n = 0; if (bracket_pos != -1) { char[] expr = unescape(comp[bracket_pos..length], '[', ']'); n = atoi(expr); // read index number if (n) n = n - 1; // convert into zero based index int at_pos = .find(expr, '@'); if (at_pos != -1) { expr = expr[at_pos+1 .. length]; int equal_pos = .find(expr, '='); // read attribute name and value if (equal_pos != -1) { attr_name = unescape(expr[0 .. equal_pos]); attr_value = unescape(expr[equal_pos+1 .. length]); } } } XMLNode child; if (!attr_name.length) // search n.th child node with specified name child = find(child_name, n); else // search n.th child node with specified name and matching attribute value child = find(child_name, attr_name, attr_value, n); if (!child && create) { child = new XMLNode(child_name); add_child(child); if (attr_name.length) this[attr_name] = attr_value; } return child; } }; /// iterator access to children nodes with name filtering class XMLChildrenFilter { public: this(XMLNode.Children children, char[] name) { _children = children; _filter_name = name; } this(XMLNode node, char[] name) { this(node.get_children(), name); } int opApply(int delegate(inout XMLNode) dg) { int result = 0; foreach(child; _children) if (child._name == _filter_name) { result = dg(child); if (result) break; } return result; } protected: XMLNode.Children _children; char[] _filter_name; }; /// iterator for XML trees class XMLPos { public: this(XMLNode root) { _root = root; _cur = root; _stack = new stack!(XMLNode); } this(XMLPos other) { _root = other._root; _cur = other._cur; // don't copy _stack _stack = new stack!(XMLNode); } this(XMLNode node, char[] name) { this(node); smart_create(name); } this(XMLNode node, char[] name, char[] attr_name, char[] attr_value) { this(node); smart_create(name, attr_name, attr_value); } this(XMLPos other, char[] name) { this(other); smart_create(name); } this(XMLPos other, char[] name, char[] attr_name, char[] attr_value) { this(other); smart_create(name, attr_name, attr_value); } /// access to current node XMLNode cur() { return _cur; } /// automatic access to current node XMLNode opCast() {return _cur;} /// call operator access to current node (instead of the C++ operator-> and operator*) XMLNode opCall() { return _cur; } /// attribute access char[] get(in char[] attr_name, in char[] def="") { return _cur.get(attr_name, def); } /// attribute setting void put(in char[] attr_name, in char[] value) { _cur.put(attr_name, value); } char[] opIndex(in char[] attr_name) { return _cur[attr_name]; } void opIndexAssign(in char[] attr_name, in char[] value) { _cur[attr_name] = value; } /// insert children when building tree void add_down(XMLNode child) { _cur.add_child(child); go_to(child); } /// go back to previous position bool back() { if (!_stack.empty()) { _cur = _stack.pop(); return true; } else return false; } /// go down to first child bool go_down() { XMLNode node = _cur.get_first_child(); if (node) { go_to(node); return true; } else return false; } /// search for child and go down bool go_down(in char[] name, int n=0) { XMLNode node = _cur.find(name, n); if (node) { go_to(node); return true; } else return false; } /// move XPath like to position in XML tree bool go(in char[] path) { XMLNode node = _cur; // Is this an absolute path? if (path.length>0 && path[0]=='/') { node = _root; path = path[1..length]; } node = node.find_relative(path); if (node) { go_to(node); return true; } else return false; } /// create child nodes using XPath notation and move to the deepest child bool create_relative(in char[] path) { XMLNode node = _cur.create_relative(path); if (!node) return false; // invalid path specified go_to(node); return true; } /// create node and move to it void create(in char[] name) { add_down(new XMLNode(name)); } /// create node if not already existing and move to it void smart_create(in char[] name) { XMLNode node = _cur.find(name); if (node) go_to(node); else add_down(new XMLNode(name)); } /// search matching child node identified by key name and an attribute value void smart_create(in char[] name, char[] attr_name, char[] attr_value) { XMLNode node = _cur.find(name, attr_name, attr_value); if (node) go_to(node); else { node = new XMLNode(name); add_down(node); node.put(attr_name, attr_value); } } char[] str() {return _cur._name;} protected: XMLNode _root; XMLNode _cur; stack!(XMLNode) _stack; /// go to specified node void go_to(XMLNode child) { _stack.push(_cur); _cur = child; } }; /// type converter for boolean data class XMLBool { public: this(bool value=false) { _value = value; } this(in char[] value, bool def=false) { if (value && *value) _value = !icmp(value, XS_TRUE); else _value = def; } this(XMLNode node, char[] attr_name, bool def=false) { char[] value = node.get(attr_name); if (value.length) _value = !icmp(value, XS_TRUE); else _value = def; } bool opCast() { return _value; } bool opNot() { return !_value; } char[] str() { return _value? XS_TRUE: XS_FALSE; } protected: bool _value; private: void opAssign(XMLBool); // disallow assignment operations }; /// type converter for boolean data with write access class XMLBoolRef { public: this(XMLNode node, char[] attr_name, bool def=false) { _pref = node.get_ref(attr_name); if (!(*_pref).length) assign(def); } bool opCast() { return !icmp(*_pref, XS_TRUE); } bool opNot() { return icmp(*_pref, XS_TRUE)? true: false; } void toggle() { assign(!opNot()); } XMLBoolRef opAssign(bool value) { assign(value); return this; } void assign(bool value) { *_pref = value? XS_TRUE: XS_FALSE; } protected: char[]* _pref; }; /// type converter for integer data class XMLInt { public: this(int value) { _value = value; } this(in char[] value, int def=0) { if (value && *value) _value = cast(int)atoi(value); else _value = def; } this(XMLNode node, char[] attr_name, int def=0) { char[] value = node.get(attr_name); if (value.length) _value = cast(int)atoi(value); else _value = def; } int opCast() { return _value; } char[] str() { return format(XS_INTFMT, _value); } protected: int _value; private: void opAssign(XMLInt); // disallow assignment operations }; /// type converter for integer data with write access class XMLIntRef { public: this(XMLNode node, char[] attr_name, int def=0) { _pref = node.get_ref(attr_name); if (!(*_pref).length) assign(def); } XMLIntRef opAssign(int value) { assign(value); return this; } int opCast() { return atoi(*_pref); } void assign(int value) { *_pref = format(XS_INTFMT, value); } protected: char[]* _pref; }; /// type converter for numeric data class XMLDouble { public: this(double value) { _value = value; } this(in char[] value, double def=0.) { if (value && *value) _value = toDouble(value); else _value = def; } this(XMLNode node, char[] attr_name, double def=0.) { char[] value = node.get(attr_name); if (value.length) _value = toDouble(value); else _value = def; } double opCast() { return _value; } char[] str() { return format(XS_FLOATFMT, _value); } protected: double _value; private: void opAssign(XMLDouble); // disallow assignment operations }; /// type converter for numeric data with write access class XMLDoubleRef { public: this(XMLNode node, char[] attr_name, double def=0.) { _pref = node.get_ref(attr_name); if (!(*_pref).length) assign(def); } XMLDoubleRef opAssign(double value) { assign(value); return this; } double opCast() { return toDouble(*_pref); } void assign(double value) { *_pref = format(XS_FLOATFMT, value); } protected: char[]* _pref; }; /// type converter for string data class XMLString { public: this(in char[] value) { _value = value; } this(in char[] value, in char[] def=XS_EMPTY) { if (value && *value) _value = value; else _value = def; } this(XMLNode node, char[] attr_name, char[] def=XS_EMPTY) { char[] value = node.get(attr_name); if (value.length) _value = value; else _value = def; } char[] str() { return _value; } protected: char[] _value; private: void opAssign(XMLString); // disallow assignment operations }; /// type converter for string data with write access class XMLStringRef { public: this(XMLNode node, char[] attr_name, char[] def=XS_EMPTY) { _pref = node.get_ref(attr_name); if (!(*_pref).length) assign(def); } this(in char[] node_name, XMLNode node, char[] attr_name, char[] def=XS_EMPTY) { _pref = node.subvalue_ref(node_name, attr_name); if (!(*_pref).length) assign(def); } XMLStringRef opAssign(in char[] value) { assign(value); return this; } char[] str() { return *_pref; } void assign(in char[] value) { *_pref = value; } protected: char[]* _pref; }; void read_option(T)(out T var, XMLPos cfg, char[] key) { char[] val = cfg.get(key); if (!val.empty()) var = val; } void read_option(T)(out int var, XMLPos cfg, char[] key) { char[] val = cfg.get(key); if (!val.empty()) var = atoi(val); } /// line buffer for XS-native parser class Buffer { this() { _buffer.length = BUFFER_LEN; reset(); } void reset() { _idx_write = 0; _buffer_str.length = 0; } void append(char c) { if (_idx_write >= _buffer.length) _buffer.length = _buffer.length << 1; _buffer[_idx_write++] = c; } char[] str() // returns UTF-8 encoded buffer content { _buffer_str = _buffer[0 .. _idx_write]; return _buffer_str; } size_t len() { return _idx_write; } bool has_CDEnd() { return _idx_write>=3 && _buffer[0..2]=="]]>"; } char[] get_tag() { int i; // skip opening tag characters if (_buffer_str.length>=1 && _buffer_str[0]=='<') { if (_buffer_str.length>=2 && _buffer_str[1]=='/') i = 2; else i = 1; } else i = 0; int j = i; if (j<_buffer_str.length && _buffer_str[j]=='?') ++j; // find end of tag name while(j<_buffer_str.length && isxmlsym(_buffer_str[j])) ++j; return _buffer_str[i..j]; } /// read attributes and values void get_attributes(out XMLNode.AttributeMap attributes) { int i; // skip opening tag characters if (_buffer_str.length>=1 && _buffer_str[0]=='<') { if (_buffer_str.length>=2 && (_buffer_str[1]=='/' || _buffer_str[i]=='?')) i = 2; else i = 1; } else i = 0; // find end of tag name while(i<_buffer_str.length && isxmlsym(_buffer_str[i])) ++i; // read attributes from buffer while(i<_buffer_str.length && _buffer_str[i]!='>' && _buffer_str[i]!='/') { while(isspace(cast(dchar)_buffer_str[i])) ++i; int attr_idx = i; while(isxmlsym(_buffer_str[i])) ++i; if (i>=_buffer_str.length || _buffer_str[i] != '=') break; //@TODO error handling int attr_end = i; if (++i>=_buffer_str.length || _buffer_str[i]!='"' && _buffer_str[i]!='\'') break; //@TODO error handling char delim = _buffer_str[i]; int value_idx = ++i; while(i<_buffer_str.length && _buffer_str[i]!=delim) ++i; int value_end = i; if (i < _buffer_str.length) ++i; // '"' char[] name_str = _buffer_str[attr_idx .. attr_end]; char[] value_str = _buffer_str[value_idx .. value_end]; attributes[name_str.dup] = DecodeXMLString(value_str).dup; } } protected: char[] _buffer; // UTF-8 encoded int _idx_write; char[] _buffer_str;// UTF-8 encoded }; /// XML reader base class class XMLReaderBase { public: this(XMLNode node) { _pos = new XMLPos(node); _endl_defined = false; _utf8 = false; _last_tag = TAG.TAG_NONE; _format = new XMLFormat; _errors = new XMLErrorList; _warnings = new XMLErrorList; } bool parse() { Buffer buffer = new Buffer; int c = get(); bool in_comment = false; while(c != EOF) { if (in_comment || c=='<') { buffer.append(c); // read start or end tag for(;;) { c = get(); if (c == EOF) break; buffer.append(c); if (c == '>') break; } char[] b = buffer.str(); if (in_comment || (b.length>=4 && b[1..4]=="!--")) { // XML comment DefaultHandler(b); if (b.length>=3 && b[length-3..length]=="-->") in_comment = true; else in_comment = false; c = get(); } else if (b.length >= 2) { if (b[1] == '/') { // end tag /*@TODO error handling char[]& tag = buffer.get_tag(); if (tag != last_opened_tag) { ERROR } */ EndElementHandler(); c = get(); } else if (b[1] == '?') { // XML declaration char[] tag = buffer.get_tag(); if (tag == "?xml") { XMLNode.AttributeMap attributes; buffer.get_attributes(attributes); char[]* ver = "version" in attributes; char[]* encoding = "encoding" in attributes; int standalone; char[]* found = "standalone" in attributes; if (found) standalone = !icmp(*found, "yes"); else standalone = -1; XmlDeclHandler(ver, encoding, standalone); if (encoding && !icmp(*encoding, "utf-8")) _utf8 = true; c = eat_endl(); } else if (tag == "?xml-stylesheet") { XMLNode.AttributeMap attributes; buffer.get_attributes(attributes); auto phref = "href" in attributes; auto ptype = "type" in attributes; auto palternate = "alternate" in attributes; StyleSheet stylesheet = new StyleSheet(phref?*phref:"", ptype?*ptype:"", palternate?!icmp(*palternate,"yes"):false); auto ptitle = "title" in attributes; if (ptitle) stylesheet._title = *ptitle; auto pmedia = "media" in attributes; if (pmedia) stylesheet._media = *pmedia; auto pcharset = "charset" in attributes; if (pcharset) stylesheet._charset = *pcharset; _format._stylesheets.push_back(stylesheet); c = eat_endl(); } else { DefaultHandler(b); c = get(); } } else if (b[1] == '!') { if (b.length>=10 && b[2..10]=="DOCTYPE ") { _format._doctype.parse(b[10..length]); c = eat_endl(); } else if (b.length>=9 && b[2..9]=="[CDATA[") { // parse strings while(!buffer.has_CDEnd()) { c = get(); if (c == EOF) break; buffer.append(c); } DefaultHandler(buffer.str()); c = get(); } } else { // start tag char[] tag = buffer.get_tag(); if (tag.length) { XMLNode.AttributeMap attributes; buffer.get_attributes(attributes); StartElementHandler(tag.dup, attributes); if (b[length-2] == '/') EndElementHandler(); } c = get(); } } else // b.length < 2 c = get(); } else { buffer.append(c); // read white space for(;;) { // check for the encoding of the first line end if (!_endl_defined) if (c == '\n') { _format._endl = "\n"; _endl_defined = true; } else if (c == '\r') { _format._endl = "\r\n"; _endl_defined = true; } c = get(); if (c == EOF) break; if (c == '<') break; buffer.append(c); } DefaultHandler(buffer.str()); } buffer.reset(); } return true; //TODO return false on invalid XML } public: //read XML stream into XML tree below _pos void read() { if (!parse()) { XMLError error; error._message = "XML parsing error"; //error._line = ; //error._column = ; _errors.push_back(error); } finish_read(); } // return current parser position as string char[] get_position() { /*@TODO display parser position in case of errors return format("%s(%d) [column %d] : %s", _systemId, _line, _column, _message); */ return ""; } XMLFormat get_format() {return _format;} char[] get_endl() {return _endl_defined? _format._endl: "\n";} XMLErrorList get_errors() {return _errors;} XMLErrorList get_warnings() {return _warnings;} void clear_errors() { _errors.clear(); _warnings.clear(); } version(XMLNODE_LOCATION) { char[] _display_path; // character pointer for fast reference in XMLLocation XMLLocation get_location() { return new XMLLocation; //@TODO XMLLocation for XS-native } } protected: XMLPos _pos; char[] _content; // UTF-8 encoded enum TAG {TAG_NONE, TAG_START, TAG_END}; TAG _last_tag; XMLErrorList _errors; XMLErrorList _warnings; XMLFormat _format; bool _endl_defined; abstract int get(); int eat_endl() { int c = get(); if (c == '\r') c = get(); if (c == '\n') c = get(); return c; } bool _utf8; void finish_read() { if (_pos()._children.empty()) _pos()._trailing ~= _content; else _pos()._children.back()._trailing ~= _content; _content.length = 0; } /// store XML version and encoding into XML reader void XmlDeclHandler(in char[]* ver, char[]* encoding, int standalone) { if (ver) _format._version = *ver; if (encoding) _format._encoding = *encoding; _format._standalone = standalone; } /// notifications about XML start tag void StartElementHandler(in char[] name, XMLNode.AttributeMap attributes) { int i = 0; // search for content end leaving only white space for leading for(i=_content.length; i>0; --i) if (!isspace(cast(dchar)_content[i-1])) break; if (i > 0) if (_pos()._children.empty()) { // no children in last node? if (_last_tag == TAG.TAG_START) _pos()._content ~= _content[0 .. i]; else if (_last_tag == TAG.TAG_END) _pos()._trailing ~= _content[0 .. i]; else // TAG_NONE at root node i = 0; } else _pos()._children.back()._trailing ~= _content[0 .. i]; char[] leading; if (i < _content.length) leading = _content[i .. length]; XMLNode node = new XMLNode(name, leading); _pos.add_down(node); version(XMLNODE_LOCATION) { node._location = get_location(); } node._attributes = attributes; _last_tag = TAG.TAG_START; _content.length = 0; } /// notifications about XML end tag void EndElementHandler() { int s = 0; int i = 0; int e = _content.length; if (_content.length>=9+3 && _content[0..9]=="") { s += 9; i = (e-=3); } else { // search for content end leaving only white space for _end_leading for(i=e; i>0; --i) if (!isspace(cast(dchar)_content[i-1])) break; } if (i > s) if (_pos()._children.empty()) // no children in current node? _pos()._content ~= _content[s .. i]; else if (_last_tag == TAG.TAG_START) _pos()._content ~= _content[s .. i]; else _pos()._children.back()._trailing ~= _content[s .. i]; if (i != e) _pos()._end_leading = _content[i .. e]; _pos.back(); _last_tag = TAG.TAG_END; _content.length = 0; } // store content, white space and comments void DefaultHandler(in char[] s) { _content ~= s; } }; /// XML reader class class XMLReader : public XMLReaderBase { public: this(XMLNode node, Stream i) { super(node); _in = i; } /// read one character from XML stream int get() { int c = _in.getc(); return !_in.eof()? c: EOF; } protected: Stream _in; }; /// XML document holder class XMLDoc : public XMLNode { public: this() { super(""); _format = new XMLFormat; _errors = new XMLErrorList; } this(in char[] path) { this(); read_file(path); } bool read_file(in char[] path) { Stream i = new BufferedFile(path, FileMode.In); scope(exit) i.close(); XMLReader reader = new XMLReader(this, i); return read(reader, path); } bool read_buffer(in char[] buffer, in char[] system_id="") { return read(new MemoryStream(buffer), system_id); } bool read(in Stream i, in char[] system_id="") { XMLReader reader = new XMLReader(this, i); return read(reader, system_id); } bool read(XMLReaderBase reader, char[] display_path) { version(XMLNODE_LOCATION) { // make a string copy to handle temporary string objects _display_path = display_path.dup; reader._display_path = _display_path; } reader.clear_errors(); reader.read(); _format = reader.get_format(); _format._endl = reader.get_endl(); if (!reader.get_errors().empty()) { _errors = reader.get_errors(); return false; } return true; } /// write XML stream preserving previous white space and comments void write(Stream o, WRITE_MODE mode=WRITE_MODE.FORMAT_SMART) { _format.print_header(o, mode!=WRITE_MODE.FORMAT_PLAIN); if (_children.length() == 1) _children.front().write(o, _format, mode); else if (!_children.empty()) { //throw Exception("more than one XML root!"); return false; } } /// write XML stream with formating void write_formating(Stream o) { write(o, WRITE_MODE.FORMAT_PRETTY); } void write_file(in char[] path, WRITE_MODE mode=WRITE_MODE.FORMAT_SMART) { Stream o = new BufferedFile(path, FileMode.Out); scope(exit) o.close(); write(o, mode); } void write_formating(in char[] path) { Stream o = new BufferedFile(path, FileMode.Out); scope(exit) o.close(); write_formating(o); } XMLFormat _format; XMLErrorList _errors; version(XMLNODE_LOCATION) { char[] _display_path; } }; /// XML message wrapper class XMLMessage : public XMLDoc { public: this(in char[] name) { _pos = new XMLPos(this); _pos.create(name); } char[] toString() { Stream m = new MemoryStream(); write(m); return m.toString(); } XMLPos _pos; protected: this() { _pos = new XMLPos(this); } }; /// helper structure to read XML messages from strings class XMLMessageFromString : public XMLMessage { public: this(in char[] xml_str, in char[] system_id="") { read_buffer(xml_str, system_id); } }; /// Reader for XML Messages class XMLMessageReader : public XMLPos { public: this(in char[] xml_str, in char[] system_id="") { super(_msg); _msg.read_buffer(xml_str, system_id); } XMLDoc get_document() { return _msg; } protected: XMLDoc _msg; }; /// on the fly XML writer class XMLWriter { private: this(XMLFormat format, Stream o) { _out = o; _format = format; _stack = new stack!(StackEntry); format.print_header(_out, false); // _format._endl is printed in write_pre() } public: this(Stream o, in XMLFormat format=new XMLFormat) { _own_stream = null; this(format, o); } this(in char[] path, in XMLFormat format=new XMLFormat) { _format = format; _own_stream = new BufferedFile(new File(path, FileMode.Out)); this(format, _own_stream); } ~this() { close(); } void close() { if (_out) { _out.writeString(_format._endl); if (_own_stream) { _own_stream.close(); delete _own_stream; } _out = null; } } /// create node and move to it void create(in char[] name) { if (!_stack.empty()) { StackEntry last = _stack.top(); if (last._state < WRITESTATE.PRE_CLOSED) { write_attributes(last); close_pre(last); } last._children = true; } auto entry = new StackEntry; entry._node_name = name; _stack.push(entry); write_pre(entry); } /// go back to previous position bool back() { if (!_stack.empty()) { write_post(_stack.top()); _stack.pop(); return true; } else return false; } /// attribute setting void put(in char[] attr_name, in char[] value) { if (!_stack.empty()) _stack.top()._attributes[attr_name] = value; } /// index operator write access to an attribute void opIndexAssign(in char[] attr_name, in char[] value) { if (!_stack.empty()) _stack.top()._attributes[attr_name] = value; } void set_content(in char[] s, bool cdata=false) { if (!_stack.empty()) _stack.top()._content = EncodeXMLString(s, cdata); } // public for access in StackEntry enum WRITESTATE { NOTHING, /*PRE,*/ ATTRIBUTES, PRE_CLOSED, /*CONTENT,*/ POST }; protected: Stream _own_stream; Stream _out; XMLFormat _format; typedef XMLNode.AttributeMap AttrMap; /// container for XMLWriter state information class StackEntry { public: char[] _node_name; AttrMap _attributes; char[] _content; WRITESTATE _state; bool _children; this() { _state = WRITESTATE.NOTHING; _children = false; } }; stack!(StackEntry) _stack; void close_pre(StackEntry entry) { _out.writeString(">"); entry._state = WRITESTATE.PRE_CLOSED; } void write_pre(StackEntry entry) { if (_format._pretty >= PRETTY_FLAGS.PRETTY_LINEFEED) _out.writeString(_format._endl); if (_format._pretty == PRETTY_FLAGS.PRETTY_INDENT) for(size_t i=_stack.length(); --i>0; ) _out.writeString(XML_INDENT_SPACE); _out.writeString('<' ~ EncodeXMLString(entry._node_name)); //entry._state = PRE; } void write_attributes(StackEntry entry) { foreach(key,value; entry._attributes) _out.writeString(' ' ~ EncodeXMLString(key) ~ "=\"" ~ EncodeXMLString(value) ~ "\""); entry._state = WRITESTATE.ATTRIBUTES; } void write_post(StackEntry entry) { if (entry._state < WRITESTATE.ATTRIBUTES) write_attributes(entry); if (entry._children || entry._content.length) { if (entry._state < WRITESTATE.PRE_CLOSED) close_pre(entry); _out.writeString(entry._content); //entry._state = CONTENT; if (_format._pretty>=PRETTY_FLAGS.PRETTY_LINEFEED && !entry._content.length) _out.writeString(_format._endl); if (_format._pretty==PRETTY_FLAGS.PRETTY_INDENT && !entry._content.length) for(size_t i=_stack.length(); --i>0; ) _out.writeString(XML_INDENT_SPACE); _out.writeString(""); } else { _out.writeString("/>"); } entry._state = WRITESTATE.POST; } }; /// remove escape characters from string char[] unescape(in char[] str, char b, char e) { if (str.length>0 && str[0]==b && str[length-1]==e) return str[1 .. length-1]; else return str; } char[] unescape(in char[] s) { return unescape(s, '"', '"'); } /// encode XML string literals char[] EncodeXMLString(in char[] str, bool cdata=false) { char[] s = str; size_t l = str.length; if (cdata) { // encode the whole string in a CDATA section char[] ret = ""; return ret; } else if (l <= BUFFER_LEN) { char[] buffer; buffer.length = 6*str.length; // worst case """ / "'" int w = 0; foreach(char c; s) switch(c) { case '&': buffer[w++] = '&'; buffer[w++] = 'a'; buffer[w++] = 'm'; buffer[w++] = 'p'; buffer[w++] = ';'; // "&" break; case '<': buffer[w++] = '&'; buffer[w++] = 'l'; buffer[w++] = 't'; buffer[w++] = ';'; // "<" break; case '>': buffer[w++] = '&'; buffer[w++] = 'g'; buffer[w++] = 't'; buffer[w++] = ';'; // ">" break; case '"': buffer[w++] = '&'; buffer[w++] = 'q'; buffer[w++] = 'u'; buffer[w++] = 'o'; buffer[w++] = 't'; buffer[w++] = ';'; // """ break; case '\'': buffer[w++] = '&'; buffer[w++] = 'a'; buffer[w++] = 'p'; buffer[w++] = 'o'; buffer[w++] = 's'; buffer[w++] = ';'; // "'" break; default: if (cast(dchar)c<20 && c!='\t' && c!='\r' && c!='\n') { char[] b = format("&%d;", cast(int)c); foreach(cc; b) buffer[w++] = cc; } else buffer[w++] = c; } buffer.length = w; return buffer; } else { // l > BUFFER_LEN // alternative code for larger strings using a memory stream Stream o = new MemoryStream; foreach(c; str) switch(c) { case '&': o.writeString("&"); break; case '<': o.writeString("<"); break; case '>': o.writeString(">"); break; case '"': o.writeString("""); break; case '\'': o.writeString("'"); break; default: if (cast(dchar)c<20 && c!='\t' && c!='\r' && c!='\n') o.writeString(format("&%d;", cast(int)c)); else o.write(c); } return o.toString(); } } bool icmpLiteral(in char[] str, in char[] lit_str, inout int i) { if (str.length>=lit_str.length && !icmp(str[0..lit_str.length], lit_str)) { i += lit_str.length; return true; } else return false; } /// decode XML string literals char[] DecodeXMLString(in char[] str) { char[] buffer; buffer.length = str.length; size_t o = 0; bool modified = false; for(int i=0; i" for(int e=i+9; ; ++e) { if (e+3 > str.length) { buffer[o++] = str[i]; break; } if (str[e..e+3] == "]]>") { i += 9; size_t l = e - i; buffer[o..o+l] = str[i..i+l]; o += l; i += l + 2; modified = true; break; } } } else buffer[o++] = str[i]; } return modified? buffer[0..o]: str; } unittest { debug(string) printf("xmlstorage unittest\n"); // void print_content(XMLDoc doc, FILE* f=stdout) // { // Stream o = new CFile(f, FileMode.Out); // doc.write(o, WRITE_MODE.FORMAT_PLAIN); // } void print_content(XMLDoc doc, Stream o=dout) { doc.write(o, WRITE_MODE.FORMAT_PLAIN); } void assert_content(XMLDoc doc, char[] str) { Stream m = new MemoryStream(); doc.write(m, WRITE_MODE.FORMAT_PLAIN); // writefln(m.toString()); assert(m.toString()==str); } void testEncodeDecode(in char[] a, in char[] b) { assert(DecodeXMLString(b)==a); assert(EncodeXMLString(a)==b); } void testEncodeDecodeCDATA(in char[] a, in char[] b) { assert(DecodeXMLString(b)==a); assert(EncodeXMLString(a, true)==b); } // test string encoding/decoding testEncodeDecode("abc", "abc"); testEncodeDecode("x<>y", "x<>y"); testEncodeDecode("\"&'", ""&'"); testEncodeDecodeCDATA("ABC", ""); // create a XML document and fill with test data auto doc = new XMLDoc(); auto pos = new XMLPos(doc); pos.create("node1"); pos.put("id", "1"); pos["attr"] = "a"; pos.create_relative("a/b/c/d"); pos.back(); pos.smart_create("node2"); (new XMLIntRef(pos(), "id")) = 2; assert(pos.back()); // pos.create("node2"); pos().set_content("hello"); assert(pos.back()); // *pos().subvalue_ref("node2", "attr2", 1) = "xyz"; assert(pos.back()); // (new XMLStringRef("node1", pos(), "attr1")).assign("abc"); print_content(doc); assert_content(doc, "" "" "" "" "" "" "" "" "" "" "hello" "" ); // test read functions and compare content pos = new XMLPos(doc); bool ok = pos.go_down("node1"); assert(ok); { assert(pos.get("id")=="1"); ok = pos.go("node2"); assert(ok); { assert(pos.get("id")=="2"); assert(pos.back()); // } ok = pos.go_down("node2", 1); assert(ok); { assert(pos["attr2"]=="xyz"); assert(pos.back()); // } assert(pos.back()); // } assert(!pos.back()); // test foreach() iterations foreach(XMLNode e; new XMLChildrenFilter(doc, "node1")) { writefln(e._name); foreach(XMLNode e; new XMLChildrenFilter(e, "node2")) { writefln(e._name); } } // test XMLWriter and file I/O { auto w = new XMLWriter("test1.xml"); scope(exit) w.close(); w.create("root"); w.create("level 1"); w.create("level 2"); w.back(); // w.back(); // w.back(); } auto d = new XMLDoc(); assert(d.read_file("test1.xml")); d.write_file("test2.xml"); }