function outStruct = xml2struct(input) %XML2STRUCT converts xml file into a MATLAB structure % % outStruct = xml2struct2(input) % % xml2struct2 takes either a java xml object, an xml file, or a string in % xml format as input and returns a parsed xml tree in structure. % % Please note that the following characters are substituted % '-' by '_dash_', ':' by '_colon_' and '.' by '_dot_' % % Originally written by W. Falkena, ASTI, TUDelft, 21-08-2010 % Attribute parsing speed increase by 40% by A. Wanner, 14-6-2011 % Added CDATA support by I. Smirnov, 20-3-2012 % Modified by X. Mo, University of Wisconsin, 12-5-2012 % Modified by Chao-Yuan Yeh, August 2016 errorMsg = ['%s is not in a supported format.\n\nInput has to be',... ' a java xml object, an xml file, or a string in xml format.']; % check if input is a java xml object if isa(input, 'org.apache.xerces.dom.DeferredDocumentImpl') ||... isa(input, 'org.apache.xerces.dom.DeferredElementImpl') xDoc = input; else try if exist(input, 'file') == 2 xDoc = xmlread(input); else try xDoc = xmlFromString(input); catch error(errorMsg, inputname(1)); end end catch ME if strcmp(ME.identifier, 'MATLAB:UndefinedFunction') error(errorMsg, inputname(1)); else rethrow(ME) end end end % parse xDoc into a MATLAB structure outStruct = parseChildNodes(xDoc); end % ----- Local function parseChildNodes ----- function [children, ptext, textflag] = parseChildNodes(theNode) % Recurse over node children. children = struct; ptext = struct; textflag = 'Text'; if hasChildNodes(theNode) childNodes = getChildNodes(theNode); numChildNodes = getLength(childNodes); for count = 1:numChildNodes theChild = item(childNodes,count-1); [text, name, attr, childs, textflag] = getNodeData(theChild); if ~strcmp(name,'#text') && ~strcmp(name,'#comment') && ... ~strcmp(name,'#cdata_dash_section') % XML allows the same elements to be defined multiple times, % put each in a different cell if (isfield(children,name)) if (~iscell(children.(name))) % put existsing element into cell format children.(name) = {children.(name)}; end index = length(children.(name))+1; % add new element children.(name){index} = childs; textfields = fieldnames(text); if ~isempty(textfields) for ii = 1:length(textfields) children.(name){index}.(textfields{ii}) = ... text.(textfields{ii}); end end if(~isempty(attr)) children.(name){index}.('Attributes') = attr; end else % add previously unknown (new) element to the structure children.(name) = childs; % add text data ( ptext returned by child node ) textfields = fieldnames(text); if ~isempty(textfields) for ii = 1:length(textfields) children.(name).(textfields{ii}) = text.(textfields{ii}); end end if(~isempty(attr)) children.(name).('Attributes') = attr; end end else ptextflag = 'Text'; if (strcmp(name, '#cdata_dash_section')) ptextflag = 'CDATA'; elseif (strcmp(name, '#comment')) ptextflag = 'Comment'; end % this is the text in an element (i.e., the parentNode) if (~isempty(regexprep(text.(textflag),'[\s]*',''))) if (~isfield(ptext,ptextflag) || isempty(ptext.(ptextflag))) ptext.(ptextflag) = text.(textflag); else % This is what happens when document is like this: % Text More text % % text will be appended to existing ptext ptext.(ptextflag) = [ptext.(ptextflag) text.(textflag)]; end end end end end end % ----- Local function getNodeData ----- function [text,name,attr,childs,textflag] = getNodeData(theNode) % Create structure of node info. %make sure name is allowed as structure name name = toCharArray(getNodeName(theNode))'; name = strrep(name, '-', '_dash_'); name = strrep(name, ':', '_colon_'); name = strrep(name, '.', '_dot_'); name = strrep(name, '_', 'u_'); attr = parseAttributes(theNode); if (isempty(fieldnames(attr))) attr = []; end %parse child nodes [childs, text, textflag] = parseChildNodes(theNode); % Get data from any childless nodes. This version is faster than below. if isempty(fieldnames(childs)) && isempty(fieldnames(text)) text.(textflag) = toCharArray(getTextContent(theNode))'; end % This alterative to the above 'if' block will also work but very slowly. % if any(strcmp(methods(theNode),'getData')) % text.(textflag) = char(getData(theNode)); % end end % ----- Local function parseAttributes ----- function attributes = parseAttributes(theNode) % Create attributes structure. attributes = struct; if hasAttributes(theNode) theAttributes = getAttributes(theNode); numAttributes = getLength(theAttributes); for count = 1:numAttributes % Suggestion of Adrian Wanner str = toCharArray(toString(item(theAttributes,count-1)))'; k = strfind(str,'='); attr_name = str(1:(k(1)-1)); attr_name = strrep(attr_name, '-', '_dash_'); attr_name = strrep(attr_name, ':', '_colon_'); attr_name = strrep(attr_name, '.', '_dot_'); attributes.(attr_name) = str((k(1)+2):(end-1)); end end end % ----- Local function xmlFromString ----- function xmlroot = xmlFromString(iString) import org.xml.sax.InputSource import java.io.* iSource = InputSource(); iSource.setCharacterStream(StringReader(iString)); xmlroot = xmlread(iSource); end