using System.Collections; using System.Collections.Generic; using System.Text.RegularExpressions; using System.Xml; namespace Rokojori { public class XMLReader { XMLLexer _lexer = new XMLLexer(); public XMLLexer lexer => _lexer; TextLinesMapper linesMapper = new TextLinesMapper(); List events; XMLDocument document; XMLNode node; string text; public XMLDocument Read( string text ) { this.text = text; events = _lexer.LexToList( text ); if ( _lexer.hasError ) { linesMapper.Map( text ); var error = events.Find( e => e.isError ); var line = linesMapper.GetLine( error.offset ); var errorLine = ""; var lineCharacterIndex = error.offset - line.contentOffset; for ( int i = 0; i < lineCharacterIndex; i++ ) { errorLine += " "; } errorLine += "~"; RJLog.Error( line.GetContent( text ) + "\n" + errorLine ); return null; } CreateDocument(); // RJLog.Log( document.Serialize() ); return document; } List insideTagElements = new List(); void CreateDocument() { document = new XMLDocument(); node = document; _lexer.GrabMatches( events, text ); events.ForEach( ( e )=> { if ( e.isDone ) { return; } var type = e.type; //RJLog.Log( "Token ", "m:", e.mode, "t:", e.type, ">>", e.match ); if ( XMLLexer.Inside_Start_Tag == e.mode ) { if ( XMLLexer.XMLStartTagClosing.Matches( e ) ) { SetElementAttributes(); } else { insideTagElements.Add( e ); } } else if ( XMLLexer.XMLProcessingInstruction.Matches( e ) ) { var instruction = new XMLProcessingInstructionNode( document, e.match ); Add( instruction ); } else if ( XMLLexer.XMLInstruction.Matches( e ) ) { var instruction = new XMLProcessingInstructionNode( document, e.match ); Add( instruction ); } else if ( XMLLexer.XMLComment.Matches( e ) ) { var comment = new XMLCommentNode( document, e.match ); Add( comment ); } else if ( XMLLexer.XMLText.Matches( e ) || LexerMatcherLibrary.WhiteSpaceMatcher.Matches( e ) || LexerMatcherLibrary.BreakMatcher.Matches( e ) ) { var unescapedMatch = XMLSerializer.Unescape( e.match ); var element = new XMLTextNode( document, unescapedMatch ); Add( element ); } else if ( XMLLexer.XMLStartTag.Matches( e ) ) { insideTagElements.Clear(); var regexMatch = XMLLexer.XMLStartTag.GetRegexMatches( e ); var ns = regexMatch.Group( 1 ); var elementName = regexMatch.Group( 2 ); var element = new XMLElementNode( document, elementName, ns ); Add( element ); var parentName = node == document ? "document" : ((XMLElementNode)node).fullNodeName; // RJLog.Log( "Adding Element", element.fullNodeName, parentName ); node = element; } else if ( XMLLexer.XMLEndTag.Matches( e ) ) { var el = (XMLElementNode) node; node = el.parentNode; } } ); } void SetElementAttributes() { var element = (XMLElementNode) node; var infos = Lists.Join( Lists.Map( insideTagElements, i => i.match ), "" ); // RJLog.Log( "Set Attributes", element.fullNodeName, insideTagElements.Count, infos ); var attributeNameIndices = Lists.CollectIndices( insideTagElements, le => XMLLexer.XMLAttributeName.Matches( le ) ); attributeNameIndices.ForEach( ( attIndex )=> { var valueIndex = FindAttributeValue( attIndex + 1 ); var value = ""; if ( valueIndex != -1 ) { var stringMatch = insideTagElements[ valueIndex ].match; value = stringMatch.Substring( 1, stringMatch.Length - 2 ); value = XMLSerializer.Unescape( value ); } var regexMatch = XMLLexer.XMLAttributeName.GetRegexMatches( insideTagElements[ attIndex ] ); var nameSpace = regexMatch.Group( 1 ); var attributeName = regexMatch.Group( 2 ); // RJLog.Log( "Attribute", insideTagElements[ attIndex ].match, regexMatch.regexMatch, nameSpace, attributeName ); element.SetAttribute( attributeName, value, nameSpace ); } ); insideTagElements.Clear(); } int FindAttributeValue( int index ) { var hasAssignment = false; for ( int i = index; i < insideTagElements.Count; i++ ) { if ( XMLLexer.XMLAttributeName.Matches( insideTagElements[ i ] ) ) { return -1; } if ( XMLLexer.XMLAttributeAssignment.Matches( insideTagElements[ i ] ) ) { if ( hasAssignment ) { return -1; } hasAssignment = true; } if ( XMLLexer.XMLAttributeAssignment.Matches( insideTagElements[ i ] ) ) { hasAssignment = true; } if ( ! hasAssignment ) { continue; } if ( XMLLexer.InsideStartTag_SingleQuotedStringMatcher.Matches( insideTagElements[ i ] ) || XMLLexer.InsideStartTag_DoubleQuotedStringMatcher.Matches( insideTagElements[ i ] ) ) { return i; } } return -1; } void Add( XMLNode childNode ) { if ( node == document ) { document.AppendChild( childNode ); // RJLog.Log( "Adding to Doc", childNode.GetInfo(), "is doc element:", document.documentElement == childNode ); } else if ( XMLNode.NodeType.Element == node.nodeType ) { var elementNode = (XMLElementNode) node; elementNode.AppendChild( childNode ); } else { RJLog.Log( "Unexpected parent" ); } } } }