2024-10-25 06:28:58 +00:00
using System.Collections;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Xml;
namespace Rokojori
public class XMLReader
2024-11-12 08:03:36 +00:00
XMLLexer _lexer = new XMLLexer();
public XMLLexer lexer => _lexer;
2024-10-25 06:28:58 +00:00
TextLinesMapper linesMapper = new TextLinesMapper();
List<LexerEvent> events;
XMLDocument document;
XMLNode node;
string text;
2025-02-12 16:48:15 +00:00
2024-10-25 06:28:58 +00:00
public XMLDocument Read( string text )
this.text = text;
2024-11-12 08:03:36 +00:00
events = _lexer.LexToList( text );
if ( _lexer.hasError )
2024-10-25 06:28:58 +00:00
linesMapper.Map( text );
var error = events.Find( e => e.isError );
var line = linesMapper.GetLine( error.offset );
var errorLine = "";
var lineCharacterIndex = error.offset - line.contentOffset;
for ( int i = 0; i < lineCharacterIndex; i++ )
errorLine += " ";
errorLine += "~";
RJLog.Error( line.GetContent( text ) + "\n" + errorLine );
return null;
2024-11-12 08:03:36 +00:00
// RJLog.Log( document.Serialize() );
2024-10-25 06:28:58 +00:00
return document;
List<LexerEvent> insideTagElements = new List<LexerEvent>();
void CreateDocument()
document = new XMLDocument();
node = document;
2024-11-12 08:03:36 +00:00
_lexer.GrabMatches( events, text );
2024-10-25 06:28:58 +00:00
( e )=>
if ( e.isDone )
var type = e.type;
//RJLog.Log( "Token ", "m:", e.mode, "t:", e.type, ">>", e.match );
if ( XMLLexer.Inside_Start_Tag == e.mode )
2025-02-12 16:48:15 +00:00
if (
XMLLexer.XMLStartTagClosingElement.Matches( e ) ||
XMLLexer.XMLStartTagClosingTag.Matches( e )
2024-10-25 06:28:58 +00:00
2025-02-12 16:48:15 +00:00
if ( XMLLexer.XMLStartTagClosingElement.Matches( e ) )
// RJLog.Log( "Closing", node, ">>\n", node.parentNode );
node = node.parentNode;
2024-10-25 06:28:58 +00:00
insideTagElements.Add( e );
else if ( XMLLexer.XMLProcessingInstruction.Matches( e ) )
var instruction = new XMLProcessingInstructionNode( document, e.match );
Add( instruction );
else if ( XMLLexer.XMLInstruction.Matches( e ) )
var instruction = new XMLProcessingInstructionNode( document, e.match );
Add( instruction );
else if ( XMLLexer.XMLComment.Matches( e ) )
var comment = new XMLCommentNode( document, e.match );
Add( comment );
else if ( XMLLexer.XMLText.Matches( e ) ||
LexerMatcherLibrary.WhiteSpaceMatcher.Matches( e ) ||
LexerMatcherLibrary.BreakMatcher.Matches( e ) )
var unescapedMatch = XMLSerializer.Unescape( e.match );
var element = new XMLTextNode( document, unescapedMatch );
Add( element );
else if ( XMLLexer.XMLStartTag.Matches( e ) )
var regexMatch = XMLLexer.XMLStartTag.GetRegexMatches( e );
var ns = regexMatch.Group( 1 );
var elementName = regexMatch.Group( 2 );
var element = new XMLElementNode( document, elementName, ns );
Add( element );
var parentName = node == document ? "document" : ((XMLElementNode)node).fullNodeName;
2024-11-12 08:03:36 +00:00
// RJLog.Log( "Adding Element", element.fullNodeName, parentName );
2024-10-25 06:28:58 +00:00
node = element;
2025-02-12 16:48:15 +00:00
else if ( XMLLexer.XMLEndTag.Matches( e ) && node != document )
2024-10-25 06:28:58 +00:00
var el = (XMLElementNode) node;
node = el.parentNode;
void SetElementAttributes()
var element = (XMLElementNode) node;
var infos = Lists.Join( Lists.Map( insideTagElements, i => i.match ), "" );
2024-11-12 08:03:36 +00:00
// RJLog.Log( "Set Attributes", element.fullNodeName, insideTagElements.Count, infos );
2024-10-25 06:28:58 +00:00
var attributeNameIndices = Lists.CollectIndices( insideTagElements, le => XMLLexer.XMLAttributeName.Matches( le ) );
( attIndex )=>
var valueIndex = FindAttributeValue( attIndex + 1 );
var value = "";
if ( valueIndex != -1 )
var stringMatch = insideTagElements[ valueIndex ].match;
value = stringMatch.Substring( 1, stringMatch.Length - 2 );
value = XMLSerializer.Unescape( value );
var regexMatch = XMLLexer.XMLAttributeName.GetRegexMatches( insideTagElements[ attIndex ] );
var nameSpace = regexMatch.Group( 1 );
var attributeName = regexMatch.Group( 2 );
// RJLog.Log( "Attribute", insideTagElements[ attIndex ].match, regexMatch.regexMatch, nameSpace, attributeName );
element.SetAttribute( attributeName, value, nameSpace );
int FindAttributeValue( int index )
var hasAssignment = false;
for ( int i = index; i < insideTagElements.Count; i++ )
if ( XMLLexer.XMLAttributeName.Matches( insideTagElements[ i ] ) )
return -1;
if ( XMLLexer.XMLAttributeAssignment.Matches( insideTagElements[ i ] ) )
if ( hasAssignment )
return -1;
hasAssignment = true;
if ( XMLLexer.XMLAttributeAssignment.Matches( insideTagElements[ i ] ) )
hasAssignment = true;
if ( ! hasAssignment )
if (
XMLLexer.InsideStartTag_SingleQuotedStringMatcher.Matches( insideTagElements[ i ] ) ||
XMLLexer.InsideStartTag_DoubleQuotedStringMatcher.Matches( insideTagElements[ i ] )
return i;
return -1;
void Add( XMLNode childNode )
2025-02-12 16:48:15 +00:00
// RJLog.Log( "Adding to Doc", childNode.GetType().Name, childNode.GetInfo(), "is doc element:", document.documentElement == childNode );
2024-10-25 06:28:58 +00:00
if ( node == document )
document.AppendChild( childNode );
// RJLog.Log( "Adding to Doc", childNode.GetInfo(), "is doc element:", document.documentElement == childNode );
else if ( XMLNode.NodeType.Element == node.nodeType )
var elementNode = (XMLElementNode) node;
elementNode.AppendChild( childNode );
RJLog.Log( "Unexpected parent" );