rj-action-library/Runtime/XML/XMLReader.cs

240 lines
6.7 KiB
C#

using System.Collections;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Xml;
namespace Rokojori
{
public class XMLReader
{
XMLLexer _lexer = new XMLLexer();
public XMLLexer lexer => _lexer;
TextLinesMapper linesMapper = new TextLinesMapper();
List<LexerEvent> events;
XMLDocument document;
XMLNode node;
string text;
public XMLDocument Read( string text )
{
this.text = text;
events = _lexer.LexToList( text );
if ( _lexer.hasError )
{
linesMapper.Map( text );
var error = events.Find( e => e.isError );
var line = linesMapper.GetLine( error.offset );
var errorLine = "";
var lineCharacterIndex = error.offset - line.contentOffset;
for ( int i = 0; i < lineCharacterIndex; i++ )
{
errorLine += " ";
}
errorLine += "~";
RJLog.Error( line.GetContent( text ) + "\n" + errorLine );
return null;
}
CreateDocument();
// RJLog.Log( document.Serialize() );
return document;
}
List<LexerEvent> insideTagElements = new List<LexerEvent>();
void CreateDocument()
{
document = new XMLDocument();
node = document;
_lexer.GrabMatches( events, text );
events.ForEach(
( e )=>
{
if ( e.isDone )
{
return;
}
var type = e.type;
//RJLog.Log( "Token ", "m:", e.mode, "t:", e.type, ">>", e.match );
if ( XMLLexer.Inside_Start_Tag == e.mode )
{
if ( XMLLexer.XMLStartTagClosing.Matches( e ) )
{
SetElementAttributes();
}
else
{
insideTagElements.Add( e );
}
}
else if ( XMLLexer.XMLProcessingInstruction.Matches( e ) )
{
var instruction = new XMLProcessingInstructionNode( document, e.match );
Add( instruction );
}
else if ( XMLLexer.XMLInstruction.Matches( e ) )
{
var instruction = new XMLProcessingInstructionNode( document, e.match );
Add( instruction );
}
else if ( XMLLexer.XMLComment.Matches( e ) )
{
var comment = new XMLCommentNode( document, e.match );
Add( comment );
}
else if ( XMLLexer.XMLText.Matches( e ) ||
LexerMatcherLibrary.WhiteSpaceMatcher.Matches( e ) ||
LexerMatcherLibrary.BreakMatcher.Matches( e ) )
{
var unescapedMatch = XMLSerializer.Unescape( e.match );
var element = new XMLTextNode( document, unescapedMatch );
Add( element );
}
else if ( XMLLexer.XMLStartTag.Matches( e ) )
{
insideTagElements.Clear();
var regexMatch = XMLLexer.XMLStartTag.GetRegexMatches( e );
var ns = regexMatch.Group( 1 );
var elementName = regexMatch.Group( 2 );
var element = new XMLElementNode( document, elementName, ns );
Add( element );
var parentName = node == document ? "document" : ((XMLElementNode)node).fullNodeName;
// RJLog.Log( "Adding Element", element.fullNodeName, parentName );
node = element;
}
else if ( XMLLexer.XMLEndTag.Matches( e ) )
{
var el = (XMLElementNode) node;
node = el.parentNode;
}
}
);
}
void SetElementAttributes()
{
var element = (XMLElementNode) node;
var infos = Lists.Join( Lists.Map( insideTagElements, i => i.match ), "" );
// RJLog.Log( "Set Attributes", element.fullNodeName, insideTagElements.Count, infos );
var attributeNameIndices = Lists.CollectIndices( insideTagElements, le => XMLLexer.XMLAttributeName.Matches( le ) );
attributeNameIndices.ForEach(
( attIndex )=>
{
var valueIndex = FindAttributeValue( attIndex + 1 );
var value = "";
if ( valueIndex != -1 )
{
var stringMatch = insideTagElements[ valueIndex ].match;
value = stringMatch.Substring( 1, stringMatch.Length - 2 );
value = XMLSerializer.Unescape( value );
}
var regexMatch = XMLLexer.XMLAttributeName.GetRegexMatches( insideTagElements[ attIndex ] );
var nameSpace = regexMatch.Group( 1 );
var attributeName = regexMatch.Group( 2 );
// RJLog.Log( "Attribute", insideTagElements[ attIndex ].match, regexMatch.regexMatch, nameSpace, attributeName );
element.SetAttribute( attributeName, value, nameSpace );
}
);
insideTagElements.Clear();
}
int FindAttributeValue( int index )
{
var hasAssignment = false;
for ( int i = index; i < insideTagElements.Count; i++ )
{
if ( XMLLexer.XMLAttributeName.Matches( insideTagElements[ i ] ) )
{
return -1;
}
if ( XMLLexer.XMLAttributeAssignment.Matches( insideTagElements[ i ] ) )
{
if ( hasAssignment )
{
return -1;
}
hasAssignment = true;
}
if ( XMLLexer.XMLAttributeAssignment.Matches( insideTagElements[ i ] ) )
{
hasAssignment = true;
}
if ( ! hasAssignment )
{
continue;
}
if (
XMLLexer.InsideStartTag_SingleQuotedStringMatcher.Matches( insideTagElements[ i ] ) ||
XMLLexer.InsideStartTag_DoubleQuotedStringMatcher.Matches( insideTagElements[ i ] )
)
{
return i;
}
}
return -1;
}
void Add( XMLNode childNode )
{
if ( node == document )
{
document.AppendChild( childNode );
// RJLog.Log( "Adding to Doc", childNode.GetInfo(), "is doc element:", document.documentElement == childNode );
}
else if ( XMLNode.NodeType.Element == node.nodeType )
{
var elementNode = (XMLElementNode) node;
elementNode.AppendChild( childNode );
}
else
{
RJLog.Log( "Unexpected parent" );
}
}
}
}