240 lines
6.7 KiB
C#
240 lines
6.7 KiB
C#
using System.Collections;
|
|
using System.Collections.Generic;
|
|
using System.Text.RegularExpressions;
|
|
using System.Xml;
|
|
|
|
namespace Rokojori
|
|
{
|
|
public class XMLReader
|
|
{
|
|
XMLLexer _lexer = new XMLLexer();
|
|
public XMLLexer lexer => _lexer;
|
|
|
|
|
|
|
|
TextLinesMapper linesMapper = new TextLinesMapper();
|
|
List<LexerEvent> events;
|
|
XMLDocument document;
|
|
XMLNode node;
|
|
|
|
string text;
|
|
|
|
public XMLDocument Read( string text )
|
|
{
|
|
this.text = text;
|
|
events = _lexer.LexToList( text );
|
|
|
|
if ( _lexer.hasError )
|
|
{
|
|
linesMapper.Map( text );
|
|
|
|
var error = events.Find( e => e.isError );
|
|
var line = linesMapper.GetLine( error.offset );
|
|
|
|
var errorLine = "";
|
|
var lineCharacterIndex = error.offset - line.contentOffset;
|
|
|
|
for ( int i = 0; i < lineCharacterIndex; i++ )
|
|
{
|
|
errorLine += " ";
|
|
}
|
|
|
|
errorLine += "~";
|
|
|
|
RJLog.Error( line.GetContent( text ) + "\n" + errorLine );
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
CreateDocument();
|
|
|
|
// RJLog.Log( document.Serialize() );
|
|
|
|
return document;
|
|
}
|
|
|
|
List<LexerEvent> insideTagElements = new List<LexerEvent>();
|
|
|
|
void CreateDocument()
|
|
{
|
|
document = new XMLDocument();
|
|
|
|
node = document;
|
|
|
|
_lexer.GrabMatches( events, text );
|
|
|
|
events.ForEach(
|
|
( e )=>
|
|
{
|
|
if ( e.isDone )
|
|
{
|
|
return;
|
|
}
|
|
|
|
var type = e.type;
|
|
|
|
//RJLog.Log( "Token ", "m:", e.mode, "t:", e.type, ">>", e.match );
|
|
|
|
if ( XMLLexer.Inside_Start_Tag == e.mode )
|
|
{
|
|
if ( XMLLexer.XMLStartTagClosing.Matches( e ) )
|
|
{
|
|
SetElementAttributes();
|
|
}
|
|
else
|
|
{
|
|
insideTagElements.Add( e );
|
|
}
|
|
}
|
|
else if ( XMLLexer.XMLProcessingInstruction.Matches( e ) )
|
|
{
|
|
var instruction = new XMLProcessingInstructionNode( document, e.match );
|
|
Add( instruction );
|
|
}
|
|
else if ( XMLLexer.XMLInstruction.Matches( e ) )
|
|
{
|
|
var instruction = new XMLProcessingInstructionNode( document, e.match );
|
|
Add( instruction );
|
|
}
|
|
else if ( XMLLexer.XMLComment.Matches( e ) )
|
|
{
|
|
var comment = new XMLCommentNode( document, e.match );
|
|
Add( comment );
|
|
}
|
|
else if ( XMLLexer.XMLText.Matches( e ) ||
|
|
LexerMatcherLibrary.WhiteSpaceMatcher.Matches( e ) ||
|
|
LexerMatcherLibrary.BreakMatcher.Matches( e ) )
|
|
{
|
|
var unescapedMatch = XMLSerializer.Unescape( e.match );
|
|
var element = new XMLTextNode( document, unescapedMatch );
|
|
Add( element );
|
|
}
|
|
else if ( XMLLexer.XMLStartTag.Matches( e ) )
|
|
{
|
|
insideTagElements.Clear();
|
|
|
|
var regexMatch = XMLLexer.XMLStartTag.GetRegexMatches( e );
|
|
|
|
var ns = regexMatch.Group( 1 );
|
|
var elementName = regexMatch.Group( 2 );
|
|
|
|
var element = new XMLElementNode( document, elementName, ns );
|
|
Add( element );
|
|
|
|
var parentName = node == document ? "document" : ((XMLElementNode)node).fullNodeName;
|
|
// RJLog.Log( "Adding Element", element.fullNodeName, parentName );
|
|
node = element;
|
|
|
|
}
|
|
else if ( XMLLexer.XMLEndTag.Matches( e ) )
|
|
{
|
|
var el = (XMLElementNode) node;
|
|
node = el.parentNode;
|
|
}
|
|
|
|
}
|
|
|
|
);
|
|
}
|
|
|
|
void SetElementAttributes()
|
|
{
|
|
var element = (XMLElementNode) node;
|
|
|
|
var infos = Lists.Join( Lists.Map( insideTagElements, i => i.match ), "" );
|
|
// RJLog.Log( "Set Attributes", element.fullNodeName, insideTagElements.Count, infos );
|
|
|
|
var attributeNameIndices = Lists.CollectIndices( insideTagElements, le => XMLLexer.XMLAttributeName.Matches( le ) );
|
|
|
|
attributeNameIndices.ForEach(
|
|
( attIndex )=>
|
|
{
|
|
var valueIndex = FindAttributeValue( attIndex + 1 );
|
|
var value = "";
|
|
|
|
if ( valueIndex != -1 )
|
|
{
|
|
var stringMatch = insideTagElements[ valueIndex ].match;
|
|
value = stringMatch.Substring( 1, stringMatch.Length - 2 );
|
|
value = XMLSerializer.Unescape( value );
|
|
}
|
|
|
|
var regexMatch = XMLLexer.XMLAttributeName.GetRegexMatches( insideTagElements[ attIndex ] );
|
|
|
|
var nameSpace = regexMatch.Group( 1 );
|
|
var attributeName = regexMatch.Group( 2 );
|
|
|
|
// RJLog.Log( "Attribute", insideTagElements[ attIndex ].match, regexMatch.regexMatch, nameSpace, attributeName );
|
|
|
|
element.SetAttribute( attributeName, value, nameSpace );
|
|
}
|
|
);
|
|
|
|
insideTagElements.Clear();
|
|
}
|
|
|
|
int FindAttributeValue( int index )
|
|
{
|
|
var hasAssignment = false;
|
|
|
|
for ( int i = index; i < insideTagElements.Count; i++ )
|
|
{
|
|
if ( XMLLexer.XMLAttributeName.Matches( insideTagElements[ i ] ) )
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
if ( XMLLexer.XMLAttributeAssignment.Matches( insideTagElements[ i ] ) )
|
|
{
|
|
if ( hasAssignment )
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
hasAssignment = true;
|
|
}
|
|
|
|
if ( XMLLexer.XMLAttributeAssignment.Matches( insideTagElements[ i ] ) )
|
|
{
|
|
hasAssignment = true;
|
|
}
|
|
|
|
if ( ! hasAssignment )
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (
|
|
XMLLexer.InsideStartTag_SingleQuotedStringMatcher.Matches( insideTagElements[ i ] ) ||
|
|
XMLLexer.InsideStartTag_DoubleQuotedStringMatcher.Matches( insideTagElements[ i ] )
|
|
)
|
|
{
|
|
return i;
|
|
}
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
void Add( XMLNode childNode )
|
|
{
|
|
if ( node == document )
|
|
{
|
|
document.AppendChild( childNode );
|
|
|
|
// RJLog.Log( "Adding to Doc", childNode.GetInfo(), "is doc element:", document.documentElement == childNode );
|
|
}
|
|
else if ( XMLNode.NodeType.Element == node.nodeType )
|
|
{
|
|
var elementNode = (XMLElementNode) node;
|
|
elementNode.AppendChild( childNode );
|
|
}
|
|
else
|
|
{
|
|
RJLog.Log( "Unexpected parent" );
|
|
}
|
|
}
|
|
|
|
}
|
|
} |