rj-action-library/Runtime/XML/XMLReader.cs

  using System.Collections;
  using System.Collections.Generic;
  using System.Text.RegularExpressions;
  using System.Xml;

  namespace Rokojori
  {
    public class XMLReader
    {
      XMLLexer _lexer = new XMLLexer();
      public XMLLexer lexer => _lexer;


      TextLinesMapper linesMapper = new TextLinesMapper();
      List<LexerEvent> events;
      XMLDocument document;
      XMLNode node;

      string text;

      public XMLDocument Read( string text )
      {
        this.text = text;
        events = _lexer.LexToList( text );

        if ( _lexer.hasError )
        {
          linesMapper.Map( text );

          var error = events.Find( e => e.isError );
          var line = linesMapper.GetLine( error.offset );

          var errorLine = "";
          var lineCharacterIndex = error.offset - line.contentOffset;

          for ( int i = 0; i < lineCharacterIndex; i++ )
          {
            errorLine += " ";
          }

          errorLine += "~";

          RJLog.Error( line.GetContent( text ) + "\n" + errorLine );

          return null;

        }

        CreateDocument();

        // RJLog.Log( document.Serialize() );

        return document;
      }

      List<LexerEvent> insideTagElements = new List<LexerEvent>();

      void CreateDocument()
      {
        document = new XMLDocument();

        node = document;

        _lexer.GrabMatches( events, text );

        events.ForEach(
          ( e )=>
          {
            if ( e.isDone )
            {
              return;
            }

            var type = e.type;

            //RJLog.Log( "Token ", "m:", e.mode, "t:", e.type, ">>", e.match );

            if ( XMLLexer.Inside_Start_Tag == e.mode )
            {
              if ( XMLLexer.XMLStartTagClosing.Matches( e ) )
              {
                SetElementAttributes();
              }
              else
              {
                insideTagElements.Add( e );
              }
            }
            else if ( XMLLexer.XMLProcessingInstruction.Matches( e ) )
            {
              var instruction = new XMLProcessingInstructionNode( document, e.match );
              Add( instruction );
            }
            else if ( XMLLexer.XMLInstruction.Matches( e ) )
            {
              var instruction = new XMLProcessingInstructionNode( document, e.match );
              Add( instruction );
            }
            else if ( XMLLexer.XMLComment.Matches( e ) )
            {
              var comment = new XMLCommentNode( document, e.match );
              Add( comment );
            }
            else if ( XMLLexer.XMLText.Matches( e ) ||
                      LexerMatcherLibrary.WhiteSpaceMatcher.Matches( e ) ||
                      LexerMatcherLibrary.BreakMatcher.Matches( e ) )
            {
              var unescapedMatch =  XMLSerializer.Unescape( e.match );
              var element = new XMLTextNode( document, unescapedMatch );
              Add( element );
            }
            else if ( XMLLexer.XMLStartTag.Matches( e ) )
            {
              insideTagElements.Clear();

              var regexMatch  = XMLLexer.XMLStartTag.GetRegexMatches( e );

              var ns          = regexMatch.Group( 1 );
              var elementName = regexMatch.Group( 2 );

              var element = new XMLElementNode( document, elementName, ns );
              Add( element );

              var parentName = node == document ? "document" : ((XMLElementNode)node).fullNodeName;
              // RJLog.Log( "Adding Element", element.fullNodeName, parentName );
              node = element;

            }
            else if ( XMLLexer.XMLEndTag.Matches( e ) )
            {
              var el = (XMLElementNode) node;
              node = el.parentNode;
            }

          }

        );
      }

      void SetElementAttributes()
      {
        var element = (XMLElementNode) node;

        var infos = Lists.Join( Lists.Map( insideTagElements, i => i.match ), "" );
        // RJLog.Log( "Set Attributes", element.fullNodeName, insideTagElements.Count, infos );

        var attributeNameIndices = Lists.CollectIndices( insideTagElements, le => XMLLexer.XMLAttributeName.Matches( le ) );

        attributeNameIndices.ForEach(
          ( attIndex )=>
          {
            var valueIndex = FindAttributeValue( attIndex + 1 );
            var value = "";

            if ( valueIndex != -1 )
            {
              var stringMatch = insideTagElements[ valueIndex ].match;
              value = stringMatch.Substring( 1, stringMatch.Length - 2 );
              value = XMLSerializer.Unescape( value );
            }

            var regexMatch  = XMLLexer.XMLAttributeName.GetRegexMatches( insideTagElements[ attIndex ] );

            var nameSpace     = regexMatch.Group( 1 );
            var attributeName = regexMatch.Group( 2 );

            // RJLog.Log( "Attribute", insideTagElements[ attIndex ].match, regexMatch.regexMatch, nameSpace, attributeName );

            element.SetAttribute( attributeName, value, nameSpace );
          }
        );

        insideTagElements.Clear();
      }

      int FindAttributeValue( int index )
      {
        var hasAssignment = false;

        for ( int i = index; i < insideTagElements.Count; i++ )
        {
          if ( XMLLexer.XMLAttributeName.Matches( insideTagElements[ i ] ) )
          {
            return -1;
          }

          if ( XMLLexer.XMLAttributeAssignment.Matches( insideTagElements[ i ] ) )
          {
            if ( hasAssignment )
            {
              return -1;
            }

            hasAssignment = true;
          }

          if ( XMLLexer.XMLAttributeAssignment.Matches( insideTagElements[ i ] ) )
          {
            hasAssignment = true;
          }

          if ( ! hasAssignment )
          {
            continue;
          }

          if (
            XMLLexer.InsideStartTag_SingleQuotedStringMatcher.Matches( insideTagElements[ i ] ) ||
            XMLLexer.InsideStartTag_DoubleQuotedStringMatcher.Matches( insideTagElements[ i ] )
          )
          {
            return i;
          }
        }

        return -1;
      }

      void Add( XMLNode childNode )
      {
        if ( node == document )
        {
          document.AppendChild( childNode );

          // RJLog.Log( "Adding to Doc", childNode.GetInfo(), "is doc element:", document.documentElement == childNode );
        }
        else if ( XMLNode.NodeType.Element == node.nodeType )
        {
          var elementNode = (XMLElementNode) node;
          elementNode.AppendChild( childNode );
        }
        else
        {
          RJLog.Log( "Unexpected parent" );
        }
      }

    }
  }