rokojori_action_library/Tools/docs/ClassDocEditing/ClassDocParser.cs


using Godot;

using Rokojori;
using System.Collections.Generic;
using System;
using System.Reflection;
using System.Text.RegularExpressions;
using System.Linq;

namespace Rokojori.DocGenerator
{
  public class ClassDocParser
  {
    public static readonly string[] ModifierTypes =
    [
      // Access modifiers
      "public",
      "protected",
      "internal",
      "protected internal",
      "private",
      "private protected",

      // Type behavior modifiers
      "abstract",
      "sealed",
      "static",
      "partial",

      // Type-specific modifiers
      "readonly",
      "ref",
      "unsafe",
      "new",

      "override",
      "virtual",

      // Modern / contextual
      "record",
      "record struct"
    ];

    public LexerList orginalTokens;

    public LexerList innerTokens;

    public TextLinesMapper linesMapper;

    public List<ClassDocMember> GetMembers()
    {
      return members;
    }

    public CSharpObjectDefinition definition;

    public int definitionStart = -1;

    public void Parse( CSharpObjectDefinition definition )
    {
      this.definition = definition;

      // RJLog.Log( "\n\n PARSING START \n\n" );
      ParseObjectDefinition();
      ParseInnerObject();
      ParseMembers();
      ParseComments();
      // RJLog.Log( "\n\n PARSING DONE \n\n" );

    }

    protected void ParseObjectDefinition()
    {
      var typeIndex = orginalTokens.Index( definition.type ) ;


      var tokenIndex = orginalTokens.MoveIndexBackwards( typeIndex, ModifierTypes );

      var prevIndex = orginalTokens.GetIndexReverse( tokenIndex );

      if ( prevIndex == -1 )
      {
        definitionStart = tokenIndex;
        return;
      }

      var prev = prevIndex == -1 ? null : orginalTokens[ prevIndex ];

      while ( prev != null && prev.MatchIs( "]" ) )
      {
        var opening = LexerEvent.ReverseFindOpeningBracket( orginalTokens.events, prevIndex );
        tokenIndex = opening.index;
        prevIndex = orginalTokens.GetIndexReverse( tokenIndex );
        prev = prevIndex == -1 ? null : orginalTokens[ prevIndex ];
      }

      definitionStart = tokenIndex;

    }

    protected void ParseInnerObject()
    {
      var index = orginalTokens.Index( definition.name );
      var block = orginalTokens.NextBlock( index );

      innerTokens = orginalTokens.RangeInside( block );

      innerTokens.ReplaceBlocks(
        ( r )=>
        {
          var opener = orginalTokens[ r.min ];
          var closer = orginalTokens[ r.min ];
          var block = new ClassBlock( opener.offset, closer.end, r.min, r.max );
          block.SetMatch( "{ /* - inner block - */ }" );
          return block;
        }
      );

      var innerObjects = CSharpLexer.GetAllObjectDefinitions( innerTokens.events );

      var objectRanges = innerObjects.Map(
        ( io )=>
        {
          var startIndex = innerTokens.Index( io.type );
          var endIndex = innerTokens.Index( io.name, startIndex );
          var end = innerTokens.FindIndexOfType( endIndex, ClassBlock.LexerType );
          var modifierStart = startIndex;

          var searching = true;

          while ( searching )
          {
            var result = LexerEvent.ReverseFind( innerTokens.events, modifierStart - 1,
              ( le ) =>
              {
                if ( le.MatchIsAny( ModifierTypes ) )
                {
                  return LexerEvent.FindResultType.Found;
                }

                if ( le.IsAnyOf( LexerMatcherLibrary.Ignore ) )
                {
                  return LexerEvent.FindResultType.KeepSearching;
                }

                return LexerEvent.FindResultType.NotFound;
              }
            );

            if ( result.type == LexerEvent.FindResultType.Found )
            {
              modifierStart = result.index;

            }
            else
            {
              searching = false;
            }
          }

          return new RangeI( modifierStart, end );

        }
      );

      innerTokens.ReplaceRanges( objectRanges,
        ( r ) =>
        {
          var start = innerTokens.events[ r.min ];
          var end = innerTokens.events[ r.max ];
          return LexerEvent.WithMatch( LexerMatcherLibrary.MultiLineCommentMatcher.type, start.offset, "/* - inner object - */" );
        }
      );

    }

    enum ParsePhase
    {
      Attributes,
      Modifiers,
      Type,
      Name,
      MemberEnd
    }

    List<ClassDocMember> members = new List<ClassDocMember>();

    void ParseMembers()
    {
      var phase = ParsePhase.Attributes;
      var tokenIndex = 0;

      var memberStart = -1;
      RangeI attributes = null;
      RangeI modifiers = null;
      RangeI type = null;
      var name = -1;


      var checkIndex = -1;

      while ( tokenIndex < innerTokens.size && checkIndex <= tokenIndex )
      {
        checkIndex = Mathf.Max( checkIndex, tokenIndex );

        var token = innerTokens[ tokenIndex ];

        // RJLog.Log( phase, "index", tokenIndex, "token", token.type, token.match, "line", linesMapper.GetAnchor(  token.offset, true ).info );

        if ( token.IsAnyOf( LexerMatcherLibrary.Ignore ) )
        {
          tokenIndex ++;
          continue;
        }

        if ( memberStart == -1 )
        {
          memberStart = tokenIndex;
        }

        if ( ParsePhase.Attributes == phase )
        {
          attributes = null;
          var attributesEndIndex = GetAttributesEndIndex( tokenIndex );

          phase = ParsePhase.Modifiers;

          if ( attributesEndIndex != -1 )
          {
            attributes = new RangeI( tokenIndex, attributesEndIndex );
            tokenIndex = attributesEndIndex + 1;
          }

          continue;
        }

        if ( ParsePhase.Modifiers == phase )
        {
          modifiers = null;
          var modifiersEndIndex = GetModifiersEndIndex( tokenIndex );

          phase = ParsePhase.Type;

          if ( modifiersEndIndex != -1 )
          {
            tokenIndex = modifiersEndIndex + 1;
          }

          continue;
        }

        if ( ParsePhase.Type == phase )
        {
          if ( token.Is( LexerMatcherLibrary.CFunctionMatcher ) )
          {
            type = new RangeI( tokenIndex, tokenIndex );
            name = tokenIndex;

            phase = ParsePhase.MemberEnd;
            tokenIndex++;
          }
          else
          {
            var typeEndIndex = GetTypeEndIndex( tokenIndex );

            type = new RangeI( tokenIndex, typeEndIndex );
            tokenIndex = typeEndIndex + 1;

            phase = ParsePhase.Name;
          }


          continue;
        }

        if ( ParsePhase.Name == phase )
        {
          if ( token.MatchIs( "(") )
          {
            name = type.max;
          }
          else
          {
            name = tokenIndex;
            tokenIndex++;
          }


          phase = ParsePhase.MemberEnd;

          continue;
        }

        if ( ParsePhase.MemberEnd == phase )
        {
          var memberEndIndex = GetMemberEndIndex( tokenIndex );

          if ( memberEndIndex == -1 )
          {
            if ( tokenIndex != -1 && memberStart != -1 )
            {
              var memberEnd = tokenIndex < memberStart ? innerTokens.size - 1 : tokenIndex;
              RJLog.Error( tokenIndex, memberStart );

              RJLog.Error(
                "Could not parse", innerTokens.Range( new RangeI( memberStart, memberEnd ) ).match
                // "Type:", "'" + innerTokens.Range( type ).match + "'"
                );
            }
            return;
          }

          phase = ParsePhase.Attributes;

          var cdMember = new ClassDocMember();
          cdMember.start = memberStart;
          cdMember.attributes = attributes;
          cdMember.modifiers = modifiers;
          cdMember.type = type;
          cdMember.name = name;
          cdMember.end = memberEndIndex;

          members.Add( cdMember );

          // RJLog.Log(
          //   "-------------\n",
          //   innerTokens.Range( cdMember.type ).match, innerTokens[ cdMember.name ].match, "\n" +
          //   "-------------"
          //   );

          tokenIndex = memberEndIndex + 1;

          memberStart = -1;

          continue;
        }

      }

      if ( checkIndex > tokenIndex )
      {
        RJLog.Log( "CheckIndex > TokenIndex", checkIndex, tokenIndex );
      }
    }

    int GetAttributesEndIndex( int tokenIndex )
    {
      if ( tokenIndex >= innerTokens.size )
      {
        return -1;
      }

      var token = innerTokens[ tokenIndex ];
      var end = -1;

      while ( token != null && token.MatchIs( "[" ) )
      {
        var endResult = innerTokens.FindClosingBracket( tokenIndex );

        if ( LexerEvent.FindResultType.Found != endResult.type )
        {
          return end;
        }

        end = endResult.index;

        tokenIndex = endResult.index + 1;
        token = tokenIndex < innerTokens.size ? innerTokens[ tokenIndex ] : null;

        while ( token != null && token.IsAnyOf( LexerMatcherLibrary.Ignore ) )
        {
          tokenIndex ++;
          token = tokenIndex < innerTokens.size ? innerTokens[ tokenIndex ] : null;
        }
      }

      return end;
    }

    int GetModifiersEndIndex( int tokenIndex )
    {
      if ( tokenIndex >= innerTokens.size )
      {
        return -1;
      }

      var token = innerTokens[ tokenIndex ];
      var end = -1;

      while ( token != null && token.MatchIsAny( ModifierTypes ) )
      {
        end = tokenIndex;

        tokenIndex ++;
        token = tokenIndex < innerTokens.size ? innerTokens[ tokenIndex ] : null;

        while ( token != null && token.IsAnyOf( LexerMatcherLibrary.Ignore ) )
        {
          tokenIndex ++;
          token = tokenIndex < innerTokens.size ? innerTokens[ tokenIndex ] : null;
        }
      }

      return end;
    }

    int GetTypeEndIndex( int tokenIndex )
    {
      if ( tokenIndex >= innerTokens.size )
      {
        return -1;
      }

      tokenIndex = GetNameEndIndex( tokenIndex );
      tokenIndex = GetGenericsEndIndex( tokenIndex );
      tokenIndex = GetPointerArraysEndIndex( tokenIndex );
      tokenIndex = GetNullableEndIndex( tokenIndex );

      return tokenIndex;
    }

    int GetNameEndIndex( int tokenIndex )
    {
      if ( tokenIndex >= innerTokens.size )
      {
        return -1;
      }

      var token = innerTokens[ tokenIndex ];

      var end = -1;

      while ( token != null && token.Is( LexerMatcherLibrary.CwordMatcher ) )
      {
        end = tokenIndex;

        tokenIndex ++;
        token = tokenIndex < innerTokens.size ? innerTokens[ tokenIndex ] : null;

        while ( token != null && token.IsAnyOf( LexerMatcherLibrary.Ignore ) )
        {
          tokenIndex ++;
          token = tokenIndex < innerTokens.size ? innerTokens[ tokenIndex ] : null;
        }

        if ( ! token.Is( LexerMatcherLibrary.OperatorMatcher, "." ) )
        {
          return end;
        }

        tokenIndex ++;
        token = tokenIndex < innerTokens.size ? innerTokens[ tokenIndex ] : null;

        while ( token != null && token.IsAnyOf( LexerMatcherLibrary.Ignore ) )
        {
          tokenIndex ++;
          token = tokenIndex < innerTokens.size ? innerTokens[ tokenIndex ] : null;
        }
      }

      return end;
    }

    int GetGenericsEndIndex( int tokenIndex )
    {
      if ( tokenIndex >= innerTokens.size )
      {
        return -1;
      }

      var orignalIndex = tokenIndex;

      tokenIndex ++;
      var token = innerTokens[ tokenIndex ];

      while ( token != null && token.IsAnyOf( LexerMatcherLibrary.Ignore ) )
      {
        tokenIndex ++;
        token = tokenIndex < innerTokens.size ? innerTokens[ tokenIndex ] : null;
      }

      if ( token.MatchIs( "<" ) )
      {
        var end = innerTokens.FindClosingBracket( tokenIndex ).index;
        // RJLog.Log( "Has Generics", innerTokens.Range( new RangeI( orignalIndex, end ) ).match );
        return end;
      }
      else
      {
        // RJLog.Log( "Has No Generics", token.match );
      }

      return orignalIndex;
    }

    int GetPointerArraysEndIndex( int tokenIndex )
    {
      if ( tokenIndex < 0 || tokenIndex >= innerTokens.size )
      {
        return -1;
      }

      var token = innerTokens[ tokenIndex ];
      var lastTokenIndex = tokenIndex;

      tokenIndex ++;

      while ( token != null && token.IsAnyOf( LexerMatcherLibrary.Ignore ) || token.MatchIs( "*" ) || token.MatchIs( "[" ) )
      {
        if ( token.MatchIs( "[" ) )
        {
          tokenIndex = innerTokens.FindClosingBracket( tokenIndex ).index;
        }

        lastTokenIndex = tokenIndex;

        tokenIndex ++;
        token = tokenIndex < innerTokens.size ? innerTokens[ tokenIndex ] : null;
      }

      return lastTokenIndex;
    }

    int GetNullableEndIndex( int tokenIndex )
    {
      if ( tokenIndex < 0 || tokenIndex >= innerTokens.size )
      {
        return -1;
      }

      var orignalIndex = tokenIndex;
      var token = innerTokens[ tokenIndex ];

      while ( token != null && token.IsAnyOf( LexerMatcherLibrary.Ignore ) )
      {

        tokenIndex ++;
        token = tokenIndex < innerTokens.size ? innerTokens[ tokenIndex ] : null;
      }

      if ( token != null && token.MatchIs( "?" ) )
      {
        return tokenIndex;
      }

      return orignalIndex;
    }


    int GetMemberEndIndex( int tokenIndex )
    {
      if ( tokenIndex < 0 || tokenIndex >= innerTokens.size )
      {
        return -1;
      }

      var token = innerTokens[ tokenIndex ];

      while ( token != null && token.IsAnyOf( LexerMatcherLibrary.Ignore ) )
      {
        tokenIndex ++;
        token = tokenIndex < innerTokens.size ? innerTokens[ tokenIndex ] : null;
      }

      if ( token.MatchIs( ";" ) )
      {
        // RJLog.Log( "Member has no assignment" );
        return tokenIndex;
      }

      if ( token.MatchIs( "=" ) || token.MatchIs( "=>" ) )
      {
        // RJLog.Log( "Member has assignment" );
        return innerTokens.FindIndexOfMatch( tokenIndex, ";" );
      }

      if ( token.MatchIs( "(" ) )
      {
        // RJLog.Log( "Member is function" );

        tokenIndex = innerTokens.FindClosingBracket( tokenIndex ).index + 1;
        token = tokenIndex < innerTokens.size ? innerTokens[ tokenIndex ] : null;
      }

      while ( token != null && token.IsAnyOf( LexerMatcherLibrary.Ignore ) )
      {
        tokenIndex ++;
        token = tokenIndex < innerTokens.size ? innerTokens[ tokenIndex ] : null;
      }

      if ( token.Is( ClassBlock.LexerType ) )
      {
        // RJLog.Log( "Member is function with block" );
        return tokenIndex;
      }

      if ( token.MatchIs( "=>" ) )
      {
        // RJLog.Log( "Member is function with assigner reference" );
        return innerTokens.FindIndexOfMatch( tokenIndex, ";" );
      }

      RJLog.Log( "Member not found, token", token.match );

      return -1;

    }


    void ParseComments()
    {
      var endIndex = 0;

      for ( int i = 0; i < members.Count; i++ )
      {
        members[ i ].comment = GetComment( members[ i ].start, endIndex );
        endIndex = members[ i ].end;
      }
    }

    int GetComment( int start, int end )
    {
      var it = start - 1;

      while ( it > end )
      {
        var token = innerTokens[ it ];

        if ( token.Is( LexerMatcherLibrary.MultiLineCommentMatcher ) )
        {
          var comment = token.match;

          var startRegex = @"^\/\*\*(\s|\n|\r)*<summary ";
          var endRegex = @"<\/summary>(\s|\n|\r)*\*\/$";

          if ( RegexUtility.Matches( comment, startRegex ) && RegexUtility.Matches( comment, endRegex ) )
          {
            return it;
          }

          return -1;
        }

        it --;
      }

      return -1;
    }

  }
}