rj-action-library/Runtime/Text/Lexing/Lexer.cs

170 lines
4.3 KiB
C#

using System.Collections;
using System.Collections.Generic;
using System.Text.RegularExpressions;
namespace Rokojori
{
public class Lexer
{
public static readonly string Default_Mode = "";
Dictionary<string,List<LexerMatcher>> _modes = new Dictionary<string, List<LexerMatcher>>();
bool _hasError =false;
public bool hasError => _hasError;
void AddMatcher( LexerMatcher matcher )
{
var list = _modes.ContainsKey( matcher.mode ) ? _modes[ matcher.mode ] : null;
if ( list == null )
{
list = new List<LexerMatcher>();
_modes[ matcher.mode ] = list;
}
list.Add( matcher );
}
public void AddAllMatchers( params LexerMatcher[] matchers )
{
for ( int i = 0; i < matchers.Length; i++ )
{
AddMatcher( matchers[ i ] );
}
}
public void AddMatcher( string type, Regex regex, string mode = "", string nextMode = null )
{
AddMatcher( new LexerMatcher( type, regex, mode, nextMode ) );
}
public void AddMatcher( string type, string regex, string mode = "", string nextMode = null )
{
AddMatcher( type, new Regex( regex ), mode, nextMode );
}
public LexerMatcher GetMatcher( string source, string mode ="" )
{
var matchers = _modes[ mode ];
var offset = 0;
for ( var i = 0; i < matchers.Count; i++ )
{
var matcher = matchers[ i ];
var matchLength = matcher.MatchLength( source, offset );
if ( matchLength > 0 )
{
return matcher;
}
}
return null;
}
public void GrabMatches( List<LexerEvent> events, string source )
{
events.ForEach( ev => { ev.GrabMatch( source ); } );
}
public void Lex( string source, System.Action<LexerEvent> callback, int offset = 0, string mode = "" )
{
var ERROR_FLAG = -1;
var DONE_FLAG = -2;
var lexerEvent = new LexerEvent( "", 0, -2 );
var numTries = 0;
var maxTries = 1000;
while ( offset < source.Length && numTries < maxTries)
{
numTries ++;
if ( ! _modes.ContainsKey( mode ) )
{
var errorMessage = "@Lexer-Error. Mode not found: '" + mode + "'";
RJLog.Log( errorMessage, "@", offset );
lexerEvent.Set( errorMessage, offset, ERROR_FLAG );
_hasError = true;
callback( lexerEvent );
return;
}
var matchers = _modes[ mode ];
var foundSomething = false;
// RJLog.Log( "--- MATCHING ----> ", offset, "CHARACTER:", source[ offset ] );
for ( var i = 0; i < matchers.Count; i++ )
{
var matcher = matchers[ i ];
var matchLength = matcher.MatchLength( source, offset );
// RJLog.Log( matcher.type, ">>", matchLength, matcher.matcher );
if ( matchLength > 0 )
{
lexerEvent.Set( matcher.fullType, offset, matchLength );
// Logs.Log(matcher.type, ">>", "'"+source.Substring( offset, matchLength )+"'", "@", offset, matchLength );
callback( lexerEvent );
foundSomething = true;
i = matchers.Count;
if ( matcher.nextMode != null )
{
mode = matcher.nextMode;
}
offset += matchLength;
}
}
if ( ! foundSomething )
{
var modeInfo = mode == "" ? "default-mode" : " mode: '" + mode + "'";
var errorMessage = "@Lexer-Error. No match for " + modeInfo;
RJLog.Log( errorMessage, "@", offset );
lexerEvent.Set( errorMessage, offset, ERROR_FLAG );
_hasError = true;
callback( lexerEvent );
return;
}
}
if ( numTries >= maxTries )
{
lexerEvent.Set( mode, offset, ERROR_FLAG );
callback( lexerEvent );
}
lexerEvent.Set( mode, offset, DONE_FLAG );
callback( lexerEvent );
}
public List<LexerEvent> LexToList( string source, int offset = 0, string mode = "" )
{
var list = new List<LexerEvent>();
Lex(
source,
( LexerEvent token )=>
{
list.Add( token.Copy() );
},
offset, mode
);
return list;
}
}
}