171 lines
4.3 KiB
C#
171 lines
4.3 KiB
C#
using System.Collections;
|
|
using System.Collections.Generic;
|
|
using System.Text.RegularExpressions;
|
|
|
|
namespace Rokojori
|
|
{
|
|
public class Lexer
|
|
{
|
|
public static readonly string Default_Mode = "";
|
|
|
|
Dictionary<string,List<LexerMatcher>> _modes = new Dictionary<string, List<LexerMatcher>>();
|
|
bool _hasError =false;
|
|
|
|
public bool hasError => _hasError;
|
|
|
|
|
|
void AddMatcher( LexerMatcher matcher )
|
|
{
|
|
var list = _modes.ContainsKey( matcher.mode ) ? _modes[ matcher.mode ] : null;
|
|
|
|
if ( list == null )
|
|
{
|
|
list = new List<LexerMatcher>();
|
|
_modes[ matcher.mode ] = list;
|
|
}
|
|
|
|
list.Add( matcher );
|
|
}
|
|
|
|
public void AddAllMatchers( params LexerMatcher[] matchers )
|
|
{
|
|
for ( int i = 0; i < matchers.Length; i++ )
|
|
{
|
|
AddMatcher( matchers[ i ] );
|
|
}
|
|
}
|
|
|
|
public void AddMatcher( string type, Regex regex, string mode = "", string nextMode = null )
|
|
{
|
|
AddMatcher( new LexerMatcher( type, regex, mode, nextMode ) );
|
|
}
|
|
|
|
public void AddMatcher( string type, string regex, string mode = "", string nextMode = null )
|
|
{
|
|
AddMatcher( type, new Regex( regex ), mode, nextMode );
|
|
}
|
|
|
|
public LexerMatcher GetMatcher( string source, string mode ="" )
|
|
{
|
|
var matchers = _modes[ mode ];
|
|
var offset = 0;
|
|
|
|
for ( var i = 0; i < matchers.Count; i++ )
|
|
{
|
|
var matcher = matchers[ i ];
|
|
var matchLength = matcher.MatchLength( source, offset );
|
|
|
|
if ( matchLength > 0 )
|
|
{
|
|
return matcher;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
public void GrabMatches( List<LexerEvent> events, string source )
|
|
{
|
|
events.ForEach( ev => { ev.GrabMatch( source ); } );
|
|
}
|
|
|
|
|
|
public void Lex( string source, System.Action<LexerEvent> callback, int offset = 0, string mode = "" )
|
|
{
|
|
var ERROR_FLAG = -1;
|
|
var DONE_FLAG = -2;
|
|
|
|
var lexerEvent = new LexerEvent( "", 0, -2 );
|
|
|
|
var numTries = 0;
|
|
var maxTries = 1000000;
|
|
|
|
while ( offset < source.Length && numTries < maxTries)
|
|
{
|
|
numTries ++;
|
|
|
|
if ( ! _modes.ContainsKey( mode ) )
|
|
{
|
|
var errorMessage = "@Lexer-Error. Mode not found: '" + mode + "'";
|
|
RJLog.Log( errorMessage, "@", offset );
|
|
lexerEvent.Set( errorMessage, offset, ERROR_FLAG );
|
|
_hasError = true;
|
|
callback( lexerEvent );
|
|
|
|
return;
|
|
}
|
|
|
|
var matchers = _modes[ mode ];
|
|
var foundSomething = false;
|
|
|
|
// RJLog.Log( "--- MATCHING ----> ", offset, "CHARACTER:", source[ offset ] );
|
|
|
|
for ( var i = 0; i < matchers.Count; i++ )
|
|
{
|
|
var matcher = matchers[ i ];
|
|
var matchLength = matcher.MatchLength( source, offset );
|
|
|
|
// RJLog.Log( matcher.type, ">>", matchLength, matcher.matcher );
|
|
|
|
if ( matchLength > 0 )
|
|
{
|
|
lexerEvent.Set( matcher.fullType, offset, matchLength );
|
|
// Logs.Log(matcher.type, ">>", "'"+source.Substring( offset, matchLength )+"'", "@", offset, matchLength );
|
|
callback( lexerEvent );
|
|
|
|
foundSomething = true;
|
|
|
|
i = matchers.Count;
|
|
|
|
if ( matcher.nextMode != null )
|
|
{
|
|
mode = matcher.nextMode;
|
|
}
|
|
|
|
offset += matchLength;
|
|
|
|
}
|
|
}
|
|
|
|
if ( ! foundSomething )
|
|
{
|
|
var modeInfo = mode == "" ? "default-mode" : " mode: '" + mode + "'";
|
|
|
|
var errorMessage = "@Lexer-Error. No match for " + modeInfo;
|
|
RJLog.Log( errorMessage, "@", offset );
|
|
lexerEvent.Set( errorMessage, offset, ERROR_FLAG );
|
|
_hasError = true;
|
|
callback( lexerEvent );
|
|
|
|
return;
|
|
}
|
|
|
|
}
|
|
|
|
if ( numTries >= maxTries )
|
|
{
|
|
lexerEvent.Set( mode, offset, ERROR_FLAG );
|
|
callback( lexerEvent );
|
|
}
|
|
|
|
lexerEvent.Set( mode, offset, DONE_FLAG );
|
|
callback( lexerEvent );
|
|
}
|
|
|
|
public List<LexerEvent> LexToList( string source, int offset = 0, string mode = "" )
|
|
{
|
|
var list = new List<LexerEvent>();
|
|
|
|
Lex(
|
|
source,
|
|
( LexerEvent token )=>
|
|
{
|
|
list.Add( token.Copy() );
|
|
},
|
|
offset, mode
|
|
);
|
|
|
|
return list;
|
|
}
|
|
}
|
|
} |