using System.Collections; using System.Collections.Generic; using System.Text.RegularExpressions; namespace Rokojori { public class Lexer { public static readonly string Default_Mode = ""; Dictionary> _modes = new Dictionary>(); bool _hasError =false; public bool hasError => _hasError; void AddMatcher( LexerMatcher matcher ) { var list = _modes.ContainsKey( matcher.mode ) ? _modes[ matcher.mode ] : null; if ( list == null ) { list = new List(); _modes[ matcher.mode ] = list; } list.Add( matcher ); } public void AddAllMatchers( params LexerMatcher[] matchers ) { for ( int i = 0; i < matchers.Length; i++ ) { AddMatcher( matchers[ i ] ); } } public void AddMatcher( string type, Regex regex, string mode = "", string nextMode = null ) { AddMatcher( new LexerMatcher( type, regex, mode, nextMode ) ); } public void AddMatcher( string type, string regex, string mode = "", string nextMode = null ) { AddMatcher( type, new Regex( regex ), mode, nextMode ); } public LexerMatcher GetMatcher( string source, string mode ="" ) { var matchers = _modes[ mode ]; var offset = 0; for ( var i = 0; i < matchers.Count; i++ ) { var matcher = matchers[ i ]; var matchLength = matcher.MatchLength( source, offset ); if ( matchLength > 0 ) { return matcher; } } return null; } public void GrabMatches( List events, string source ) { events.ForEach( ev => { ev.GrabMatch( source ); } ); } public void Lex( string source, System.Action callback, int offset = 0, string mode = "" ) { var ERROR_FLAG = -1; var DONE_FLAG = -2; var lexerEvent = new LexerEvent( "", 0, -2 ); var numTries = 0; var maxTries = 1000000; while ( offset < source.Length && numTries < maxTries) { numTries ++; if ( ! _modes.ContainsKey( mode ) ) { var errorMessage = "@Lexer-Error. Mode not found: '" + mode + "'"; RJLog.Log( errorMessage, "@", offset ); lexerEvent.Set( errorMessage, offset, ERROR_FLAG ); _hasError = true; callback( lexerEvent ); return; } var matchers = _modes[ mode ]; var foundSomething = false; // RJLog.Log( "--- MATCHING ----> ", offset, "CHARACTER:", source[ offset ] ); for ( var i = 0; i < matchers.Count; i++ ) { var matcher = matchers[ i ]; var matchLength = matcher.MatchLength( source, offset ); // RJLog.Log( matcher.type, ">>", matchLength, matcher.matcher ); if ( matchLength > 0 ) { lexerEvent.Set( matcher.fullType, offset, matchLength ); // Logs.Log(matcher.type, ">>", "'"+source.Substring( offset, matchLength )+"'", "@", offset, matchLength ); callback( lexerEvent ); foundSomething = true; i = matchers.Count; if ( matcher.nextMode != null ) { mode = matcher.nextMode; } offset += matchLength; } } if ( ! foundSomething ) { var modeInfo = mode == "" ? "default-mode" : " mode: '" + mode + "'"; var errorMessage = "@Lexer-Error. No match for " + modeInfo; RJLog.Log( errorMessage, "@", offset ); lexerEvent.Set( errorMessage, offset, ERROR_FLAG ); _hasError = true; callback( lexerEvent ); return; } } if ( numTries >= maxTries ) { lexerEvent.Set( mode, offset, ERROR_FLAG ); callback( lexerEvent ); } lexerEvent.Set( mode, offset, DONE_FLAG ); callback( lexerEvent ); } public List LexToList( string source, int offset = 0, string mode = "" ) { var list = new List(); Lex( source, ( LexerEvent token )=> { list.Add( token.Copy() ); }, offset, mode ); return list; } } }