rj-action-library/Runtime/Text/JSON/JSONLexer.cs

340 lines
8.4 KiB
C#

using System.Collections;
using System.Collections.Generic;
using System.Text;
namespace Rokojori
{
public enum JSONLexerEventType
{
DONE_SUCCESS,
DONE_ERROR,
OBJECT_START,
OBJECT_END,
ARRAY_START,
ARRAY_END,
IDENTIFIER,
STRING,
NUMBER,
TRUE,
FALSE,
NULL,
ARRAY_SEPERATOR,
IDENTIFIER_SEPERATOR,
ERROR_UNEXPECTED_SYMBOL,
ERROR_UNCLOSED_STRING,
ERROR_UNCLOSED_OBJECT,
ERROR_UNCLOSED_ARRAY,
ERROR_EXPECTED_FALSE,
ERROR_EXPECTED_TRUE,
ERROR_EXPECTED_NULL,
ERROR_INFINITY_PREVENTION
}
public class JSONLexer
{
public static bool IsErrorType( JSONLexerEventType type )
{
switch( type )
{
case JSONLexerEventType.ERROR_UNEXPECTED_SYMBOL:
case JSONLexerEventType.ERROR_UNCLOSED_STRING:
case JSONLexerEventType.ERROR_UNCLOSED_OBJECT:
case JSONLexerEventType.ERROR_UNCLOSED_ARRAY:
case JSONLexerEventType.ERROR_EXPECTED_FALSE:
case JSONLexerEventType.ERROR_EXPECTED_TRUE:
case JSONLexerEventType.ERROR_EXPECTED_NULL:
case JSONLexerEventType.ERROR_INFINITY_PREVENTION:
case JSONLexerEventType.DONE_ERROR:
{
return true;
}
}
return false;
}
public delegate void OnParse( JSONLexerEventType type, int offset, int length );
string source;
OnParse onParseCallback;
public void Lex( string source, OnParse onParseCallback = null )
{
this.source = source;
this.onParseCallback = onParseCallback;
Call( LexJSON(), 0 );
}
void Call( JSONLexerEventType type, int offset, int length = -1 )
{
if ( onParseCallback != null )
{
onParseCallback( type, offset, length );
}
}
JSONLexerEventType LexJSON()
{
var offset = SkipWhiteSpace( 0 );
while ( offset < source.Length )
{
var offsetStart = offset;
var character = source[ offset ];
if ( character == '{' )
{
Call( JSONLexerEventType.OBJECT_START, offset , 1 );
}
else if ( character == '}' )
{
Call( JSONLexerEventType.OBJECT_END, offset , 1 );
}
else if ( character == '[' )
{
Call( JSONLexerEventType.ARRAY_START, offset , 1 );
}
else if ( character == ']' )
{
Call( JSONLexerEventType.ARRAY_END, offset , 1 );
}
else if ( character == ',' )
{
Call( JSONLexerEventType.ARRAY_SEPERATOR, offset , 1 );
}
else if ( character == ':' )
{
Call( JSONLexerEventType.IDENTIFIER_SEPERATOR, offset , 1 );
}
else if ( character == '\"' )
{
offset = LexString( offset );
}
else if ( character == 't' )
{
offset = LexTrue( offset );
}
else if ( character == 'f' )
{
offset = LexFalse( offset );
}
else if ( character == 'n' )
{
offset = LexNull( offset );
}
else if ( System.Char.IsDigit( character ) || character == '-' )
{
offset = LexNumber( offset );
}
if ( offset == -1 )
{
return JSONLexerEventType.DONE_ERROR;
}
offset ++;
offset = SkipWhiteSpace( offset );
if ( offsetStart >= offset )
{
Call( JSONLexerEventType.ERROR_INFINITY_PREVENTION, offsetStart );
return JSONLexerEventType.DONE_ERROR;
}
}
return JSONLexerEventType.DONE_SUCCESS;
}
int SkipWhiteSpace( int offset )
{
while ( offset < source.Length && System.Char.IsWhiteSpace( source[ offset ] ) )
{
offset ++;
}
return offset;
}
bool IsIdentifier( int offset )
{
offset = SkipWhiteSpace( offset );
return offset < source.Length && source[ offset ] == ':';
}
int LexString( int offset )
{
var offsetStart = offset;
offset++;
var maxTries = 10000; var currentTries = 0;
// currentTries ++; if ( currentTries == maxTries ) { throw new System.Exception(); }
while ( offset < source.Length )
{
currentTries ++; if ( currentTries == maxTries ) { throw new System.Exception(); }
var character = source[ offset ];
if ( character == '\"' )
{
var previousCharacter = source[ offset - 1 ];
if ( previousCharacter != '\\' )
{
var length = ( offset - offsetStart ) + 1;
var isIdentifier = IsIdentifier( offset + 1 );
var type = isIdentifier ? JSONLexerEventType.IDENTIFIER :
JSONLexerEventType.STRING;
Call( type, offsetStart, length );
return ( offsetStart + length ) - 1;
}
}
offset ++;
}
Call( JSONLexerEventType.ERROR_UNCLOSED_STRING, offsetStart );
return -1;
}
int LexTrue( int offset )
{
if ( MatchNextThree( offset, 'r', 'u', 'e' ) )
{
Call( JSONLexerEventType.TRUE, offset, 4 );
return offset + 3;
}
Call( JSONLexerEventType.ERROR_EXPECTED_TRUE, offset );
return -1;
}
int LexFalse( int offset )
{
if ( MatchNextFour( offset, 'a', 'l', 's', 'e' ) )
{
Call( JSONLexerEventType.FALSE, offset, 5 );
return offset + 4;
}
Call( JSONLexerEventType.ERROR_EXPECTED_FALSE, offset );
return -1;
}
int LexNull( int offset )
{
if ( MatchNextThree( offset, 'u', 'l', 'l' ) )
{
Call( JSONLexerEventType.NULL, offset, 4 );
return offset + 3;
}
Call( JSONLexerEventType.ERROR_EXPECTED_NULL, offset );
return -1;
}
int LexNumber( int offset )
{
var hasDot = false;
var hasE = false;
var hasMinus = false;
var parsing = true;
var offsetStart = offset;
var maxTries = 100; var currentTries = 0;
// currentTries ++; if ( currentTries == maxTries ) { throw new System.Exception(); }
while ( parsing && offset < source.Length )
{
currentTries ++; if ( currentTries == maxTries ) { throw new System.Exception(); }
var character = source[ offset ];
parsing = false;
if ( System.Char.IsDigit( character ) )
{
parsing = true;
}
else if ( character == '-' && ! hasMinus )
{
hasMinus = true;
parsing = true;
}
else if ( character == '.' && ! hasDot )
{
hasDot = true;
parsing = true;
}
else if ( ( character == 'e' || character == 'E' ) && ! hasE )
{
var hasNext = offset + 2 < source.Length;
if ( hasNext )
{
var nextCharacter = source[ offset + 1 ];
hasE = nextCharacter == '+' || nextCharacter == '-';
parsing = hasE;
}
if ( parsing )
{
offset ++;
}
}
if ( parsing )
{
offset ++;
}
}
var length = ( offset - offsetStart );
Call( JSONLexerEventType.NUMBER, offsetStart, length );
return ( offsetStart + length ) - 1;
}
bool MatchNextThree( int offset, char next, char second, char third )
{
if ( offset + 3 >= source.Length ) { return false; }
if ( source[ offset + 1 ] != next ) { return false; }
if ( source[ offset + 2 ] != second ){ return false; }
if ( source[ offset + 3 ] != third ) { return false; }
return true;
}
bool MatchNextFour( int offset, char next, char second, char third, char fourth )
{
if ( offset + 4 >= source.Length ) { return false; }
if ( source[ offset + 1 ] != next ) { return false; }
if ( source[ offset + 2 ] != second ){ return false; }
if ( source[ offset + 3 ] != third ) { return false; }
if ( source[ offset + 4 ] != fourth ){ return false; }
return true;
}
}
}