340 lines
8.4 KiB
C#
340 lines
8.4 KiB
C#
|
using System.Collections;
|
||
|
using System.Collections.Generic;
|
||
|
|
||
|
using System.Text;
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
namespace Rokojori
|
||
|
{
|
||
|
|
||
|
public enum JSONLexerEventType
|
||
|
{
|
||
|
DONE_SUCCESS,
|
||
|
DONE_ERROR,
|
||
|
|
||
|
OBJECT_START,
|
||
|
OBJECT_END,
|
||
|
ARRAY_START,
|
||
|
ARRAY_END,
|
||
|
IDENTIFIER,
|
||
|
STRING,
|
||
|
NUMBER,
|
||
|
TRUE,
|
||
|
FALSE,
|
||
|
NULL,
|
||
|
ARRAY_SEPERATOR,
|
||
|
IDENTIFIER_SEPERATOR,
|
||
|
|
||
|
|
||
|
ERROR_UNEXPECTED_SYMBOL,
|
||
|
ERROR_UNCLOSED_STRING,
|
||
|
ERROR_UNCLOSED_OBJECT,
|
||
|
ERROR_UNCLOSED_ARRAY,
|
||
|
ERROR_EXPECTED_FALSE,
|
||
|
ERROR_EXPECTED_TRUE,
|
||
|
ERROR_EXPECTED_NULL,
|
||
|
ERROR_INFINITY_PREVENTION
|
||
|
}
|
||
|
|
||
|
public class JSONLexer
|
||
|
{
|
||
|
public static bool IsErrorType( JSONLexerEventType type )
|
||
|
{
|
||
|
switch( type )
|
||
|
{
|
||
|
case JSONLexerEventType.ERROR_UNEXPECTED_SYMBOL:
|
||
|
case JSONLexerEventType.ERROR_UNCLOSED_STRING:
|
||
|
case JSONLexerEventType.ERROR_UNCLOSED_OBJECT:
|
||
|
case JSONLexerEventType.ERROR_UNCLOSED_ARRAY:
|
||
|
case JSONLexerEventType.ERROR_EXPECTED_FALSE:
|
||
|
case JSONLexerEventType.ERROR_EXPECTED_TRUE:
|
||
|
case JSONLexerEventType.ERROR_EXPECTED_NULL:
|
||
|
case JSONLexerEventType.ERROR_INFINITY_PREVENTION:
|
||
|
case JSONLexerEventType.DONE_ERROR:
|
||
|
{
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
public delegate void OnParse( JSONLexerEventType type, int offset, int length );
|
||
|
|
||
|
string source;
|
||
|
OnParse onParseCallback;
|
||
|
|
||
|
|
||
|
|
||
|
public void Lex( string source, OnParse onParseCallback = null )
|
||
|
{
|
||
|
this.source = source;
|
||
|
this.onParseCallback = onParseCallback;
|
||
|
|
||
|
Call( LexJSON(), 0 );
|
||
|
|
||
|
}
|
||
|
|
||
|
void Call( JSONLexerEventType type, int offset, int length = -1 )
|
||
|
{
|
||
|
if ( onParseCallback != null )
|
||
|
{
|
||
|
onParseCallback( type, offset, length );
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
JSONLexerEventType LexJSON()
|
||
|
{
|
||
|
var offset = SkipWhiteSpace( 0 );
|
||
|
|
||
|
while ( offset < source.Length )
|
||
|
{
|
||
|
var offsetStart = offset;
|
||
|
|
||
|
var character = source[ offset ];
|
||
|
|
||
|
if ( character == '{' )
|
||
|
{
|
||
|
Call( JSONLexerEventType.OBJECT_START, offset , 1 );
|
||
|
}
|
||
|
else if ( character == '}' )
|
||
|
{
|
||
|
Call( JSONLexerEventType.OBJECT_END, offset , 1 );
|
||
|
}
|
||
|
else if ( character == '[' )
|
||
|
{
|
||
|
Call( JSONLexerEventType.ARRAY_START, offset , 1 );
|
||
|
}
|
||
|
else if ( character == ']' )
|
||
|
{
|
||
|
Call( JSONLexerEventType.ARRAY_END, offset , 1 );
|
||
|
}
|
||
|
else if ( character == ',' )
|
||
|
{
|
||
|
Call( JSONLexerEventType.ARRAY_SEPERATOR, offset , 1 );
|
||
|
}
|
||
|
else if ( character == ':' )
|
||
|
{
|
||
|
Call( JSONLexerEventType.IDENTIFIER_SEPERATOR, offset , 1 );
|
||
|
}
|
||
|
else if ( character == '\"' )
|
||
|
{
|
||
|
offset = LexString( offset );
|
||
|
}
|
||
|
else if ( character == 't' )
|
||
|
{
|
||
|
offset = LexTrue( offset );
|
||
|
}
|
||
|
else if ( character == 'f' )
|
||
|
{
|
||
|
offset = LexFalse( offset );
|
||
|
}
|
||
|
else if ( character == 'n' )
|
||
|
{
|
||
|
offset = LexNull( offset );
|
||
|
}
|
||
|
else if ( System.Char.IsDigit( character ) || character == '-' )
|
||
|
{
|
||
|
offset = LexNumber( offset );
|
||
|
}
|
||
|
|
||
|
if ( offset == -1 )
|
||
|
{
|
||
|
return JSONLexerEventType.DONE_ERROR;
|
||
|
}
|
||
|
|
||
|
offset ++;
|
||
|
offset = SkipWhiteSpace( offset );
|
||
|
|
||
|
if ( offsetStart >= offset )
|
||
|
{
|
||
|
Call( JSONLexerEventType.ERROR_INFINITY_PREVENTION, offsetStart );
|
||
|
return JSONLexerEventType.DONE_ERROR;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return JSONLexerEventType.DONE_SUCCESS;
|
||
|
}
|
||
|
|
||
|
int SkipWhiteSpace( int offset )
|
||
|
{
|
||
|
|
||
|
while ( offset < source.Length && System.Char.IsWhiteSpace( source[ offset ] ) )
|
||
|
{
|
||
|
offset ++;
|
||
|
}
|
||
|
|
||
|
return offset;
|
||
|
}
|
||
|
|
||
|
bool IsIdentifier( int offset )
|
||
|
{
|
||
|
offset = SkipWhiteSpace( offset );
|
||
|
return offset < source.Length && source[ offset ] == ':';
|
||
|
}
|
||
|
|
||
|
int LexString( int offset )
|
||
|
{
|
||
|
var offsetStart = offset;
|
||
|
offset++;
|
||
|
|
||
|
var maxTries = 10000; var currentTries = 0;
|
||
|
// currentTries ++; if ( currentTries == maxTries ) { throw new System.Exception(); }
|
||
|
|
||
|
while ( offset < source.Length )
|
||
|
{
|
||
|
currentTries ++; if ( currentTries == maxTries ) { throw new System.Exception(); }
|
||
|
|
||
|
var character = source[ offset ];
|
||
|
|
||
|
if ( character == '\"' )
|
||
|
{
|
||
|
var previousCharacter = source[ offset - 1 ];
|
||
|
|
||
|
if ( previousCharacter != '\\' )
|
||
|
{
|
||
|
var length = ( offset - offsetStart ) + 1;
|
||
|
var isIdentifier = IsIdentifier( offset + 1 );
|
||
|
var type = isIdentifier ? JSONLexerEventType.IDENTIFIER :
|
||
|
JSONLexerEventType.STRING;
|
||
|
Call( type, offsetStart, length );
|
||
|
return ( offsetStart + length ) - 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
offset ++;
|
||
|
|
||
|
}
|
||
|
|
||
|
Call( JSONLexerEventType.ERROR_UNCLOSED_STRING, offsetStart );
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
int LexTrue( int offset )
|
||
|
{
|
||
|
if ( MatchNextThree( offset, 'r', 'u', 'e' ) )
|
||
|
{
|
||
|
Call( JSONLexerEventType.TRUE, offset, 4 );
|
||
|
return offset + 3;
|
||
|
}
|
||
|
|
||
|
Call( JSONLexerEventType.ERROR_EXPECTED_TRUE, offset );
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
int LexFalse( int offset )
|
||
|
{
|
||
|
if ( MatchNextFour( offset, 'a', 'l', 's', 'e' ) )
|
||
|
{
|
||
|
Call( JSONLexerEventType.FALSE, offset, 5 );
|
||
|
return offset + 4;
|
||
|
}
|
||
|
Call( JSONLexerEventType.ERROR_EXPECTED_FALSE, offset );
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
int LexNull( int offset )
|
||
|
{
|
||
|
if ( MatchNextThree( offset, 'u', 'l', 'l' ) )
|
||
|
{
|
||
|
Call( JSONLexerEventType.NULL, offset, 4 );
|
||
|
return offset + 3;
|
||
|
}
|
||
|
|
||
|
Call( JSONLexerEventType.ERROR_EXPECTED_NULL, offset );
|
||
|
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
int LexNumber( int offset )
|
||
|
{
|
||
|
var hasDot = false;
|
||
|
var hasE = false;
|
||
|
var hasMinus = false;
|
||
|
var parsing = true;
|
||
|
|
||
|
var offsetStart = offset;
|
||
|
|
||
|
var maxTries = 100; var currentTries = 0;
|
||
|
// currentTries ++; if ( currentTries == maxTries ) { throw new System.Exception(); }
|
||
|
|
||
|
while ( parsing && offset < source.Length )
|
||
|
{
|
||
|
currentTries ++; if ( currentTries == maxTries ) { throw new System.Exception(); }
|
||
|
var character = source[ offset ];
|
||
|
|
||
|
parsing = false;
|
||
|
|
||
|
if ( System.Char.IsDigit( character ) )
|
||
|
{
|
||
|
parsing = true;
|
||
|
}
|
||
|
else if ( character == '-' && ! hasMinus )
|
||
|
{
|
||
|
hasMinus = true;
|
||
|
parsing = true;
|
||
|
}
|
||
|
else if ( character == '.' && ! hasDot )
|
||
|
{
|
||
|
hasDot = true;
|
||
|
parsing = true;
|
||
|
}
|
||
|
else if ( ( character == 'e' || character == 'E' ) && ! hasE )
|
||
|
{
|
||
|
var hasNext = offset + 2 < source.Length;
|
||
|
|
||
|
if ( hasNext )
|
||
|
{
|
||
|
var nextCharacter = source[ offset + 1 ];
|
||
|
hasE = nextCharacter == '+' || nextCharacter == '-';
|
||
|
parsing = hasE;
|
||
|
}
|
||
|
|
||
|
if ( parsing )
|
||
|
{
|
||
|
offset ++;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
if ( parsing )
|
||
|
{
|
||
|
offset ++;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
var length = ( offset - offsetStart );
|
||
|
Call( JSONLexerEventType.NUMBER, offsetStart, length );
|
||
|
|
||
|
return ( offsetStart + length ) - 1;
|
||
|
}
|
||
|
|
||
|
bool MatchNextThree( int offset, char next, char second, char third )
|
||
|
{
|
||
|
if ( offset + 3 >= source.Length ) { return false; }
|
||
|
if ( source[ offset + 1 ] != next ) { return false; }
|
||
|
if ( source[ offset + 2 ] != second ){ return false; }
|
||
|
if ( source[ offset + 3 ] != third ) { return false; }
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
bool MatchNextFour( int offset, char next, char second, char third, char fourth )
|
||
|
{
|
||
|
if ( offset + 4 >= source.Length ) { return false; }
|
||
|
if ( source[ offset + 1 ] != next ) { return false; }
|
||
|
if ( source[ offset + 2 ] != second ){ return false; }
|
||
|
if ( source[ offset + 3 ] != third ) { return false; }
|
||
|
if ( source[ offset + 4 ] != fourth ){ return false; }
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
|
||
|
}
|
||
|
}
|