using System.Collections; using System.Collections.Generic; using System.Text; namespace Rokojori { public enum JSONLexerEventType { DONE_SUCCESS, DONE_ERROR, OBJECT_START, OBJECT_END, ARRAY_START, ARRAY_END, IDENTIFIER, STRING, NUMBER, TRUE, FALSE, NULL, ARRAY_SEPERATOR, IDENTIFIER_SEPERATOR, ERROR_UNEXPECTED_SYMBOL, ERROR_UNCLOSED_STRING, ERROR_UNCLOSED_OBJECT, ERROR_UNCLOSED_ARRAY, ERROR_EXPECTED_FALSE, ERROR_EXPECTED_TRUE, ERROR_EXPECTED_NULL, ERROR_INFINITY_PREVENTION } public class JSONLexer { public static bool IsErrorType( JSONLexerEventType type ) { switch( type ) { case JSONLexerEventType.ERROR_UNEXPECTED_SYMBOL: case JSONLexerEventType.ERROR_UNCLOSED_STRING: case JSONLexerEventType.ERROR_UNCLOSED_OBJECT: case JSONLexerEventType.ERROR_UNCLOSED_ARRAY: case JSONLexerEventType.ERROR_EXPECTED_FALSE: case JSONLexerEventType.ERROR_EXPECTED_TRUE: case JSONLexerEventType.ERROR_EXPECTED_NULL: case JSONLexerEventType.ERROR_INFINITY_PREVENTION: case JSONLexerEventType.DONE_ERROR: { return true; } } return false; } public delegate void OnParse( JSONLexerEventType type, int offset, int length ); string source; OnParse onParseCallback; public void Lex( string source, OnParse onParseCallback = null ) { this.source = source; this.onParseCallback = onParseCallback; Call( LexJSON(), 0 ); } void Call( JSONLexerEventType type, int offset, int length = -1 ) { if ( onParseCallback != null ) { onParseCallback( type, offset, length ); } } JSONLexerEventType LexJSON() { var offset = SkipWhiteSpace( 0 ); while ( offset < source.Length ) { var offsetStart = offset; var character = source[ offset ]; if ( character == '{' ) { Call( JSONLexerEventType.OBJECT_START, offset , 1 ); } else if ( character == '}' ) { Call( JSONLexerEventType.OBJECT_END, offset , 1 ); } else if ( character == '[' ) { Call( JSONLexerEventType.ARRAY_START, offset , 1 ); } else if ( character == ']' ) { Call( JSONLexerEventType.ARRAY_END, offset , 1 ); } else if ( character == ',' ) { Call( JSONLexerEventType.ARRAY_SEPERATOR, offset , 1 ); } else if ( character == ':' ) { Call( JSONLexerEventType.IDENTIFIER_SEPERATOR, offset , 1 ); } else if ( character == '\"' ) { offset = LexString( offset ); } else if ( character == 't' ) { offset = LexTrue( offset ); } else if ( character == 'f' ) { offset = LexFalse( offset ); } else if ( character == 'n' ) { offset = LexNull( offset ); } else if ( System.Char.IsDigit( character ) || character == '-' ) { offset = LexNumber( offset ); } if ( offset == -1 ) { return JSONLexerEventType.DONE_ERROR; } offset ++; offset = SkipWhiteSpace( offset ); if ( offsetStart >= offset ) { Call( JSONLexerEventType.ERROR_INFINITY_PREVENTION, offsetStart ); return JSONLexerEventType.DONE_ERROR; } } return JSONLexerEventType.DONE_SUCCESS; } int SkipWhiteSpace( int offset ) { while ( offset < source.Length && System.Char.IsWhiteSpace( source[ offset ] ) ) { offset ++; } return offset; } bool IsIdentifier( int offset ) { offset = SkipWhiteSpace( offset ); return offset < source.Length && source[ offset ] == ':'; } int LexString( int offset ) { var offsetStart = offset; offset++; var maxTries = 10000; var currentTries = 0; // currentTries ++; if ( currentTries == maxTries ) { throw new System.Exception(); } while ( offset < source.Length ) { currentTries ++; if ( currentTries == maxTries ) { throw new System.Exception(); } var character = source[ offset ]; if ( character == '\"' ) { var previousCharacter = source[ offset - 1 ]; if ( previousCharacter != '\\' ) { var length = ( offset - offsetStart ) + 1; var isIdentifier = IsIdentifier( offset + 1 ); var type = isIdentifier ? JSONLexerEventType.IDENTIFIER : JSONLexerEventType.STRING; Call( type, offsetStart, length ); return ( offsetStart + length ) - 1; } } offset ++; } Call( JSONLexerEventType.ERROR_UNCLOSED_STRING, offsetStart ); return -1; } int LexTrue( int offset ) { if ( MatchNextThree( offset, 'r', 'u', 'e' ) ) { Call( JSONLexerEventType.TRUE, offset, 4 ); return offset + 3; } Call( JSONLexerEventType.ERROR_EXPECTED_TRUE, offset ); return -1; } int LexFalse( int offset ) { if ( MatchNextFour( offset, 'a', 'l', 's', 'e' ) ) { Call( JSONLexerEventType.FALSE, offset, 5 ); return offset + 4; } Call( JSONLexerEventType.ERROR_EXPECTED_FALSE, offset ); return -1; } int LexNull( int offset ) { if ( MatchNextThree( offset, 'u', 'l', 'l' ) ) { Call( JSONLexerEventType.NULL, offset, 4 ); return offset + 3; } Call( JSONLexerEventType.ERROR_EXPECTED_NULL, offset ); return -1; } int LexNumber( int offset ) { var hasDot = false; var hasE = false; var hasMinus = false; var parsing = true; var offsetStart = offset; var maxTries = 100; var currentTries = 0; // currentTries ++; if ( currentTries == maxTries ) { throw new System.Exception(); } while ( parsing && offset < source.Length ) { currentTries ++; if ( currentTries == maxTries ) { throw new System.Exception(); } var character = source[ offset ]; parsing = false; if ( System.Char.IsDigit( character ) ) { parsing = true; } else if ( character == '-' && ! hasMinus ) { hasMinus = true; parsing = true; } else if ( character == '.' && ! hasDot ) { hasDot = true; parsing = true; } else if ( ( character == 'e' || character == 'E' ) && ! hasE ) { var hasNext = offset + 2 < source.Length; if ( hasNext ) { var nextCharacter = source[ offset + 1 ]; hasE = nextCharacter == '+' || nextCharacter == '-'; parsing = hasE; } if ( parsing ) { offset ++; } } if ( parsing ) { offset ++; } } var length = ( offset - offsetStart ); Call( JSONLexerEventType.NUMBER, offsetStart, length ); return ( offsetStart + length ) - 1; } bool MatchNextThree( int offset, char next, char second, char third ) { if ( offset + 3 >= source.Length ) { return false; } if ( source[ offset + 1 ] != next ) { return false; } if ( source[ offset + 2 ] != second ){ return false; } if ( source[ offset + 3 ] != third ) { return false; } return true; } bool MatchNextFour( int offset, char next, char second, char third, char fourth ) { if ( offset + 4 >= source.Length ) { return false; } if ( source[ offset + 1 ] != next ) { return false; } if ( source[ offset + 2 ] != second ){ return false; } if ( source[ offset + 3 ] != third ) { return false; } if ( source[ offset + 4 ] != fourth ){ return false; } return true; } } }