using System.Collections; using System.Collections.Generic; using System.Text.RegularExpressions; namespace Rokojori { public class XMLLexer:Lexer { public static readonly string Inside_Start_Tag = "Inside_Start_Tag"; public static readonly LexerMatcher XMLComment = new LexerMatcher( "XMLComment", @"" ); public static readonly LexerMatcher XMLCData = new LexerMatcher( "XMLCData", @"" ); public static readonly LexerMatcher XMLInstruction = new LexerMatcher( "XMLInstruction", @"" ); public static readonly LexerMatcher XMLProcessingInstruction = new LexerMatcher( "XMLDeclaration", @"<\?((?:.|\r|\n)*?)>" ); public static readonly LexerMatcher XMLStartTag = new LexerMatcher( "XMLStartTag", XMLRegexExtensions.Extend( @"<(?:\xml-name:)?\xml-name" ), Lexer.Default_Mode, XMLLexer.Inside_Start_Tag ); public static readonly LexerMatcher XMLEndTag = new LexerMatcher( "XMLEndTag", XMLRegexExtensions.Extend( @"<\/(?:\xml-name:)?\xml-name>" ) ); public static readonly LexerMatcher XMLEscapedEntity = new LexerMatcher( "XMLEscapedEntity", @"&#?\w+;" ); public static readonly LexerMatcher XMLText = new LexerMatcher( "XMLText", @"([^<].)" ); public static readonly LexerMatcher XMLAttributeName = new LexerMatcher( "XMLAttributeName", XMLRegexExtensions.Extend( @"(?:\xml-name:)?\xml-name" ), XMLLexer.Inside_Start_Tag, XMLLexer.Inside_Start_Tag ); public static readonly LexerMatcher XMLStartTagClosing = new LexerMatcher( "XMLStartTagClosing", @"\/?>", XMLLexer.Inside_Start_Tag, Lexer.Default_Mode ); public static readonly LexerMatcher XMLAttributeAssignment = new LexerMatcher( "XMLAttributeAssignment", @"=", XMLLexer.Inside_Start_Tag, XMLLexer.Inside_Start_Tag ); public static readonly LexerMatcher InsideStartTag_SingleQuotedStringMatcher = LexerMatcherLibrary.SingleQuotedStringMatcher.WithModes( XMLLexer.Inside_Start_Tag ); public static readonly LexerMatcher InsideStartTag_DoubleQuotedStringMatcher = LexerMatcherLibrary.DoubleQuotedStringMatcher.WithModes( XMLLexer.Inside_Start_Tag ); public static readonly LexerMatcher InsideStartTag_WhiteSpaceMatcher = LexerMatcherLibrary.WhiteSpaceMatcher.WithModes( XMLLexer.Inside_Start_Tag ); public static readonly LexerMatcher InsideStartTag_BreakMatcher = LexerMatcherLibrary.BreakMatcher.WithModes( XMLLexer.Inside_Start_Tag ); public static List Lex( string source ) { var lexer = new XMLLexer(); var events = lexer.LexToList( source ); if ( lexer.hasError ) { return null; } events.ForEach( ev => { ev.GrabMatch( source ); } ); return events; } public XMLLexer() { /* default: { , COMMENT } { , INSTRUCTION } { , XML DEC } { , CLOSE_TAG } { &*; ESCAPED_TEXT } { *, TEXT } insideStartTag: { >, START_TAG_CLOSER, "default" } { "\*", STRING } { '\*', STRING } { NS:\w(-\w), WORD_WITH_HYPHEN } { =, ASSIGNMENT } */ AddAllMatchers( XMLLexer.XMLComment, XMLLexer.XMLCData, XMLLexer.XMLInstruction, XMLLexer.XMLProcessingInstruction, XMLLexer.XMLStartTag, XMLLexer.XMLEndTag, XMLLexer.XMLEscapedEntity, LexerMatcherLibrary.WhiteSpaceMatcher, LexerMatcherLibrary.BreakMatcher, XMLLexer.XMLText, XMLLexer.XMLStartTagClosing, XMLLexer.InsideStartTag_SingleQuotedStringMatcher, XMLLexer.InsideStartTag_DoubleQuotedStringMatcher, XMLLexer.XMLAttributeName, XMLLexer.XMLAttributeAssignment, XMLLexer.InsideStartTag_WhiteSpaceMatcher, XMLLexer.InsideStartTag_BreakMatcher ); } } }