129 lines
4.0 KiB
C#
129 lines
4.0 KiB
C#
using System.Collections;
|
|
using System.Collections.Generic;
|
|
using System.Text.RegularExpressions;
|
|
|
|
namespace Rokojori
|
|
{
|
|
public class XMLLexer:Lexer
|
|
{
|
|
public static readonly string Inside_Start_Tag = "Inside_Start_Tag";
|
|
|
|
public static readonly LexerMatcher XMLComment =
|
|
new LexerMatcher( "XMLComment", @"<!--((?:.|\r|\n)*?)-->" );
|
|
|
|
public static readonly LexerMatcher XMLCData =
|
|
new LexerMatcher( "XMLCData", @"<![CDATA[((?:.|\r|\n)*?)]]>" );
|
|
|
|
public static readonly LexerMatcher XMLInstruction =
|
|
new LexerMatcher( "XMLInstruction", @"<!((?:.|\r|\n)*?)>" );
|
|
|
|
public static readonly LexerMatcher XMLProcessingInstruction =
|
|
new LexerMatcher( "XMLDeclaration", @"<\?((?:.|\r|\n)*?)>" );
|
|
|
|
public static readonly LexerMatcher XMLStartTag =
|
|
new LexerMatcher( "XMLStartTag",
|
|
XMLRegexExtensions.Extend( @"<(?:\xml-name:)?\xml-name" ),
|
|
Lexer.Default_Mode, XMLLexer.Inside_Start_Tag
|
|
);
|
|
|
|
public static readonly LexerMatcher XMLEndTag =
|
|
new LexerMatcher( "XMLEndTag",
|
|
XMLRegexExtensions.Extend( @"<\/(?:\xml-name:)?\xml-name>" )
|
|
);
|
|
|
|
public static readonly LexerMatcher XMLEscapedEntity =
|
|
new LexerMatcher( "XMLEscapedEntity", @"&#?\w+;" );
|
|
|
|
|
|
public static readonly LexerMatcher XMLText =
|
|
new LexerMatcher( "XMLText", @"([^<].)" );
|
|
|
|
public static readonly LexerMatcher XMLAttributeName =
|
|
new LexerMatcher( "XMLAttributeName",
|
|
XMLRegexExtensions.Extend( @"(?:\xml-name:)?\xml-name" ),
|
|
XMLLexer.Inside_Start_Tag, XMLLexer.Inside_Start_Tag
|
|
);
|
|
|
|
public static readonly LexerMatcher XMLStartTagClosing =
|
|
new LexerMatcher( "XMLStartTagClosing", @"\/?>", XMLLexer.Inside_Start_Tag, Lexer.Default_Mode );
|
|
|
|
public static readonly LexerMatcher XMLAttributeAssignment =
|
|
new LexerMatcher( "XMLAttributeAssignment", @"=", XMLLexer.Inside_Start_Tag, XMLLexer.Inside_Start_Tag );
|
|
|
|
public static readonly LexerMatcher InsideStartTag_SingleQuotedStringMatcher =
|
|
LexerMatcherLibrary.SingleQuotedStringMatcher.WithModes( XMLLexer.Inside_Start_Tag );
|
|
|
|
public static readonly LexerMatcher InsideStartTag_DoubleQuotedStringMatcher =
|
|
LexerMatcherLibrary.DoubleQuotedStringMatcher.WithModes( XMLLexer.Inside_Start_Tag );
|
|
|
|
public static readonly LexerMatcher InsideStartTag_WhiteSpaceMatcher =
|
|
LexerMatcherLibrary.WhiteSpaceMatcher.WithModes( XMLLexer.Inside_Start_Tag );
|
|
|
|
public static readonly LexerMatcher InsideStartTag_BreakMatcher =
|
|
LexerMatcherLibrary.BreakMatcher.WithModes( XMLLexer.Inside_Start_Tag );
|
|
|
|
|
|
public static List<LexerEvent> Lex( string source )
|
|
{
|
|
var lexer = new XMLLexer();
|
|
var events = lexer.LexToList( source );
|
|
|
|
if ( lexer.hasError )
|
|
{
|
|
return null;
|
|
}
|
|
|
|
events.ForEach( ev => { ev.GrabMatch( source ); } );
|
|
return events;
|
|
}
|
|
|
|
public XMLLexer()
|
|
{
|
|
|
|
/*
|
|
|
|
default:
|
|
{ <!--*-->, COMMENT }
|
|
{ <!WORD>, INSTRUCTION }
|
|
{ <?*?>, XML DEC }
|
|
{ <NS:START_TAG, START_TAG, insideTag }
|
|
{ </NS:CLOSE_TAG>, CLOSE_TAG }
|
|
{ &*; ESCAPED_TEXT }
|
|
{ *, TEXT }
|
|
|
|
insideStartTag:
|
|
{ >, START_TAG_CLOSER, "default" }
|
|
{ "\*", STRING }
|
|
{ '\*', STRING }
|
|
{ NS:\w(-\w), WORD_WITH_HYPHEN }
|
|
{ =, ASSIGNMENT }
|
|
|
|
*/
|
|
|
|
AddAllMatchers(
|
|
XMLLexer.XMLComment,
|
|
XMLLexer.XMLCData,
|
|
XMLLexer.XMLInstruction,
|
|
XMLLexer.XMLProcessingInstruction,
|
|
|
|
XMLLexer.XMLStartTag,
|
|
XMLLexer.XMLEndTag,
|
|
|
|
XMLLexer.XMLEscapedEntity,
|
|
LexerMatcherLibrary.WhiteSpaceMatcher,
|
|
LexerMatcherLibrary.BreakMatcher,
|
|
XMLLexer.XMLText,
|
|
|
|
XMLLexer.XMLStartTagClosing,
|
|
|
|
XMLLexer.InsideStartTag_SingleQuotedStringMatcher,
|
|
XMLLexer.InsideStartTag_DoubleQuotedStringMatcher,
|
|
XMLLexer.XMLAttributeName,
|
|
XMLLexer.XMLAttributeAssignment,
|
|
XMLLexer.InsideStartTag_WhiteSpaceMatcher,
|
|
XMLLexer.InsideStartTag_BreakMatcher
|
|
);
|
|
|
|
}
|
|
}
|
|
} |