rj-action-library/Runtime/Text/Lexing/LexerLibrary/XMLLexer.cs

129 lines
4.0 KiB
C#

using System.Collections;
using System.Collections.Generic;
using System.Text.RegularExpressions;
namespace Rokojori
{
public class XMLLexer:Lexer
{
public static readonly string Inside_Start_Tag = "Inside_Start_Tag";
public static readonly LexerMatcher XMLComment =
new LexerMatcher( "XMLComment", @"<!--((?:.|\r|\n)*?)-->" );
public static readonly LexerMatcher XMLCData =
new LexerMatcher( "XMLCData", @"<![CDATA[((?:.|\r|\n)*?)]]>" );
public static readonly LexerMatcher XMLInstruction =
new LexerMatcher( "XMLInstruction", @"<!((?:.|\r|\n)*?)>" );
public static readonly LexerMatcher XMLProcessingInstruction =
new LexerMatcher( "XMLDeclaration", @"<\?((?:.|\r|\n)*?)>" );
public static readonly LexerMatcher XMLStartTag =
new LexerMatcher( "XMLStartTag",
XMLRegexExtensions.Extend( @"<(?:\xml-name:)?\xml-name" ),
Lexer.Default_Mode, XMLLexer.Inside_Start_Tag
);
public static readonly LexerMatcher XMLEndTag =
new LexerMatcher( "XMLEndTag",
XMLRegexExtensions.Extend( @"<\/(?:\xml-name:)?\xml-name>" )
);
public static readonly LexerMatcher XMLEscapedEntity =
new LexerMatcher( "XMLEscapedEntity", @"&#?\w+;" );
public static readonly LexerMatcher XMLText =
new LexerMatcher( "XMLText", @"([^<].)" );
public static readonly LexerMatcher XMLAttributeName =
new LexerMatcher( "XMLAttributeName",
XMLRegexExtensions.Extend( @"(?:\xml-name:)?\xml-name" ),
XMLLexer.Inside_Start_Tag, XMLLexer.Inside_Start_Tag
);
public static readonly LexerMatcher XMLStartTagClosing =
new LexerMatcher( "XMLStartTagClosing", @"\/?>", XMLLexer.Inside_Start_Tag, Lexer.Default_Mode );
public static readonly LexerMatcher XMLAttributeAssignment =
new LexerMatcher( "XMLAttributeAssignment", @"=", XMLLexer.Inside_Start_Tag, XMLLexer.Inside_Start_Tag );
public static readonly LexerMatcher InsideStartTag_SingleQuotedStringMatcher =
LexerMatcherLibrary.SingleQuotedStringMatcher.WithModes( XMLLexer.Inside_Start_Tag );
public static readonly LexerMatcher InsideStartTag_DoubleQuotedStringMatcher =
LexerMatcherLibrary.DoubleQuotedStringMatcher.WithModes( XMLLexer.Inside_Start_Tag );
public static readonly LexerMatcher InsideStartTag_WhiteSpaceMatcher =
LexerMatcherLibrary.WhiteSpaceMatcher.WithModes( XMLLexer.Inside_Start_Tag );
public static readonly LexerMatcher InsideStartTag_BreakMatcher =
LexerMatcherLibrary.BreakMatcher.WithModes( XMLLexer.Inside_Start_Tag );
public static List<LexerEvent> Lex( string source )
{
var lexer = new XMLLexer();
var events = lexer.LexToList( source );
if ( lexer.hasError )
{
return null;
}
events.ForEach( ev => { ev.GrabMatch( source ); } );
return events;
}
public XMLLexer()
{
/*
default:
{ <!--*-->, COMMENT }
{ <!WORD>, INSTRUCTION }
{ <?*?>, XML DEC }
{ <NS:START_TAG, START_TAG, insideTag }
{ </NS:CLOSE_TAG>, CLOSE_TAG }
{ &*; ESCAPED_TEXT }
{ *, TEXT }
insideStartTag:
{ >, START_TAG_CLOSER, "default" }
{ "\*", STRING }
{ '\*', STRING }
{ NS:\w(-\w), WORD_WITH_HYPHEN }
{ =, ASSIGNMENT }
*/
AddAllMatchers(
XMLLexer.XMLComment,
XMLLexer.XMLCData,
XMLLexer.XMLInstruction,
XMLLexer.XMLProcessingInstruction,
XMLLexer.XMLStartTag,
XMLLexer.XMLEndTag,
XMLLexer.XMLEscapedEntity,
LexerMatcherLibrary.WhiteSpaceMatcher,
LexerMatcherLibrary.BreakMatcher,
XMLLexer.XMLText,
XMLLexer.XMLStartTagClosing,
XMLLexer.InsideStartTag_SingleQuotedStringMatcher,
XMLLexer.InsideStartTag_DoubleQuotedStringMatcher,
XMLLexer.XMLAttributeName,
XMLLexer.XMLAttributeAssignment,
XMLLexer.InsideStartTag_WhiteSpaceMatcher,
XMLLexer.InsideStartTag_BreakMatcher
);
}
}
}