174 lines
3.9 KiB
C#
174 lines
3.9 KiB
C#
|
using System.Collections;
|
||
|
using System.Collections.Generic;
|
||
|
using System.Text.RegularExpressions;
|
||
|
using System.Text;
|
||
|
|
||
|
namespace Rokojori
|
||
|
{
|
||
|
public class HtmlSerializer
|
||
|
{
|
||
|
|
||
|
StringBuilder sb = new StringBuilder();
|
||
|
HtmlWalker walker = new HtmlWalker();
|
||
|
string indent = " ";
|
||
|
Dictionary<HtmlNode, int> depthMap = new Dictionary<HtmlNode, int>();
|
||
|
Dictionary<int, string> indentMap = new Dictionary<int, string>();
|
||
|
List<HtmlElementNode> stack = new List<HtmlElementNode>();
|
||
|
|
||
|
|
||
|
public static string Escape( string rawText )
|
||
|
{
|
||
|
rawText = RegexUtility.Replace( rawText, "&", "&" );
|
||
|
rawText = RegexUtility.Replace( rawText, "<", "<" );
|
||
|
rawText = RegexUtility.Replace( rawText, ">", ">" );
|
||
|
rawText = RegexUtility.Replace( rawText, "\'", "'" );
|
||
|
rawText = RegexUtility.Replace( rawText, "\"", """ );
|
||
|
|
||
|
return rawText;
|
||
|
}
|
||
|
|
||
|
public string Serialize( HtmlNode node )
|
||
|
{
|
||
|
|
||
|
sb.Append( "<!DOCTYPE html>" );
|
||
|
walker.DepthIterate( node,
|
||
|
|
||
|
( n, d ) =>
|
||
|
{
|
||
|
var depth = GetDepth( n );
|
||
|
ClosePreviousElements( depth );
|
||
|
|
||
|
var element = n as HtmlElementNode;
|
||
|
|
||
|
if ( element != null )
|
||
|
{
|
||
|
sb.Append( "\n" );
|
||
|
sb.Append( GetIndent( depth ) );
|
||
|
|
||
|
sb.Append( "<" + element.nodeName );
|
||
|
|
||
|
for ( int i = 0; i < element.numAttributes; i++ )
|
||
|
{
|
||
|
var attribute = element.GetAttributeAt( i );
|
||
|
|
||
|
sb.Append( " " );
|
||
|
sb.Append( attribute.name );
|
||
|
sb.Append( "=\"" );
|
||
|
sb.Append( Escape( attribute.value ) );
|
||
|
sb.Append( "\"");
|
||
|
}
|
||
|
|
||
|
sb.Append( ">" );
|
||
|
|
||
|
stack.Add( element );
|
||
|
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (
|
||
|
HtmlElementNodeName.style.Selects( n.parentElement ) ||
|
||
|
HtmlElementNodeName.script.Selects( n.parentElement )
|
||
|
)
|
||
|
{
|
||
|
sb.Append( n.nodeValue );
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
sb.Append( Escape( n.nodeValue ) );
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
},
|
||
|
|
||
|
false,
|
||
|
depthMap
|
||
|
);
|
||
|
|
||
|
ClosePreviousElements( -1 );
|
||
|
|
||
|
return sb.ToString();
|
||
|
|
||
|
}
|
||
|
|
||
|
string GetIndent( int depth )
|
||
|
{
|
||
|
|
||
|
if ( indentMap.ContainsKey( depth ) )
|
||
|
{
|
||
|
return indentMap[ depth ];
|
||
|
}
|
||
|
|
||
|
if ( depth == 0 )
|
||
|
{
|
||
|
indentMap[ 0 ] = "";
|
||
|
return "";
|
||
|
}
|
||
|
|
||
|
if ( indentMap.ContainsKey( depth - 1 ) )
|
||
|
{
|
||
|
var smallerIndent = indentMap[ depth -1 ];
|
||
|
indentMap[ depth ] = smallerIndent + indent;
|
||
|
return indentMap[ depth ];
|
||
|
}
|
||
|
|
||
|
var sb = new StringBuilder();
|
||
|
|
||
|
for ( int i = 0; i < depth; i++ )
|
||
|
{
|
||
|
sb.Append( indent );
|
||
|
}
|
||
|
|
||
|
indentMap[ depth ] = sb.ToString();
|
||
|
return indentMap[ depth ];
|
||
|
|
||
|
}
|
||
|
|
||
|
int GetDepth( HtmlNode n )
|
||
|
{
|
||
|
return walker.GetDepth( n, depthMap );
|
||
|
}
|
||
|
|
||
|
void ClosePreviousElements( int currentDepth )
|
||
|
{
|
||
|
for ( int i = stack.Count - 1; i >= 0; i-- )
|
||
|
{
|
||
|
var stackDepth = GetDepth( stack[ i ] );
|
||
|
|
||
|
if ( stackDepth >= currentDepth )
|
||
|
{
|
||
|
var element = stack[ i ];
|
||
|
stack.RemoveAt( i );
|
||
|
|
||
|
if ( NeedsLineBreak( element ) )
|
||
|
{
|
||
|
sb.Append( "\n" );
|
||
|
sb.Append( GetIndent( stackDepth ) );
|
||
|
}
|
||
|
|
||
|
sb.Append( "</" + element.nodeName + ">" );
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
i = -1;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bool NeedsLineBreak( HtmlElementNode elementNode )
|
||
|
{
|
||
|
if ( elementNode.numChildren == 0 ||
|
||
|
|
||
|
elementNode.HasOnlyTextNodes() &&
|
||
|
! ( HtmlElementNodeName.script.Selects( elementNode ) ||
|
||
|
HtmlElementNodeName.style.Selects( elementNode )
|
||
|
)
|
||
|
)
|
||
|
{
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
}
|