using System.Collections; using System.Collections.Generic; using System.Text.RegularExpressions; using System.Text; namespace Rokojori { public class HtmlSerializer { StringBuilder sb = new StringBuilder(); HtmlWalker walker = new HtmlWalker(); string indent = " "; Dictionary depthMap = new Dictionary(); Dictionary indentMap = new Dictionary(); List stack = new List(); public static string Escape( string rawText ) { rawText = RegexUtility.Replace( rawText, "&", "&" ); rawText = RegexUtility.Replace( rawText, "<", "<" ); rawText = RegexUtility.Replace( rawText, ">", ">" ); rawText = RegexUtility.Replace( rawText, "\'", "'" ); rawText = RegexUtility.Replace( rawText, "\"", """ ); return rawText; } public string Serialize( HtmlNode node ) { sb.Append( "" ); walker.DepthIterate( node, ( n, d ) => { var depth = GetDepth( n ); ClosePreviousElements( depth ); var element = n as HtmlElementNode; if ( element != null ) { sb.Append( "\n" ); sb.Append( GetIndent( depth ) ); sb.Append( "<" + element.nodeName ); for ( int i = 0; i < element.numAttributes; i++ ) { var attribute = element.GetAttributeAt( i ); sb.Append( " " ); sb.Append( attribute.name ); sb.Append( "=\"" ); sb.Append( Escape( attribute.value ) ); sb.Append( "\""); } sb.Append( ">" ); stack.Add( element ); } else { if ( HtmlElementNodeName.style.Selects( n.parentElement ) || HtmlElementNodeName.script.Selects( n.parentElement ) ) { sb.Append( n.nodeValue ); } else { sb.Append( Escape( n.nodeValue ) ); } } }, false, depthMap ); ClosePreviousElements( -1 ); return sb.ToString(); } string GetIndent( int depth ) { if ( indentMap.ContainsKey( depth ) ) { return indentMap[ depth ]; } if ( depth == 0 ) { indentMap[ 0 ] = ""; return ""; } if ( indentMap.ContainsKey( depth - 1 ) ) { var smallerIndent = indentMap[ depth -1 ]; indentMap[ depth ] = smallerIndent + indent; return indentMap[ depth ]; } var sb = new StringBuilder(); for ( int i = 0; i < depth; i++ ) { sb.Append( indent ); } indentMap[ depth ] = sb.ToString(); return indentMap[ depth ]; } int GetDepth( HtmlNode n ) { return walker.GetDepth( n, depthMap ); } void ClosePreviousElements( int currentDepth ) { for ( int i = stack.Count - 1; i >= 0; i-- ) { var stackDepth = GetDepth( stack[ i ] ); if ( stackDepth >= currentDepth ) { var element = stack[ i ]; stack.RemoveAt( i ); if ( NeedsLineBreak( element ) ) { sb.Append( "\n" ); sb.Append( GetIndent( stackDepth ) ); } sb.Append( "" ); } else { i = -1; } } } bool NeedsLineBreak( HtmlElementNode elementNode ) { if ( elementNode.numChildren == 0 || elementNode.HasOnlyTextNodes() && ! ( HtmlElementNodeName.script.Selects( elementNode ) || HtmlElementNodeName.style.Selects( elementNode ) ) ) { return false; } return true; } } }