This project has moved and is read-only. For the latest updates, please go here.
1
Vote

OuterXml bug?

description

Consider the following sample application:
using System;
using System.Xml;
using System.Xml.XPath;
using HtmlAgilityPack;

class Program
{
    static void Main(string[] args)
    {
        string
            input = @"<div id='a1'>"+
                      "<div id='a2'>Test1</div>"+
                      "Test2"+
                      "<div id='a3'>Test3</div>" +
                      "</div>";

        Console.WriteLine("Correct output:\r\n");
        XmlDocumentOuterXml(input);

        Console.WriteLine("\r\nIncorrect output:\r\n");
        HtmlAgilityPackOuterXml(input);
    }

    private static void XmlDocumentOuterXml(string input)
    {
        var doc = new XmlDocument();
        doc.LoadXml(input);
        Parse(doc);
    }

    private static void HtmlAgilityPackOuterXml(string input)
    {
        var doc = new HtmlDocument();
        doc.LoadHtml(input);
        Parse(doc);
    }

    private static void Parse(IXPathNavigable doc)
    {
        var iterator = doc
            .CreateNavigator()
            .SelectDescendants(System.Xml.XPath.XPathNodeType.Element, true);
        while (iterator.MoveNext())
        {
            Console.WriteLine(iterator.Current.OuterXml);
        }
    }
}
The output is:
Correct output:

<div id="a1">
  <div id="a2">Test1</div>Test2<div id="a3">Test3</div></div>
<div id="a2">Test1</div>
<div id="a3">Test3</div>

Incorrect output:

<div id="a1" />
<div id="a2">Test2</div>
<div id="a3">Test2</div>
Why the incorrect output?

comments