Trying to squeeze out some performance from XML parser, I've performed a few tests to compare XmlReader and XElement in C#. The results are below:
Method | EntryCount | Mean | Error | StdDev | Gen 0 | Gen 1 | Gen 2 | Allocated |
---|---|---|---|---|---|---|---|---|
XmlReader | 10 | 22.71 μs | 13.99 μs | 0.767 μs | 3.8757 | 0.1221 | - | 15.94 KB |
Linq | 10 | 31.69 μs | 17.07 μs | 0.936 μs | 4.9438 | - | - | 20.38 KB |
XmlReader | 100 | 201.16 μs | 91.83 μs | 5.033 μs | 13.1836 | 1.7090 | - | 53.92 KB |
Linq | 100 | 262.43 μs | 117.71 μs | 6.452 μs | 22.4609 | 5.3711 | - | 92.08 KB |
XmlReader | 1000 | 1,957.26 μs | 487.94 μs | 26.745 μs | 70.3125 | 35.1563 | - | 427.13 KB |
Linq | 1000 | 2,760.14 μs | 1,338.66 μs | 73.376 μs | 128.9063 | 62.5000 | - | 802.53 KB |
XmlReader | 10000 | 26,045.06 μs | 7,497.77 μs | 410.978 μs | 750.0000 | 312.5000 | 93.7500 | 4261.65 KB |
Linq | 10000 | 37,641.87 μs | 22,644.42 μs | 1,241.217 μs | 1375.0000 | 562.5000 | 187.5000 | 8008.76 KB |
XmlReader | 100000 | 267,986.78 μs | 226,860.08 μs | 12,434.967 μs | 7000.0000 | 2500.0000 | 500.0000 | 41974.41 KB |
Linq | 100000 | 338,254.50 μs | 84,972.74 μs | 4,657.643 μs | 13000.0000 | 5000.0000 | 1000.0000 | 79473.88 KB |
Summary: XmlReader is always more efficient, both in terms of memory consumption and raw CPU cycles. No matter the document is big or small. Even with 10 elements, the performance gain is massive.
Full testing code here:
#LINQPad optimize+
static string xml = @"<?xml version=""1.0"" encoding=""UTF-8""?>
<ListBucketResult>
<Name>algresearch</Name>
<Prefix/>
<KeyCount>5</KeyCount>
<MaxKeys>1000</MaxKeys>
<IsTruncated>false</IsTruncated>
@c
</ListBucketResult>
";
static string contents = @"<Contents>
<Key>11d247ec-ca73-4ab9-a8ff-a5ffc446f8a4</Key>
<LastModified>2021-02-25T10:29:26.000Z</LastModified>
<ETag>"5cc7bdc9132074539612aacde94d39ae"</ETag>
<Size>15</Size>
<StorageClass>STANDARD</StorageClass>
</Contents>";
void Main()
{
Util.AutoScrollResults = true;
BenchmarkRunner.Run<ParseBenchmark>();
}
public class IOEntry
{
public IOEntry(string key)
{
Key = key;
}
public string Key { get; private set; }
public DateTimeOffset? LastModificationTime { get; set; }
public int Size { get; set; }
public string ETag { get; set; }
public string StorageClass { get; set; }
}
[ShortRunJob]
[MarkdownExporter]
//[SimpleJob]
//[RPlotExporter]
[MemoryDiagnoser]
//[DisassemblyDiagnoser]
public class ParseBenchmark
{
public IReadOnlyCollection<IOEntry> ParseWithXmlReader(string xml, out string continuationToken)
{
continuationToken = null;
var result = new List<IOEntry>();
using (var sr = new StringReader(xml))
{
using (var xr = System.Xml.XmlReader.Create(sr))
{
while (xr.Read())
{
if (xr.NodeType == XmlNodeType.Element)
{
switch (xr.Name)
{
case "NextContinuationToken":
break;
case "Contents":
string key = null;
string lastMod = null;
string eTag = null;
string size = null;
string storageClass = null;
// read all the elements in this
string en = null;
while (xr.Read() && !(xr.NodeType == XmlNodeType.EndElement && xr.Name == "Contents"))
{
if (xr.NodeType == XmlNodeType.Element)
en = xr.Name;
else if (xr.NodeType == XmlNodeType.Text)
{
switch (en)
{
case "Key":
key = xr.Value;
break;
case "LastModified":
lastMod = xr.Value;
break;
case "ETag":
eTag = xr.Value;
break;
case "Size":
size = xr.Value;
break;
case "StorageClass":
storageClass = xr.Value;
break;
}
}
}
if (key != null)
{
var entry = new IOEntry(key)
{
LastModificationTime = DateTimeOffset.Parse(lastMod),
Size = int.Parse(size),
ETag = eTag,
StorageClass = storageClass
};
result.Add(entry);
}
break;
}
}
}
}
}
return result;
}
public IReadOnlyCollection<IOEntry> ParseWithXElement(string xml, out string continuationToken)
{
continuationToken = null;
var result = new List<IOEntry>();
XElement x = XElement.Parse(xml);
continuationToken = x.Element("NextContinuationToken")?.Value;
foreach(XElement c in x.Elements("Contents"))
{
var entry = new IOEntry(c.Element("Key").Value)
{
LastModificationTime = DateTimeOffset.Parse(c.Element("LastModified")?.Value),
Size = int.Parse(c.Element("Size")?.Value),
ETag = c.Element("ETag")?.Value,
StorageClass = c.Element("StorageClass")?.Value
};
result.Add(entry);
}
return result;
}
private string _xml;
[Params(10, 100, 1000, 10000, 100000)]
public int EntryCount;
[GlobalSetup]
public void Setup()
{
var sb = new StringBuilder();
for(int i = 0; i < EntryCount; i++)
{
sb.Append(contents);
}
_xml = xml.Replace("@c", sb.ToString());
}
[Benchmark]
public IReadOnlyCollection<IOEntry> XmlReader()
{
return ParseWithXmlReader(_xml, out _);
}
[Benchmark]
public IReadOnlyCollection<IOEntry> Linq()
{
return ParseWithXElement(_xml, out _);
}
}
P.S. Originally published on my blog.
Top comments (0)