Perfect-XML

Get Involed with Perfect!

Star Perfect On Github Stack Overflow Follow Perfect on Twitter Join the Perfect Slack

Swift 4.0 Platforms OS X | Linux License Apache PerfectlySoft Twitter Slack Status

Perfect 的 XML 和 HTML 解析支持

它目前包含大部分 DOM Core level 2 只读 API,并包含 XPath 支持。

构建

将此项目作为依赖项添加到你的 Package.swift 文件中。

.Package(url:"https://github.com/PerfectlySoft/Perfect-XML.git", majorVersion: 3)

macOS 构建说明

如果你收到以下编译错误,则需要安装并链接 libxml2

note: you may be able to install libxml-2.0 using your system-packager:

    brew install libxml2

Compile Swift Module 'PerfectXML' (2 sources)
<module-includes>:1:9: note: in file included from <module-includes>:1:
#import "libxml2.h"

要使用 Homebrew 安装并链接 libxml2,请使用以下两个命令

brew install libxml2
brew link --force libxml2

Linux 构建说明

确保你已安装 libxml2-dev 和 pkg-config。

sudo apt-get install libxml2-dev pkg-config

用法

使用你的 XML 字符串实例化一个 XDocument 对象

import PerfectXML
let document = XDocument(fromSource: xmlString)

使用你的 HTML 字符串实例化一个 HTMLDocument 对象

import PerfectXML
let document = HTMLDocument(fromSource: htmlString)

现在你可以通过使用 documentElement 属性获取文档结构的根节点

print(document.documentElement?.string(pretty: true))

每个节点都有几个重要的属性

每个节点还有一个 getElementsByTagName: 方法,它可以递归地搜索自身及其子节点,以返回一个包含所有具有该名称的节点的数组。此方法使在 XML 文件中查找单个值变得容易。

import PerfectXML

let serverResponseXML = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" +
"<Autodiscover xmlns=\"http://schemas.microsoft.com/exchange/autodiscover/responseschema/2006\">" + 
"  <Response>" + 
"    <Error Time=\"14:52:25.4524532\" Id=\"3280124998\">" + 
"      <ErrorCode>600</ErrorCode>" + 
"      <Message>Invalid Request</Message>" +
"      <DebugData />" +
"    </Error>" + 
"  </Response>" +
"</Autodiscover>"

let serverResponseDocument = XDocument(fromSource: serverResponseXML)
let serverMessage = serverResponseDocument?.documentElement?.getElementsByTagName("Message").first?.nodeValue
print(serverMessage) // prints Optional("Invalid Request")

使用 getElementsByTagName 从 XML feed 构建模型类型很容易

import PerfectXML

let rssXML = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" +
"<rss version=\"2.0\">" +
"  <channel>" +
"    <title>W3Schools Home Page</title>" +
"    <link>https://w3schools.org.cn</link>" +
"    <description>Free web building tutorials</description>" +
"    <item>" +
"      <title>RSS Tutorial</title>" +
"      <link>https://w3schools.org.cn/xml/xml_rss.asp</link>" +
"      <description>New RSS tutorial on W3Schools</description>" +
"    </item>" +
"    <item>" +
"      <title>XML Tutorial</title>" +
"      <link>https://w3schools.org.cn/xml</link>" +
"      <description>New XML tutorial on W3Schools</description>" +
"    </item>" +
"  </channel>" +
"</rss>"

let rssDocument = XDocument(fromSource: rssXML)
let feedItems = rssDocument?.documentElement?.getElementsByTagName("item")
print(feedItems?.count) // prints 2
let items = feedItems?.map({ MyCustomStruct(xmlNode: $0) })

有时标签名称过于通用,以至于无法有意义地搜索它们,例如上面示例中的 “title”。 如果我们想要获取 Channel 的标题、链接和描述,我们可以以更审慎的方式访问它的每个子节点。

import PerfectXML

let rssXML = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>" +
"<rss version=\"2.0\">" +
"  <channel>" +
"    <title>W3Schools Home Page</title>" +
"    <link>https://w3schools.org.cn</link>" +
"    <description>Free web building tutorials</description>" +
"    <item>" +
"      <title>RSS Tutorial</title>" +
"      <link>https://w3schools.org.cn/xml/xml_rss.asp</link>" +
"      <description>New RSS tutorial on W3Schools</description>" +
"    </item>" +
"    <item>" +
"      <title>XML Tutorial</title>" +
"      <link>https://w3schools.org.cn/xml</link>" +
"      <description>New XML tutorial on W3Schools</description>" +
"    </item>" +
"  </channel>" +
"</rss>"

let rssDocument = XDocument(fromSource: rssXML)
let channelNode = rssDocument?.documentElement?.getElementsByTagName("channel").first
let channelTitle = channelNode?.childNodes.filter({ $0.nodeName == "title" }).first?.nodeValue
let channelLink = channelNode?.childNodes.filter({ $0.nodeName == "link" }).first?.nodeValue
let channelDescription = channelNode?.childNodes.filter({ $0.nodeName == "description" }).first?.nodeValue

print(channelTitle) // Optional("W3Schools Home Page")
print(channelLink) // Optional("https://w3schools.org.cn")
print(channelDescription) // Optional("Free web building tutorials")

解析 XML 来源

此代码片段将解析 XML 来源字符串,然后将其转换回字符串。

let docSrc = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a><b><c a=\"attr1\">HI</c><d/></b></a>\n"
let doc = XDocument(fromSource: docSrc)
let str = doc?.string(pretty: false)
XCTAssert(str == docSrc, "\(str)")

解析 HTML 来源

此代码片段将解析 HTML 来源字符串。

let docSrc = "<html>\n<head>\n<title>title</title></head>\n<body>\n<div>hi</div>\n</body>\n</html>\n"
let doc = HTMLDocument(fromSource: docSrc)
let nodeName = doc?.documentElement?.nodeName
XCTAssert(nodeName == "html")

检查节点名称

let docSrc = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a><b/><c/><d/></a>\n"
let doc = XDocument(fromSource: docSrc)
XCTAssert(doc?.nodeName == "#document")
guard let children = doc?.documentElement else {
	return XCTAssert(false, "No children")
}
XCTAssert(children.nodeName == "a")
let names = ["b", "c", "d"]
for (n, v) in zip(children.childNodes, names) {
	guard let _ = n as? XElement else {
		return XCTAssert(false)
	}
	XCTAssert(n.nodeName == v, "\(n.nodeName) != \(v)")
}

检查文本节点

let value = "ABCD"
let docSrc = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a>\(value)</a>\n"
let doc = XDocument(fromSource: docSrc)
XCTAssert(doc?.nodeName == "#document")
guard let children = doc?.documentElement else {
	return XCTAssert(false, "No children")
}
XCTAssert(children.nodeName == "a")
do {
	let children = children.childNodes
	XCTAssert(children.count == 1)
	guard let textChild = children.first as? XText else {
		return XCTAssert(false)
	}
	XCTAssert(textChild.nodeValue == value)
}

检查节点类型

let value = "ABCD"
let docSrc = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a>\(value)</a>\n"
let doc = XDocument(fromSource: docSrc)
XCTAssert(doc?.nodeName == "#document")
guard let children = doc?.documentElement else {
	return XCTAssert(false, "No children")
}
XCTAssert(children.nodeName == "a")
let nodeType = children.nodeType
if case .elementNode = nodeType {
	XCTAssert(true)
} else {
	XCTAssert(false, "\(nodeType)")
}

第一个和最后一个子节点

let docSrc = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a><b/><c/><d/></a>\n"
let doc = XDocument(fromSource: docSrc)
XCTAssert(doc?.nodeName == "#document")
guard let children = doc?.documentElement else {
	return XCTAssert(false, "No children")
}
XCTAssert(children.nodeName == "a")
guard let firstChild = children.firstChild else {
	return XCTAssert(false)
}
guard let lastChild = children.lastChild else {
	return XCTAssert(false)
}
XCTAssert(firstChild.nodeName == "b")
XCTAssert(lastChild.nodeName == "d")

下一个和上一个兄弟节点

let docSrc = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a><b/><c/><d/></a>\n"
let doc = XDocument(fromSource: docSrc)
XCTAssert(doc?.nodeName == "#document")
guard let children = doc?.documentElement else {
	return XCTAssert(false, "No children")
}
XCTAssert(children.nodeName == "a")
guard let firstChild = children.firstChild else {
	return XCTAssert(false)
}
XCTAssert(firstChild.nodeName == "b")
guard let nextSib = firstChild.nextSibling else {
	return XCTAssert(false)
}
guard let prevSib = nextSib.previousSibling else {
	return XCTAssert(false)
}
XCTAssert(nextSib.nodeName == "c")
XCTAssert(prevSib.nodeName == "b")

元素属性

let docSrc = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a><b atr1=\"the value\" atr2=\"the other value\"></b></a>\n"
let doc = XDocument(fromSource: docSrc)
XCTAssert(doc?.nodeName == "#document")
guard let children = doc?.documentElement else {
	return XCTAssert(false, "No children")
}
XCTAssert(children.nodeName == "a")
guard let firstChild = children.firstChild as? XElement else {
	return XCTAssert(false)
}
XCTAssert(firstChild.nodeName == "b")
guard let atr1 = firstChild.getAttribute(name: "atr1") else {
	return XCTAssert(false)
}
XCTAssert(atr1 == "the value")
guard let atr2 = firstChild.getAttributeNode(name: "atr2") else {
	return XCTAssert(false)
}
XCTAssert(atr2.value == "the other value")

使用命名空间

let names = ["atr1", "atr2"]
let docSrc = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a xmlns:foo=\"foo:bar\"><b foo:atr1=\"the value\" foo:atr2=\"the other value\"></b></a>\n"
let doc = XDocument(fromSource: docSrc)
XCTAssert(doc?.nodeName == "#document")
guard let children = doc?.documentElement else {
	return XCTAssert(false, "No children")
}
XCTAssert(children.nodeName == "a")
guard let firstChild = children.firstChild else {
	return XCTAssert(false)
}
XCTAssert(firstChild.nodeName == "b")
guard let attrs = firstChild.attributes else {
	return XCTAssert(false, "nil attributes")
}
XCTAssert(attrs.length == 2)
for name in names {
	guard let item = attrs.getNamedItemNS(namespaceURI: "foo:bar", localName: name) else {
		return XCTAssert(false)
	}
	XCTAssert(item.nodeName == name)
}

按名称获取元素

let docSrc = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a><b/><a><b><b/></b></a></a>\n"
let doc = XDocument(fromSource: docSrc)
XCTAssert(doc?.nodeName == "#document")
guard let elements = doc?.documentElement?.getElementsByTagName("b") else {
	return XCTAssert(false)
}
XCTAssert(elements.count == 3)
for node in elements {
	XCTAssert(node.nodeName == "b")
}

使用命名空间

let docSrc = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a xmlns:foo=\"foo:bar\"><b/><foo:a><b>FOO<b/></b></foo:a></a>\n"
let doc = XDocument(fromSource: docSrc)
XCTAssert(doc?.nodeName == "#document")
do {
	guard let elements = doc?.getElementsByTagNameNS(namespaceURI: "foo:bar", localName: "a") else {
		return XCTAssert(false)
	}
	XCTAssert(elements.count == 1)
	for node in elements {
		XCTAssert(node.nodeName == "a")
		XCTAssert(node.localName == "a")
		XCTAssert(node.prefix == "foo")
		XCTAssert(node.namespaceURI == "foo:bar")
	}
}
do {
	guard let elements = doc?.documentElement?.getElementsByTagNameNS(namespaceURI: "foo:bar", localName: "a") else {
		return XCTAssert(false)
	}
	XCTAssert(elements.count == 1)
	for node in elements {
		XCTAssert(node.nodeName == "a")
		XCTAssert(node.localName == "a")
		XCTAssert(node.prefix == "foo")
		XCTAssert(node.namespaceURI == "foo:bar")
	}
}
do {
	guard let elements = doc?.getElementsByTagNameNS(namespaceURI: "foo:barz", localName: "a") else {
		return XCTAssert(false)
	}
	XCTAssert(elements.count == 0)
}
do {
	guard let elements = doc?.documentElement?.getElementsByTagNameNS(namespaceURI: "foo:barz", localName: "a") else {
		return XCTAssert(false)
	}
	XCTAssert(elements.count == 0)
}

按 ID 获取元素

let docSrc = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a><b id=\"foo\"/><a><b>FOO<b/></b></a></a>\n"
let doc = XDocument(fromSource: docSrc)
XCTAssert(doc?.nodeName == "#document")
guard let element = doc?.getElementById("foo") else {
	return XCTAssert(false)
}
XCTAssert(element.tagName == "b")

XPath

元素

let docSrc = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a><b id=\"foo\"/><a><b>FOO<b/></b></a></a>\n"
guard let doc = XDocument(fromSource: docSrc) else {
	return XCTAssert(false)
}
XCTAssert(doc.nodeName == "#document")
let pathRes = doc.extract(path: "/a/b")
guard case .nodeSet(let set) = pathRes else {
	return XCTAssert(false, "\(pathRes)")
}
for node in set {
	guard let b = node as? XElement else {
		return XCTAssert(false, "\(node)")
	}
	XCTAssert(b.tagName == "b")
}

属性

let docSrc = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a><b id=\"foo\"/><a><b>FOO<b/></b></a></a>\n"
guard let doc = XDocument(fromSource: docSrc) else {
	return XCTAssert(false)
}
XCTAssert(doc.nodeName == "#document")
let pathRes = doc.extract(path: "/a/b/@id")
guard case .nodeSet(let set) = pathRes else {
	return XCTAssert(false, "\(pathRes)")
}
for node in set {
	guard let b = node as? XAttr else {
		return XCTAssert(false, "\(node)")
	}
	XCTAssert(b.name == "id")
	XCTAssert(b.value == "foo")
}

文本

let docSrc = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a><b id=\"foo\"/><a><b>FOO<b/></b></a></a>\n"
guard let doc = XDocument(fromSource: docSrc) else {
	return XCTAssert(false)
}
XCTAssert(doc.nodeName == "#document")
let pathRes = doc.extract(path: "/a/a/b/text()")
guard case .nodeSet(let set) = pathRes else {
	return XCTAssert(false, "\(pathRes)")
}
for node in set {
	guard let b = node as? XText else {
		return XCTAssert(false, "\(node)")
	}
	guard let nodeValue = b.nodeValue else {
		return XCTAssert(false, "\(b)")
	}
	XCTAssert(nodeValue == "FOO")
}

命名空间

let docSrc = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<a xmlns:foo=\"foo:bar\"><b/><foo:a><b>FOO<b/></b></foo:a></a>\n"
guard let doc = XDocument(fromSource: docSrc) else {
	return XCTAssert(false)
}
let namespaces = [("f", "foo:bar")]
let pathRes = doc.extract(path: "/a/f:a", namespaces: namespaces)
guard case .nodeSet(let set) = pathRes else {
	return XCTAssert(false, "\(pathRes)")
}
for node in set {
	guard let e = node as? XElement else {
		return XCTAssert(false, "\(node)")
	}
	XCTAssert(e.tagName == "a")
	XCTAssert(e.namespaceURI == "foo:bar")
	XCTAssert(e.prefix == "foo")
}