aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntoine Martin <dev@ayakael.net>2024-04-25 23:09:24 -0400
committerJ0WI <1715-J0WI@users.gitlab.alpinelinux.org>2024-04-26 14:01:20 +0000
commit0b7be63bfa8c893385949d42fa4c546ca304edad (patch)
treee64b62c9e298d3c249d6e6167077fae62f98a1a3
parent0981eb9bbf1be10cd34932caebd1624d22a05879 (diff)
community/py3-extruct: fix lxml 5.1.0 support
-rw-r--r--community/py3-extruct/214_fix-syntaxwarning.patch46
-rw-r--r--community/py3-extruct/217_add-support-lxml-5.1.0.patch333
-rw-r--r--community/py3-extruct/APKBUILD15
3 files changed, 388 insertions, 6 deletions
diff --git a/community/py3-extruct/214_fix-syntaxwarning.patch b/community/py3-extruct/214_fix-syntaxwarning.patch
new file mode 100644
index 00000000000..98e6d6046da
--- /dev/null
+++ b/community/py3-extruct/214_fix-syntaxwarning.patch
@@ -0,0 +1,46 @@
+From 22fa7887383187260db8a8506c3b7f4c8b21e5b2 Mon Sep 17 00:00:00 2001
+From: dmo <marillat@users.noreply.github.com>
+Date: Mon, 12 Feb 2024 09:40:31 +0100
+Subject: [PATCH 1/2] Fix SyntaxWarning in #213
+
+---
+ extruct/rdfa.py | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/extruct/rdfa.py b/extruct/rdfa.py
+index 273b0b5..58a0dd9 100644
+--- a/extruct/rdfa.py
++++ b/extruct/rdfa.py
+@@ -85,7 +85,7 @@ def _replaceNS(self, prop, html_element, head_element):
+
+ match = None
+ if head_element.get("prefix"):
+- match = re.search(prefix + ": [^\s]+", head_element.get("prefix"))
++ match = re.search(prefix + ": [^\\s]+", head_element.get("prefix"))
+
+ # if namespace taken from prefix attribute in head tag
+ if match:
+
+From dc8ef20b99d02ce720284e731e842e1cd7fd6da1 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
+Date: Fri, 12 Apr 2024 13:31:48 +0200
+Subject: [PATCH 2/2] Update extruct/rdfa.py
+
+Co-authored-by: Andrey Rakhmatullin <wrar@wrar.name>
+---
+ extruct/rdfa.py | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/extruct/rdfa.py b/extruct/rdfa.py
+index 58a0dd9..c313e2b 100644
+--- a/extruct/rdfa.py
++++ b/extruct/rdfa.py
+@@ -85,7 +85,7 @@ def _replaceNS(self, prop, html_element, head_element):
+
+ match = None
+ if head_element.get("prefix"):
+- match = re.search(prefix + ": [^\\s]+", head_element.get("prefix"))
++ match = re.search(prefix + r": [^\s]+", head_element.get("prefix"))
+
+ # if namespace taken from prefix attribute in head tag
+ if match:
diff --git a/community/py3-extruct/217_add-support-lxml-5.1.0.patch b/community/py3-extruct/217_add-support-lxml-5.1.0.patch
new file mode 100644
index 00000000000..340b9d9f524
--- /dev/null
+++ b/community/py3-extruct/217_add-support-lxml-5.1.0.patch
@@ -0,0 +1,333 @@
+From 046b85e31b1ff40d6855578500cc1c8e94156adb Mon Sep 17 00:00:00 2001
+From: Michael Genson <71845777+michael-genson@users.noreply.github.com>
+Date: Mon, 1 Apr 2024 22:18:32 +0000
+Subject: [PATCH 1/8] add support for lxml >= 5.1
+
+---
+ extruct/xmldom.py | 21 ++++++++++++++-------
+ 1 file changed, 14 insertions(+), 7 deletions(-)
+
+diff --git a/extruct/xmldom.py b/extruct/xmldom.py
+index fe0a03b..c4beee0 100644
+--- a/extruct/xmldom.py
++++ b/extruct/xmldom.py
+@@ -6,15 +6,22 @@
+ from xml.dom import Node
+ from xml.dom.minidom import Attr, NamedNodeMap
+
+-from lxml.etree import (
+- ElementBase,
+- XPath,
+- _ElementStringResult,
+- _ElementUnicodeResult,
+- tostring,
+-)
++from lxml.etree import ElementBase, XPath, _ElementUnicodeResult, tostring
+ from lxml.html import HtmlElementClassLookup, HTMLParser
+
++try:
++ from lxml.etree import _ElementStringResult
++except ImportError:
++
++ class _ElementStringResult(bytes):
++ """
++ _ElementStringResult is removed in lxml >= 5.1.0,
++ so we define it here for compatibility.
++ """
++
++ def getparent(self):
++ return self._parent
++
+
+ class DomElementUnicodeResult:
+ CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE
+
+From 1a18238c6287b66cb75d64766d018c95b1b60775 Mon Sep 17 00:00:00 2001
+From: Michael Genson <71845777+michael-genson@users.noreply.github.com>
+Date: Mon, 1 Apr 2024 22:31:37 +0000
+Subject: [PATCH 2/8] remove _ElementStringResult and break lxml <5.2.0
+ compatibility
+
+---
+ extruct/xmldom.py | 19 +++----------------
+ 1 file changed, 3 insertions(+), 16 deletions(-)
+
+diff --git a/extruct/xmldom.py b/extruct/xmldom.py
+index c4beee0..93d4e7c 100644
+--- a/extruct/xmldom.py
++++ b/extruct/xmldom.py
+@@ -9,19 +9,6 @@
+ from lxml.etree import ElementBase, XPath, _ElementUnicodeResult, tostring
+ from lxml.html import HtmlElementClassLookup, HTMLParser
+
+-try:
+- from lxml.etree import _ElementStringResult
+-except ImportError:
+-
+- class _ElementStringResult(bytes):
+- """
+- _ElementStringResult is removed in lxml >= 5.1.0,
+- so we define it here for compatibility.
+- """
+-
+- def getparent(self):
+- return self._parent
+-
+
+ class DomElementUnicodeResult:
+ CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE
+@@ -54,7 +41,7 @@ def lxmlDomNodeType(node):
+ if isinstance(node, ElementBase):
+ return Node.ELEMENT_NODE
+
+- elif isinstance(node, (_ElementStringResult, _ElementUnicodeResult)):
++ elif isinstance(node, _ElementUnicodeResult):
+ if node.is_attribute:
+ return Node.ATTRIBUTE_NODE
+ else:
+@@ -123,7 +110,7 @@ def childNodes_xpath(self):
+ if isinstance(n, ElementBase):
+ yield n
+
+- elif isinstance(n, (_ElementStringResult, _ElementUnicodeResult)):
++ elif isinstance(n, _ElementUnicodeResult):
+
+ if isinstance(n, _ElementUnicodeResult):
+ n = DomElementUnicodeResult(n)
+@@ -149,7 +136,7 @@ def getElementById(self, i):
+
+ @property
+ def data(self):
+- if isinstance(self, (_ElementStringResult, _ElementUnicodeResult)):
++ if isinstance(self, _ElementUnicodeResult):
+ return self
+ else:
+ raise RuntimeError
+
+From c6f0433b57f387d2a6e6549936c0539a5279f05a Mon Sep 17 00:00:00 2001
+From: Michael Genson <71845777+michael-genson@users.noreply.github.com>
+Date: Mon, 1 Apr 2024 22:32:34 +0000
+Subject: [PATCH 3/8] pin lxml>=5.2.0 with html_clean
+
+---
+ requirements.txt | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/requirements.txt b/requirements.txt
+index aefa8e3..0a042ef 100644
+--- a/requirements.txt
++++ b/requirements.txt
+@@ -1,6 +1,6 @@
+ # project requirements, install them using following command:
+ # pip install -r requirements.txt
+-lxml
++lxml>=5.2.0[html_clean]
+ requests
+ rdflib>=6.0.0; python_version>="3.7"
+ rdflib<6.0.0; python_version<"3.7"
+
+From 699c8584d0250c7ffd18ba6658624f30226f45dd Mon Sep 17 00:00:00 2001
+From: Michael Genson <71845777+michael-genson@users.noreply.github.com>
+Date: Tue, 2 Apr 2024 13:41:32 +0000
+Subject: [PATCH 4/8] fix requirements format
+
+---
+ requirements.txt | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/requirements.txt b/requirements.txt
+index 0a042ef..0e9eb5e 100644
+--- a/requirements.txt
++++ b/requirements.txt
+@@ -1,6 +1,6 @@
+ # project requirements, install them using following command:
+ # pip install -r requirements.txt
+-lxml>=5.2.0[html_clean]
++lxml[html_clean]>=5.2.0
+ requests
+ rdflib>=6.0.0; python_version>="3.7"
+ rdflib<6.0.0; python_version<"3.7"
+
+From 3a00899affe2635438ddc10e7bde3cc51687edbb Mon Sep 17 00:00:00 2001
+From: Michael Genson <71845777+michael-genson@users.noreply.github.com>
+Date: Thu, 4 Apr 2024 09:09:12 -0500
+Subject: [PATCH 5/8] re-add support for older lxml versions
+
+---
+ extruct/xmldom.py | 19 ++++++++++++++++---
+ requirements.txt | 2 +-
+ 2 files changed, 17 insertions(+), 4 deletions(-)
+
+diff --git a/extruct/xmldom.py b/extruct/xmldom.py
+index 93d4e7c..fe76ce6 100644
+--- a/extruct/xmldom.py
++++ b/extruct/xmldom.py
+@@ -9,6 +9,19 @@
+ from lxml.etree import ElementBase, XPath, _ElementUnicodeResult, tostring
+ from lxml.html import HtmlElementClassLookup, HTMLParser
+
++try:
++ from lxml.etree import _ElementStringResult
++except ImportError:
++
++ class _ElementStringResult(bytes): # type: ignore[no-redef]
++ """
++ _ElementStringResult is removed in lxml >= 5.1.0,
++ so we define it here for compatibility.
++ """
++
++ def getparent(self):
++ return self._parent # type: ignore[attr-defined]
++
+
+ class DomElementUnicodeResult:
+ CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE
+@@ -41,7 +54,7 @@ def lxmlDomNodeType(node):
+ if isinstance(node, ElementBase):
+ return Node.ELEMENT_NODE
+
+- elif isinstance(node, _ElementUnicodeResult):
++ elif isinstance(node, (_ElementStringResult, _ElementUnicodeResult)):
+ if node.is_attribute:
+ return Node.ATTRIBUTE_NODE
+ else:
+@@ -110,7 +123,7 @@ def childNodes_xpath(self):
+ if isinstance(n, ElementBase):
+ yield n
+
+- elif isinstance(n, _ElementUnicodeResult):
++ elif isinstance(n, (_ElementStringResult, _ElementUnicodeResult)):
+
+ if isinstance(n, _ElementUnicodeResult):
+ n = DomElementUnicodeResult(n)
+@@ -136,7 +149,7 @@ def getElementById(self, i):
+
+ @property
+ def data(self):
+- if isinstance(self, _ElementUnicodeResult):
++ if isinstance(self, (_ElementStringResult, _ElementUnicodeResult)):
+ return self
+ else:
+ raise RuntimeError
+diff --git a/requirements.txt b/requirements.txt
+index 0e9eb5e..6665ca9 100644
+--- a/requirements.txt
++++ b/requirements.txt
+@@ -1,6 +1,6 @@
+ # project requirements, install them using following command:
+ # pip install -r requirements.txt
+-lxml[html_clean]>=5.2.0
++lxml[html_clean]
+ requests
+ rdflib>=6.0.0; python_version>="3.7"
+ rdflib<6.0.0; python_version<"3.7"
+
+From fd6460575d88395a4011ec76fed1def6c2e8cfd6 Mon Sep 17 00:00:00 2001
+From: Michael Genson <71845777+michael-genson@users.noreply.github.com>
+Date: Thu, 4 Apr 2024 15:54:33 -0500
+Subject: [PATCH 6/8] update tests for breaking mf2py changes
+
+---
+ tests/samples/misc/microformat_flat_test.json | 5 ++++-
+ tests/samples/misc/microformat_test.json | 6 +++++-
+ tests/test_uniform.py | 5 ++++-
+ 3 files changed, 13 insertions(+), 3 deletions(-)
+
+diff --git a/tests/samples/misc/microformat_flat_test.json b/tests/samples/misc/microformat_flat_test.json
+index 28344b3..60ad482 100644
+--- a/tests/samples/misc/microformat_flat_test.json
++++ b/tests/samples/misc/microformat_flat_test.json
+@@ -32,7 +32,10 @@
+ "aJ Styles FastLane 2018 15 x 17 Framed Plaque w/ Ring Canvas"
+ ],
+ "photo": [
+- "/on/demandware.static/-/Sites-main/default/dwa3227ee6/images/small/CN1148.jpg"
++ {
++ "alt": "aJ Styles FastLane 2018 15 x 17 Framed Plaque w/ Ring Canvas",
++ "value": "/on/demandware.static/-/Sites-main/default/dwa3227ee6/images/small/CN1148.jpg"
++ }
+ ]
+ }
+ ]
+diff --git a/tests/samples/misc/microformat_test.json b/tests/samples/misc/microformat_test.json
+index a485db0..5eea845 100644
+--- a/tests/samples/misc/microformat_test.json
++++ b/tests/samples/misc/microformat_test.json
+@@ -27,7 +27,11 @@
+ {
+ "properties": {
+ "photo": [
+- "/on/demandware.static/-/Sites-main/default/dwa3227ee6/images/small/CN1148.jpg"
++ {
++ "alt": "aJ Styles FastLane 2018 15 x 17 Framed Plaque w/ Ring Canvas",
++ "value": "/on/demandware.static/-/Sites-main/default/dwa3227ee6/images/small/CN1148.jpg"
++
++ }
+ ],
+ "name": [
+ "aJ Styles FastLane 2018 15 x 17 Framed Plaque w/ Ring Canvas"
+diff --git a/tests/test_uniform.py b/tests/test_uniform.py
+index ca27dd2..fc44227 100644
+--- a/tests/test_uniform.py
++++ b/tests/test_uniform.py
+@@ -167,7 +167,10 @@ def test_umicroformat(self):
+ "Canvas"
+ ],
+ "photo": [
+- "/on/demandware.static/-/Sites-main/default/dwa3227ee6/images/small/CN1148.jpg"
++ {
++ "alt": "aJ Styles FastLane 2018 15 x 17 Framed Plaque w/ Ring Canvas",
++ "value": "/on/demandware.static/-/Sites-main/default/dwa3227ee6/images/small/CN1148.jpg"
++ }
+ ],
+ },
+ ],
+
+From d7df9160e72ac84636c7536b4d2a726d22ada1f7 Mon Sep 17 00:00:00 2001
+From: Michael Genson <71845777+michael-genson@users.noreply.github.com>
+Date: Fri, 5 Apr 2024 08:57:18 -0500
+Subject: [PATCH 7/8] fixed wrong tab size
+
+---
+ tests/test_uniform.py | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tests/test_uniform.py b/tests/test_uniform.py
+index fc44227..81d9284 100644
+--- a/tests/test_uniform.py
++++ b/tests/test_uniform.py
+@@ -168,8 +168,8 @@ def test_umicroformat(self):
+ ],
+ "photo": [
+ {
+- "alt": "aJ Styles FastLane 2018 15 x 17 Framed Plaque w/ Ring Canvas",
+- "value": "/on/demandware.static/-/Sites-main/default/dwa3227ee6/images/small/CN1148.jpg"
++ "alt": "aJ Styles FastLane 2018 15 x 17 Framed Plaque w/ Ring Canvas",
++ "value": "/on/demandware.static/-/Sites-main/default/dwa3227ee6/images/small/CN1148.jpg",
+ }
+ ],
+ },
+
+From 0a320551026bcc966ba2d45858703fa2c46358ce Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io>
+Date: Thu, 11 Apr 2024 18:39:17 +0200
+Subject: [PATCH 8/8] Update extruct/xmldom.py
+
+Co-authored-by: James Addison <55152140+jayaddison@users.noreply.github.com>
+---
+ extruct/xmldom.py | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/extruct/xmldom.py b/extruct/xmldom.py
+index fe76ce6..018919c 100644
+--- a/extruct/xmldom.py
++++ b/extruct/xmldom.py
+@@ -15,7 +15,7 @@
+
+ class _ElementStringResult(bytes): # type: ignore[no-redef]
+ """
+- _ElementStringResult is removed in lxml >= 5.1.0,
++ _ElementStringResult is removed in lxml >= 5.1.1,
+ so we define it here for compatibility.
+ """
+
diff --git a/community/py3-extruct/APKBUILD b/community/py3-extruct/APKBUILD
index def300d506b..8e7b9996dfd 100644
--- a/community/py3-extruct/APKBUILD
+++ b/community/py3-extruct/APKBUILD
@@ -4,7 +4,7 @@ pkgname=py3-extruct
#_pkgreal is used by apkbuild-pypi to find modules at PyPI
_pkgreal=extruct
pkgver=0.16.0
-pkgrel=2
+pkgrel=3
pkgdesc="Extract embedded metadata from HTML markup"
url="https://pypi.python.org/project/extruct"
license="BSD-3-Clause"
@@ -21,7 +21,11 @@ depends="
"
checkdepends="py3-pytest"
makedepends="py3-setuptools py3-gpep517 py3-wheel"
-source="$pkgname-$pkgver.tar.gz::https://github.com/scrapinghub/extruct/archive/refs/tags/v$pkgver.tar.gz"
+source="
+ $pkgname-$pkgver.tar.gz::https://github.com/scrapinghub/extruct/archive/refs/tags/v$pkgver.tar.gz
+ 217_add-support-lxml-5.1.0.patch
+ 214_fix-syntaxwarning.patch
+"
builddir="$srcdir/$_pkgreal-$pkgver"
subpackages="$pkgname-pyc"
@@ -34,10 +38,7 @@ build() {
check() {
python3 -m venv --clear --without-pip --system-site-packages .testenv
.testenv/bin/python3 -m installer "$builddir"/.dist/*.whl
- # FAILED tests/test_extruct_uniform.py::TestFlatten::test_microformat - AssertionError: Lists differ: [{'@t[41 chars]'], '@context': 'http://microformats.org/wiki/[851 chars]'}]}] != [{'@t[41 chars]'], 'name': [''], '@context': 'http://microfor[769 chars]0']}]
- # FAILED tests/test_microformat.py::TestMicroformat::test_microformat - AssertionError: Lists differ: [{'type': ['h-hidden-phone', 'h-hidden-table[856 chars]}]}}] != [{'properties': {'name': ['']}, 'type': ['h-[774 chars]y']}]
- # FAILED tests/test_uniform.py::TestUniform::test_umicroformat - AssertionError: Lists differ: [{'@type': ['h-hidden-phone', 'h-hidden-table[898 chars]'}]}] != [{'@context': 'http://microformats.org/wiki/'[816 chars].']}]
- .testenv/bin/python3 -m pytest -v -k 'not test_microformat and not test_umicroformat'
+ .testenv/bin/python3 -m pytest -v
}
package() {
@@ -47,4 +48,6 @@ package() {
sha512sums="
bb9f831a84e28f7bb6b9a1464fdc76801d72a57014ce83b2459e1acf9cf5f28e45ae94b5a89e5c239e9fd57d0585baa02e7f9c3650bb46cc5861df4362eda724 py3-extruct-0.16.0.tar.gz
+8d317c0f18b7acc895f9fd18171389303eb82c592c08b7d77e605f950a329938da33892e4d5241bda623e6dd55e8d302a5cf7c9eef583c0b228e3ee3a203ef15 217_add-support-lxml-5.1.0.patch
+cd923aaecdfe7c49c94b947b285e799474810a379fb62cc82b2388a88feadd6d9b27161a38b786ffec18511b86ece72443d68d2adcf0065cc2793e747449ae3d 214_fix-syntaxwarning.patch
"