diff options
author | Antoine Martin <dev@ayakael.net> | 2024-04-25 23:09:24 -0400 |
---|---|---|
committer | J0WI <1715-J0WI@users.gitlab.alpinelinux.org> | 2024-04-26 14:01:20 +0000 |
commit | 0b7be63bfa8c893385949d42fa4c546ca304edad (patch) | |
tree | e64b62c9e298d3c249d6e6167077fae62f98a1a3 | |
parent | 0981eb9bbf1be10cd34932caebd1624d22a05879 (diff) |
community/py3-extruct: fix lxml 5.1.0 support
-rw-r--r-- | community/py3-extruct/214_fix-syntaxwarning.patch | 46 | ||||
-rw-r--r-- | community/py3-extruct/217_add-support-lxml-5.1.0.patch | 333 | ||||
-rw-r--r-- | community/py3-extruct/APKBUILD | 15 |
3 files changed, 388 insertions, 6 deletions
diff --git a/community/py3-extruct/214_fix-syntaxwarning.patch b/community/py3-extruct/214_fix-syntaxwarning.patch new file mode 100644 index 00000000000..98e6d6046da --- /dev/null +++ b/community/py3-extruct/214_fix-syntaxwarning.patch @@ -0,0 +1,46 @@ +From 22fa7887383187260db8a8506c3b7f4c8b21e5b2 Mon Sep 17 00:00:00 2001 +From: dmo <marillat@users.noreply.github.com> +Date: Mon, 12 Feb 2024 09:40:31 +0100 +Subject: [PATCH 1/2] Fix SyntaxWarning in #213 + +--- + extruct/rdfa.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/extruct/rdfa.py b/extruct/rdfa.py +index 273b0b5..58a0dd9 100644 +--- a/extruct/rdfa.py ++++ b/extruct/rdfa.py +@@ -85,7 +85,7 @@ def _replaceNS(self, prop, html_element, head_element): + + match = None + if head_element.get("prefix"): +- match = re.search(prefix + ": [^\s]+", head_element.get("prefix")) ++ match = re.search(prefix + ": [^\\s]+", head_element.get("prefix")) + + # if namespace taken from prefix attribute in head tag + if match: + +From dc8ef20b99d02ce720284e731e842e1cd7fd6da1 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io> +Date: Fri, 12 Apr 2024 13:31:48 +0200 +Subject: [PATCH 2/2] Update extruct/rdfa.py + +Co-authored-by: Andrey Rakhmatullin <wrar@wrar.name> +--- + extruct/rdfa.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/extruct/rdfa.py b/extruct/rdfa.py +index 58a0dd9..c313e2b 100644 +--- a/extruct/rdfa.py ++++ b/extruct/rdfa.py +@@ -85,7 +85,7 @@ def _replaceNS(self, prop, html_element, head_element): + + match = None + if head_element.get("prefix"): +- match = re.search(prefix + ": [^\\s]+", head_element.get("prefix")) ++ match = re.search(prefix + r": [^\s]+", head_element.get("prefix")) + + # if namespace taken from prefix attribute in head tag + if match: diff --git a/community/py3-extruct/217_add-support-lxml-5.1.0.patch b/community/py3-extruct/217_add-support-lxml-5.1.0.patch new file mode 100644 index 00000000000..340b9d9f524 --- /dev/null +++ b/community/py3-extruct/217_add-support-lxml-5.1.0.patch @@ -0,0 +1,333 @@ +From 046b85e31b1ff40d6855578500cc1c8e94156adb Mon Sep 17 00:00:00 2001 +From: Michael Genson <71845777+michael-genson@users.noreply.github.com> +Date: Mon, 1 Apr 2024 22:18:32 +0000 +Subject: [PATCH 1/8] add support for lxml >= 5.1 + +--- + extruct/xmldom.py | 21 ++++++++++++++------- + 1 file changed, 14 insertions(+), 7 deletions(-) + +diff --git a/extruct/xmldom.py b/extruct/xmldom.py +index fe0a03b..c4beee0 100644 +--- a/extruct/xmldom.py ++++ b/extruct/xmldom.py +@@ -6,15 +6,22 @@ + from xml.dom import Node + from xml.dom.minidom import Attr, NamedNodeMap + +-from lxml.etree import ( +- ElementBase, +- XPath, +- _ElementStringResult, +- _ElementUnicodeResult, +- tostring, +-) ++from lxml.etree import ElementBase, XPath, _ElementUnicodeResult, tostring + from lxml.html import HtmlElementClassLookup, HTMLParser + ++try: ++ from lxml.etree import _ElementStringResult ++except ImportError: ++ ++ class _ElementStringResult(bytes): ++ """ ++ _ElementStringResult is removed in lxml >= 5.1.0, ++ so we define it here for compatibility. ++ """ ++ ++ def getparent(self): ++ return self._parent ++ + + class DomElementUnicodeResult: + CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE + +From 1a18238c6287b66cb75d64766d018c95b1b60775 Mon Sep 17 00:00:00 2001 +From: Michael Genson <71845777+michael-genson@users.noreply.github.com> +Date: Mon, 1 Apr 2024 22:31:37 +0000 +Subject: [PATCH 2/8] remove _ElementStringResult and break lxml <5.2.0 + compatibility + +--- + extruct/xmldom.py | 19 +++---------------- + 1 file changed, 3 insertions(+), 16 deletions(-) + +diff --git a/extruct/xmldom.py b/extruct/xmldom.py +index c4beee0..93d4e7c 100644 +--- a/extruct/xmldom.py ++++ b/extruct/xmldom.py +@@ -9,19 +9,6 @@ + from lxml.etree import ElementBase, XPath, _ElementUnicodeResult, tostring + from lxml.html import HtmlElementClassLookup, HTMLParser + +-try: +- from lxml.etree import _ElementStringResult +-except ImportError: +- +- class _ElementStringResult(bytes): +- """ +- _ElementStringResult is removed in lxml >= 5.1.0, +- so we define it here for compatibility. +- """ +- +- def getparent(self): +- return self._parent +- + + class DomElementUnicodeResult: + CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE +@@ -54,7 +41,7 @@ def lxmlDomNodeType(node): + if isinstance(node, ElementBase): + return Node.ELEMENT_NODE + +- elif isinstance(node, (_ElementStringResult, _ElementUnicodeResult)): ++ elif isinstance(node, _ElementUnicodeResult): + if node.is_attribute: + return Node.ATTRIBUTE_NODE + else: +@@ -123,7 +110,7 @@ def childNodes_xpath(self): + if isinstance(n, ElementBase): + yield n + +- elif isinstance(n, (_ElementStringResult, _ElementUnicodeResult)): ++ elif isinstance(n, _ElementUnicodeResult): + + if isinstance(n, _ElementUnicodeResult): + n = DomElementUnicodeResult(n) +@@ -149,7 +136,7 @@ def getElementById(self, i): + + @property + def data(self): +- if isinstance(self, (_ElementStringResult, _ElementUnicodeResult)): ++ if isinstance(self, _ElementUnicodeResult): + return self + else: + raise RuntimeError + +From c6f0433b57f387d2a6e6549936c0539a5279f05a Mon Sep 17 00:00:00 2001 +From: Michael Genson <71845777+michael-genson@users.noreply.github.com> +Date: Mon, 1 Apr 2024 22:32:34 +0000 +Subject: [PATCH 3/8] pin lxml>=5.2.0 with html_clean + +--- + requirements.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/requirements.txt b/requirements.txt +index aefa8e3..0a042ef 100644 +--- a/requirements.txt ++++ b/requirements.txt +@@ -1,6 +1,6 @@ + # project requirements, install them using following command: + # pip install -r requirements.txt +-lxml ++lxml>=5.2.0[html_clean] + requests + rdflib>=6.0.0; python_version>="3.7" + rdflib<6.0.0; python_version<"3.7" + +From 699c8584d0250c7ffd18ba6658624f30226f45dd Mon Sep 17 00:00:00 2001 +From: Michael Genson <71845777+michael-genson@users.noreply.github.com> +Date: Tue, 2 Apr 2024 13:41:32 +0000 +Subject: [PATCH 4/8] fix requirements format + +--- + requirements.txt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/requirements.txt b/requirements.txt +index 0a042ef..0e9eb5e 100644 +--- a/requirements.txt ++++ b/requirements.txt +@@ -1,6 +1,6 @@ + # project requirements, install them using following command: + # pip install -r requirements.txt +-lxml>=5.2.0[html_clean] ++lxml[html_clean]>=5.2.0 + requests + rdflib>=6.0.0; python_version>="3.7" + rdflib<6.0.0; python_version<"3.7" + +From 3a00899affe2635438ddc10e7bde3cc51687edbb Mon Sep 17 00:00:00 2001 +From: Michael Genson <71845777+michael-genson@users.noreply.github.com> +Date: Thu, 4 Apr 2024 09:09:12 -0500 +Subject: [PATCH 5/8] re-add support for older lxml versions + +--- + extruct/xmldom.py | 19 ++++++++++++++++--- + requirements.txt | 2 +- + 2 files changed, 17 insertions(+), 4 deletions(-) + +diff --git a/extruct/xmldom.py b/extruct/xmldom.py +index 93d4e7c..fe76ce6 100644 +--- a/extruct/xmldom.py ++++ b/extruct/xmldom.py +@@ -9,6 +9,19 @@ + from lxml.etree import ElementBase, XPath, _ElementUnicodeResult, tostring + from lxml.html import HtmlElementClassLookup, HTMLParser + ++try: ++ from lxml.etree import _ElementStringResult ++except ImportError: ++ ++ class _ElementStringResult(bytes): # type: ignore[no-redef] ++ """ ++ _ElementStringResult is removed in lxml >= 5.1.0, ++ so we define it here for compatibility. ++ """ ++ ++ def getparent(self): ++ return self._parent # type: ignore[attr-defined] ++ + + class DomElementUnicodeResult: + CDATA_SECTION_NODE = Node.CDATA_SECTION_NODE +@@ -41,7 +54,7 @@ def lxmlDomNodeType(node): + if isinstance(node, ElementBase): + return Node.ELEMENT_NODE + +- elif isinstance(node, _ElementUnicodeResult): ++ elif isinstance(node, (_ElementStringResult, _ElementUnicodeResult)): + if node.is_attribute: + return Node.ATTRIBUTE_NODE + else: +@@ -110,7 +123,7 @@ def childNodes_xpath(self): + if isinstance(n, ElementBase): + yield n + +- elif isinstance(n, _ElementUnicodeResult): ++ elif isinstance(n, (_ElementStringResult, _ElementUnicodeResult)): + + if isinstance(n, _ElementUnicodeResult): + n = DomElementUnicodeResult(n) +@@ -136,7 +149,7 @@ def getElementById(self, i): + + @property + def data(self): +- if isinstance(self, _ElementUnicodeResult): ++ if isinstance(self, (_ElementStringResult, _ElementUnicodeResult)): + return self + else: + raise RuntimeError +diff --git a/requirements.txt b/requirements.txt +index 0e9eb5e..6665ca9 100644 +--- a/requirements.txt ++++ b/requirements.txt +@@ -1,6 +1,6 @@ + # project requirements, install them using following command: + # pip install -r requirements.txt +-lxml[html_clean]>=5.2.0 ++lxml[html_clean] + requests + rdflib>=6.0.0; python_version>="3.7" + rdflib<6.0.0; python_version<"3.7" + +From fd6460575d88395a4011ec76fed1def6c2e8cfd6 Mon Sep 17 00:00:00 2001 +From: Michael Genson <71845777+michael-genson@users.noreply.github.com> +Date: Thu, 4 Apr 2024 15:54:33 -0500 +Subject: [PATCH 6/8] update tests for breaking mf2py changes + +--- + tests/samples/misc/microformat_flat_test.json | 5 ++++- + tests/samples/misc/microformat_test.json | 6 +++++- + tests/test_uniform.py | 5 ++++- + 3 files changed, 13 insertions(+), 3 deletions(-) + +diff --git a/tests/samples/misc/microformat_flat_test.json b/tests/samples/misc/microformat_flat_test.json +index 28344b3..60ad482 100644 +--- a/tests/samples/misc/microformat_flat_test.json ++++ b/tests/samples/misc/microformat_flat_test.json +@@ -32,7 +32,10 @@ + "aJ Styles FastLane 2018 15 x 17 Framed Plaque w/ Ring Canvas" + ], + "photo": [ +- "/on/demandware.static/-/Sites-main/default/dwa3227ee6/images/small/CN1148.jpg" ++ { ++ "alt": "aJ Styles FastLane 2018 15 x 17 Framed Plaque w/ Ring Canvas", ++ "value": "/on/demandware.static/-/Sites-main/default/dwa3227ee6/images/small/CN1148.jpg" ++ } + ] + } + ] +diff --git a/tests/samples/misc/microformat_test.json b/tests/samples/misc/microformat_test.json +index a485db0..5eea845 100644 +--- a/tests/samples/misc/microformat_test.json ++++ b/tests/samples/misc/microformat_test.json +@@ -27,7 +27,11 @@ + { + "properties": { + "photo": [ +- "/on/demandware.static/-/Sites-main/default/dwa3227ee6/images/small/CN1148.jpg" ++ { ++ "alt": "aJ Styles FastLane 2018 15 x 17 Framed Plaque w/ Ring Canvas", ++ "value": "/on/demandware.static/-/Sites-main/default/dwa3227ee6/images/small/CN1148.jpg" ++ ++ } + ], + "name": [ + "aJ Styles FastLane 2018 15 x 17 Framed Plaque w/ Ring Canvas" +diff --git a/tests/test_uniform.py b/tests/test_uniform.py +index ca27dd2..fc44227 100644 +--- a/tests/test_uniform.py ++++ b/tests/test_uniform.py +@@ -167,7 +167,10 @@ def test_umicroformat(self): + "Canvas" + ], + "photo": [ +- "/on/demandware.static/-/Sites-main/default/dwa3227ee6/images/small/CN1148.jpg" ++ { ++ "alt": "aJ Styles FastLane 2018 15 x 17 Framed Plaque w/ Ring Canvas", ++ "value": "/on/demandware.static/-/Sites-main/default/dwa3227ee6/images/small/CN1148.jpg" ++ } + ], + }, + ], + +From d7df9160e72ac84636c7536b4d2a726d22ada1f7 Mon Sep 17 00:00:00 2001 +From: Michael Genson <71845777+michael-genson@users.noreply.github.com> +Date: Fri, 5 Apr 2024 08:57:18 -0500 +Subject: [PATCH 7/8] fixed wrong tab size + +--- + tests/test_uniform.py | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tests/test_uniform.py b/tests/test_uniform.py +index fc44227..81d9284 100644 +--- a/tests/test_uniform.py ++++ b/tests/test_uniform.py +@@ -168,8 +168,8 @@ def test_umicroformat(self): + ], + "photo": [ + { +- "alt": "aJ Styles FastLane 2018 15 x 17 Framed Plaque w/ Ring Canvas", +- "value": "/on/demandware.static/-/Sites-main/default/dwa3227ee6/images/small/CN1148.jpg" ++ "alt": "aJ Styles FastLane 2018 15 x 17 Framed Plaque w/ Ring Canvas", ++ "value": "/on/demandware.static/-/Sites-main/default/dwa3227ee6/images/small/CN1148.jpg", + } + ], + }, + +From 0a320551026bcc966ba2d45858703fa2c46358ce Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= <adrian@chaves.io> +Date: Thu, 11 Apr 2024 18:39:17 +0200 +Subject: [PATCH 8/8] Update extruct/xmldom.py + +Co-authored-by: James Addison <55152140+jayaddison@users.noreply.github.com> +--- + extruct/xmldom.py | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/extruct/xmldom.py b/extruct/xmldom.py +index fe76ce6..018919c 100644 +--- a/extruct/xmldom.py ++++ b/extruct/xmldom.py +@@ -15,7 +15,7 @@ + + class _ElementStringResult(bytes): # type: ignore[no-redef] + """ +- _ElementStringResult is removed in lxml >= 5.1.0, ++ _ElementStringResult is removed in lxml >= 5.1.1, + so we define it here for compatibility. + """ + diff --git a/community/py3-extruct/APKBUILD b/community/py3-extruct/APKBUILD index def300d506b..8e7b9996dfd 100644 --- a/community/py3-extruct/APKBUILD +++ b/community/py3-extruct/APKBUILD @@ -4,7 +4,7 @@ pkgname=py3-extruct #_pkgreal is used by apkbuild-pypi to find modules at PyPI _pkgreal=extruct pkgver=0.16.0 -pkgrel=2 +pkgrel=3 pkgdesc="Extract embedded metadata from HTML markup" url="https://pypi.python.org/project/extruct" license="BSD-3-Clause" @@ -21,7 +21,11 @@ depends=" " checkdepends="py3-pytest" makedepends="py3-setuptools py3-gpep517 py3-wheel" -source="$pkgname-$pkgver.tar.gz::https://github.com/scrapinghub/extruct/archive/refs/tags/v$pkgver.tar.gz" +source=" + $pkgname-$pkgver.tar.gz::https://github.com/scrapinghub/extruct/archive/refs/tags/v$pkgver.tar.gz + 217_add-support-lxml-5.1.0.patch + 214_fix-syntaxwarning.patch +" builddir="$srcdir/$_pkgreal-$pkgver" subpackages="$pkgname-pyc" @@ -34,10 +38,7 @@ build() { check() { python3 -m venv --clear --without-pip --system-site-packages .testenv .testenv/bin/python3 -m installer "$builddir"/.dist/*.whl - # FAILED tests/test_extruct_uniform.py::TestFlatten::test_microformat - AssertionError: Lists differ: [{'@t[41 chars]'], '@context': 'http://microformats.org/wiki/[851 chars]'}]}] != [{'@t[41 chars]'], 'name': [''], '@context': 'http://microfor[769 chars]0']}] - # FAILED tests/test_microformat.py::TestMicroformat::test_microformat - AssertionError: Lists differ: [{'type': ['h-hidden-phone', 'h-hidden-table[856 chars]}]}}] != [{'properties': {'name': ['']}, 'type': ['h-[774 chars]y']}] - # FAILED tests/test_uniform.py::TestUniform::test_umicroformat - AssertionError: Lists differ: [{'@type': ['h-hidden-phone', 'h-hidden-table[898 chars]'}]}] != [{'@context': 'http://microformats.org/wiki/'[816 chars].']}] - .testenv/bin/python3 -m pytest -v -k 'not test_microformat and not test_umicroformat' + .testenv/bin/python3 -m pytest -v } package() { @@ -47,4 +48,6 @@ package() { sha512sums=" bb9f831a84e28f7bb6b9a1464fdc76801d72a57014ce83b2459e1acf9cf5f28e45ae94b5a89e5c239e9fd57d0585baa02e7f9c3650bb46cc5861df4362eda724 py3-extruct-0.16.0.tar.gz +8d317c0f18b7acc895f9fd18171389303eb82c592c08b7d77e605f950a329938da33892e4d5241bda623e6dd55e8d302a5cf7c9eef583c0b228e3ee3a203ef15 217_add-support-lxml-5.1.0.patch +cd923aaecdfe7c49c94b947b285e799474810a379fb62cc82b2388a88feadd6d9b27161a38b786ffec18511b86ece72443d68d2adcf0065cc2793e747449ae3d 214_fix-syntaxwarning.patch " |