guix/gnu/packages/patches/python-sgmllib3k-assertions.patch

Restores compatibility with Python >=3.9,
which removed the custom .error() method in
https://github.com/python/cpython/commit/e34bbfd61f405eef89e8aa50672b0b25022de320

Despite the big diff, only a try…except clause is added.

--- source/sgmllib.py	2023-03-18 08:57:58.726240606 +0100
+++ source/sgmllib.py	2023-03-18 09:02:01.667568916 +0100
@@ -101,113 +101,116 @@
         """Handle the remaining data."""
         self.goahead(1)
 
-    def error(self, message):
-        raise SGMLParseError(message)
-
     # Internal -- handle data as far as reasonable.  May leave state
     # and data to be processed by a subsequent call.  If 'end' is
     # true, force handling all data as if followed by EOF marker.
     def goahead(self, end):
-        rawdata = self.rawdata
-        i = 0
-        n = len(rawdata)
-        while i < n:
-            if self.nomoretags:
-                self.handle_data(rawdata[i:n])
-                i = n
-                break
-            match = interesting.search(rawdata, i)
-            if match: j = match.start()
-            else: j = n
-            if i < j:
-                self.handle_data(rawdata[i:j])
-            i = j
-            if i == n: break
-            if rawdata[i] == '<':
-                if starttagopen.match(rawdata, i):
+        try:
+            rawdata = self.rawdata
+            i = 0
+            n = len(rawdata)
+            while i < n:
+                if self.nomoretags:
+                    self.handle_data(rawdata[i:n])
+                    i = n
+                    break
+                match = interesting.search(rawdata, i)
+                if match: j = match.start()
+                else: j = n
+                if i < j:
+                    self.handle_data(rawdata[i:j])
+                i = j
+                if i == n: break
+                if rawdata[i] == '<':
+                    if starttagopen.match(rawdata, i):
+                        if self.literal:
+                            self.handle_data(rawdata[i])
+                            i = i+1
+                            continue
+                        k = self.parse_starttag(i)
+                        if k < 0: break
+                        i = k
+                        continue
+                    if rawdata.startswith("</", i):
+                        k = self.parse_endtag(i)
+                        if k < 0: break
+                        i = k
+                        self.literal = 0
+                        continue
+                    if self.literal:
+                        if n > (i + 1):
+                            self.handle_data("<")
+                            i = i+1
+                        else:
+                            # incomplete
+                            break
+                        continue
+                    if rawdata.startswith("<!--", i):
+                            # Strictly speaking, a comment is --.*--
+                            # within a declaration tag <!...>.
+                            # This should be removed,
+                            # and comments handled only in parse_declaration.
+                        k = self.parse_comment(i)
+                        if k < 0: break
+                        i = k
+                        continue
+                    if rawdata.startswith("<?", i):
+                        k = self.parse_pi(i)
+                        if k < 0: break
+                        i = i+k
+                        continue
+                    if rawdata.startswith("<!", i):
+                        # This is some sort of declaration; in "HTML as
+                        # deployed," this should only be the document type
+                        # declaration ("<!DOCTYPE html...>").
+                        k = self.parse_declaration(i)
+                        if k < 0: break
+                        i = k
+                        continue
+                elif rawdata[i] == '&':
                     if self.literal:
                         self.handle_data(rawdata[i])
                         i = i+1
                         continue
-                    k = self.parse_starttag(i)
-                    if k < 0: break
-                    i = k
-                    continue
-                if rawdata.startswith("</", i):
-                    k = self.parse_endtag(i)
-                    if k < 0: break
-                    i = k
-                    self.literal = 0
-                    continue
-                if self.literal:
-                    if n > (i + 1):
-                        self.handle_data("<")
-                        i = i+1
-                    else:
-                        # incomplete
-                        break
-                    continue
-                if rawdata.startswith("<!--", i):
-                        # Strictly speaking, a comment is --.*--
-                        # within a declaration tag <!...>.
-                        # This should be removed,
-                        # and comments handled only in parse_declaration.
-                    k = self.parse_comment(i)
-                    if k < 0: break
-                    i = k
-                    continue
-                if rawdata.startswith("<?", i):
-                    k = self.parse_pi(i)
-                    if k < 0: break
-                    i = i+k
-                    continue
-                if rawdata.startswith("<!", i):
-                    # This is some sort of declaration; in "HTML as
-                    # deployed," this should only be the document type
-                    # declaration ("<!DOCTYPE html...>").
-                    k = self.parse_declaration(i)
-                    if k < 0: break
-                    i = k
-                    continue
-            elif rawdata[i] == '&':
-                if self.literal:
+                    match = charref.match(rawdata, i)
+                    if match:
+                        name = match.group(1)
+                        self.handle_charref(name)
+                        i = match.end(0)
+                        if rawdata[i-1] != ';': i = i-1
+                        continue
+                    match = entityref.match(rawdata, i)
+                    if match:
+                        name = match.group(1)
+                        self.handle_entityref(name)
+                        i = match.end(0)
+                        if rawdata[i-1] != ';': i = i-1
+                        continue
+                else:
+                    self.error('neither < nor & ??')
+                # We get here only if incomplete matches but
+                # nothing else
+                match = incomplete.match(rawdata, i)
+                if not match:
                     self.handle_data(rawdata[i])
                     i = i+1
                     continue
-                match = charref.match(rawdata, i)
-                if match:
-                    name = match.group(1)
-                    self.handle_charref(name)
-                    i = match.end(0)
-                    if rawdata[i-1] != ';': i = i-1
-                    continue
-                match = entityref.match(rawdata, i)
-                if match:
-                    name = match.group(1)
-                    self.handle_entityref(name)
-                    i = match.end(0)
-                    if rawdata[i-1] != ';': i = i-1
-                    continue
-            else:
-                self.error('neither < nor & ??')
-            # We get here only if incomplete matches but
-            # nothing else
-            match = incomplete.match(rawdata, i)
-            if not match:
-                self.handle_data(rawdata[i])
-                i = i+1
-                continue
-            j = match.end(0)
-            if j == n:
-                break # Really incomplete
-            self.handle_data(rawdata[i:j])
-            i = j
-        # end while
-        if end and i < n:
-            self.handle_data(rawdata[i:n])
-            i = n
-        self.rawdata = rawdata[i:]
-        # XXX if end: check for empty stack
+                j = match.end(0)
+                if j == n:
+                    break # Really incomplete
+                self.handle_data(rawdata[i:j])
+                i = j
+            # end while
+            if end and i < n:
+                self.handle_data(rawdata[i:n])
+                i = n
+            self.rawdata = rawdata[i:]
+            # XXX if end: check for empty stack
+        except AssertionError as e:
+            # The .error() method, which threw the custom SGMLParseError was removed
+            # by https://github.com/python/cpython/issues/76025. So we have to catch
+            # _markupbase’s AssertionError and translate it into the old one.
+            raise SGMLParseError (e.args[0]) from e
 
     # Extensions for the DOCTYPE scanner:
     _decl_otherchars = '='
-												gnu: python-sgmllib3k: Add Python >=3.9 compatibility.

* gnu/packages/patches/python-sgmllib3k-assertions.patch: New file.
* gnu/local.mk: Register it.
* gnu/packages/python-xyz.scm (python-sgmllib3k): Use it.

											
										
										
											2023-03-18 04:49:51 -04:00
+								Restores compatibility with Python >=3.9,
 								which removed the custom .error() method in
 								https://github.com/python/cpython/commit/e34bbfd61f405eef89e8aa50672b0b25022de320
 								Despite the big diff, only a try…except clause is added.
 								--- source/sgmllib.py	2023-03-18 08:57:58.726240606 +0100
 								+++ source/sgmllib.py	2023-03-18 09:02:01.667568916 +0100
@@ -101,113 +101,116 @@
 								         """Handle the remaining data."""
 								         self.goahead(1)
 								-    def error(self, message):
 								-        raise SGMLParseError(message)
 								-
 								     # Internal -- handle data as far as reasonable.  May leave state
 								     # and data to be processed by a subsequent call.  If 'end' is
 								     # true, force handling all data as if followed by EOF marker.
 								     def goahead(self, end):
 								-        rawdata = self.rawdata
 								-        i = 0
 								-        n = len(rawdata)
 								-        while i < n:
 								-            if self.nomoretags:
 								-                self.handle_data(rawdata[i:n])
 								-                i = n
 								-                break
 								-            match = interesting.search(rawdata, i)
 								-            if match: j = match.start()
 								-            else: j = n
 								-            if i < j:
 								-                self.handle_data(rawdata[i:j])
 								-            i = j
 								-            if i == n: break
 								-            if rawdata[i] == '<':
 								-                if starttagopen.match(rawdata, i):
 								+        try:
 								+            rawdata = self.rawdata
 								+            i = 0
 								+            n = len(rawdata)
 								+            while i < n:
 								+                if self.nomoretags:
 								+                    self.handle_data(rawdata[i:n])
 								+                    i = n
 								+                    break
 								+                match = interesting.search(rawdata, i)
 								+                if match: j = match.start()
 								+                else: j = n
 								+                if i < j:
 								+                    self.handle_data(rawdata[i:j])
 								+                i = j
 								+                if i == n: break
 								+                if rawdata[i] == '<':
 								+                    if starttagopen.match(rawdata, i):
 								+                        if self.literal:
 								+                            self.handle_data(rawdata[i])
 								+                            i = i+1
 								+                            continue
 								+                        k = self.parse_starttag(i)
 								+                        if k < 0: break
 								+                        i = k
 								+                        continue
 								+                    if rawdata.startswith("</", i):
 								+                        k = self.parse_endtag(i)
 								+                        if k < 0: break
 								+                        i = k
 								+                        self.literal = 0
 								+                        continue
 								+                    if self.literal:
 								+                        if n > (i + 1):
 								+                            self.handle_data("<")
 								+                            i = i+1
 								+                        else:
 								+                            # incomplete
 								+                            break
 								+                        continue
 								+                    if rawdata.startswith("<!--", i):
 								+                            # Strictly speaking, a comment is --.*--
 								+                            # within a declaration tag <!...>.
 								+                            # This should be removed,
 								+                            # and comments handled only in parse_declaration.
 								+                        k = self.parse_comment(i)
 								+                        if k < 0: break
 								+                        i = k
 								+                        continue
 								+                    if rawdata.startswith("<?", i):
 								+                        k = self.parse_pi(i)
 								+                        if k < 0: break
 								+                        i = i+k
 								+                        continue
 								+                    if rawdata.startswith("<!", i):
 								+                        # This is some sort of declaration; in "HTML as
 								+                        # deployed," this should only be the document type
 								+                        # declaration ("<!DOCTYPE html...>").
 								+                        k = self.parse_declaration(i)
 								+                        if k < 0: break
 								+                        i = k
 								+                        continue
 								+                elif rawdata[i] == '&':
 								                     if self.literal:
 								                         self.handle_data(rawdata[i])
 								                         i = i+1
 								                         continue
 								-                    k = self.parse_starttag(i)
 								-                    if k < 0: break
 								-                    i = k
 								-                    continue
 								-                if rawdata.startswith("</", i):
 								-                    k = self.parse_endtag(i)
 								-                    if k < 0: break
 								-                    i = k
 								-                    self.literal = 0
 								-                    continue
 								-                if self.literal:
 								-                    if n > (i + 1):
 								-                        self.handle_data("<")
 								-                        i = i+1
 								-                    else:
 								-                        # incomplete
 								-                        break
 								-                    continue
 								-                if rawdata.startswith("<!--", i):
 								-                        # Strictly speaking, a comment is --.*--
 								-                        # within a declaration tag <!...>.
 								-                        # This should be removed,
 								-                        # and comments handled only in parse_declaration.
 								-                    k = self.parse_comment(i)
 								-                    if k < 0: break
 								-                    i = k
 								-                    continue
 								-                if rawdata.startswith("<?", i):
 								-                    k = self.parse_pi(i)
 								-                    if k < 0: break
 								-                    i = i+k
 								-                    continue
 								-                if rawdata.startswith("<!", i):
 								-                    # This is some sort of declaration; in "HTML as
 								-                    # deployed," this should only be the document type
 								-                    # declaration ("<!DOCTYPE html...>").
 								-                    k = self.parse_declaration(i)
 								-                    if k < 0: break
 								-                    i = k
 								-                    continue
 								-            elif rawdata[i] == '&':
 								-                if self.literal:
 								+                    match = charref.match(rawdata, i)
 								+                    if match:
 								+                        name = match.group(1)
 								+                        self.handle_charref(name)
 								+                        i = match.end(0)
 								+                        if rawdata[i-1] != ';': i = i-1
 								+                        continue
 								+                    match = entityref.match(rawdata, i)
 								+                    if match:
 								+                        name = match.group(1)
 								+                        self.handle_entityref(name)
 								+                        i = match.end(0)
 								+                        if rawdata[i-1] != ';': i = i-1
 								+                        continue
 								+                else:
 								+                    self.error('neither < nor & ??')
 								+                # We get here only if incomplete matches but
 								+                # nothing else
 								+                match = incomplete.match(rawdata, i)
 								+                if not match:
 								                     self.handle_data(rawdata[i])
 								                     i = i+1
 								                     continue
 								-                match = charref.match(rawdata, i)
 								-                if match:
 								-                    name = match.group(1)
 								-                    self.handle_charref(name)
 								-                    i = match.end(0)
 								-                    if rawdata[i-1] != ';': i = i-1
 								-                    continue
 								-                match = entityref.match(rawdata, i)
 								-                if match:
 								-                    name = match.group(1)
 								-                    self.handle_entityref(name)
 								-                    i = match.end(0)
 								-                    if rawdata[i-1] != ';': i = i-1
 								-                    continue
 								-            else:
 								-                self.error('neither < nor & ??')
 								-            # We get here only if incomplete matches but
 								-            # nothing else
 								-            match = incomplete.match(rawdata, i)
 								-            if not match:
 								-                self.handle_data(rawdata[i])
 								-                i = i+1
 								-                continue
 								-            j = match.end(0)
 								-            if j == n:
 								-                break # Really incomplete
 								-            self.handle_data(rawdata[i:j])
 								-            i = j
 								-        # end while
 								-        if end and i < n:
 								-            self.handle_data(rawdata[i:n])
 								-            i = n
 								-        self.rawdata = rawdata[i:]
 								-        # XXX if end: check for empty stack
 								+                j = match.end(0)
 								+                if j == n:
 								+                    break # Really incomplete
 								+                self.handle_data(rawdata[i:j])
 								+                i = j
 								+            # end while
 								+            if end and i < n:
 								+                self.handle_data(rawdata[i:n])
 								+                i = n
 								+            self.rawdata = rawdata[i:]
 								+            # XXX if end: check for empty stack
 								+        except AssertionError as e:
 								+            # The .error() method, which threw the custom SGMLParseError was removed
 								+            # by https://github.com/python/cpython/issues/76025. So we have to catch
 								+            # _markupbase’s AssertionError and translate it into the old one.
 								+            raise SGMLParseError (e.args[0]) from e
 								     # Extensions for the DOCTYPE scanner:
 								     _decl_otherchars = '='