diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c380f27..78eeea95 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Fixed - `TypeError` when PDF object reference cannot be parsed as int ([#972](https://github.com/pdfminer/pdfminer.six/pull/972))]) +- `TypeError` when PDF literal cannot be converted to str ([#978](https://github.com/pdfminer/pdfminer.six/pull/978)) ### Removed diff --git a/pdfminer/pdfinterp.py b/pdfminer/pdfinterp.py index 3ff2c144..4052a443 100644 --- a/pdfminer/pdfinterp.py +++ b/pdfminer/pdfinterp.py @@ -947,7 +947,7 @@ def do_EI(self, obj: PDFStackT) -> None: def do_Do(self, xobjid_arg: PDFStackT) -> None: """Invoke named XObject""" - xobjid = cast(str, literal_name(xobjid_arg)) + xobjid = literal_name(xobjid_arg) try: xobj = stream_value(self.xobjmap[xobjid]) except KeyError: diff --git a/pdfminer/psparser.py b/pdfminer/psparser.py index 36172c7a..0839f1f0 100755 --- a/pdfminer/psparser.py +++ b/pdfminer/psparser.py @@ -117,23 +117,21 @@ def intern(self, name: PSLiteral.NameType) -> _SymbolT: KEYWORD_DICT_END = KWD(b">>") -def literal_name(x: object) -> Any: - if not isinstance(x, PSLiteral): +def literal_name(x: Any) -> str: + if isinstance(x, PSLiteral): + if isinstance(x.name, str): + return x.name + try: + return str(x.name, "utf-8") + except UnicodeDecodeError: + return str(x.name) + else: if settings.STRICT: raise PSTypeError(f"Literal required: {x!r}") - else: - name = x - else: - name = x.name - if not isinstance(name, str): - try: - name = str(name, "utf-8") - except Exception: - pass - return name + return str(x) -def keyword_name(x: object) -> Any: +def keyword_name(x: Any) -> Any: if not isinstance(x, PSKeyword): if settings.STRICT: raise PSTypeError("Keyword required: %r" % x) @@ -523,12 +521,13 @@ def nexttoken(self) -> Tuple[int, PSBaseParserToken]: # Stack slots may by occupied by any of: +# * the name of a literal # * the PSBaseParserToken types # * list (via KEYWORD_ARRAY) # * dict (via KEYWORD_DICT) # * subclass-specific extensions (e.g. PDFStream, PDFObjRef) via ExtraT ExtraT = TypeVar("ExtraT") -PSStackType = Union[float, bool, PSLiteral, bytes, List, Dict, ExtraT] +PSStackType = Union[str, float, bool, PSLiteral, bytes, List, Dict, ExtraT] PSStackEntry = Tuple[int, PSStackType[ExtraT]]