Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MAINT: Replace warning with logging.error #2377

Merged
merged 1 commit into from
Jan 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/user/suppress-warnings.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ pypdf makes use of 3 mechanisms to show that something went wrong:
* **Log messages** are informative messages that can be used for post-mortem
analysis. Most of the time, users can ignore them. They come in different
*levels*, such as info / warning / error indicating the severity.
Examples are non-standard compliant PDF files which pypdf can deal with.
Examples are non-standard compliant PDF files which pypdf can deal with or
a missing implementation that leads to a part of the text not being extracted.
* **Warnings** are avoidable issues, such as using deprecated classes /
functions / parameters. Another example is missing capabilities of pypdf.
In those cases, pypdf users should adjust their code. Warnings
Expand Down
13 changes: 4 additions & 9 deletions pypdf/_cmap.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import warnings
from binascii import unhexlify
from math import ceil
from typing import Any, Dict, List, Tuple, Union, cast

from ._codecs import adobe_glyphs, charset_encoding
from ._utils import b_, logger_warning
from .errors import PdfReadWarning
from ._utils import b_, logger_error, logger_warning
from .generic import (
DecodedStreamObject,
DictionaryObject,
Expand Down Expand Up @@ -180,18 +178,15 @@
else:
raise Exception("not found")
except Exception:
warnings.warn(
f"Advanced encoding {enc} not implemented yet",
PdfReadWarning,
)
logger_error(f"Advanced encoding {enc} not implemented yet", __name__)
encoding = enc
elif isinstance(enc, DictionaryObject) and "/BaseEncoding" in enc:
try:
encoding = charset_encoding[cast(str, enc["/BaseEncoding"])].copy()
except Exception:
warnings.warn(
logger_error(

Check warning on line 187 in pypdf/_cmap.py

View check run for this annotation

Codecov / codecov/patch

pypdf/_cmap.py#L187

Added line #L187 was not covered by tests
f"Advanced encoding {encoding} not implemented yet",
PdfReadWarning,
__name__,
)
encoding = charset_encoding["/StandardCoding"].copy()
else:
Expand Down
12 changes: 12 additions & 0 deletions pypdf/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,18 @@ def deprecation_no_replacement(name: str, removed_in: str) -> None:
deprecation(DEPR_MSG_NO_REPLACEMENT_HAPPENED.format(name, removed_in))


def logger_error(msg: str, src: str) -> None:
"""
Use this instead of logger.error directly.

That allows people to overwrite it more easily.

See the docs on when to use which:
https://pypdf.readthedocs.io/en/latest/user/suppress-warnings.html
"""
logging.getLogger(src).error(msg)


def logger_warning(msg: str, src: str) -> None:
"""
Use this instead of logger.warning directly.
Expand Down
9 changes: 4 additions & 5 deletions tests/test_cmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

from pypdf import PdfReader
from pypdf._cmap import build_char_map
from pypdf.errors import PdfReadWarning

from . import get_data_from_url

Expand Down Expand Up @@ -85,11 +84,11 @@ def test_text_extraction_fast(caplog, url: str, name: str, strict: bool):


@pytest.mark.enable_socket()
def test_parse_encoding_advanced_encoding_not_implemented():
def test_parse_encoding_advanced_encoding_not_implemented(caplog):
reader = PdfReader(BytesIO(get_data_from_url(name="tika-957144.pdf")))
with pytest.warns(PdfReadWarning, match="Advanced encoding .* not implemented yet"):
for page in reader.pages:
page.extract_text()
for page in reader.pages:
page.extract_text()
assert "Advanced encoding /WinAnsEncoding not implemented yet" in caplog.text


@pytest.mark.enable_socket()
Expand Down
Loading