Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Refactor oEmbed previews #10814

Merged
merged 21 commits into from
Sep 21, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions synapse/rest/media/v1/oembed.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ class OEmbedResult:
# The Open Graph result (converted from the oEmbed result).
open_graph_result: JsonDict
# Number of seconds to cache the content.
clokep marked this conversation as resolved.
Show resolved Hide resolved
#
# This will be None if no cache-age is provided in the oEmbed response (or
# if the oEmbed response cannot be turned into an Open Graph response).
cache_age: Optional[int]
clokep marked this conversation as resolved.
Show resolved Hide resolved


Expand Down Expand Up @@ -118,31 +121,31 @@ def parse_oembed_response(self, url: str, raw_body: str) -> OEmbedResult:
cache_age = int(cache_age)

# The results.
ppen_graph_response = {"og:title": oembed.get("title")}
open_graph_response = {"og:title": oembed.get("title")}

# If a thumbnail exists, use it. Note that dimensions will be calculated later.
if "thumbnail_url" in oembed:
ppen_graph_response["og:image"] = oembed["thumbnail_url"]
open_graph_response["og:image"] = oembed["thumbnail_url"]

# Process each type separately.
oembed_type = oembed["type"]
if oembed_type == "rich":
calc_description_and_urls(ppen_graph_response, oembed["html"])
calc_description_and_urls(open_graph_response, oembed["html"])

elif oembed_type == "photo":
# If this is a photo, use the full image, not the thumbnail.
ppen_graph_response["og:image"] = oembed["url"]
open_graph_response["og:image"] = oembed["url"]

else:
raise RuntimeError(f"Unknown oEmbed type: {oembed_type}")

except Exception as e:
# Trap any exception and let the code follow as usual.
logger.warning(f"Error parsing oEmbed metadata from {url}: {e:r}")
ppen_graph_response = {}
open_graph_response = {}
cache_age = None

return OEmbedResult(ppen_graph_response, cache_age)
return OEmbedResult(open_graph_response, cache_age)


def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> None:
Expand Down
56 changes: 31 additions & 25 deletions synapse/rest/media/v1/preview_url_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
OG_TAG_VALUE_MAXLEN = 1000

ONE_HOUR = 60 * 60 * 1000
ONE_DAY = 24 * ONE_HOUR


@attr.s(slots=True, frozen=True, auto_attribs=True)
Expand Down Expand Up @@ -265,8 +266,8 @@ async def _do_preview(self, url: str, user: str, ts: int) -> bytes:

logger.debug("got media_info of '%s'", media_info)

# The timestamp of when this media expires.
expiration_ts_ms = media_info.expires + media_info.created_ts_ms
# The number of milliseconds that the response should be considered valid.
expiration_ms = media_info.expires

if _is_media(media_info.media_type):
file_id = media_info.filesystem_id
Expand Down Expand Up @@ -311,7 +312,7 @@ async def _do_preview(self, url: str, user: str, ts: int) -> bytes:

# Use the cache age from the oEmbed result, instead of the HTTP response.
if oembed_response.cache_age is not None:
expiration_ts_ms = oembed_response.cache_age + media_info.created_ts_ms
expiration_ms = oembed_response.cache_age

await self._precache_image_url(user, media_info, og)

Expand All @@ -335,12 +336,15 @@ async def _do_preview(self, url: str, user: str, ts: int) -> bytes:

jsonog = json_encoder.encode(og)

# Cap the amount of time to consider a response valid.
expiration_ms = min(expiration_ms, ONE_DAY)

# store OG in history-aware DB cache
await self.store.store_url_cache(
url,
media_info.response_code,
media_info.etag,
expiration_ts_ms,
media_info.created_ts_ms + expiration_ms,
jsonog,
media_info.filesystem_id,
media_info.created_ts_ms,
Expand Down Expand Up @@ -448,32 +452,34 @@ async def _precache_image_url(
media_info: The media being previewed.
og: The Open Graph dictionary. This is modified with image information.
"""
#
# If there's no image or it is blank, there's nothing to do.
if "og:image" not in og or not og["og:image"]:
return

# FIXME: it might be cleaner to use the same flow as the main /preview_url
# request itself and benefit from the same caching etc. But for now we
# just rely on the caching on the master request to speed things up.
if "og:image" in og and og["og:image"]:
image_info = await self._download_url(
_rebase_url(og["og:image"], media_info.uri), user
)

if _is_media(image_info.media_type):
# TODO: make sure we don't choke on white-on-transparent images
file_id = image_info.filesystem_id
dims = await self.media_repo._generate_thumbnails(
None, file_id, file_id, image_info.media_type, url_cache=True
)
if dims:
og["og:image:width"] = dims["width"]
og["og:image:height"] = dims["height"]
else:
logger.warning("Couldn't get dims for %s", og["og:image"])
image_info = await self._download_url(
_rebase_url(og["og:image"], media_info.uri), user
)

og["og:image"] = f"mxc://{self.server_name}/{image_info.filesystem_id}"
og["og:image:type"] = image_info.media_type
og["matrix:image:size"] = image_info.media_length
if _is_media(image_info.media_type):
# TODO: make sure we don't choke on white-on-transparent images
file_id = image_info.filesystem_id
dims = await self.media_repo._generate_thumbnails(
None, file_id, file_id, image_info.media_type, url_cache=True
)
if dims:
og["og:image:width"] = dims["width"]
og["og:image:height"] = dims["height"]
else:
del og["og:image"]
logger.warning("Couldn't get dims for %s", og["og:image"])

og["og:image"] = f"mxc://{self.server_name}/{image_info.filesystem_id}"
og["og:image:type"] = image_info.media_type
og["matrix:image:size"] = image_info.media_length
else:
del og["og:image"]

def _start_expire_url_cache_data(self) -> Deferred:
return run_as_background_process(
Expand Down
4 changes: 2 additions & 2 deletions tests/rest/media/v1/test_url_preview.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,7 @@ def test_oembed_photo(self):
client.dataReceived(
(
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
b'Content-Type: image/png; charset="utf8"\r\n\r\n'
b"Content-Type: image/png\r\n\r\n"
)
% (len(SMALL_PNG),)
+ SMALL_PNG
Expand All @@ -624,7 +624,7 @@ def test_oembed_photo(self):
self.assertTrue(channel.json_body["og:image"].startswith("mxc://"))
self.assertEqual(channel.json_body["og:image:height"], 1)
self.assertEqual(channel.json_body["og:image:width"], 1)
self.assertTrue(channel.json_body["og:image:type"].startswith("image/png"))
self.assertEqual(channel.json_body["og:image:type"], "image/png")

def test_oembed_rich(self):
"""Test an oEmbed endpoint which returns HTML content via the 'rich' type."""
Expand Down