matrix-org · clokep · Sep 21, 2021 · Sep 8, 2021 · Sep 8, 2021 · Sep 8, 2021
@@ -32,6 +32,9 @@ class OEmbedResult:
     # The Open Graph result (converted from the oEmbed result).
     open_graph_result: JsonDict
     # Number of seconds to cache the content.
+    #
+    # This will be None if no cache-age is provided in the oEmbed response (or
+    # if the oEmbed response cannot be turned into an Open Graph response).
     cache_age: Optional[int]
 
 
@@ -118,31 +121,31 @@ def parse_oembed_response(self, url: str, raw_body: str) -> OEmbedResult:
                 cache_age = int(cache_age)
 
             # The results.
-            ppen_graph_response = {"og:title": oembed.get("title")}
+            open_graph_response = {"og:title": oembed.get("title")}
 
             # If a thumbnail exists, use it. Note that dimensions will be calculated later.
             if "thumbnail_url" in oembed:
-                ppen_graph_response["og:image"] = oembed["thumbnail_url"]
+                open_graph_response["og:image"] = oembed["thumbnail_url"]
 
             # Process each type separately.
             oembed_type = oembed["type"]
             if oembed_type == "rich":
-                calc_description_and_urls(ppen_graph_response, oembed["html"])
+                calc_description_and_urls(open_graph_response, oembed["html"])
 
             elif oembed_type == "photo":
                 # If this is a photo, use the full image, not the thumbnail.
-                ppen_graph_response["og:image"] = oembed["url"]
+                open_graph_response["og:image"] = oembed["url"]
 
             else:
                 raise RuntimeError(f"Unknown oEmbed type: {oembed_type}")
 
         except Exception as e:
             # Trap any exception and let the code follow as usual.
             logger.warning(f"Error parsing oEmbed metadata from {url}: {e:r}")
-            ppen_graph_response = {}
+            open_graph_response = {}
             cache_age = None
 
-        return OEmbedResult(ppen_graph_response, cache_age)
+        return OEmbedResult(open_graph_response, cache_age)
 
 
 def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> None:

@@ -73,6 +73,7 @@
 OG_TAG_VALUE_MAXLEN = 1000
 
 ONE_HOUR = 60 * 60 * 1000
+ONE_DAY = 24 * ONE_HOUR
 
 
 @attr.s(slots=True, frozen=True, auto_attribs=True)
@@ -265,8 +266,8 @@ async def _do_preview(self, url: str, user: str, ts: int) -> bytes:
 
         logger.debug("got media_info of '%s'", media_info)
 
-        # The timestamp of when this media expires.
-        expiration_ts_ms = media_info.expires + media_info.created_ts_ms
+        # The number of milliseconds that the response should be considered valid.
+        expiration_ms = media_info.expires
 
         if _is_media(media_info.media_type):
             file_id = media_info.filesystem_id
@@ -311,7 +312,7 @@ async def _do_preview(self, url: str, user: str, ts: int) -> bytes:
 
             # Use the cache age from the oEmbed result, instead of the HTTP response.
             if oembed_response.cache_age is not None:
-                expiration_ts_ms = oembed_response.cache_age + media_info.created_ts_ms
+                expiration_ms = oembed_response.cache_age
 
             await self._precache_image_url(user, media_info, og)
 
@@ -335,12 +336,15 @@ async def _do_preview(self, url: str, user: str, ts: int) -> bytes:
 
         jsonog = json_encoder.encode(og)
 
+        # Cap the amount of time to consider a response valid.
+        expiration_ms = min(expiration_ms, ONE_DAY)
+
         # store OG in history-aware DB cache
         await self.store.store_url_cache(
             url,
             media_info.response_code,
             media_info.etag,
-            expiration_ts_ms,
+            media_info.created_ts_ms + expiration_ms,
             jsonog,
             media_info.filesystem_id,
             media_info.created_ts_ms,
@@ -448,32 +452,34 @@ async def _precache_image_url(
             media_info: The media being previewed.
             og: The Open Graph dictionary. This is modified with image information.
         """
-        #
+        # If there's no image or it is blank, there's nothing to do.
+        if "og:image" not in og or not og["og:image"]:
+            return
+
         # FIXME: it might be cleaner to use the same flow as the main /preview_url
         # request itself and benefit from the same caching etc.  But for now we
         # just rely on the caching on the master request to speed things up.
-        if "og:image" in og and og["og:image"]:
-            image_info = await self._download_url(
-                _rebase_url(og["og:image"], media_info.uri), user
-            )
-
-            if _is_media(image_info.media_type):
-                # TODO: make sure we don't choke on white-on-transparent images
-                file_id = image_info.filesystem_id
-                dims = await self.media_repo._generate_thumbnails(
-                    None, file_id, file_id, image_info.media_type, url_cache=True
-                )
-                if dims:
-                    og["og:image:width"] = dims["width"]
-                    og["og:image:height"] = dims["height"]
-                else:
-                    logger.warning("Couldn't get dims for %s", og["og:image"])
+        image_info = await self._download_url(
+            _rebase_url(og["og:image"], media_info.uri), user
+        )
 
-                og["og:image"] = f"mxc://{self.server_name}/{image_info.filesystem_id}"
-                og["og:image:type"] = image_info.media_type
-                og["matrix:image:size"] = image_info.media_length
+        if _is_media(image_info.media_type):
+            # TODO: make sure we don't choke on white-on-transparent images
+            file_id = image_info.filesystem_id
+            dims = await self.media_repo._generate_thumbnails(
+                None, file_id, file_id, image_info.media_type, url_cache=True
+            )
+            if dims:
+                og["og:image:width"] = dims["width"]
+                og["og:image:height"] = dims["height"]
             else:
-                del og["og:image"]
+                logger.warning("Couldn't get dims for %s", og["og:image"])
+
+            og["og:image"] = f"mxc://{self.server_name}/{image_info.filesystem_id}"
+            og["og:image:type"] = image_info.media_type
+            og["matrix:image:size"] = image_info.media_length
+        else:
+            del og["og:image"]
 
     def _start_expire_url_cache_data(self) -> Deferred:
         return run_as_background_process(

@@ -608,7 +608,7 @@ def test_oembed_photo(self):
         client.dataReceived(
             (
                 b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
-                b'Content-Type: image/png; charset="utf8"\r\n\r\n'
+                b"Content-Type: image/png\r\n\r\n"
             )
             % (len(SMALL_PNG),)
             + SMALL_PNG
@@ -624,7 +624,7 @@ def test_oembed_photo(self):
         self.assertTrue(channel.json_body["og:image"].startswith("mxc://"))
         self.assertEqual(channel.json_body["og:image:height"], 1)
         self.assertEqual(channel.json_body["og:image:width"], 1)
-        self.assertTrue(channel.json_body["og:image:type"].startswith("image/png"))
+        self.assertEqual(channel.json_body["og:image:type"], "image/png")
 
     def test_oembed_rich(self):
         """Test an oEmbed endpoint which returns HTML content via the 'rich' type."""