From d2be98ca93d75a0d57fb096a2227646309ebd2fa Mon Sep 17 00:00:00 2001 From: Diana <67607236+dbelokon@users.noreply.github.com> Date: Tue, 18 Jan 2022 17:27:27 -0500 Subject: [PATCH] Implement videos as a type of posts (#2581) YouTube videos are tracked like regular blogposts, as they also come from a type of RSS feed. We make some changes to adjust to the type of RSS response we get so we can extract the necessary data. --- src/api/posts/src/data/post.js | 20 ++ src/api/posts/test/posts.test.js | 44 +++- src/backend/data/post.js | 22 ++ src/backend/feed/processor.js | 6 + test/fixtures.js | 17 ++ test/post.test.js | 29 ++- test/test_files/humphd-yt-channel.xml | 360 ++++++++++++++++++++++++++ 7 files changed, 495 insertions(+), 3 deletions(-) create mode 100644 test/test_files/humphd-yt-channel.xml diff --git a/src/api/posts/src/data/post.js b/src/api/posts/src/data/post.js index dd711c4f5e..df2c43df81 100644 --- a/src/api/posts/src/data/post.js +++ b/src/api/posts/src/data/post.js @@ -35,6 +35,25 @@ function ensureFeed(feed) { return feed instanceof Feed ? Promise.resolve(feed) : Feed.byId(feed); } +/** + * @param {string} url + * @returns {"video" | "blogpost"} the post's type + */ +function determinePostType(url) { + try { + const associatedLink = new URL(url); + + if (associatedLink.hostname.includes('youtube.com')) { + return 'video'; + } + // Assume that we are dealing with a blogpost if we + // are not dealing with videos + return 'blogpost'; + } catch { + return 'blogpost'; + } +} + class Post { constructor(title, html, datePublished, dateUpdated, postUrl, guid, feed) { // Use the post's guid as our unique identifier @@ -46,6 +65,7 @@ class Post { // create an absolute url if postURL is relative this.url = new URL(postUrl, feed.url).href; this.guid = guid; + this.type = determinePostType(this.url); if (!(feed instanceof Feed)) { throw new Error(`expected feed to be a Feed Object, got '${feed}'`); diff --git a/src/api/posts/test/posts.test.js b/src/api/posts/test/posts.test.js index ac3082f136..558b840eb6 100644 --- a/src/api/posts/test/posts.test.js +++ b/src/api/posts/test/posts.test.js @@ -82,6 +82,7 @@ describe('/posts', () => { describe('test /posts/:id responses', () => { const missingGuid = 'http://missing-guid'; + const youtubeGuid = 'http://youtube.com'; const randomGuid = 'http://random-guid'; const feed1 = new Feed( @@ -93,7 +94,16 @@ describe('test /posts/:id responses', () => { null ); - beforeAll(() => Promise.resolve(addFeed(feed1))); + const youtubeFeed = new Feed( + 'YouTube Author', + 'http://youtube.com/feed/videos.xml', + 'user', + 'https://youtube.com/', + null, + null + ); + + beforeAll(() => Promise.all([addFeed(feed1), addFeed(youtubeFeed)])); const addedPost1 = new Post( 'Post Title', @@ -105,11 +115,23 @@ describe('test /posts/:id responses', () => { feed1 ); - beforeAll(() => Promise.resolve(addPost(addedPost1))); + const addedVideo1 = new Post( + 'YouTube Video Title', + 'YouTube Video Description', + new Date('2009-09-07T22:20:00.000Z'), + new Date('2009-09-07T22:23:00.000Z'), + 'https://youtube.com/watch', + youtubeGuid, + youtubeFeed + ); + + beforeAll(() => Promise.all([addPost(addedPost1), addPost(addedVideo1)])); beforeAll(() => { feed1.save(); + youtubeFeed.save(); addedPost1.save(); + addedVideo1.save(); }); test('A post with an id should be returned and match the id of a post from redis', async () => { @@ -160,4 +182,22 @@ describe('test /posts/:id responses', () => { expect(res.status).toEqual(404); expect(res.get('Content-length')).toEqual('46'); }); + + test('request a post with type equal to "blogpost"', async () => { + const res = await request(app).get(`/${addedPost1.id}`).set('Accept', 'application/json'); + const post = await getPost(`${addedPost1.id}`); + expect(res.status).toEqual(200); + expect(res.get('Content-type')).toContain('application/json'); + expect(res.body.id).toEqual(post.id); + expect(res.body.type).toEqual('blogpost'); + }); + + test('request a post with type equal to "video"', async () => { + const res = await request(app).get(`/${addedVideo1.id}`).set('Accept', 'application/json'); + const post = await getPost(`${addedVideo1.id}`); + expect(res.status).toEqual(200); + expect(res.get('Content-type')).toContain('application/json'); + expect(res.body.id).toEqual(post.id); + expect(res.body.type).toEqual('video'); + }); }); diff --git a/src/backend/data/post.js b/src/backend/data/post.js index 098337d3c1..3ae82f8dcf 100644 --- a/src/backend/data/post.js +++ b/src/backend/data/post.js @@ -39,6 +39,25 @@ function ensureFeed(feed) { return feed instanceof Feed ? Promise.resolve(feed) : Feed.byId(feed); } +/** + * @param {string} url + * @returns {"video" | "blogpost"} the post's type + */ +function determinePostType(url) { + try { + const associatedLink = new URL(url); + + if (associatedLink.hostname.includes('youtube.com')) { + return 'video'; + } + // Assume that we are dealing with a blogpost if we + // are not dealing with videos + return 'blogpost'; + } catch { + return 'blogpost'; + } +} + class Post { constructor(title, html, datePublished, dateUpdated, postUrl, guid, feed) { // Use the post's guid as our unique identifier @@ -50,6 +69,7 @@ class Post { // create an absolute url if postURL is relative this.url = new URL(postUrl, feed.url).href; this.guid = guid; + this.type = determinePostType(this.url); // We expect to get a real Feed vs. a feed id if (!(feed instanceof Feed)) { @@ -96,6 +116,8 @@ class Post { if (article.contentEncoded) article.content = article.contentEncoded; + if (article.mediaGroup) article.content = article.mediaGroup['media:description']; + // A valid RSS/Atom feed can have missing fields that we care about. // Keep track of any that are missing, and throw if necessary. const missing = []; diff --git a/src/backend/feed/processor.js b/src/backend/feed/processor.js index 0f9860ec7e..62102fbbc1 100644 --- a/src/backend/feed/processor.js +++ b/src/backend/feed/processor.js @@ -184,13 +184,19 @@ module.exports = async function processor(job) { ['pubDate', 'pubdate'], ['creator', 'author'], ['content:encoded', 'contentEncoded'], + ['updated', 'date'], + ['id', 'guid'], + ['media:group', 'mediaGroup'], + ['published', 'pubdate'], ], }, }, feed ) ); + const articles = await parser.parseURL(feed.url); + // Transform the list of articles to a list of Post objects await articlesToPosts(articles.items, feed); diff --git a/test/fixtures.js b/test/fixtures.js index a6c9c55356..19a06715c7 100644 --- a/test/fixtures.js +++ b/test/fixtures.js @@ -18,6 +18,12 @@ const getRealWorldRssUri = () => 'https://blog.humphd.org/tag/seneca/rss/'; const getRealWorldRssBody = () => fs.readFileSync(path.join(__dirname, './test_files/blog.humphd.org.rss')); +// Use David Humphrey's channel for a realistic test case of YouTube channel +const getRealWorldYouTubeFeedUri = () => + 'https://www.youtube.com/feeds/videos.xml?channel_id=UCqaMbMDf01BLttof1lHAo2A'; +const getRealWorldYouTubeFeedBody = () => + fs.readFileSync(path.join(__dirname, './test_files/humphd-yt-channel.xml')); + // Portion of https://www.feedforall.com/sample.xml const getValidFeedBody = () => ` @@ -131,6 +137,7 @@ exports.getAtomUri = getAtomUri; exports.getRssUri = getRssUri; exports.getHtmlUri = getHtmlUri; exports.getRealWorldRssUri = getRealWorldRssUri; +exports.getRealWorldYouTubeFeedUri = getRealWorldYouTubeFeedUri; exports.stripProtocol = stripProtocol; exports.getInvalidDescription = getInvalidDescription; @@ -162,4 +169,14 @@ exports.nockRealWorldRssResponse = function (headers = {}) { nockResponse(getRealWorldRssUri(), getRealWorldRssBody(), 200, 'application/rss+xml', headers); }; +exports.nockRealWorldYouTubeFeedResponse = function (headers = {}) { + nockResponse( + getRealWorldYouTubeFeedUri(), + getRealWorldYouTubeFeedBody(), + 200, + 'application/rss+xml', + headers + ); +}; + exports.createMockJobObjectFromFeedId = (id) => ({ data: { id } }); diff --git a/test/post.test.js b/test/post.test.js index b9d295e061..31afc5f951 100644 --- a/test/post.test.js +++ b/test/post.test.js @@ -6,12 +6,18 @@ const parse = new Parser({ ['pubDate', 'pubdate'], ['creator', 'author'], ['content:encoded', 'contentEncoded'], + ['updated', 'date'], + ['id', 'guid'], + ['media:group', 'mediaGroup'], + ['published', 'pubdate'], ], }, }); const { nockRealWorldRssResponse, + nockRealWorldYouTubeFeedResponse, + getRealWorldYouTubeFeedUri, getRealWorldRssUri, getInvalidDescription, } = require('./fixtures'); @@ -32,6 +38,7 @@ describe('Post data class tests', () => { url: 'https://user.post.com/?post-id=123', guid: 'https://user.post.com/?post-id=123&guid', id: hash('https://user.post.com/?post-id=123&guid'), + type: 'blogpost', }; beforeAll(async () => { @@ -175,7 +182,7 @@ describe('Post data class tests', () => { expect(result).toBe(null); }); - describe('Post.createFromArticle() tests', () => { + describe('Post.createFromArticle() with blog feeds tests', () => { let articles; beforeEach(async () => { nockRealWorldRssResponse(); @@ -282,4 +289,24 @@ describe('Post data class tests', () => { await expect(Post.createFromArticle(article, feed)).rejects.toThrow(); }); }); + + describe('Post.createFromArticle() with youtube feeds tests', () => { + let articles; + beforeEach(async () => { + nockRealWorldYouTubeFeedResponse(); + articles = await parse.parseURL(getRealWorldYouTubeFeedUri()); + expect(Array.isArray(articles.items)).toBe(true); + expect(articles.items.length).toBe(15); + }); + + test('Post.createFromArticle() should create Post with YouTube video article', async () => { + const article = articles.items[0]; + const id = await Post.createFromArticle(article, feed); + const videoPost = await Post.byId(id); + + expect(videoPost.title).toBe('DPS909 OSD600 Week 03 - Fixing a Bug in the Azure JS SDK'); + expect(videoPost.url).toBe('https://www.youtube.com/watch?v=mNuHA7vH6Wc'); + expect(videoPost.type).toBe('video'); + }); + }); }); diff --git a/test/test_files/humphd-yt-channel.xml b/test/test_files/humphd-yt-channel.xml new file mode 100644 index 0000000000..de3184fb7b --- /dev/null +++ b/test/test_files/humphd-yt-channel.xml @@ -0,0 +1,360 @@ + + + + yt:channel:UCqaMbMDf01BLttof1lHAo2A + UCqaMbMDf01BLttof1lHAo2A + David Humphrey + + + David Humphrey + https://www.youtube.com/channel/UCqaMbMDf01BLttof1lHAo2A + + 2015-07-11T20:00:29+00:00 + + yt:video:mNuHA7vH6Wc + mNuHA7vH6Wc + UCqaMbMDf01BLttof1lHAo2A + DPS909 OSD600 Week 03 - Fixing a Bug in the Azure JS SDK + + + David Humphrey + https://www.youtube.com/channel/UCqaMbMDf01BLttof1lHAo2A + + 2021-09-24T19:04:25+00:00 + 2021-10-27T10:46:10+00:00 + + DPS909 OSD600 Week 03 - Fixing a Bug in the Azure JS SDK + + + Walkthrough and discussion of fixing a bug in https://github.com/Azure/azure-sdk-for-js. Issue at https://github.com/Azure/azure-sdk-for-js/issues/15772. PR at https://github.com/Azure/azure-sdk-for-js/pull/17820. + + + + + + + + yt:video:GUXjyPp433M + GUXjyPp433M + UCqaMbMDf01BLttof1lHAo2A + DPS909/OSD600 Fall 2021 Week 01 Part 2 - Week 1 Overview + + + David Humphrey + https://www.youtube.com/channel/UCqaMbMDf01BLttof1lHAo2A + + 2021-09-06T12:00:06+00:00 + 2021-09-16T13:20:23+00:00 + + DPS909/OSD600 Fall 2021 Week 01 Part 2 - Week 1 Overview + + + Overview of https://github.com/Seneca-CDOT/topics-in-open-source-2021 + + + + + + + + yt:video:rOBX6vRch7U + rOBX6vRch7U + UCqaMbMDf01BLttof1lHAo2A + WEB422 Week 12 WebSockets Part 1 - Intro and Writing the Server + + + David Humphrey + https://www.youtube.com/channel/UCqaMbMDf01BLttof1lHAo2A + + 2021-04-04T13:00:30+00:00 + 2021-11-06T04:28:19+00:00 + + WEB422 Week 12 WebSockets Part 1 - Intro and Writing the Server + + + Introduction to Web Sockets and writing a Web Socket Server for an Emoji Chat App. Code at https://github.com/humphd/web422-week12 + + + + + + + + yt:video:lU9db7Dd95I + lU9db7Dd95I + UCqaMbMDf01BLttof1lHAo2A + WEB422 Week 12 WebSockets Part 2 - Writing the React Front-End + + + David Humphrey + https://www.youtube.com/channel/UCqaMbMDf01BLttof1lHAo2A + + 2021-04-04T13:00:30+00:00 + 2021-04-05T23:01:11+00:00 + + WEB422 Week 12 WebSockets Part 2 - Writing the React Front-End + + + Creating the React Front-End for our Emoji Chat App. Code at https://github.com/humphd/web422-week12 + + + + + + + + yt:video:motlCbDr6c4 + motlCbDr6c4 + UCqaMbMDf01BLttof1lHAo2A + WEB422 Week 05 Part 4 - Deploying React Apps + + + David Humphrey + https://www.youtube.com/channel/UCqaMbMDf01BLttof1lHAo2A + + 2021-02-07T14:00:13+00:00 + 2021-03-11T03:07:39+00:00 + + WEB422 Week 05 Part 4 - Deploying React Apps + + + Deploying a Create-React-App (https://create-react-app.dev/docs/deployment) to various services: GitHub Pages (https://pages.github.com/), Vercel (https://vercel.com/), Netlify (https://www.netlify.com/), Begin (https://begin.com/). + +Example deploy of (https://github.com/humphd/web422-week-05-artsy) to Vercel https://web422-week-05-artsy.vercel.app/ and GitHub Pages https://humphd.github.io/web422-week-05-artsy/build/ + + + + + + + + yt:video:TaSAzbTltUo + TaSAzbTltUo + UCqaMbMDf01BLttof1lHAo2A + WEB422 Week 05 Part 3 - Using Formik, Joi, and Yup with React + + + David Humphrey + https://www.youtube.com/channel/UCqaMbMDf01BLttof1lHAo2A + + 2021-02-07T14:00:07+00:00 + 2021-08-19T07:07:52+00:00 + + WEB422 Week 05 Part 3 - Using Formik, Joi, and Yup with React + + + Example of working with Form data in the browser with Formik, and server using Express, and how to validate the data using Joi and Yup on both ends. Code available at https://github.com/humphd/web422-week05-formik. + + + + + + + + yt:video:NPf-Y2Ek6a4 + NPf-Y2Ek6a4 + UCqaMbMDf01BLttof1lHAo2A + WEB422 Week 05 Part 1 - Intro to React Forms + + + David Humphrey + https://www.youtube.com/channel/UCqaMbMDf01BLttof1lHAo2A + + 2021-02-07T14:00:06+00:00 + 2021-05-08T09:50:22+00:00 + + WEB422 Week 05 Part 1 - Intro to React Forms + + + Introduction to React Forms, discussion of code in notes https://web422.ca/notes/react-forms. See also the official React docs on forms https://reactjs.org/docs/forms.html + + + + + + + + yt:video:5i10iPnrfmw + 5i10iPnrfmw + UCqaMbMDf01BLttof1lHAo2A + WEB422 Week 05 Part 2 - Using a Form to Load API Data + + + David Humphrey + https://www.youtube.com/channel/UCqaMbMDf01BLttof1lHAo2A + + 2021-02-07T14:00:01+00:00 + 2021-11-03T23:24:37+00:00 + + WEB422 Week 05 Part 2 - Using a Form to Load API Data + + + Building an interactive Form in React that uses the Art Institute of Chicago REST API (http://api.artic.edu/docs/). Code available at https://github.com/humphd/web422-week-05-artsy + + + + + + + + yt:video:zcJA5YBzJG8 + zcJA5YBzJG8 + UCqaMbMDf01BLttof1lHAo2A + WEB422 Week 04 Part 3 - Using Third Party React Components for Routing and UI + + + David Humphrey + https://www.youtube.com/channel/UCqaMbMDf01BLttof1lHAo2A + + 2021-01-31T14:00:30+00:00 + 2021-03-11T03:10:12+00:00 + + WEB422 Week 04 Part 3 - Using Third Party React Components for Routing and UI + + + Discussion of React Routing using React Router (https://reactrouter.com/web) and how we can use 3rd Party React Components like https://react-bootstrap.github.io/ + + + + + + + + yt:video:hgew3p5RriY + hgew3p5RriY + UCqaMbMDf01BLttof1lHAo2A + WEB422 Week 04 Part 1 - React Events + + + David Humphrey + https://www.youtube.com/channel/UCqaMbMDf01BLttof1lHAo2A + + 2021-01-31T14:00:12+00:00 + 2021-03-11T04:53:59+00:00 + + WEB422 Week 04 Part 1 - React Events + + + Discussion of React Events and how they related to DOM Events (notes: https://web422.ca/notes/react-events-and-data). Code from video available at https://gist.github.com/humphd/e353ab107e561c496bf9eec78fa8cac4 + + + + + + + + yt:video:dDMgZ7TfPaI + dDMgZ7TfPaI + UCqaMbMDf01BLttof1lHAo2A + WEB422 Week 04 Part 4 - GitHub API Example + + + David Humphrey + https://www.youtube.com/channel/UCqaMbMDf01BLttof1lHAo2A + + 2021-01-31T14:00:09+00:00 + 2021-03-11T04:50:40+00:00 + + WEB422 Week 04 Part 4 - GitHub API Example + + + GitHub API Example with React Router, Bootstrap React, and useSWR 3rd party Components. Code available at https://github.com/humphd/web422-week04-github-example. + + + + + + + + yt:video:WdT_coWe4ms + WdT_coWe4ms + UCqaMbMDf01BLttof1lHAo2A + WEB422 Week 04 Part 2 - React Events and Data Loading + + + David Humphrey + https://www.youtube.com/channel/UCqaMbMDf01BLttof1lHAo2A + + 2021-01-31T14:00:06+00:00 + 2021-10-23T12:22:30+00:00 + + WEB422 Week 04 Part 2 - React Events and Data Loading + + + Working with Events, Data Loading, and Conditional Rendering in React. Code available at https://github.com/humphd/web422-week04-events-data-loading + + + + + + + + yt:video:rx4KuxqD3CA + rx4KuxqD3CA + UCqaMbMDf01BLttof1lHAo2A + WEB422 Week 03 Part 2 - Modern JavaScript in React + + + David Humphrey + https://www.youtube.com/channel/UCqaMbMDf01BLttof1lHAo2A + + 2021-01-24T14:00:31+00:00 + 2021-09-12T04:43:29+00:00 + + WEB422 Week 03 Part 2 - Modern JavaScript in React + + + Discussion of some of the newer syntax in JavaScript that will be used extensively in React. See https://reactjs.org/docs/getting-started.html#javascript-resources and https://developer.mozilla.org/en-US/docs/Web/JavaScript/A_re-introduction_to_JavaScript and + + + + + + + + yt:video:wC0TOPzrLTI + wC0TOPzrLTI + UCqaMbMDf01BLttof1lHAo2A + WEB422 Week 03 Part 1 - React Intro + + + David Humphrey + https://www.youtube.com/channel/UCqaMbMDf01BLttof1lHAo2A + + 2021-01-24T14:00:13+00:00 + 2021-01-24T14:00:13+00:00 + + WEB422 Week 03 Part 1 - React Intro + + + Intro to React. Discussion of some of the main ideas and philosophies of building web applications with React. + + + + + + + + yt:video:n1A52kPfPtI + n1A52kPfPtI + UCqaMbMDf01BLttof1lHAo2A + WEB422 Week 03 Part 4 - Writing an App with React + + + David Humphrey + https://www.youtube.com/channel/UCqaMbMDf01BLttof1lHAo2A + + 2021-01-24T14:00:12+00:00 + 2021-01-28T21:28:06+00:00 + + WEB422 Week 03 Part 4 - Writing an App with React + + + Rewrite of the Week 1 reqres.in API front-end (https://github.com/humphd/web422-week01) in React. Code available at https://github.com/humphd/web422-week03-react. Live version at https://humphd.github.io/web422-week03-react/dist/ + + + + + + + \ No newline at end of file