Skip to content

Commit

Permalink
Migrated most of /scripts/lib to TypeScript (#42)
Browse files Browse the repository at this point in the history
* Migrated most of `/scripts/lib` to TypeScript

* Working through more types

* Got everything working

* Migrated from config file to dotenv so things run on CI
  • Loading branch information
jwngr committed Jul 14, 2024
1 parent f56291b commit 6b8e148
Show file tree
Hide file tree
Showing 57 changed files with 1,364 additions and 1,143 deletions.
3 changes: 3 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
DARK_SKY_API_KEY=""
SENTRY_DSN=""
IS_SENTRY_ENABLED=""
4 changes: 4 additions & 0 deletions .github/workflows/scripts-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,7 @@ jobs:
- name: Lint
run: npm run lint
working-directory: scripts
env:
DARK_SKY_API_KEY: ${{ secrets.DARK_SKY_API_KEY }}
SENTRY_DSN: ${{ secrets.SENTRY_DSN }}
IS_SENTRY_ENABLED: ${{ secrets.IS_SENTRY_ENABLED }}
6 changes: 1 addition & 5 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ coverage/
dist/

# Configuration files
config/config.json
.env

# npm / yarn
npm-debug.log*
Expand All @@ -21,7 +21,3 @@ firebase-debug.log

# Miscellaneous
.DS_Store
.env.local
.env.development.local
.env.test.local
.env.production.local
9 changes: 0 additions & 9 deletions config/example.config.json

This file was deleted.

9 changes: 7 additions & 2 deletions docs/server-setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,20 @@ server for Six Degrees of Wikipedia.
$ git push # Enter password for jwngr-ops
```

1. Add a `.env` file to the root of the repo to set environment variables:

```bash
$ cp .env.example .env
# Make sure to enable Sentry!
```

1. Install the required npm dependencies:

```bash
$ cd scripts/
$ npm install
```

1. Copy the `config.json` into the repo's `config/` directory, making sure to enable Sentry.
1. Run `crontab -e` and add the following cron jobs to that file:

```
Expand Down
12 changes: 7 additions & 5 deletions scripts/archive/fetchUniqueLocations.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import {transformForAllSeasons} from '../../website/src/resources/schedules';
import {Logger} from '../lib/logger';

Logger.info('Fetching unique locations...');
const logger = new Logger({isSentryEnabled: false});

logger.info('Fetching unique locations...');

let gamesCount = 0;
let locations = new Set();
Expand All @@ -16,12 +18,12 @@ transformForAllSeasons((gameData) => {
locations.add(`${city}|||${state || country}|||${stadium || ''}`);
});

Logger.info('GAMES COUNT:', gamesCount);
Logger.info('LOCATIONS COUNT:', locations.size);
logger.info('GAMES COUNT:', gamesCount);
logger.info('LOCATIONS COUNT:', locations.size);

locations.forEach((location) => {
const [city, stateOrCountry, stadium] = location.split('|||');
Logger.info(`${city}\t${stateOrCountry}\t${stadium}`);
logger.info(`${city}\t${stateOrCountry}\t${stadium}`);
});

Logger.success('Unique locations fetched!');
logger.success('Unique locations fetched!');
37 changes: 20 additions & 17 deletions scripts/archive/generateDecadeCsvs.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,32 @@ import path from 'path';

import format from 'date-fns/format';
import _ from 'lodash';
import range from 'lodash/range';

import {getForSeason} from '../../website/src/resources/schedules';
import {CURRENT_SEASON} from '../lib/constants';
import {Logger} from '../lib/logger';

const logger = new Logger({isSentryEnabled: false});

const OUTPUT_DATA_DIRECTORY = path.resolve(__dirname, '../../data/decadeCsvs');

const DECADES = [
_.range(1887, 1890),
_.range(1892, 1900),
_.range(1900, 1910),
_.range(1910, 1920),
_.range(1920, 1930),
_.range(1930, 1940),
_.range(1940, 1950),
_.range(1950, 1960),
_.range(1960, 1970),
_.range(1970, 1980),
_.range(1980, 1990),
_.range(1990, 2000),
_.range(2000, 2010),
_.range(2010, 2020),
_.range(2020, CURRENT_SEASON),
range(1887, 1890),
range(1892, 1900),
range(1900, 1910),
range(1910, 1920),
range(1920, 1930),
range(1930, 1940),
range(1940, 1950),
range(1950, 1960),
range(1960, 1970),
range(1970, 1980),
range(1980, 1990),
range(1990, 2000),
range(2000, 2010),
range(2010, 2020),
range(2020, CURRENT_SEASON),
];

const stats = [
Expand All @@ -48,7 +51,7 @@ const stats = [
{name: 'possession', text: 'Possession'},
];

Logger.info('Generating CSVs...');
logger.info('Generating CSVs...');

DECADES.forEach((seasons) => {
const firstYear = seasons[0];
Expand Down Expand Up @@ -90,4 +93,4 @@ DECADES.forEach((seasons) => {
fs.writeFileSync(outputFilename, lines.join('\n'));
});

Logger.success('CSVs generated!');
logger.success('CSVs generated!');
26 changes: 14 additions & 12 deletions scripts/archive/scrapeGameDetails_WP.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ import fs from 'fs';
import path from 'path';

import cheerio from 'cheerio';
import _ from 'lodash';
import range from 'lodash/range';
import request from 'request-promise';

import {Logger} from '../lib/logger';

const logger = new Logger({isSentryEnabled: false});

const INPUT_DATA_DIRECTORY = path.resolve(__dirname, '../../data/schedules');

const getHtmlForUrl = (url) => {
Expand All @@ -19,7 +21,7 @@ const getHtmlForUrl = (url) => {
};

const fetchGameDetailsForYear = (year) => {
Logger.info(`Fetching year ${year}.`);
logger.info(`Fetching year ${year}.`);

return getHtmlForUrl(
`https://en.wikipedia.org/wiki/${year}_Notre_Dame_Fighting_Irish_football_team`
Expand Down Expand Up @@ -53,7 +55,7 @@ const fetchGameDetailsForYear = (year) => {
let rowCellText = $(elem).text().trim();

// Fix formatting issue in 1961 site data.
if (j === headerNames.indexOf('Site') && !_.includes(rowCellText, ' • ')) {
if (j === headerNames.indexOf('Site') && !rowCellText.includes(' • ')) {
let lastCharWasLowercase = false;
rowCellText.split('').forEach((char, k) => {
if (char >= 'A' && char <= 'Z' && lastCharWasLowercase) {
Expand All @@ -65,7 +67,7 @@ const fetchGameDetailsForYear = (year) => {
}

if (j === headerNames.indexOf('Site')) {
if (!_.includes(rowCellText, ' • ')) {
if (!rowCellText.includes(' • ')) {
rowCellText = ` • ${rowCellText}`;
}
}
Expand Down Expand Up @@ -97,9 +99,9 @@ const fetchGameDetailsForYear = (year) => {
}
if (opponentIndex !== -1) {
const opponent = rowCellValues[opponentIndex];
const isHomeGame = !_.startsWith(opponent, 'at');
const isHomeGame = !opponent.startsWith('at');
if (gamesData[i - 1].isHomeGame !== isHomeGame) {
// Logger.info('HOME / AWAY MISMATCH:', year, i - 1, opponent);
// logger.info('HOME / AWAY MISMATCH:', year, i - 1, opponent);
}
}

Expand All @@ -108,7 +110,7 @@ const fetchGameDetailsForYear = (year) => {
if (resultIndex !== -1) {
const result = rowCellValues[resultIndex][0];
if (gamesData[i - 1].result !== result) {
// Logger.info('RESULT MISMATCH:', year, i - 1);
// logger.info('RESULT MISMATCH:', year, i - 1);
}
}

Expand All @@ -119,7 +121,7 @@ const fetchGameDetailsForYear = (year) => {
let city;
let state;
let stateAndParens;
if (_.includes(location, ',')) {
if (location.includes(',')) {
[city, stateAndParens] = location.split(', ');
state = stateAndParens.split(' (')[0];
} else {
Expand Down Expand Up @@ -152,25 +154,25 @@ const fetchGameDetailsForYear = (year) => {
}
}
} catch (error) {
Logger.error(`Failed to parse schedule for ${year}:`, {error, rowCellValues});
logger.error(`Failed to parse schedule for ${year}:`, {error, rowCellValues});
throw error;
}
}
});

fs.writeFileSync(filename, JSON.stringify(gamesData, null, 2));

Logger.info(`Success ${year}!`);
logger.info(`Success ${year}!`);
})
.catch((error) => {
let errorMessage = error.message;
if (error.statusCode === 404) {
errorMessage = '404 page not found.';
}
Logger.error(`Failed to fetch schedule for ${year}:`, {errorMessage});
logger.error(`Failed to fetch schedule for ${year}:`, {errorMessage});
});
};

_.range(1900, 2017).forEach((year) => {
range(1900, 2017).forEach((year) => {
fetchGameDetailsForYear(year);
});
8 changes: 5 additions & 3 deletions scripts/archive/scrapeGameIdsAndApRankings_SR.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import request from 'request-promise';
import {CURRENT_SEASON} from '../lib/constants';
import {Logger} from '../lib/logger';

const logger = new Logger({isSentryEnabled: false});

const INPUT_DATA_DIRECTORY = path.resolve(__dirname, '../../website/src/resources/schedules');

const SPORTS_REFERENCE_GAME_STATS_START_YEAR = 2000;
Expand Down Expand Up @@ -58,7 +60,7 @@ const promises = years.map((year) => {
};
})
.catch((error) => {
Logger.error(`Error fetching game IDs and AP rankings for ${year}`, {error});
logger.error(`Error fetching game IDs and AP rankings for ${year}`, {error});
});
});

Expand Down Expand Up @@ -105,8 +107,8 @@ Promise.all(promises)
fs.writeFileSync(filename, JSON.stringify(yearData, null, 2));
});

Logger.success('Success!');
logger.success('Success!');
})
.catch((error) => {
Logger.fail(`Error fetching all game IDs`, error);
logger.fail(`Error fetching all game IDs`, error);
});
10 changes: 6 additions & 4 deletions scripts/archive/scrapeGameStats_SR.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import puppeteer from 'puppeteer';
import {CURRENT_SEASON} from '../lib/constants';
import {Logger} from '../lib/logger';

const logger = new Logger({isSentryEnabled: false});

const INPUT_DATA_DIRECTORY = path.resolve(__dirname, '../../website/src/resources/schedules');

process.setMaxListeners(Infinity);
Expand All @@ -18,7 +20,7 @@ const scrapeGameStats = async (gameId) => {

const url = `https://www.sports-reference.com/cfb/boxscores/${gameId}.html`;

Logger.info(`Scraping ${url}`);
logger.info(`Scraping ${url}`);

await page.goto(url, {
waitUntil: 'networkidle2',
Expand Down Expand Up @@ -77,7 +79,7 @@ const fn = async () => {
const promises = _.map(yearData, (gameData) => {
if ('sportsReferenceGameId' in gameData) {
return scrapeGameStats(gameData.sportsReferenceGameId).catch((error) => {
Logger.error(`Failed to scrape game stats for ${gameData.sportsReferenceGameId}:`, {error});
logger.error(`Failed to scrape game stats for ${gameData.sportsReferenceGameId}:`, {error});
throw error;
});
} else {
Expand All @@ -96,9 +98,9 @@ const fn = async () => {

fs.writeFileSync(filename, JSON.stringify(yearData, null, 2));

Logger.success('Scraped game stats');
logger.success('Scraped game stats');
} catch (error) {
Logger.error('Failed to scrape game stats', {error});
logger.error('Failed to scrape game stats', {error});
} finally {
browser.close();
}
Expand Down
11 changes: 6 additions & 5 deletions scripts/archive/scrapeHeadCoachesAndBowlGames_WP.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ import fs from 'fs';
import path from 'path';

import cheerio from 'cheerio';
import _ from 'lodash';
import request from 'request-promise';

import {Logger} from '../lib/logger';

const logger = new Logger({isSentryEnabled: false});

const INPUT_DATA_DIRECTORY = path.resolve(__dirname, '../../data/schedules');

const getHtmlForUrl = (url) => {
Expand All @@ -18,7 +19,7 @@ const getHtmlForUrl = (url) => {
});
};

Logger.info(`Fetching head coaches and bowl games.`);
logger.info(`Fetching head coaches and bowl games.`);

getHtmlForUrl(`https://en.wikipedia.org/wiki/List_of_Notre_Dame_Fighting_Irish_football_seasons`)
.then(($) => {
Expand Down Expand Up @@ -59,7 +60,7 @@ getHtmlForUrl(`https://en.wikipedia.org/wiki/List_of_Notre_Dame_Fighting_Irish_f
let bowlGame = rowCellValues[5];
if (bowlGame) {
bowlGame = bowlGame.slice(2).replace('†', '');
if (!_.includes(bowlGame, ' ') || bowlGame === 'Champs Sports') {
if (!bowlGame.includes(' ') || bowlGame === 'Champs Sports') {
bowlGame += ' Bowl';
}
gamesData[gamesData.length - 1].isBowlGame = true;
Expand All @@ -71,8 +72,8 @@ getHtmlForUrl(`https://en.wikipedia.org/wiki/List_of_Notre_Dame_Fighting_Irish_f
}
});

Logger.success('Fetched head coaches and bowl games');
logger.success('Fetched head coaches and bowl games');
})
.catch((error) => {
Logger.error(`Failed to fetch head coaches and bowl games:`, error.message);
logger.error(`Failed to fetch head coaches and bowl games:`, error.message);
});
Loading

0 comments on commit 6b8e148

Please sign in to comment.