Adding title fallback + Bubo version to output + formatting

This commit is contained in:
George Mandis 2021-12-05 13:34:59 -08:00
parent 4afe47bf4f
commit 16a2525518
3 changed files with 123 additions and 74 deletions

View File

@ -15,13 +15,22 @@ import Parser from "rss-parser";
import { Feeds, FeedItem } from "./@types/bubo"; import { Feeds, FeedItem } from "./@types/bubo";
import { Response } from "node-fetch"; import { Response } from "node-fetch";
import { render } from "./renderer.js"; import { render } from "./renderer.js";
import { getLink, getTitle, getTimestamp, parseFeed, getFeedList } from "./utilities.js"; import {
getLink,
getTitle,
getTimestamp,
parseFeed,
getFeedList,
getBuboInfo
} from "./utilities.js";
import { writeFile } from "fs/promises"; import { writeFile } from "fs/promises";
import chalk from "chalk"; import chalk from "chalk";
const buboInfo = await getBuboInfo();
const parser = new Parser(); const parser = new Parser();
const feedList = await getFeedList(); const feedList = await getFeedList();
const feedListLength = Object.entries(feedList).flat(2).length - Object.keys(feedList).length; const feedListLength =
Object.entries(feedList).flat(2).length - Object.keys(feedList).length;
/** /**
* contentFromAllFeeds = Contains normalized, aggregated feed data and is passed to template renderer at the end * contentFromAllFeeds = Contains normalized, aggregated feed data and is passed to template renderer at the end
@ -32,7 +41,8 @@ const errors: unknown[] = [];
// benchmarking data + utility // benchmarking data + utility
const initTime = Date.now(); const initTime = Date.now();
const benchmark = (startTime: number) => chalk.cyanBright.bold(`(${(Date.now() - startTime) / 1000} seconds)`); const benchmark = (startTime: number) =>
chalk.cyanBright.bold(`${(Date.now() - startTime) / 1000} seconds`);
/** /**
* These values are used to control throttling/batching the fetches: * These values are used to control throttling/batching the fetches:
@ -49,7 +59,6 @@ const success = chalk.bold.green;
// to feedListLength and know when we're finished. // to feedListLength and know when we're finished.
let completed = 0; let completed = 0;
/** /**
* finishBuild * finishBuild
* -- * --
@ -62,12 +71,17 @@ const finishBuild: () => void = async () => {
// generate the static HTML output from our template renderer // generate the static HTML output from our template renderer
const output = render({ const output = render({
data: contentFromAllFeeds, data: contentFromAllFeeds,
errors: errors errors: errors,
info: buboInfo
}); });
// write the output to public/index.html // write the output to public/index.html
await writeFile("./public/index.html", output); await writeFile("./public/index.html", output);
console.log(`Finished writing to output. ${benchmark(initTime)}`); console.log(
`\nFinished writing to output:\n- ${feedListLength} feeds in ${benchmark(
initTime
)}\n- ${errors.length} errors`
);
}; };
/** /**
@ -77,43 +91,54 @@ const finishBuild: () => void = async () => {
* @param { group, feed, startTime} * @param { group, feed, startTime}
* @returns Promise<void> * @returns Promise<void>
*/ */
const processFeed = ( const processFeed =
{ ({
group, feed, startTime group,
}: { group: string; feed: string, startTime: number } feed,
) => async (response: Response): Promise<void> => { startTime
}: {
group: string;
feed: string;
startTime: number;
}) =>
async (response: Response): Promise<void> => {
const body = await parseFeed(response); const body = await parseFeed(response);
completed++; completed++;
// skip to the next one if this didn't work out // skip to the next one if this didn't work out
if (!body) return; if (!body) return;
try { try {
const contents: FeedItem = const contents: FeedItem = (
(typeof body === "string" ? (await parser.parseString(body)) : body) as FeedItem; typeof body === "string" ? await parser.parseString(body) : body
) as FeedItem;
contents.feed = feed; contents.feed = feed;
contents.title = getTitle(contents); contents.title = getTitle(contents);
contents.link = getLink(contents); contents.link = getLink(contents);
// try to normalize date attribute naming // try to normalize date attribute naming
contents?.items?.forEach((item) => { contents?.items?.forEach(item => {
item.timestamp = getTimestamp(item); item.timestamp = getTimestamp(item);
item.title = getTitle(item); item.title = getTitle(item);
item.link = getLink(item); item.link = getLink(item);
}); });
contentFromAllFeeds[group].push(contents as object); contentFromAllFeeds[group].push(contents as object);
console.log(`${success("Successfully fetched:")} ${feed} ${benchmark(startTime)}`); console.log(
`${success("Successfully fetched:")} ${feed} - ${benchmark(startTime)}`
);
} catch (err) { } catch (err) {
console.log(`${error("Error processing:")} ${feed} ${benchmark(startTime)}`); console.log(
errors.push(`Error processing: ${feed} | ${err}`); `${error("Error processing:")} ${feed} - ${benchmark(
startTime
)}\n${err}`
);
errors.push(`Error processing: ${feed}\n\t${err}`);
} }
// if this is the last feed, go ahead and build the output // if this is the last feed, go ahead and build the output
(completed === feedListLength) && finishBuild(); completed === feedListLength && finishBuild();
}; };
// go through each group of feeds and process // go through each group of feeds and process
const processFeeds = () => { const processFeeds = () => {
@ -127,17 +152,18 @@ const processFeeds = () => {
setTimeout(() => { setTimeout(() => {
console.log(`Fetching: ${feed}...`); console.log(`Fetching: ${feed}...`);
fetch(feed).then(processFeed({ group, feed, startTime })).catch(err => { fetch(feed)
console.log(error(`Error fetching ${feed} ${benchmark(startTime)}`)); .then(processFeed({ group, feed, startTime }))
.catch(err => {
console.log(
error(`Error fetching ${feed} ${benchmark(startTime)}`)
);
errors.push(`Error fetching ${feed} ${err.toString()}`); errors.push(`Error fetching ${feed} ${err.toString()}`);
}); });
}, (idx % (feedListLength / MAX_CONNECTIONS)) * DELAY_MS); }, (idx % (feedListLength / MAX_CONNECTIONS)) * DELAY_MS);
idx++; idx++;
} }
} }
}; };
processFeeds(); processFeeds();

View File

@ -1,13 +1,13 @@
/* /*
* Return our renderer. * Return our renderer.
* Using Nunjucks out of the box. * Using Nunjucks out of the box.
* https://mozilla.github.io/nunjucks/ * https://mozilla.github.io/nunjucks/
*/ */
import nunjucks from "nunjucks"; import nunjucks from "nunjucks";
const env: nunjucks.Environment = nunjucks.configure({ autoescape: true }); const env: nunjucks.Environment = nunjucks.configure({ autoescape: true });
import { readFile } from "fs/promises"; import { readFile } from "fs/promises";
import { Feeds } from "./@types/bubo"; import { Feeds, JSONValue } from "./@types/bubo";
/** /**
* Global filters for my Nunjucks templates * Global filters for my Nunjucks templates
@ -17,21 +17,28 @@ env.addFilter("formatDate", function (dateString): string {
return !isNaN(date.getTime()) ? date.toLocaleDateString() : dateString; return !isNaN(date.getTime()) ? date.toLocaleDateString() : dateString;
}); });
env.addGlobal("now", (new Date()).toUTCString()); env.addGlobal("now", new Date().toUTCString());
// load the template // load the template
const template: string = const template: string = (
(await readFile( await readFile(new URL("../config/template.html", import.meta.url))
new URL("../config/template.html", import.meta.url) ).toString();
)).toString();
// generate the static HTML output from our template renderer // generate the static HTML output from our template renderer
const render = ({ data, errors }: { data: Feeds; errors: unknown[] }) => { const render = ({
data,
errors,
info
}: {
data: Feeds;
errors: unknown[];
info?: JSONValue;
}) => {
return env.renderString(template, { return env.renderString(template, {
data, data,
errors errors,
info
}); });
}; };
export { render }; export { render };

View File

@ -1,5 +1,5 @@
/* /*
There's a little inconcistency with how feeds report certain things like There's a little inconsistency with how feeds report certain things like
title, links and timestamps. These helpers try to normalize that bit and title, links and timestamps. These helpers try to normalize that bit and
provide an order-of-operations list of properties to look for. provide an order-of-operations list of properties to look for.
@ -13,27 +13,37 @@ import { FeedItem, JSONValue } from "./@types/bubo";
export const getLink = (obj: FeedItem): string => { export const getLink = (obj: FeedItem): string => {
const link_values: string[] = ["link", "url", "guid", "home_page_url"]; const link_values: string[] = ["link", "url", "guid", "home_page_url"];
const keys: string[] = Object.keys(obj); const keys: string[] = Object.keys(obj);
const link_property: string | undefined = link_values.find(link_value => keys.includes(link_value)); const link_property: string | undefined = link_values.find(link_value =>
return link_property ? obj[link_property] as string : ""; keys.includes(link_value)
);
return link_property ? (obj[link_property] as string) : "";
}; };
// fallback to URL for the title if not present
// fallback to URL for the title if not present (coupled to my template) // (title -> url -> link)
export const getTitle = (obj: FeedItem): string => { export const getTitle = (obj: FeedItem): string => {
const title_values: string[] = ["title", "url", "link"]; // fallback to url/link as title if omitted const title_values: string[] = ["title", "url", "link"];
const keys: string[] = Object.keys(obj); const keys: string[] = Object.keys(obj);
const title_property: string | undefined = title_values.find(title_value => keys.includes(title_value));
return title_property ? obj[title_property] as string : ""; // if title is empty for some reason, fall back on url or link
const title_property: string | undefined = title_values.find(
title_value => keys.includes(title_value) && obj[title_value]
);
return title_property ? (obj[title_property] as string) : "";
}; };
// More dependable way to get timestamps // More dependable way to get timestamps
export const getTimestamp = (obj: FeedItem): string => { export const getTimestamp = (obj: FeedItem): string => {
const dateString: string = (obj.pubDate || obj.isoDate || obj.date || obj.date_published).toString(); const dateString: string = (
obj.pubDate ||
obj.isoDate ||
obj.date ||
obj.date_published
).toString();
const timestamp: number = new Date(dateString).getTime(); const timestamp: number = new Date(dateString).getTime();
return isNaN(timestamp) ? dateString : timestamp.toString(); return isNaN(timestamp) ? dateString : timestamp.toString();
}; };
// parse RSS/XML or JSON feeds // parse RSS/XML or JSON feeds
export async function parseFeed(response: Response): Promise<JSONValue> { export async function parseFeed(response: Response): Promise<JSONValue> {
const contentType = response.headers.get("content-type")?.split(";")[0]; const contentType = response.headers.get("content-type")?.split(";")[0];
@ -56,19 +66,25 @@ export async function parseFeed(response: Response): Promise<JSONValue> {
const jsonFeed = [contentType] const jsonFeed = [contentType]
.map(item => .map(item =>
["application/json", "application/feed+json"].includes(item) ? response.json() as Promise<JSONValue> : false ["application/json", "application/feed+json"].includes(item)
? (response.json() as Promise<JSONValue>)
: false
) )
.filter(_ => _)[0]; .filter(_ => _)[0];
return (rssFeed && rssFeed) || (jsonFeed && jsonFeed) || {}; return (rssFeed && rssFeed) || (jsonFeed && jsonFeed) || {};
} }
export const getFeedList = async (): Promise<JSONValue> => { export const getFeedList = async (): Promise<JSONValue> => {
return JSON.parse( return JSON.parse(
(await readFile( (
new URL("../config/feeds.json", import.meta.url) await readFile(new URL("../config/feeds.json", import.meta.url))
)).toString() ).toString()
); );
}; };
export const getBuboInfo = async (): Promise<JSONValue> => {
return JSON.parse(
(await readFile(new URL("../package.json", import.meta.url))).toString()
);
};