feat: add support for multiple RSS feeds

This commit is contained in:
aserper
2025-12-13 00:27:36 -05:00
parent 8703e30957
commit 9439b6c8fb
5 changed files with 174 additions and 33 deletions

View File

@@ -88,6 +88,28 @@ Then run:
docker-compose up -d
```
### Multiple Feeds
To monitor multiple feeds, you can either:
- Use the `RSS_FEEDS` environment variable (comma-separated list)
- Use the `FEEDS_FILE` environment variable (path to file with one URL per line)
#### Run with Multiple Feeds (Docker)
```bash
docker run -d \
--name masto-rss-bot \
-e MASTODON_CLIENT_ID="your_client_id" \
-e MASTODON_CLIENT_SECRET="your_client_secret" \
-e MASTODON_ACCESS_TOKEN="your_access_token" \
-e MASTODON_INSTANCE_URL="https://mastodon.social" \
-e RSS_FEEDS="https://feed1.com/rss,https://feed2.com/rss" \
-e TOOT_VISIBILITY="public" \
-e CHECK_INTERVAL="300" \
-v /path/to/state:/state \
amitserper/masto-rss:latest
```
## Configuration
All configuration is done via environment variables:
@@ -98,9 +120,14 @@ All configuration is done via environment variables:
| `MASTODON_CLIENT_SECRET` | Mastodon application client secret | Yes | `xyz789...` |
| `MASTODON_ACCESS_TOKEN` | Mastodon access token | Yes | `token123...` |
| `MASTODON_INSTANCE_URL` | URL of your Mastodon instance | Yes | `https://mastodon.social` |
| `RSS_FEED_URL` | URL of the RSS/Atom feed to monitor | Yes | `https://example.com/feed.xml` |
| `RSS_FEED_URL` | Single RSS/Atom feed URL (Legacy) | No* | `https://example.com/feed.xml` |
| `RSS_FEEDS` | Comma-separated list of feed URLs | No* | `https://site1.com,https://site2.com` |
| `FEEDS_FILE` | Path to file containing list of feed URLs | No* | `/config/feeds.txt` |
| `TOOT_VISIBILITY` | Post visibility level | Yes | `public`, `unlisted`, `private`, or `direct` |
| `CHECK_INTERVAL` | Seconds between feed checks | Yes | `300` (5 minutes) |
| `PROCESSED_ENTRIES_FILE`| Custom path for state file | No | `/state/processed.txt` |
\* At least one of `RSS_FEED_URL`, `RSS_FEEDS`, or `FEEDS_FILE` must be provided.
### Getting Mastodon API Credentials

62
bot.py
View File

@@ -16,7 +16,7 @@ class MastodonRSSBot:
client_secret: str,
access_token: str,
instance_url: str,
feed_url: str,
feed_urls: list[str],
toot_visibility: str = "public",
check_interval: int = 300,
state_file: str = "/state/processed_entries.txt",
@@ -29,12 +29,12 @@ class MastodonRSSBot:
client_secret: Mastodon application client secret
access_token: Mastodon access token
instance_url: URL of the Mastodon instance
feed_url: URL of the RSS/Atom feed to monitor
feed_urls: List of URLs of the RSS/Atom feeds to monitor
toot_visibility: Visibility level for posts ('public', 'unlisted', 'private', 'direct')
check_interval: Seconds between feed checks
state_file: Path to file storing processed entry URLs
"""
self.feed_url = feed_url
self.feed_urls = feed_urls
self.toot_visibility = toot_visibility
self.check_interval = check_interval
self.state_file = state_file
@@ -73,20 +73,23 @@ class MastodonRSSBot:
with open(self.state_file, "w") as file:
file.write("\n".join(sorted(processed_entries)))
def parse_feed(self) -> Optional[feedparser.FeedParserDict]:
def parse_feed(self, feed_url: str) -> Optional[feedparser.FeedParserDict]:
"""
Parse the RSS feed.
Args:
feed_url: URL of the feed to parse
Returns:
Parsed feed object or None if parsing fails
"""
try:
feed = feedparser.parse(self.feed_url)
feed = feedparser.parse(feed_url)
if hasattr(feed, "bozo_exception"):
print(f"Warning: Feed parsing issue: {feed.bozo_exception}")
print(f"Warning: Feed parsing issue for {feed_url}: {feed.bozo_exception}")
return feed
except Exception as e:
print(f"Error parsing feed: {e}")
print(f"Error parsing feed {feed_url}: {e}")
return None
def format_status(self, entry: feedparser.FeedParserDict) -> str:
@@ -120,22 +123,21 @@ class MastodonRSSBot:
print(f"Error posting to Mastodon: {e}")
return False
def process_new_entries(self) -> int:
def process_feed(self, feed_url: str, processed_entries: Set[str]) -> int:
"""
Check for new feed entries and post them to Mastodon.
Process a single feed for new entries.
Args:
feed_url: URL of the feed to process
processed_entries: Set of already processed entry URLs
Returns:
Number of new entries posted
"""
print("Checking for new RSS items...")
# Load processed entries
processed_entries = self.load_processed_entries()
# Parse feed
feed = self.parse_feed()
print(f"Checking feed: {feed_url}")
feed = self.parse_feed(feed_url)
if not feed or not hasattr(feed, "entries"):
print("No entries found in feed")
print(f"No entries found in feed: {feed_url}")
return 0
new_entries_count = 0
@@ -161,11 +163,31 @@ class MastodonRSSBot:
else:
print(f"Failed to post entry: {title}")
# Save updated state
self.save_processed_entries(processed_entries)
return new_entries_count
def process_new_entries(self) -> int:
"""
Check for new feed entries in all feeds and post them to Mastodon.
Returns:
Total number of new entries posted across all feeds
"""
print("Checking for new RSS items...")
# Load processed entries
processed_entries = self.load_processed_entries()
total_new_entries = 0
for feed_url in self.feed_urls:
total_new_entries += self.process_feed(feed_url, processed_entries)
# Save updated state
if total_new_entries > 0:
self.save_processed_entries(processed_entries)
return total_new_entries
def run(self) -> None:
"""
Main loop: continuously monitor the feed and post new entries.

39
main.py
View File

@@ -9,12 +9,45 @@ def main():
print("Starting Mastodon RSS Bot...")
# Load configuration from environment variables
feed_urls = []
# 1. Legacy single feed URL
if os.environ.get("RSS_FEED_URL"):
feed_urls.append(os.environ["RSS_FEED_URL"])
# 2. Comma-separated list of feeds
if os.environ.get("RSS_FEEDS"):
feeds = [url.strip() for url in os.environ["RSS_FEEDS"].split(",") if url.strip()]
feed_urls.extend(feeds)
# 3. File containing list of feeds
feeds_file = os.environ.get("FEEDS_FILE")
if feeds_file and os.path.exists(feeds_file):
try:
with open(feeds_file, "r") as f:
file_feeds = [line.strip() for line in f if line.strip() and not line.startswith("#")]
feed_urls.extend(file_feeds)
except Exception as e:
print(f"Error reading feeds file {feeds_file}: {e}")
# Deduplicate while preserving order
unique_feed_urls = []
seen = set()
for url in feed_urls:
if url not in seen:
unique_feed_urls.append(url)
seen.add(url)
if not unique_feed_urls:
print("Error: No RSS feeds configured. Please set RSS_FEED_URL, RSS_FEEDS, or FEEDS_FILE.")
return
bot = MastodonRSSBot(
client_id=os.environ["MASTODON_CLIENT_ID"],
client_secret=os.environ["MASTODON_CLIENT_SECRET"],
access_token=os.environ["MASTODON_ACCESS_TOKEN"],
instance_url=os.environ["MASTODON_INSTANCE_URL"],
feed_url=os.environ["RSS_FEED_URL"],
feed_urls=unique_feed_urls,
toot_visibility=os.environ.get("TOOT_VISIBILITY", "public"),
check_interval=int(os.environ.get("CHECK_INTERVAL", "300")),
state_file=os.environ.get(
@@ -24,7 +57,9 @@ def main():
print("Bot configured successfully:")
print(f" Instance: {os.environ['MASTODON_INSTANCE_URL']}")
print(f" Feed URL: {os.environ['RSS_FEED_URL']}")
print(f" Monitoring {len(unique_feed_urls)} feed(s):")
for url in unique_feed_urls:
print(f" - {url}")
print(f" Visibility: {os.environ.get('TOOT_VISIBILITY', 'public')}")
print(f" Check interval: {os.environ.get('CHECK_INTERVAL', '300')} seconds")
print(

View File

@@ -18,7 +18,7 @@ class TestMastodonRSSBot(unittest.TestCase):
"client_secret": "test_client_secret",
"access_token": "test_access_token",
"instance_url": "https://mastodon.test",
"feed_url": "https://example.com/feed.xml",
"feed_urls": ["https://example.com/feed.xml"],
"toot_visibility": "public",
"check_interval": 60,
"state_file": tempfile.mktemp(),
@@ -34,7 +34,7 @@ class TestMastodonRSSBot(unittest.TestCase):
"""Test bot initializes with correct configuration"""
bot = MastodonRSSBot(**self.test_config)
self.assertEqual(bot.feed_url, self.test_config["feed_url"])
self.assertEqual(bot.feed_urls, self.test_config["feed_urls"])
self.assertEqual(bot.toot_visibility, self.test_config["toot_visibility"])
self.assertEqual(bot.check_interval, self.test_config["check_interval"])
self.assertEqual(bot.state_file, self.test_config["state_file"])
@@ -170,10 +170,10 @@ class TestMastodonRSSBot(unittest.TestCase):
mock_parse.return_value = mock_feed
bot = MastodonRSSBot(**self.test_config)
feed = bot.parse_feed()
feed = bot.parse_feed("https://example.com/feed.xml")
self.assertIsNotNone(feed)
mock_parse.assert_called_once_with(self.test_config["feed_url"])
mock_parse.assert_called_once_with("https://example.com/feed.xml")
@patch("bot.feedparser.parse")
@patch("bot.Mastodon")
@@ -182,7 +182,7 @@ class TestMastodonRSSBot(unittest.TestCase):
mock_parse.side_effect = Exception("Network error")
bot = MastodonRSSBot(**self.test_config)
feed = bot.parse_feed()
feed = bot.parse_feed("https://example.com/feed.xml")
self.assertIsNone(feed)
@@ -226,6 +226,31 @@ class TestMastodonRSSBot(unittest.TestCase):
saved_entries = bot.load_processed_entries()
self.assertEqual(len(saved_entries), 3)
@patch("bot.feedparser.parse")
@patch("bot.Mastodon")
def test_process_new_entries_multiple_feeds(self, mock_mastodon, mock_parse):
"""Test processing with multiple feeds"""
self.test_config["feed_urls"] = ["http://feed1.com", "http://feed2.com"]
def side_effect(url):
mock = Mock()
if url == "http://feed1.com":
mock.entries = [{"title": "1", "link": "http://link1.com"}]
else:
mock.entries = [{"title": "2", "link": "http://link2.com"}]
return mock
mock_parse.side_effect = side_effect
mock_instance = Mock()
mock_mastodon.return_value = mock_instance
bot = MastodonRSSBot(**self.test_config)
count = bot.process_new_entries()
self.assertEqual(count, 2)
self.assertEqual(mock_parse.call_count, 2)
@patch("bot.feedparser.parse")
@patch("bot.Mastodon")
def test_process_new_entries_some_processed(self, mock_mastodon, mock_parse):
@@ -330,8 +355,8 @@ class TestMainEntry(unittest.TestCase):
},
)
@patch("main.MastodonRSSBot")
def test_main_loads_environment_config(self, mock_bot_class):
"""Test that main() loads configuration from environment"""
def test_main_loads_legacy_environment_config(self, mock_bot_class):
"""Test that main() loads configuration from legacy environment variable"""
from main import main
mock_bot_instance = Mock()
@@ -349,12 +374,44 @@ class TestMainEntry(unittest.TestCase):
client_secret="test_secret",
access_token="test_token",
instance_url="https://mastodon.test",
feed_url="https://example.com/feed.xml",
feed_urls=["https://example.com/feed.xml"],
toot_visibility="unlisted",
check_interval=120,
state_file="/tmp/test_state.txt",
)
@patch.dict(
os.environ,
{
"MASTODON_CLIENT_ID": "test_id",
"MASTODON_CLIENT_SECRET": "test_secret",
"MASTODON_ACCESS_TOKEN": "test_token",
"MASTODON_INSTANCE_URL": "https://mastodon.test",
"RSS_FEEDS": "http://feed1.com, http://feed2.com",
# No RSS_FEED_URL
"TOOT_VISIBILITY": "public",
},
)
@patch("main.MastodonRSSBot")
def test_main_loads_multiple_feeds_env(self, mock_bot_class):
"""Test that main() loads multiple feeds from environment variable"""
# Ensure RSS_FEED_URL is not set from previous tests or env
if "RSS_FEED_URL" in os.environ:
del os.environ["RSS_FEED_URL"]
from main import main
mock_bot_instance = Mock()
mock_bot_class.return_value = mock_bot_instance
try:
main()
except Exception:
pass
mock_bot_class.assert_called_once()
_, kwargs = mock_bot_class.call_args
self.assertEqual(kwargs["feed_urls"], ["http://feed1.com", "http://feed2.com"])
if __name__ == "__main__":
unittest.main()

View File

@@ -17,7 +17,7 @@ class TestRSSFeedIntegration(unittest.TestCase):
"client_secret": "test_client_secret",
"access_token": "test_access_token",
"instance_url": "https://mastodon.test",
"feed_url": "https://example.com/feed.xml",
"feed_urls": ["https://example.com/feed.xml"],
"toot_visibility": "public",
"check_interval": 1,
"state_file": tempfile.mktemp(),
@@ -187,7 +187,7 @@ class TestMastodonAPIIntegration(unittest.TestCase):
"client_secret": "test_client_secret",
"access_token": "test_access_token",
"instance_url": "https://mastodon.test",
"feed_url": "https://example.com/feed.xml",
"feed_urls": ["https://example.com/feed.xml"],
"toot_visibility": "public",
"check_interval": 1,
"state_file": tempfile.mktemp(),