diff --git a/handler/feed_check.go b/handler/feed_check.go index bcbcd8a..3a5bdf5 100644 --- a/handler/feed_check.go +++ b/handler/feed_check.go @@ -57,12 +57,22 @@ func (h *Handler) OnCheck() { lastEntry = &abonnement.LastEntry.String } - feed, err := abonnement.Feed.Check(lastEntry) + result, err := abonnement.Feed.Check(lastEntry) if err != nil { log.Printf("%s: %s", abonnement.Feed.Url, err) return } + // Update cache headers if we got new ones + if result.ETag != nil || result.LastModified != nil { + err = h.DB.Abonnements.SetCacheHeaders(abonnement.Feed.Url, result.ETag, result.LastModified) + if err != nil { + log.Printf("%s: failed to update cache headers: %s", abonnement.Feed.Url, err) + } + } + + feed := result.Feed + for _, entry := range reverse(feed.Items) { templateData := &TemplateData{} if entry.Title != "" { diff --git a/storage/abonnements.go b/storage/abonnements.go index 2b4d093..dd7b3fb 100644 --- a/storage/abonnements.go +++ b/storage/abonnements.go @@ -1,11 +1,16 @@ package storage import ( + "compress/gzip" "context" "database/sql" + "fmt" + "io" + "net/http" + "time" + "github.com/jmoiron/sqlx" "github.com/mmcdole/gofeed" - "time" ) type ( @@ -17,6 +22,7 @@ type ( GetByUser(chatId int64) ([]Feed, error) GetAll() ([]Abonnement, error) SetLastEntry(feedUrl string, lastEntry *string) error + SetCacheHeaders(feedUrl string, etag *string, lastModified *string) error } Abonnements struct { @@ -35,11 +41,20 @@ type ( } Feed struct { - ID int64 `db:"id"` - Url string `db:"url"` - LastEntry sql.NullString `db:"last_entry"` - CreatedAt time.Time `db:"created_at"` - UpdatedAt sql.NullTime `db:"updated_at"` + ID int64 `db:"id"` + Url string `db:"url"` + LastEntry sql.NullString `db:"last_entry"` + ETag sql.NullString `db:"etag"` + LastModified sql.NullString `db:"last_modified"` + CreatedAt time.Time `db:"created_at"` + UpdatedAt sql.NullTime `db:"updated_at"` + } + + // FeedCheckResult contains the parsed feed and caching headers + FeedCheckResult struct { + Feed *gofeed.Feed + ETag *string + LastModified *string } ) @@ -214,20 +229,147 @@ WHERE feeds.url = ?` return err } -func (feedToCheck Feed) Check(lastEntry *string) (*gofeed.Feed, error) { - feed, err := gofeed.NewParser().ParseURL(feedToCheck.Url) +func (db *Abonnements) SetCacheHeaders(feedUrl string, etag *string, lastModified *string) error { + const query = `UPDATE feeds +SET feeds.etag = ?, feeds.last_modified = ? +WHERE feeds.url = ?` + + _, err := db.Exec(query, etag, lastModified, feedUrl) + return err +} + +// fetchFeedWithCaching fetches a feed using HTTP with smart caching headers +func fetchFeedWithCaching(feedURL string, etag *string, lastModified *string) (*http.Response, error) { + client := &http.Client{ + Timeout: 30 * time.Second, + } + + req, err := http.NewRequest("GET", feedURL, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + // Set User-Agent to identify the bot + req.Header.Set("User-Agent", "RSSBot/2.0 (+https://github.com/Brawl345/rssbot)") + + // Enable compression + req.Header.Set("Accept-Encoding", "gzip, deflate") + + // Add caching headers if we have them + if etag != nil && *etag != "" { + req.Header.Set("If-None-Match", *etag) + } + if lastModified != nil && *lastModified != "" { + req.Header.Set("If-Modified-Since", *lastModified) + } + + resp, err := client.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to fetch feed: %w", err) + } + + return resp, nil +} + +// readResponseBody reads and decompresses the response body if needed +func readResponseBody(resp *http.Response) (io.ReadCloser, error) { + var reader io.ReadCloser + var err error + + switch resp.Header.Get("Content-Encoding") { + case "gzip": + reader, err = gzip.NewReader(resp.Body) + if err != nil { + resp.Body.Close() + return nil, fmt.Errorf("failed to create gzip reader: %w", err) + } + default: + reader = resp.Body + } + + return reader, nil +} + +func (feedToCheck Feed) Check(lastEntry *string) (*FeedCheckResult, error) { + var etag *string + var lastModified *string + + if feedToCheck.ETag.Valid { + etag = &feedToCheck.ETag.String + } + if feedToCheck.LastModified.Valid { + lastModified = &feedToCheck.LastModified.String + } + + resp, err := fetchFeedWithCaching(feedToCheck.Url, etag, lastModified) if err != nil { return nil, err } + defer resp.Body.Close() + + // Extract new caching headers from response + var newETag *string + var newLastModified *string + + if etagHeader := resp.Header.Get("ETag"); etagHeader != "" { + newETag = &etagHeader + } + if lastModHeader := resp.Header.Get("Last-Modified"); lastModHeader != "" { + newLastModified = &lastModHeader + } + + // Handle HTTP 304 Not Modified - no new content + if resp.StatusCode == http.StatusNotModified { + // Return empty feed with no items to indicate nothing new + // Keep existing cache headers since server confirmed they're still valid + return &FeedCheckResult{ + Feed: &gofeed.Feed{Items: []*gofeed.Item{}}, + ETag: etag, + LastModified: lastModified, + }, nil + } + + // Handle HTTP 429 Too Many Requests or 503 Service Unavailable + if resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode == http.StatusServiceUnavailable { + retryAfter := resp.Header.Get("Retry-After") + if retryAfter != "" { + return nil, fmt.Errorf("server returned %d, retry after: %s", resp.StatusCode, retryAfter) + } + return nil, fmt.Errorf("server returned %d", resp.StatusCode) + } + + // Handle other non-200 status codes + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode) + } + + // Read and decompress body if needed + reader, err := readResponseBody(resp) + if err != nil { + return nil, err + } + defer reader.Close() + + // Parse the feed using gofeed's Parse method + parser := gofeed.NewParser() + feed, err := parser.Parse(reader) + if err != nil { + return nil, fmt.Errorf("failed to parse feed: %w", err) + } + // Filter out items we've already seen if lastEntry != nil { for i, item := range feed.Items { if item.GUID == *lastEntry { feed.Items = feed.Items[:i] - return feed, nil + break } } } - return feed, nil + return &FeedCheckResult{ + Feed: feed, + ETag: newETag, + LastModified: newLastModified, + }, nil } diff --git a/storage/feed_caching_test.go b/storage/feed_caching_test.go new file mode 100644 index 0000000..3454be1 --- /dev/null +++ b/storage/feed_caching_test.go @@ -0,0 +1,756 @@ +package storage + +import ( + "compress/gzip" + "database/sql" + "fmt" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" +) + +const sampleRSSFeed = ` + + + Test Feed + https://example.com + A test feed + + Test Item 1 + https://example.com/item1 + item1 + Test description 1 + Mon, 01 Jan 2024 00:00:00 GMT + + + Test Item 2 + https://example.com/item2 + item2 + Test description 2 + Mon, 02 Jan 2024 00:00:00 GMT + + +` + +const sampleRSSFeedUpdated = ` + + + Test Feed + https://example.com + A test feed + + Test Item 3 + https://example.com/item3 + item3 + Test description 3 + Mon, 03 Jan 2024 00:00:00 GMT + + + Test Item 1 + https://example.com/item1 + item1 + Test description 1 + Mon, 01 Jan 2024 00:00:00 GMT + + + Test Item 2 + https://example.com/item2 + item2 + Test description 2 + Mon, 02 Jan 2024 00:00:00 GMT + + +` + +// TestFetchFeedWithETag tests that ETag is properly sent in If-None-Match header +func TestFetchFeedWithETag(t *testing.T) { + etag := `"test-etag-123"` + requestReceived := false + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + requestReceived = true + + // Verify the If-None-Match header is sent + if r.Header.Get("If-None-Match") != etag { + t.Errorf("Expected If-None-Match header to be %s, got %s", etag, r.Header.Get("If-None-Match")) + } + + // Return 200 with new ETag + w.Header().Set("ETag", `"new-etag-456"`) + w.Header().Set("Content-Type", "application/rss+xml") + w.WriteHeader(http.StatusOK) + w.Write([]byte(sampleRSSFeed)) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + ETag: sql.NullString{ + String: etag, + Valid: true, + }, + } + + result, err := feed.Check(nil) + if err != nil { + t.Fatalf("Expected no error, got %v", err) + } + + if !requestReceived { + t.Error("Request was not received by test server") + } + + if result.ETag == nil || *result.ETag != `"new-etag-456"` { + t.Errorf("Expected new ETag to be \"new-etag-456\", got %v", result.ETag) + } + + if result.Feed == nil { + t.Error("Expected feed to be parsed") + } + + if len(result.Feed.Items) != 2 { + t.Errorf("Expected 2 items, got %d", len(result.Feed.Items)) + } +} + +// TestFetchFeedWithLastModified tests that Last-Modified is properly sent in If-Modified-Since header +func TestFetchFeedWithLastModified(t *testing.T) { + lastModified := "Mon, 01 Jan 2024 00:00:00 GMT" + requestReceived := false + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + requestReceived = true + + // Verify the If-Modified-Since header is sent + if r.Header.Get("If-Modified-Since") != lastModified { + t.Errorf("Expected If-Modified-Since header to be %s, got %s", lastModified, r.Header.Get("If-Modified-Since")) + } + + // Return 200 with new Last-Modified + newLastModified := "Mon, 02 Jan 2024 00:00:00 GMT" + w.Header().Set("Last-Modified", newLastModified) + w.Header().Set("Content-Type", "application/rss+xml") + w.WriteHeader(http.StatusOK) + w.Write([]byte(sampleRSSFeed)) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + LastModified: sql.NullString{ + String: lastModified, + Valid: true, + }, + } + + result, err := feed.Check(nil) + if err != nil { + t.Fatalf("Expected no error, got %v", err) + } + + if !requestReceived { + t.Error("Request was not received by test server") + } + + if result.LastModified == nil || *result.LastModified != "Mon, 02 Jan 2024 00:00:00 GMT" { + t.Errorf("Expected new Last-Modified to be Mon, 02 Jan 2024 00:00:00 GMT, got %v", result.LastModified) + } +} + +// TestFetchFeedWith304NotModified tests that 304 responses are handled correctly +func TestFetchFeedWith304NotModified(t *testing.T) { + etag := `"test-etag-123"` + lastModified := "Mon, 01 Jan 2024 00:00:00 GMT" + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Verify headers are sent + if r.Header.Get("If-None-Match") != etag { + t.Errorf("Expected If-None-Match header to be %s", etag) + } + if r.Header.Get("If-Modified-Since") != lastModified { + t.Errorf("Expected If-Modified-Since header to be %s", lastModified) + } + + // Return 304 Not Modified + w.WriteHeader(http.StatusNotModified) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + ETag: sql.NullString{ + String: etag, + Valid: true, + }, + LastModified: sql.NullString{ + String: lastModified, + Valid: true, + }, + } + + result, err := feed.Check(nil) + if err != nil { + t.Fatalf("Expected no error, got %v", err) + } + + // 304 should return empty feed + if result.Feed == nil { + t.Error("Expected feed to be not nil") + } + + if len(result.Feed.Items) != 0 { + t.Errorf("Expected 0 items for 304 response, got %d", len(result.Feed.Items)) + } + + // Cache headers should be preserved + if result.ETag == nil || *result.ETag != etag { + t.Errorf("Expected ETag to be preserved as %s, got %v", etag, result.ETag) + } + + if result.LastModified == nil || *result.LastModified != lastModified { + t.Errorf("Expected Last-Modified to be preserved as %s, got %v", lastModified, result.LastModified) + } +} + +// TestFetchFeedWithGzipCompression tests that gzip-compressed feeds are handled correctly +func TestFetchFeedWithGzipCompression(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Verify Accept-Encoding header + acceptEncoding := r.Header.Get("Accept-Encoding") + if !strings.Contains(acceptEncoding, "gzip") { + t.Errorf("Expected Accept-Encoding to contain gzip, got %s", acceptEncoding) + } + + // Return gzipped content + w.Header().Set("Content-Encoding", "gzip") + w.Header().Set("Content-Type", "application/rss+xml") + + gz := gzip.NewWriter(w) + defer gz.Close() + gz.Write([]byte(sampleRSSFeed)) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + } + + result, err := feed.Check(nil) + if err != nil { + t.Fatalf("Expected no error, got %v", err) + } + + if result.Feed == nil { + t.Error("Expected feed to be parsed") + } + + if len(result.Feed.Items) != 2 { + t.Errorf("Expected 2 items, got %d", len(result.Feed.Items)) + } +} + +// TestFetchFeedWithUserAgent tests that User-Agent is properly set +func TestFetchFeedWithUserAgent(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + userAgent := r.Header.Get("User-Agent") + if !strings.Contains(userAgent, "RSSBot") { + t.Errorf("Expected User-Agent to contain RSSBot, got %s", userAgent) + } + + w.Header().Set("Content-Type", "application/rss+xml") + w.WriteHeader(http.StatusOK) + w.Write([]byte(sampleRSSFeed)) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + } + + _, err := feed.Check(nil) + if err != nil { + t.Fatalf("Expected no error, got %v", err) + } +} + +// TestFetchFeedWith429TooManyRequests tests handling of 429 status code +func TestFetchFeedWith429TooManyRequests(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Retry-After", "60") + w.WriteHeader(http.StatusTooManyRequests) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + } + + _, err := feed.Check(nil) + if err == nil { + t.Fatal("Expected error for 429 status code") + } + + if !strings.Contains(err.Error(), "429") { + t.Errorf("Expected error to mention 429, got %v", err) + } + + if !strings.Contains(err.Error(), "60") { + t.Errorf("Expected error to mention Retry-After value, got %v", err) + } +} + +// TestFetchFeedWith503ServiceUnavailable tests handling of 503 status code +func TestFetchFeedWith503ServiceUnavailable(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Retry-After", "120") + w.WriteHeader(http.StatusServiceUnavailable) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + } + + _, err := feed.Check(nil) + if err == nil { + t.Fatal("Expected error for 503 status code") + } + + if !strings.Contains(err.Error(), "503") { + t.Errorf("Expected error to mention 503, got %v", err) + } + + if !strings.Contains(err.Error(), "120") { + t.Errorf("Expected error to mention Retry-After value, got %v", err) + } +} + +// TestFetchFeedWithNoCacheHeaders tests that feeds without cache headers work correctly +func TestFetchFeedWithNoCacheHeaders(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Don't set any cache headers + w.Header().Set("Content-Type", "application/rss+xml") + w.WriteHeader(http.StatusOK) + w.Write([]byte(sampleRSSFeed)) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + } + + result, err := feed.Check(nil) + if err != nil { + t.Fatalf("Expected no error, got %v", err) + } + + if result.Feed == nil { + t.Error("Expected feed to be parsed") + } + + // Cache headers should be nil when not provided by server + if result.ETag != nil { + t.Error("Expected ETag to be nil when server doesn't provide it") + } + + if result.LastModified != nil { + t.Error("Expected Last-Modified to be nil when server doesn't provide it") + } +} + +// TestFetchFeedWithLastEntryFilter tests that items after lastEntry are filtered +func TestFetchFeedWithLastEntryFilter(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/rss+xml") + w.WriteHeader(http.StatusOK) + w.Write([]byte(sampleRSSFeedUpdated)) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + } + + // Set lastEntry to item2 + lastEntry := "item2" + result, err := feed.Check(&lastEntry) + if err != nil { + t.Fatalf("Expected no error, got %v", err) + } + + if result.Feed == nil { + t.Error("Expected feed to be parsed") + } + + // Should only return item3 and item1 (items before item2) + if len(result.Feed.Items) != 2 { + t.Errorf("Expected 2 items (before item2), got %d", len(result.Feed.Items)) + } + + // Verify that item2 is not included + for _, item := range result.Feed.Items { + if item.GUID == "item2" { + t.Error("Expected item2 to be filtered out") + } + } +} + +// TestFetchFeedWithInvalidURL tests error handling for invalid URLs +func TestFetchFeedWithInvalidURL(t *testing.T) { + feed := Feed{ + Url: "http://invalid-url-that-does-not-exist-12345.com/feed", + } + + _, err := feed.Check(nil) + if err == nil { + t.Fatal("Expected error for invalid URL") + } +} + +// TestFetchFeedWithInvalidXML tests error handling for invalid RSS/XML +func TestFetchFeedWithInvalidXML(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/rss+xml") + w.WriteHeader(http.StatusOK) + w.Write([]byte("invalid xml content")) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + } + + _, err := feed.Check(nil) + if err == nil { + t.Fatal("Expected error for invalid XML") + } + + if !strings.Contains(err.Error(), "failed to parse feed") { + t.Errorf("Expected parse error, got %v", err) + } +} + +// TestFetchFeedWith404NotFound tests handling of 404 status code +func TestFetchFeedWith404NotFound(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + } + + _, err := feed.Check(nil) + if err == nil { + t.Fatal("Expected error for 404 status code") + } + + if !strings.Contains(err.Error(), "404") { + t.Errorf("Expected error to mention 404, got %v", err) + } +} + +// TestFetchFeedWithTimeout tests timeout handling +func TestFetchFeedWithTimeout(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Sleep longer than the client timeout + time.Sleep(35 * time.Second) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + } + + _, err := feed.Check(nil) + if err == nil { + t.Fatal("Expected timeout error") + } +} + +// TestFetchFeedBothETagAndLastModified tests that both headers work together +func TestFetchFeedBothETagAndLastModified(t *testing.T) { + etag := `"test-etag"` + lastModified := "Mon, 01 Jan 2024 00:00:00 GMT" + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Verify both headers are sent + if r.Header.Get("If-None-Match") != etag { + t.Errorf("Expected If-None-Match header to be %s", etag) + } + if r.Header.Get("If-Modified-Since") != lastModified { + t.Errorf("Expected If-Modified-Since header to be %s", lastModified) + } + + // Return both new headers + w.Header().Set("ETag", `"new-etag"`) + w.Header().Set("Last-Modified", "Mon, 02 Jan 2024 00:00:00 GMT") + w.Header().Set("Content-Type", "application/rss+xml") + w.WriteHeader(http.StatusOK) + w.Write([]byte(sampleRSSFeed)) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + ETag: sql.NullString{ + String: etag, + Valid: true, + }, + LastModified: sql.NullString{ + String: lastModified, + Valid: true, + }, + } + + result, err := feed.Check(nil) + if err != nil { + t.Fatalf("Expected no error, got %v", err) + } + + // Verify both cache headers were updated + if result.ETag == nil || *result.ETag != `"new-etag"` { + t.Errorf("Expected ETag to be updated to \"new-etag\", got %v", result.ETag) + } + + if result.LastModified == nil || *result.LastModified != "Mon, 02 Jan 2024 00:00:00 GMT" { + t.Errorf("Expected Last-Modified to be updated, got %v", result.LastModified) + } +} + +// BenchmarkFetchFeedWith304 benchmarks the performance of 304 responses +func BenchmarkFetchFeedWith304(b *testing.B) { + etag := `"test-etag"` + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotModified) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + ETag: sql.NullString{ + String: etag, + Valid: true, + }, + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := feed.Check(nil) + if err != nil { + b.Fatalf("Unexpected error: %v", err) + } + } +} + +// BenchmarkFetchFeedWithGzip benchmarks the performance of gzipped feeds +func BenchmarkFetchFeedWithGzip(b *testing.B) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Encoding", "gzip") + w.Header().Set("Content-Type", "application/rss+xml") + + gz := gzip.NewWriter(w) + defer gz.Close() + gz.Write([]byte(sampleRSSFeed)) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := feed.Check(nil) + if err != nil { + b.Fatalf("Unexpected error: %v", err) + } + } +} + +// TestConcurrentFeedFetches tests concurrent feed fetches +func TestConcurrentFeedFetches(t *testing.T) { + requestCount := 0 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + requestCount++ + w.Header().Set("Content-Type", "application/rss+xml") + w.WriteHeader(http.StatusOK) + w.Write([]byte(sampleRSSFeed)) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + } + + // Fetch feed concurrently + const numGoroutines = 10 + errors := make(chan error, numGoroutines) + + for i := 0; i < numGoroutines; i++ { + go func() { + _, err := feed.Check(nil) + errors <- err + }() + } + + // Check for errors + for i := 0; i < numGoroutines; i++ { + err := <-errors + if err != nil { + t.Errorf("Concurrent fetch failed: %v", err) + } + } + + if requestCount != numGoroutines { + t.Errorf("Expected %d requests, got %d", numGoroutines, requestCount) + } +} + +// TestReadResponseBodyWithoutCompression tests reading uncompressed responses +func TestReadResponseBodyWithoutCompression(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/rss+xml") + w.WriteHeader(http.StatusOK) + w.Write([]byte(sampleRSSFeed)) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + } + + result, err := feed.Check(nil) + if err != nil { + t.Fatalf("Expected no error, got %v", err) + } + + if result.Feed == nil { + t.Error("Expected feed to be parsed") + } + + if result.Feed.Title != "Test Feed" { + t.Errorf("Expected feed title 'Test Feed', got %s", result.Feed.Title) + } +} + +// TestFetchFeedWithEmptyETag tests that empty ETag is not sent +func TestFetchFeedWithEmptyETag(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Verify that If-None-Match is not sent when ETag is empty + if r.Header.Get("If-None-Match") != "" { + t.Error("Expected If-None-Match header to not be sent when ETag is empty") + } + + w.Header().Set("Content-Type", "application/rss+xml") + w.WriteHeader(http.StatusOK) + w.Write([]byte(sampleRSSFeed)) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + ETag: sql.NullString{ + String: "", + Valid: true, + }, + } + + _, err := feed.Check(nil) + if err != nil { + t.Fatalf("Expected no error, got %v", err) + } +} + +// TestFetchFeedPreservesNewItemsAfterLastEntry tests filtering logic +func TestFetchFeedPreservesNewItemsAfterLastEntry(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/rss+xml") + w.WriteHeader(http.StatusOK) + // Feed has items: item3, item1, item2 (in that order) + w.Write([]byte(sampleRSSFeedUpdated)) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + } + + // Last seen was item1, so we should only get item3 + lastEntry := "item1" + result, err := feed.Check(&lastEntry) + if err != nil { + t.Fatalf("Expected no error, got %v", err) + } + + if len(result.Feed.Items) != 1 { + t.Errorf("Expected 1 new item, got %d", len(result.Feed.Items)) + } + + if len(result.Feed.Items) > 0 && result.Feed.Items[0].GUID != "item3" { + t.Errorf("Expected first item to be item3, got %s", result.Feed.Items[0].GUID) + } +} + +// TestReadResponseBodyGzipError tests error handling when gzip decompression fails +func TestReadResponseBodyGzipError(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Encoding", "gzip") + w.Header().Set("Content-Type", "application/rss+xml") + w.WriteHeader(http.StatusOK) + // Write invalid gzip data + w.Write([]byte("not gzipped data")) + })) + defer server.Close() + + feed := Feed{ + Url: server.URL, + } + + _, err := feed.Check(nil) + if err == nil { + t.Fatal("Expected error for invalid gzip data") + } + + if !strings.Contains(err.Error(), "gzip") { + t.Errorf("Expected error to mention gzip, got %v", err) + } +} + +// Example test showing typical usage pattern +func ExampleFeed_Check() { + // Create a test server that returns an RSS feed + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("ETag", `"example-etag"`) + w.Header().Set("Last-Modified", "Mon, 01 Jan 2024 00:00:00 GMT") + w.Header().Set("Content-Type", "application/rss+xml") + w.WriteHeader(http.StatusOK) + w.Write([]byte(sampleRSSFeed)) + })) + defer server.Close() + + // Create a feed + feed := Feed{ + Url: server.URL, + } + + // First check - no cache headers + result, err := feed.Check(nil) + if err != nil { + fmt.Printf("Error: %v\n", err) + return + } + + fmt.Printf("Items: %d\n", len(result.Feed.Items)) + fmt.Printf("Has ETag: %v\n", result.ETag != nil) + fmt.Printf("Has Last-Modified: %v\n", result.LastModified != nil) + + // Output: + // Items: 2 + // Has ETag: true + // Has Last-Modified: true +} diff --git a/storage/migrations/0003-feed-caching.sql b/storage/migrations/0003-feed-caching.sql new file mode 100644 index 0000000..25f76c3 --- /dev/null +++ b/storage/migrations/0003-feed-caching.sql @@ -0,0 +1,5 @@ +-- +migrate Up + +ALTER TABLE `feeds` + ADD COLUMN `etag` VARCHAR(255) DEFAULT NULL AFTER `last_entry`, + ADD COLUMN `last_modified` VARCHAR(255) DEFAULT NULL AFTER `etag`;