commit 1259b13c404fbe6969e923f62b457f3b1a07d345 from: amacleod date: Fri Jan 19 21:38:10 2024 UTC Initial commit commit - /dev/null commit + 1259b13c404fbe6969e923f62b457f3b1a07d345 blob - /dev/null blob + 1c363d2841cdff3fecddc69509795cad4461aabf (mode 644) --- /dev/null +++ LICENCE @@ -0,0 +1,14 @@ +BSD Zero Clause License + +Copyright (c) Alisdair MacLeod + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR +OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. blob - /dev/null blob + c2ef43640f7bcc324f9886f7cc1f5eb6c5912b62 (mode 644) --- /dev/null +++ README.md @@ -0,0 +1,14 @@ +Eris +==== + +Eris is a tiny RSS/Atom planet generator. + +Pass it an OPML with feeds and it will go and gather the latest 250 posts from across all feed sources and output an HTML page with links to them. + +Run it on a simple cron job to have your own static RSS planet. + +```shell +eris feeds.opml > feeds.html +``` + +I love this little tool, but my god are people inconsistent with date formats on their RSS feeds. \ No newline at end of file blob - /dev/null blob + 2f7e34df7b61bb83eac6150e25a4dd7f15a7d473 (mode 644) --- /dev/null +++ eris.go @@ -0,0 +1,306 @@ +// Copyright (c) Alisdair MacLeod +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +// REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +// AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +// INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +// LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR +// OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +// PERFORMANCE OF THIS SOFTWARE. + +package main + +import ( + "bytes" + "encoding/xml" + "errors" + "fmt" + "html/template" + "io" + "log" + "net/http" + "os" + "sort" + "strings" + "sync" + "time" + + "golang.org/x/net/html/charset" +) + +const ( + // HTTP client connection timeout. 15 seconds is an arbitrary number to try + // to limit the amount of time wasted on servers with poor connections. + clientTimeout = 15 * time.Second + // Maximum number of concurrent connections allowed per host. Lots of feeds + // (especially podcasts) use the same host, and so we can get forced resets + // if we try to connect too fast. + connsPerHost = 20 + // Maximum number of entries to include in the HTML output. + maxEntries = 250 +) + +const ( + feedTmpl = ` + + +Eris Feeds +{{range .}}

{{.EntryTitle}}

+{{end -}}` +) + +type Entry struct { + EntryTitle string + Link string + Description string + Time time.Time +} + +type node struct { + XMLName xml.Name + Attrs []xml.Attr `xml:"-"` + Content []byte `xml:",innerxml"` + Nodes []node `xml:",any"` +} + +type rss struct { + Items []item `xml:"channel>item"` +} + +type item struct { + Title string `xml:"title"` + PubDate string `xml:"pubDate"` + Link string `xml:"link"` + Description string `xml:"description"` +} + +type atom struct { + Entries []entry `xml:"entry"` +} + +type entry struct { + Title string `xml:"title"` + Updated string `xml:"updated"` + Link link `xml:"link"` +} + +type link struct { + Href string `xml:"href,attr"` +} + +type opml struct { + XMLName xml.Name `xml:"opml"` + Outlines []outline `xml:"body>outline"` +} + +type outline struct { + Type string `xml:"type,attr"` + Text string `xml:"text,attr"` + XmlUrl string `xml:"xmlUrl,attr"` + Outlines []outline `xml:"outline"` +} + +func parseFeed(feed []byte) ([]Entry, error) { + var unknownFeed node + if err := unmarshal(feed, &unknownFeed); err != nil { + return nil, fmt.Errorf("unmarshaling unknown feed: %w", err) + } + var ret []Entry + switch strings.ToLower(unknownFeed.XMLName.Local) { + case "feed": + var f atom + if err := unmarshal(feed, &f); err != nil { + return nil, fmt.Errorf("unmarshaling atom feed: %w", err) + } + for _, entry := range f.Entries { + date, err := parseDate(entry.Updated) + switch { + case errors.Is(err, errNoDate): + date = time.Now() + case err != nil: + return nil, fmt.Errorf("parse Updated node for atom entry: %w", err) + } + ret = append(ret, Entry{ + EntryTitle: entry.Title, + Link: entry.Link.Href, + Time: date, + }) + } + return ret, nil + case "rdf": + fallthrough + case "rss": + var f rss + if err := unmarshal(feed, &f); err != nil { + return nil, fmt.Errorf("unmarshaling rss feed: %w", err) + } + for _, item := range f.Items { + date, err := parseDate(item.PubDate) + switch { + case errors.Is(err, errNoDate): + date = time.Now() + case err != nil: + return nil, fmt.Errorf("parse pubDate node for rss item: %w", err) + } + ret = append(ret, Entry{ + EntryTitle: item.Title, + Link: item.Link, + Description: item.Description, + Time: date, + }) + } + return ret, nil + default: + return nil, errors.New("unknown feed type") + } +} + +var dateFormats = []string{ + time.RFC822, + time.RFC822Z, + time.RFC1123, + time.RFC1123Z, + time.RFC3339, + "02 Jan 2006 15:04:05 MST", // RFC822 with full year and seconds + "02 Jan 2006 15:04:05 -0700", // RFC822Z with full year and seconds + "2 Jan 2006 15:04:05 -0700", // RFC822Z with full year, seconds and without padded day + "Mon, 2 Jan 2006 15:04:05 MST", // RFC1123 without padded day + "Mon, 2 Jan 2006 15:04:05 -0700", // RFC1123Z without padded day + "2006-01-02", // RFC3339 date only + "2006-01-02 15:04:05", // A common attempt at RFC3339 but with no timezone or 'T' delimiter +} + +var errNoDate = errors.New("no date specified") + +func parseDate(dateString string) (time.Time, error) { + dateString = strings.TrimSpace(dateString) + if dateString == "" { + return time.Time{}, errNoDate + } + for _, format := range dateFormats { + if t, err := time.Parse(format, dateString); err == nil { + return t, nil + } + } + return time.Time{}, fmt.Errorf("cannot parse date string: %q", dateString) +} + +func unmarshal(data []byte, v interface{}) error { + decoder := xml.NewDecoder(bytes.NewReader(data)) + decoder.Strict = false + decoder.CharsetReader = charset.NewReaderLabel + return decoder.Decode(v) +} + +func parseOPML(oo []outline) []string { + var ret []string + for _, o := range oo { + if o.Type == "rss" { + ret = append(ret, o.XmlUrl) + } + ret = append(ret, parseOPML(o.Outlines)...) + } + return ret +} + +func main() { + if len(os.Args) < 2 { + fmt.Println("Please specify an opml file to read feeds from.") + os.Exit(1) + } + log.SetOutput(os.Stderr) + feedFile, err := os.Open(os.Args[1]) + if err != nil { + fmt.Printf("Could not open file %q: %v\n", os.Args[1], err) + os.Exit(1) + } + var OPML opml + if err := xml.NewDecoder(feedFile).Decode(&OPML); err != nil { + fmt.Printf("Could not parse OPML: %v\n", err) + os.Exit(1) + } + feedUrls := parseOPML(OPML.Outlines) + tmpl := template.Must(template.New("feeds").Parse(feedTmpl)) + client := &http.Client{ + Timeout: clientTimeout, + Transport: &http.Transport{ + MaxConnsPerHost: connsPerHost, + }, + } + + entryChan := make(chan []Entry) + var wg sync.WaitGroup + for _, text := range feedUrls { + wg.Add(1) + go func(url string) { + defer wg.Done() + req, err := http.NewRequest("GET", url, nil) + if err != nil { + log.Printf("error creating request for %q: %v\n", url, err) + return + } + req.Header.Add("User-Agent", "eris (https://github.com/admacleod/eris)") + res, err := client.Do(req) + if err != nil { + // Ignore HTTP errors, all they do is clog up logs when servers + // temporarily go offline. + return + } + if res.StatusCode != http.StatusOK { + log.Printf("non-OK status code from %q: %d %s", url, res.StatusCode, res.Status) + return + } + defer func() { + if err := res.Body.Close(); err != nil { + log.Printf("error closing request body for %q: %v\n", url, err) + } + }() + rawFeed, err := io.ReadAll(res.Body) + if err != nil { + log.Printf("error reading feeds for %q: %v\n", url, err) + return + } + parsedEntries, err := parseFeed(rawFeed) + if err != nil { + log.Printf("error gathering feed entries for %q: %v\n", url, err) + return + } + entryChan <- parsedEntries + }(text) + } + + entrySet := make(map[string]Entry) + done := make(chan struct{}) + go func() { + for entries := range entryChan { + for _, entry := range entries { + entrySet[entry.Link] = entry + } + } + close(done) + }() + + wg.Wait() + close(entryChan) + <-done + + var entries []Entry + for _, entry := range entrySet { + entries = append(entries, entry) + } + + sort.SliceStable(entries, func(i, j int) bool { + return entries[i].Time.After(entries[j].Time) + }) + + if len(entries) > maxEntries { + entries = entries[:maxEntries] + } + + if err := tmpl.Execute(os.Stdout, entries); err != nil { + log.Fatalf("error executing html template: %v\n", err) + } +} blob - /dev/null blob + 192f6c15aa7fd445f30d026d0174bef2020a5420 (mode 644) --- /dev/null +++ go.mod @@ -0,0 +1,7 @@ +module github.com/admacleod/eris + +go 1.17 + +require golang.org/x/net v0.0.0-20220111093109-d55c255bac03 + +require golang.org/x/text v0.3.7 // indirect blob - /dev/null blob + 536ae5f6563e6769b3c635390a7c42fdfd29960c (mode 644) --- /dev/null +++ go.sum @@ -0,0 +1,9 @@ +golang.org/x/net v0.0.0-20220111093109-d55c255bac03 h1:0FB83qp0AzVJm+0wcIlauAjJ+tNdh7jLuacRYCIVv7s= +golang.org/x/net v0.0.0-20220111093109-d55c255bac03/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=