diff --git a/analyst.go b/analyst.go
index e455d9a..53ee808 100644
--- a/analyst.go
+++ b/analyst.go
@@ -1,85 +1,67 @@
package main
import (
- "context"
- "fmt"
- "strings"
-
"github.com/mmcdole/gofeed"
- openai "github.com/sashabaranov/go-openai"
- "github.com/spf13/viper"
)
-const defaultLimit = 20 // default number of articles per feed for analysis
-var model = openai.GPT4o
+const (
+ analystTag = "#analyst"
+ defaultLimit = 20
+)
-func generateAnalysis(fp *gofeed.Parser, writer Writer) {
- if !viper.IsSet("analyst_feeds") || !viper.IsSet("analyst_prompt") {
+func RunAnalyst(cfg *Config, store *Storage, llm *LLMClient, writer Writer, fp *gofeed.Parser) {
+ if len(cfg.AnalystFeeds) == 0 || cfg.AnalystPrompt == "" {
return
}
-
- analystFeeds := viper.GetStringSlice("analyst_feeds")
- analystPrompt := viper.GetString("analyst_prompt")
- analystModel := viper.GetString("analyst_model")
-
- var articleTitles []string
- for _, feedURL := range analystFeeds {
- parsedFeed := parseFeed(fp, feedURL, defaultLimit)
- if parsedFeed == nil {
- continue
- }
- for _, item := range parsedFeed.Items {
- seen, seen_today, summary := isSeenArticle(item, "#analyst")
- if seen {
- continue
- }
- articleTitles = append(articleTitles, item.Title+": "+item.Description) // add also description for better context
- if !seen_today {
- addToSeenTable(item.Link+"#analyst", summary)
- }
- }
+ if llm == nil {
+ return
}
- if len(articleTitles) == 0 {
+ articles := collectArticlesForAnalysis(cfg, store, fp)
+ if len(articles) == 0 {
return
}
- prompt := fmt.Sprintf("%s\n\n%s", analystPrompt, strings.Join(articleTitles, "\n"))
- analysis := getLLMAnalysis(prompt, analystModel)
-
+ analysis := llm.Analyze(articles)
if analysis != "" {
- writer.write("\n## Daily Analysis:\n")
- writer.write(analysis + "\n")
+ writer.Write("\n## Daily Analysis:\n")
+ writer.Write(analysis + "\n")
}
}
-func getLLMAnalysis(prompt string, analystModel string) string {
- clientConfig := openai.DefaultConfig(openaiApiKey)
- if openaiBaseURL != "" {
- clientConfig.BaseURL = openaiBaseURL
- }
- if analystModel != "" {
- model = analystModel
- }
- client := openai.NewClientWithConfig(clientConfig)
+func collectArticlesForAnalysis(cfg *Config, store *Storage, fp *gofeed.Parser) []string {
+ var articles []string
- resp, err := client.CreateChatCompletion(
- context.Background(),
- openai.ChatCompletionRequest{
- Model: model,
- Messages: []openai.ChatCompletionMessage{
- {
- Role: openai.ChatMessageRoleUser,
- Content: prompt,
- },
- },
- },
- )
+ for _, feedURL := range cfg.AnalystFeeds {
+ feed, err := fp.ParseURL(feedURL)
+ if err != nil {
+ continue
+ }
- if err != nil {
- fmt.Printf("ChatCompletion error: %v\n", err)
- return ""
+ // Limit items
+ limit := defaultLimit
+ if len(feed.Items) > limit {
+ feed.Items = feed.Items[:limit]
+ }
+
+ for _, item := range feed.Items {
+ articleLink := item.Link + analystTag
+ seen, seenToday, summary := store.IsSeen(articleLink)
+
+ if seen {
+ continue
+ }
+
+ articleText := item.Title + ": " + item.Description
+ articles = append(articles, articleText)
+
+ if !seenToday {
+ if err := store.MarkAsSeen(articleLink, summary); err != nil {
+ continue
+ }
+ }
+ }
}
- return resp.Choices[0].Message.Content
+ return articles
}
diff --git a/config.go b/config.go
index 690f4c3..0d57d76 100644
--- a/config.go
+++ b/config.go
@@ -1,7 +1,6 @@
package main
import (
- "database/sql"
"encoding/xml"
"flag"
"fmt"
@@ -44,47 +43,39 @@ analyst_prompt:
analyst_model:
`
-func parseOPML(xmlContent []byte) []RSS {
- o := Opml{}
- OpmlSlice := []RSS{}
- decoder := xml.NewDecoder(strings.NewReader(string(xmlContent)))
- decoder.Strict = false
- if err := decoder.Decode(&o); err != nil {
- log.Println(err)
- }
- for _, outline := range o.Body.Outline {
- if outline.XmlUrl != "" {
- OpmlSlice = append(OpmlSlice, RSS{url: outline.XmlUrl, limit: 20})
- }
- for _, feed := range outline.Outline {
- if feed.XmlUrl != "" {
- OpmlSlice = append(OpmlSlice, RSS{url: feed.XmlUrl, limit: 20})
- }
- }
- }
- return OpmlSlice
+type Config struct {
+ MarkdownDirPath string
+ MarkdownFilePrefix string
+ MarkdownFileSuffix string
+ Feeds []RSS
+ GoogleNewsKeywords string
+ Instapaper bool
+ WeatherLat float64
+ WeatherLon float64
+ TerminalMode bool
+ ReadingTime bool
+ SunriseSunset bool
+ ShowImages bool
+ OpenAIKey string
+ OpenAIBaseURL string
+ OpenAIModel string
+ SummaryPrompt string
+ AnalystFeeds []string
+ AnalystPrompt string
+ AnalystModel string
+ DatabaseFilePath string
}
-func getFeedAndLimit(feedURL string) (string, int) {
- var limit = 20 // default limit
- chopped := strings.Split(feedURL, " ")
- if len(chopped) > 1 {
- var err error
- limit, err = strconv.Atoi(chopped[1])
- if err != nil {
- fatal(err)
- }
- }
- return chopped[0], limit
+type RSS struct {
+ url string
+ limit int
+ summarize bool
}
-func bootstrapConfig() {
- currentDir, direrr := os.Getwd()
- if direrr != nil {
- log.Println(direrr)
- }
- // if -t parameter is passed overwrite terminal_mode setting in config.yml
- flag.BoolVar(&terminalMode, "t", terminalMode, "Run Matcha in Terminal Mode, no markdown files will be created")
+func LoadConfig() (*Config, error) {
+ viper.SetDefault("limit", 20)
+
+ terminalMode := flag.Bool("t", false, "Run Matcha in Terminal Mode, no markdown files will be created")
configFile := flag.String("c", "", "Config file path (if you want to override the current directory config.yaml)")
opmlFile := flag.String("o", "", "OPML file path to append feeds from opml files")
build := flag.Bool("build", false, "Dev: Build matcha binaries in the bin directory")
@@ -95,125 +86,138 @@ func bootstrapConfig() {
os.Exit(0)
}
- // if -c parameter is passed overwrite config.yaml setting in config.yaml
- if len(*configFile) > 0 {
+ if *configFile != "" {
viper.SetConfigFile(*configFile)
} else {
viper.AddConfigPath(".")
- generateConfigFile(currentDir)
viper.SetConfigName("config")
+ // Generate default if not exists
+ if _, err := os.Stat("./config.yaml"); os.IsNotExist(err) {
+ generateConfigFile()
+ }
}
- err := viper.ReadInConfig()
- if err != nil {
- fmt.Print(err)
- panic("Error reading yaml configuration file")
+ if err := viper.ReadInConfig(); err != nil {
+ return nil, fmt.Errorf("error reading config: %w", err)
}
- if viper.IsSet("markdown_dir_path") {
- markdownDirPath = viper.Get("markdown_dir_path").(string)
- } else {
- markdownDirPath = currentDir
- }
- myFeeds = []RSS{}
- feeds := viper.Get("feeds")
- if viper.IsSet("weather_latitude") {
- lat = viper.Get("weather_latitude").(float64)
- }
- if viper.IsSet("weather_longitude") {
- lon = viper.Get("weather_longitude").(float64)
+ cfg := &Config{
+ MarkdownDirPath: viper.GetString("markdown_dir_path"),
+ MarkdownFilePrefix: viper.GetString("markdown_file_prefix"),
+ MarkdownFileSuffix: viper.GetString("markdown_file_suffix"),
+ GoogleNewsKeywords: viper.GetString("google_news_keywords"),
+ Instapaper: viper.GetBool("instapaper"),
+ WeatherLat: viper.GetFloat64("weather_latitude"),
+ WeatherLon: viper.GetFloat64("weather_longitude"),
+ TerminalMode: viper.GetBool("terminal_mode") || *terminalMode,
+ ReadingTime: viper.GetBool("reading_time"),
+ SunriseSunset: viper.GetBool("sunrise_sunset"),
+ ShowImages: viper.GetBool("show_images"),
+ OpenAIKey: viper.GetString("openai_api_key"),
+ OpenAIBaseURL: viper.GetString("openai_base_url"),
+ OpenAIModel: viper.GetString("openai_model"),
+ SummaryPrompt: viper.GetString("summary_prompt"),
+ AnalystFeeds: viper.GetStringSlice("analyst_feeds"),
+ AnalystPrompt: viper.GetString("analyst_prompt"),
+ AnalystModel: viper.GetString("analyst_model"),
+ DatabaseFilePath: viper.GetString("database_file_path"),
}
- if viper.IsSet("markdown_file_prefix") {
- mdPrefix = viper.Get("markdown_file_prefix").(string)
- }
- if viper.IsSet("markdown_file_suffix") {
- mdSuffix = viper.Get("markdown_file_suffix").(string)
- }
- if viper.IsSet("openai_api_key") {
- openaiApiKey = viper.Get("openai_api_key").(string)
- }
- if viper.IsSet("openai_base_url") {
- openaiBaseURL = viper.Get("openai_base_url").(string)
- }
- if viper.IsSet("openai_model") {
- openaiModel = viper.Get("openai_model").(string)
+
+ if cfg.MarkdownDirPath == "" {
+ wd, _ := os.Getwd()
+ cfg.MarkdownDirPath = wd
}
- if viper.IsSet("summary_prompt") {
- summaryPrompt = viper.Get("summary_prompt").(string)
+ if cfg.DatabaseFilePath == "" {
+ cfg.DatabaseFilePath = getDefaultDBPath()
}
- if viper.IsSet("summary_feeds") {
- summaryFeeds := viper.Get("summary_feeds")
- for _, summaryFeed := range summaryFeeds.([]any) {
- url, limit := getFeedAndLimit(summaryFeed.(string))
- myFeeds = append(myFeeds, RSS{url: url, limit: limit, summarize: true})
+ cfg.Feeds = loadFeeds(cfg, *opmlFile)
+
+ return cfg, nil
+}
+
+func loadFeeds(cfg *Config, flagOpml string) []RSS {
+ var feeds []RSS
+ // Summary Feeds
+ rawSumFeeds := viper.Get("summary_feeds")
+ if rawSumFeeds != nil {
+ for _, f := range rawSumFeeds.([]interface{}) {
+ url, limit := getFeedAndLimit(f.(string))
+ feeds = append(feeds, RSS{url: url, limit: limit, summarize: true})
}
}
- if feeds != nil {
- for _, feed := range feeds.([]any) {
- url, limit := getFeedAndLimit(feed.(string))
- myFeeds = append(myFeeds, RSS{url: url, limit: limit})
- }
- }
- if viper.IsSet("google_news_keywords") {
- googleNewsKeywords := url.QueryEscape(viper.Get("google_news_keywords").(string))
- if googleNewsKeywords != "" {
- googleNewsUrl := "https://news.google.com/rss/search?hl=en-US&gl=US&ceid=US%3Aen&oc=11&q=" + strings.Join(strings.Split(googleNewsKeywords, "%2C"), "%20%7C%20")
- myFeeds = append(myFeeds, RSS{url: googleNewsUrl, limit: 15}) // #FIXME make it configurable
+ // Standard Feeds
+ rawFeeds := viper.Get("feeds")
+ if rawFeeds != nil {
+ for _, f := range rawFeeds.([]interface{}) {
+ url, limit := getFeedAndLimit(f.(string))
+ feeds = append(feeds, RSS{url: url, limit: limit})
}
}
- // Import any config.opml file on current direcotory
- configPath := currentDir + "/" + "config.opml"
- if _, err := os.Stat(configPath); err == nil {
- xmlContent, _ := os.ReadFile(currentDir + "/" + "config.opml")
- myFeeds = append(myFeeds, parseOPML(xmlContent)...)
- }
- // Append any opml file added by -o parameter
- if len(*opmlFile) > 0 {
- xmlContent, _ := os.ReadFile(*opmlFile)
- myFeeds = append(myFeeds, parseOPML(xmlContent)...)
+ // OPML Files (Config dir + Flag)
+ opmlPaths := []string{"config.opml", viper.GetString("opml_file_path"), flagOpml}
+ for _, path := range opmlPaths {
+ if path != "" {
+ if content, err := os.ReadFile(path); err == nil {
+ feeds = append(feeds, parseOPML(content)...)
+ }
+ }
}
- // Append opml file from config.yml
- if viper.IsSet("opml_file_path") {
- xmlContent, _ := os.ReadFile(viper.Get("opml_file_path").(string))
- myFeeds = append(myFeeds, parseOPML(xmlContent)...)
+ if cfg.GoogleNewsKeywords != "" {
+ escaped := url.QueryEscape(cfg.GoogleNewsKeywords)
+ googleNewsUrl := "https://news.google.com/rss/search?hl=en-US&gl=US&ceid=US%3Aen&oc=11&q=" + strings.Join(strings.Split(escaped, "%2C"), "%20%7C%20") // TODO
+ feeds = append(feeds, RSS{url: googleNewsUrl, limit: 15}) // #FIXME make it configurable
}
- instapaper = viper.GetBool("instapaper")
- reading_time = viper.GetBool("reading_time")
- show_images = viper.GetBool("show_images")
- sunrise_sunset = viper.GetBool("sunrise_sunset")
+ return feeds
+}
- // Overwrite terminal_mode from config file only if its not set through -t flag
- if !terminalMode {
- terminalMode = viper.GetBool("terminal_mode")
- }
+func getDefaultDBPath() string {
+ dir, _ := os.UserConfigDir()
+ path := filepath.Join(dir, "brew", "matcha.db")
+ _ = os.MkdirAll(filepath.Dir(path), 0755)
+ return path
+}
- databaseFilePath := viper.GetString("database_file_path")
- if databaseFilePath == "" {
- databaseDirPath, err := os.UserConfigDir()
- fatal(err)
- databaseFilePath = filepath.Join(databaseDirPath, "brew", "matcha.db")
- fatal(os.MkdirAll(filepath.Dir(databaseFilePath), os.ModePerm))
+func parseOPML(xmlContent []byte) []RSS {
+ o := Opml{}
+ OpmlSlice := []RSS{}
+ decoder := xml.NewDecoder(strings.NewReader(string(xmlContent)))
+ decoder.Strict = false
+ if err := decoder.Decode(&o); err != nil {
+ log.Println(err)
}
-
- db, err = sql.Open("sqlite", databaseFilePath)
- fatal(err)
- err = applyMigrations(db)
- if err != nil {
- log.Println("Coudn't apply migrations:", err)
+ for _, outline := range o.Body.Outline {
+ if outline.XmlUrl != "" {
+ OpmlSlice = append(OpmlSlice, RSS{url: outline.XmlUrl, limit: 20})
+ }
+ for _, feed := range outline.Outline {
+ if feed.XmlUrl != "" {
+ OpmlSlice = append(OpmlSlice, RSS{url: feed.XmlUrl, limit: 20})
+ }
+ }
}
+ return OpmlSlice
+}
- if !terminalMode {
- markdown_file_name := mdPrefix + currentDate + mdSuffix + ".md"
- os.Remove(filepath.Join(markdownDirPath, markdown_file_name))
+func getFeedAndLimit(feedURL string) (string, int) {
+ var limit = 20 // default limit
+ chopped := strings.Split(feedURL, " ")
+ if len(chopped) > 1 {
+ var err error
+ limit, err = strconv.Atoi(chopped[1])
+ if err != nil {
+ log.Fatalf("Error getting limit on feed: %v", err)
+ }
}
+ return chopped[0], limit
}
-func generateConfigFile(currentDir string) {
+func generateConfigFile() {
+ currentDir, _ := os.Getwd()
configPath := currentDir + "/" + "config.yaml"
if _, err := os.Stat(configPath); err == nil {
// File exists, dont do anything
diff --git a/feeds_writer.go b/feeds_writer.go
deleted file mode 100644
index 2c35603..0000000
--- a/feeds_writer.go
+++ /dev/null
@@ -1,289 +0,0 @@
-package main
-
-import (
- "database/sql"
- "fmt"
- "log"
- "net/url"
- "strconv"
- "strings"
- "time"
-
- "github.com/PuerkitoBio/goquery"
- readability "github.com/go-shiori/go-readability"
- "github.com/mmcdole/gofeed"
-)
-
-var markdownDirPath string
-var mdPrefix, mdSuffix string
-var terminalMode bool = false
-var currentDate = time.Now().Format("2006-01-02")
-var lat, lon float64
-var instapaper bool
-var openaiApiKey string
-var openaiBaseURL string
-var openaiModel string
-var reading_time bool
-var show_images bool
-var sunrise_sunset bool
-var myFeeds []RSS
-var db *sql.DB
-var summaryPrompt string
-
-type RSS struct {
- url string
- limit int
- summarize bool
-}
-
-type Writer interface {
- write(body string)
- writeLink(title string, url string, newline bool, readingTime string) string
- writeSummary(content string, newline bool) string
- writeFavicon(s *gofeed.Feed) string
-}
-
-func getWriter() Writer {
- if terminalMode {
- return TerminalWriter{}
- }
- return MarkdownWriter{}
-}
-
-func fatal(e error) {
- if e != nil {
- log.Fatal(e)
- }
-}
-
-func getReadingTime(link string) string {
- article, err := readability.FromURL(link, 30*time.Second)
- if err != nil {
- return "" // Just dont display any reading time if can't get the article text
- }
-
- // get number of words in a string
- words := strings.Fields(article.TextContent)
-
- // assuming average reading time is 200 words per minute calculate reading time of the article
- readingTime := float64(len(words)) / float64(200)
- minutes := int(readingTime)
-
- // if minutes is zero return an empty string
- if minutes == 0 {
- return ""
- }
-
- return strconv.Itoa(minutes) + " min"
-}
-
-func (w MarkdownWriter) writeFavicon(s *gofeed.Feed) string {
- var src string
- if s.FeedLink == "" {
- // default feed favicon
- return "šµ"
-
- } else {
- u, err := url.Parse(s.FeedLink)
- if err != nil {
- fmt.Println(err)
- }
- src = "https://www.google.com/s2/favicons?sz=32&domain=" + u.Hostname()
- }
- // if s.Title contains "hacker news"
- if strings.Contains(s.Title, "Hacker News") {
- src = "https://news.ycombinator.com/favicon.ico"
- }
-
- //return html image tag of favicon
- return fmt.Sprintf("", src)
-}
-
-func ExtractImageTagFromHTML(htmlText string) string {
- doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlText))
- if err != nil {
- return "" // Error occurred while parsing HTML
- }
-
- imgTags := doc.Find("img")
-
- if imgTags.Length() == 0 {
- return "" // No img tag found, return empty string
- }
-
- firstImgTag := imgTags.First()
-
- width := firstImgTag.AttrOr("width", "")
- height := firstImgTag.AttrOr("height", "")
-
- // If both width and height are present, calculate the aspect ratio and set the maximum width
- if width != "" && height != "" {
- widthInt, _ := strconv.Atoi(width)
- heightInt, _ := strconv.Atoi(height)
-
- if widthInt > 0 && heightInt > 0 {
- aspectRatio := float64(widthInt) / float64(heightInt)
- maxWidth := 400
-
- if widthInt > maxWidth {
- widthInt = maxWidth
- heightInt = int(float64(widthInt) / aspectRatio)
- }
-
- firstImgTag.SetAttr("width", fmt.Sprintf("%d", widthInt))
- firstImgTag.SetAttr("height", fmt.Sprintf("%d", heightInt))
- }
- }
-
- html, err := goquery.OuterHtml(firstImgTag)
- if err != nil {
- return "" // Error occurred while extracting the HTML of the img tag
- }
-
- return html // Return the modified img tag
-}
-
-// Parses the feed URL and returns the feed object
-func parseFeed(fp *gofeed.Parser, url string, limit int) *gofeed.Feed {
- feed, err := fp.ParseURL(url)
- if err != nil {
- fmt.Printf("Error parsing %s with error: %s", url, err)
- return nil
- }
-
- if len(feed.Items) > limit {
- feed.Items = feed.Items[:limit]
- }
-
- return feed
-}
-
-// Generates the feed items and returns them as a string
-func generateFeedItems(w Writer, feed *gofeed.Feed, rss RSS) string {
- var items string
-
- for _, item := range feed.Items {
- seen, seen_today, summary := isSeenArticle(item, "")
- if seen {
- continue
- }
- title, link := getFeedTitleAndLink(item)
- if summary == "" {
- summary = getSummary(rss, item, link)
- }
- // Add the comments link if it's a Hacker News feed
- if strings.Contains(feed.Link, "news.ycombinator.com") {
- commentsLink, commentsCount := getCommentsInfo(item)
- if commentsCount < 100 {
- items += w.writeLink("š¬ ", commentsLink, false, "")
- } else {
- items += w.writeLink("š„ ", commentsLink, false, "")
- }
- }
-
- // Add the Instapaper link if enabled
- if instapaper && !terminalMode {
- items += getInstapaperLink(item.Link)
- }
-
- // Support RSS with no Title (such as Mastodon), use Description instead
- if title == "" {
- title = stripHtmlRegex(item.Description)
- }
-
- timeInMin := ""
- if reading_time {
- timeInMin = getReadingTime(link)
- }
-
- items += w.writeLink(title, link, true, timeInMin)
- if rss.summarize {
- items += w.writeSummary(summary, true)
- }
-
- if show_images && !terminalMode {
- img := ExtractImageTagFromHTML(item.Content)
- if img != "" {
- items += img + "\n"
- }
- }
-
- // Add the item to the seen table if not seen today
- if !seen_today {
- addToSeenTable(item.Link, summary)
- }
- }
-
- return items
-}
-
-// Writes the feed and its items
-func writeFeed(w Writer, feed *gofeed.Feed, items string) {
- w.write(fmt.Sprintf("\n### %s %s\n%s", w.writeFavicon(feed), feed.Title, items))
-}
-
-// Returns the title and link for the given feed item
-func getFeedTitleAndLink(item *gofeed.Item) (string, string) {
- return item.Title, item.Link
-}
-
-// Returns the summary for the given feed item
-func getSummary(rss RSS, item *gofeed.Item, link string) string {
- if !rss.summarize {
- return ""
- }
-
- summary := getSummaryFromLink(link)
- if summary == "" {
- summary = item.Description
- }
-
- return summary
-}
-
-// Returns the comments link and count for the given feed item
-func getCommentsInfo(item *gofeed.Item) (string, int) {
- first_index := strings.Index(item.Description, "Comments URL") + 23
- comments_url := item.Description[first_index : first_index+45]
- // Find Comments number
- first_comments_index := strings.Index(item.Description, "Comments:") + 10
- // replace