From a60c93b42b436db8acae7e3f84b4358746581ba4 Mon Sep 17 00:00:00 2001 From: Edi Piqoni Date: Sun, 30 Nov 2025 19:16:31 +0100 Subject: [PATCH] refactoring, weather fix --- analyst.go | 106 +++++++---------- config.go | 258 ++++++++++++++++++++-------------------- feeds_writer.go | 289 --------------------------------------------- llm.go | 110 +++++++++++++++++ main.go | 50 +++++--- markdown_writer.go | 78 +++++++++--- migrations.go | 58 ++++++++- processor.go | 205 ++++++++++++++++++++++++++++++++ summarize.go | 71 ----------- terminal_writer.go | 11 +- utils.go | 13 -- weather.go | 66 +++++++---- writer.go | 12 ++ 13 files changed, 695 insertions(+), 632 deletions(-) delete mode 100644 feeds_writer.go create mode 100644 llm.go create mode 100644 processor.go delete mode 100644 summarize.go delete mode 100644 utils.go create mode 100644 writer.go diff --git a/analyst.go b/analyst.go index e455d9a..53ee808 100644 --- a/analyst.go +++ b/analyst.go @@ -1,85 +1,67 @@ package main import ( - "context" - "fmt" - "strings" - "github.com/mmcdole/gofeed" - openai "github.com/sashabaranov/go-openai" - "github.com/spf13/viper" ) -const defaultLimit = 20 // default number of articles per feed for analysis -var model = openai.GPT4o +const ( + analystTag = "#analyst" + defaultLimit = 20 +) -func generateAnalysis(fp *gofeed.Parser, writer Writer) { - if !viper.IsSet("analyst_feeds") || !viper.IsSet("analyst_prompt") { +func RunAnalyst(cfg *Config, store *Storage, llm *LLMClient, writer Writer, fp *gofeed.Parser) { + if len(cfg.AnalystFeeds) == 0 || cfg.AnalystPrompt == "" { return } - - analystFeeds := viper.GetStringSlice("analyst_feeds") - analystPrompt := viper.GetString("analyst_prompt") - analystModel := viper.GetString("analyst_model") - - var articleTitles []string - for _, feedURL := range analystFeeds { - parsedFeed := parseFeed(fp, feedURL, defaultLimit) - if parsedFeed == nil { - continue - } - for _, item := range parsedFeed.Items { - seen, seen_today, summary := isSeenArticle(item, "#analyst") - if seen { - continue - } - articleTitles = append(articleTitles, item.Title+": "+item.Description) // add also description for better context - if !seen_today { - addToSeenTable(item.Link+"#analyst", summary) - } - } + if llm == nil { + return } - if len(articleTitles) == 0 { + articles := collectArticlesForAnalysis(cfg, store, fp) + if len(articles) == 0 { return } - prompt := fmt.Sprintf("%s\n\n%s", analystPrompt, strings.Join(articleTitles, "\n")) - analysis := getLLMAnalysis(prompt, analystModel) - + analysis := llm.Analyze(articles) if analysis != "" { - writer.write("\n## Daily Analysis:\n") - writer.write(analysis + "\n") + writer.Write("\n## Daily Analysis:\n") + writer.Write(analysis + "\n") } } -func getLLMAnalysis(prompt string, analystModel string) string { - clientConfig := openai.DefaultConfig(openaiApiKey) - if openaiBaseURL != "" { - clientConfig.BaseURL = openaiBaseURL - } - if analystModel != "" { - model = analystModel - } - client := openai.NewClientWithConfig(clientConfig) +func collectArticlesForAnalysis(cfg *Config, store *Storage, fp *gofeed.Parser) []string { + var articles []string - resp, err := client.CreateChatCompletion( - context.Background(), - openai.ChatCompletionRequest{ - Model: model, - Messages: []openai.ChatCompletionMessage{ - { - Role: openai.ChatMessageRoleUser, - Content: prompt, - }, - }, - }, - ) + for _, feedURL := range cfg.AnalystFeeds { + feed, err := fp.ParseURL(feedURL) + if err != nil { + continue + } - if err != nil { - fmt.Printf("ChatCompletion error: %v\n", err) - return "" + // Limit items + limit := defaultLimit + if len(feed.Items) > limit { + feed.Items = feed.Items[:limit] + } + + for _, item := range feed.Items { + articleLink := item.Link + analystTag + seen, seenToday, summary := store.IsSeen(articleLink) + + if seen { + continue + } + + articleText := item.Title + ": " + item.Description + articles = append(articles, articleText) + + if !seenToday { + if err := store.MarkAsSeen(articleLink, summary); err != nil { + continue + } + } + } } - return resp.Choices[0].Message.Content + return articles } diff --git a/config.go b/config.go index 690f4c3..0d57d76 100644 --- a/config.go +++ b/config.go @@ -1,7 +1,6 @@ package main import ( - "database/sql" "encoding/xml" "flag" "fmt" @@ -44,47 +43,39 @@ analyst_prompt: analyst_model: ` -func parseOPML(xmlContent []byte) []RSS { - o := Opml{} - OpmlSlice := []RSS{} - decoder := xml.NewDecoder(strings.NewReader(string(xmlContent))) - decoder.Strict = false - if err := decoder.Decode(&o); err != nil { - log.Println(err) - } - for _, outline := range o.Body.Outline { - if outline.XmlUrl != "" { - OpmlSlice = append(OpmlSlice, RSS{url: outline.XmlUrl, limit: 20}) - } - for _, feed := range outline.Outline { - if feed.XmlUrl != "" { - OpmlSlice = append(OpmlSlice, RSS{url: feed.XmlUrl, limit: 20}) - } - } - } - return OpmlSlice +type Config struct { + MarkdownDirPath string + MarkdownFilePrefix string + MarkdownFileSuffix string + Feeds []RSS + GoogleNewsKeywords string + Instapaper bool + WeatherLat float64 + WeatherLon float64 + TerminalMode bool + ReadingTime bool + SunriseSunset bool + ShowImages bool + OpenAIKey string + OpenAIBaseURL string + OpenAIModel string + SummaryPrompt string + AnalystFeeds []string + AnalystPrompt string + AnalystModel string + DatabaseFilePath string } -func getFeedAndLimit(feedURL string) (string, int) { - var limit = 20 // default limit - chopped := strings.Split(feedURL, " ") - if len(chopped) > 1 { - var err error - limit, err = strconv.Atoi(chopped[1]) - if err != nil { - fatal(err) - } - } - return chopped[0], limit +type RSS struct { + url string + limit int + summarize bool } -func bootstrapConfig() { - currentDir, direrr := os.Getwd() - if direrr != nil { - log.Println(direrr) - } - // if -t parameter is passed overwrite terminal_mode setting in config.yml - flag.BoolVar(&terminalMode, "t", terminalMode, "Run Matcha in Terminal Mode, no markdown files will be created") +func LoadConfig() (*Config, error) { + viper.SetDefault("limit", 20) + + terminalMode := flag.Bool("t", false, "Run Matcha in Terminal Mode, no markdown files will be created") configFile := flag.String("c", "", "Config file path (if you want to override the current directory config.yaml)") opmlFile := flag.String("o", "", "OPML file path to append feeds from opml files") build := flag.Bool("build", false, "Dev: Build matcha binaries in the bin directory") @@ -95,125 +86,138 @@ func bootstrapConfig() { os.Exit(0) } - // if -c parameter is passed overwrite config.yaml setting in config.yaml - if len(*configFile) > 0 { + if *configFile != "" { viper.SetConfigFile(*configFile) } else { viper.AddConfigPath(".") - generateConfigFile(currentDir) viper.SetConfigName("config") + // Generate default if not exists + if _, err := os.Stat("./config.yaml"); os.IsNotExist(err) { + generateConfigFile() + } } - err := viper.ReadInConfig() - if err != nil { - fmt.Print(err) - panic("Error reading yaml configuration file") + if err := viper.ReadInConfig(); err != nil { + return nil, fmt.Errorf("error reading config: %w", err) } - if viper.IsSet("markdown_dir_path") { - markdownDirPath = viper.Get("markdown_dir_path").(string) - } else { - markdownDirPath = currentDir - } - myFeeds = []RSS{} - feeds := viper.Get("feeds") - if viper.IsSet("weather_latitude") { - lat = viper.Get("weather_latitude").(float64) - } - if viper.IsSet("weather_longitude") { - lon = viper.Get("weather_longitude").(float64) + cfg := &Config{ + MarkdownDirPath: viper.GetString("markdown_dir_path"), + MarkdownFilePrefix: viper.GetString("markdown_file_prefix"), + MarkdownFileSuffix: viper.GetString("markdown_file_suffix"), + GoogleNewsKeywords: viper.GetString("google_news_keywords"), + Instapaper: viper.GetBool("instapaper"), + WeatherLat: viper.GetFloat64("weather_latitude"), + WeatherLon: viper.GetFloat64("weather_longitude"), + TerminalMode: viper.GetBool("terminal_mode") || *terminalMode, + ReadingTime: viper.GetBool("reading_time"), + SunriseSunset: viper.GetBool("sunrise_sunset"), + ShowImages: viper.GetBool("show_images"), + OpenAIKey: viper.GetString("openai_api_key"), + OpenAIBaseURL: viper.GetString("openai_base_url"), + OpenAIModel: viper.GetString("openai_model"), + SummaryPrompt: viper.GetString("summary_prompt"), + AnalystFeeds: viper.GetStringSlice("analyst_feeds"), + AnalystPrompt: viper.GetString("analyst_prompt"), + AnalystModel: viper.GetString("analyst_model"), + DatabaseFilePath: viper.GetString("database_file_path"), } - if viper.IsSet("markdown_file_prefix") { - mdPrefix = viper.Get("markdown_file_prefix").(string) - } - if viper.IsSet("markdown_file_suffix") { - mdSuffix = viper.Get("markdown_file_suffix").(string) - } - if viper.IsSet("openai_api_key") { - openaiApiKey = viper.Get("openai_api_key").(string) - } - if viper.IsSet("openai_base_url") { - openaiBaseURL = viper.Get("openai_base_url").(string) - } - if viper.IsSet("openai_model") { - openaiModel = viper.Get("openai_model").(string) + + if cfg.MarkdownDirPath == "" { + wd, _ := os.Getwd() + cfg.MarkdownDirPath = wd } - if viper.IsSet("summary_prompt") { - summaryPrompt = viper.Get("summary_prompt").(string) + if cfg.DatabaseFilePath == "" { + cfg.DatabaseFilePath = getDefaultDBPath() } - if viper.IsSet("summary_feeds") { - summaryFeeds := viper.Get("summary_feeds") - for _, summaryFeed := range summaryFeeds.([]any) { - url, limit := getFeedAndLimit(summaryFeed.(string)) - myFeeds = append(myFeeds, RSS{url: url, limit: limit, summarize: true}) + cfg.Feeds = loadFeeds(cfg, *opmlFile) + + return cfg, nil +} + +func loadFeeds(cfg *Config, flagOpml string) []RSS { + var feeds []RSS + // Summary Feeds + rawSumFeeds := viper.Get("summary_feeds") + if rawSumFeeds != nil { + for _, f := range rawSumFeeds.([]interface{}) { + url, limit := getFeedAndLimit(f.(string)) + feeds = append(feeds, RSS{url: url, limit: limit, summarize: true}) } } - if feeds != nil { - for _, feed := range feeds.([]any) { - url, limit := getFeedAndLimit(feed.(string)) - myFeeds = append(myFeeds, RSS{url: url, limit: limit}) - } - } - if viper.IsSet("google_news_keywords") { - googleNewsKeywords := url.QueryEscape(viper.Get("google_news_keywords").(string)) - if googleNewsKeywords != "" { - googleNewsUrl := "https://news.google.com/rss/search?hl=en-US&gl=US&ceid=US%3Aen&oc=11&q=" + strings.Join(strings.Split(googleNewsKeywords, "%2C"), "%20%7C%20") - myFeeds = append(myFeeds, RSS{url: googleNewsUrl, limit: 15}) // #FIXME make it configurable + // Standard Feeds + rawFeeds := viper.Get("feeds") + if rawFeeds != nil { + for _, f := range rawFeeds.([]interface{}) { + url, limit := getFeedAndLimit(f.(string)) + feeds = append(feeds, RSS{url: url, limit: limit}) } } - // Import any config.opml file on current direcotory - configPath := currentDir + "/" + "config.opml" - if _, err := os.Stat(configPath); err == nil { - xmlContent, _ := os.ReadFile(currentDir + "/" + "config.opml") - myFeeds = append(myFeeds, parseOPML(xmlContent)...) - } - // Append any opml file added by -o parameter - if len(*opmlFile) > 0 { - xmlContent, _ := os.ReadFile(*opmlFile) - myFeeds = append(myFeeds, parseOPML(xmlContent)...) + // OPML Files (Config dir + Flag) + opmlPaths := []string{"config.opml", viper.GetString("opml_file_path"), flagOpml} + for _, path := range opmlPaths { + if path != "" { + if content, err := os.ReadFile(path); err == nil { + feeds = append(feeds, parseOPML(content)...) + } + } } - // Append opml file from config.yml - if viper.IsSet("opml_file_path") { - xmlContent, _ := os.ReadFile(viper.Get("opml_file_path").(string)) - myFeeds = append(myFeeds, parseOPML(xmlContent)...) + if cfg.GoogleNewsKeywords != "" { + escaped := url.QueryEscape(cfg.GoogleNewsKeywords) + googleNewsUrl := "https://news.google.com/rss/search?hl=en-US&gl=US&ceid=US%3Aen&oc=11&q=" + strings.Join(strings.Split(escaped, "%2C"), "%20%7C%20") // TODO + feeds = append(feeds, RSS{url: googleNewsUrl, limit: 15}) // #FIXME make it configurable } - instapaper = viper.GetBool("instapaper") - reading_time = viper.GetBool("reading_time") - show_images = viper.GetBool("show_images") - sunrise_sunset = viper.GetBool("sunrise_sunset") + return feeds +} - // Overwrite terminal_mode from config file only if its not set through -t flag - if !terminalMode { - terminalMode = viper.GetBool("terminal_mode") - } +func getDefaultDBPath() string { + dir, _ := os.UserConfigDir() + path := filepath.Join(dir, "brew", "matcha.db") + _ = os.MkdirAll(filepath.Dir(path), 0755) + return path +} - databaseFilePath := viper.GetString("database_file_path") - if databaseFilePath == "" { - databaseDirPath, err := os.UserConfigDir() - fatal(err) - databaseFilePath = filepath.Join(databaseDirPath, "brew", "matcha.db") - fatal(os.MkdirAll(filepath.Dir(databaseFilePath), os.ModePerm)) +func parseOPML(xmlContent []byte) []RSS { + o := Opml{} + OpmlSlice := []RSS{} + decoder := xml.NewDecoder(strings.NewReader(string(xmlContent))) + decoder.Strict = false + if err := decoder.Decode(&o); err != nil { + log.Println(err) } - - db, err = sql.Open("sqlite", databaseFilePath) - fatal(err) - err = applyMigrations(db) - if err != nil { - log.Println("Coudn't apply migrations:", err) + for _, outline := range o.Body.Outline { + if outline.XmlUrl != "" { + OpmlSlice = append(OpmlSlice, RSS{url: outline.XmlUrl, limit: 20}) + } + for _, feed := range outline.Outline { + if feed.XmlUrl != "" { + OpmlSlice = append(OpmlSlice, RSS{url: feed.XmlUrl, limit: 20}) + } + } } + return OpmlSlice +} - if !terminalMode { - markdown_file_name := mdPrefix + currentDate + mdSuffix + ".md" - os.Remove(filepath.Join(markdownDirPath, markdown_file_name)) +func getFeedAndLimit(feedURL string) (string, int) { + var limit = 20 // default limit + chopped := strings.Split(feedURL, " ") + if len(chopped) > 1 { + var err error + limit, err = strconv.Atoi(chopped[1]) + if err != nil { + log.Fatalf("Error getting limit on feed: %v", err) + } } + return chopped[0], limit } -func generateConfigFile(currentDir string) { +func generateConfigFile() { + currentDir, _ := os.Getwd() configPath := currentDir + "/" + "config.yaml" if _, err := os.Stat(configPath); err == nil { // File exists, dont do anything diff --git a/feeds_writer.go b/feeds_writer.go deleted file mode 100644 index 2c35603..0000000 --- a/feeds_writer.go +++ /dev/null @@ -1,289 +0,0 @@ -package main - -import ( - "database/sql" - "fmt" - "log" - "net/url" - "strconv" - "strings" - "time" - - "github.com/PuerkitoBio/goquery" - readability "github.com/go-shiori/go-readability" - "github.com/mmcdole/gofeed" -) - -var markdownDirPath string -var mdPrefix, mdSuffix string -var terminalMode bool = false -var currentDate = time.Now().Format("2006-01-02") -var lat, lon float64 -var instapaper bool -var openaiApiKey string -var openaiBaseURL string -var openaiModel string -var reading_time bool -var show_images bool -var sunrise_sunset bool -var myFeeds []RSS -var db *sql.DB -var summaryPrompt string - -type RSS struct { - url string - limit int - summarize bool -} - -type Writer interface { - write(body string) - writeLink(title string, url string, newline bool, readingTime string) string - writeSummary(content string, newline bool) string - writeFavicon(s *gofeed.Feed) string -} - -func getWriter() Writer { - if terminalMode { - return TerminalWriter{} - } - return MarkdownWriter{} -} - -func fatal(e error) { - if e != nil { - log.Fatal(e) - } -} - -func getReadingTime(link string) string { - article, err := readability.FromURL(link, 30*time.Second) - if err != nil { - return "" // Just dont display any reading time if can't get the article text - } - - // get number of words in a string - words := strings.Fields(article.TextContent) - - // assuming average reading time is 200 words per minute calculate reading time of the article - readingTime := float64(len(words)) / float64(200) - minutes := int(readingTime) - - // if minutes is zero return an empty string - if minutes == 0 { - return "" - } - - return strconv.Itoa(minutes) + " min" -} - -func (w MarkdownWriter) writeFavicon(s *gofeed.Feed) string { - var src string - if s.FeedLink == "" { - // default feed favicon - return "šŸµ" - - } else { - u, err := url.Parse(s.FeedLink) - if err != nil { - fmt.Println(err) - } - src = "https://www.google.com/s2/favicons?sz=32&domain=" + u.Hostname() - } - // if s.Title contains "hacker news" - if strings.Contains(s.Title, "Hacker News") { - src = "https://news.ycombinator.com/favicon.ico" - } - - //return html image tag of favicon - return fmt.Sprintf("", src) -} - -func ExtractImageTagFromHTML(htmlText string) string { - doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlText)) - if err != nil { - return "" // Error occurred while parsing HTML - } - - imgTags := doc.Find("img") - - if imgTags.Length() == 0 { - return "" // No img tag found, return empty string - } - - firstImgTag := imgTags.First() - - width := firstImgTag.AttrOr("width", "") - height := firstImgTag.AttrOr("height", "") - - // If both width and height are present, calculate the aspect ratio and set the maximum width - if width != "" && height != "" { - widthInt, _ := strconv.Atoi(width) - heightInt, _ := strconv.Atoi(height) - - if widthInt > 0 && heightInt > 0 { - aspectRatio := float64(widthInt) / float64(heightInt) - maxWidth := 400 - - if widthInt > maxWidth { - widthInt = maxWidth - heightInt = int(float64(widthInt) / aspectRatio) - } - - firstImgTag.SetAttr("width", fmt.Sprintf("%d", widthInt)) - firstImgTag.SetAttr("height", fmt.Sprintf("%d", heightInt)) - } - } - - html, err := goquery.OuterHtml(firstImgTag) - if err != nil { - return "" // Error occurred while extracting the HTML of the img tag - } - - return html // Return the modified img tag -} - -// Parses the feed URL and returns the feed object -func parseFeed(fp *gofeed.Parser, url string, limit int) *gofeed.Feed { - feed, err := fp.ParseURL(url) - if err != nil { - fmt.Printf("Error parsing %s with error: %s", url, err) - return nil - } - - if len(feed.Items) > limit { - feed.Items = feed.Items[:limit] - } - - return feed -} - -// Generates the feed items and returns them as a string -func generateFeedItems(w Writer, feed *gofeed.Feed, rss RSS) string { - var items string - - for _, item := range feed.Items { - seen, seen_today, summary := isSeenArticle(item, "") - if seen { - continue - } - title, link := getFeedTitleAndLink(item) - if summary == "" { - summary = getSummary(rss, item, link) - } - // Add the comments link if it's a Hacker News feed - if strings.Contains(feed.Link, "news.ycombinator.com") { - commentsLink, commentsCount := getCommentsInfo(item) - if commentsCount < 100 { - items += w.writeLink("šŸ’¬ ", commentsLink, false, "") - } else { - items += w.writeLink("šŸ”„ ", commentsLink, false, "") - } - } - - // Add the Instapaper link if enabled - if instapaper && !terminalMode { - items += getInstapaperLink(item.Link) - } - - // Support RSS with no Title (such as Mastodon), use Description instead - if title == "" { - title = stripHtmlRegex(item.Description) - } - - timeInMin := "" - if reading_time { - timeInMin = getReadingTime(link) - } - - items += w.writeLink(title, link, true, timeInMin) - if rss.summarize { - items += w.writeSummary(summary, true) - } - - if show_images && !terminalMode { - img := ExtractImageTagFromHTML(item.Content) - if img != "" { - items += img + "\n" - } - } - - // Add the item to the seen table if not seen today - if !seen_today { - addToSeenTable(item.Link, summary) - } - } - - return items -} - -// Writes the feed and its items -func writeFeed(w Writer, feed *gofeed.Feed, items string) { - w.write(fmt.Sprintf("\n### %s %s\n%s", w.writeFavicon(feed), feed.Title, items)) -} - -// Returns the title and link for the given feed item -func getFeedTitleAndLink(item *gofeed.Item) (string, string) { - return item.Title, item.Link -} - -// Returns the summary for the given feed item -func getSummary(rss RSS, item *gofeed.Item, link string) string { - if !rss.summarize { - return "" - } - - summary := getSummaryFromLink(link) - if summary == "" { - summary = item.Description - } - - return summary -} - -// Returns the comments link and count for the given feed item -func getCommentsInfo(item *gofeed.Item) (string, int) { - first_index := strings.Index(item.Description, "Comments URL") + 23 - comments_url := item.Description[first_index : first_index+45] - // Find Comments number - first_comments_index := strings.Index(item.Description, "Comments:") + 10 - // replace

with empty string - comments_number := strings.Replace(item.Description[first_comments_index:], "

\n", "", -1) - comments_number_int, _ := strconv.Atoi(comments_number) - // return the link and the number of comments - return comments_url, comments_number_int -} - -func addToSeenTable(link string, summary string) { - stmt, err := db.Prepare("INSERT INTO seen(url, date, summary) values(?,?,?)") - fatal(err) - res, err := stmt.Exec(link, currentDate, summary) - fatal(err) - _ = res - stmt.Close() -} - -func getInstapaperLink(link string) string { - return "[](https://www.instapaper.com/hello2?url=" + link + ")" -} - -func isSeenArticle(item *gofeed.Item, postfix string) (seen bool, today bool, summaryText string) { - var url string - var date string - var summary sql.NullString - err := db.QueryRow("SELECT url, date, summary FROM seen WHERE url=?", item.Link+postfix).Scan(&url, &date, &summary) - if err != nil && err != sql.ErrNoRows { - fmt.Println(err) - return false, false, "" - } - - if summary.Valid { - summaryText = summary.String - } else { - summaryText = "" - } - - seen = url != "" && date != currentDate - today = url != "" && date == currentDate - return seen, today, summaryText -} diff --git a/llm.go b/llm.go new file mode 100644 index 0000000..746aeeb --- /dev/null +++ b/llm.go @@ -0,0 +1,110 @@ +package main + +import ( + "context" + "fmt" + "strings" + + openai "github.com/sashabaranov/go-openai" +) + +type LLMClient struct { + client *openai.Client + config *Config +} + +func NewLLMClient(cfg *Config) *LLMClient { + cConfig := openai.DefaultConfig(cfg.OpenAIKey) + if cfg.OpenAIBaseURL != "" { + cConfig.BaseURL = cfg.OpenAIBaseURL + } + return &LLMClient{ + client: openai.NewClientWithConfig(cConfig), + config: cfg, + } +} + +func (l *LLMClient) Summarize(text string) string { + fmt.Println("summarize invoked") + if l == nil || l.client == nil { + return "" + } + + const maxCharactersToSummarize = 5000 + const minCharactersToSummarize = 200 + + if len(text) > maxCharactersToSummarize { + text = text[:maxCharactersToSummarize] + } + + // Don't summarize if the article is too short + if len(text) < minCharactersToSummarize { + return "" + } + + prompt := l.config.SummaryPrompt + if prompt == "" { + prompt = "Summarize the following text:" + } + + model := l.config.OpenAIModel + if model == "" { + model = openai.GPT3Dot5Turbo // TODO: change this + } + + resp, err := l.client.CreateChatCompletion( + context.Background(), + openai.ChatCompletionRequest{ + Model: model, + Messages: []openai.ChatCompletionMessage{ + { + Role: openai.ChatMessageRoleSystem, + Content: prompt, + }, + { + Role: openai.ChatMessageRoleUser, + Content: text, + }, + }, + }, + ) + + if err != nil { + fmt.Printf("Summarization error: %v\n", err) + return "" + } + + if len(resp.Choices) == 0 { + return "" + } + + return resp.Choices[0].Message.Content +} + +func (l *LLMClient) Analyze(articles []string) string { + if l == nil || len(articles) == 0 { + return "" + } + + model := l.config.AnalystModel + if model == "" { + model = openai.GPT4o + } + + prompt := fmt.Sprintf("%s\n\n%s", l.config.AnalystPrompt, strings.Join(articles, "\n")) + + resp, err := l.client.CreateChatCompletion( + context.Background(), + openai.ChatCompletionRequest{ + Model: model, + Messages: []openai.ChatCompletionMessage{ + {Role: openai.ChatMessageRoleUser, Content: prompt}, + }, + }, + ) + if err != nil { + fmt.Printf("Analysis failed: %v\n", err) + return "" + } + return resp.Choices[0].Message.Content +} diff --git a/main.go b/main.go index ae6a04d..4b14887 100644 --- a/main.go +++ b/main.go @@ -1,31 +1,45 @@ package main import ( + "log" + "os" + "github.com/mmcdole/gofeed" _ "modernc.org/sqlite" ) func main() { - bootstrapConfig() + cfg, err := LoadConfig() + if err != nil { + log.Fatalf("Failed to load config: %v", err) + } + + store, err := NewStorage(cfg.DatabaseFilePath) + if err != nil { + log.Fatalf("Failed to init DB: %v", err) + } + defer store.Close() + + llm := NewLLMClient(cfg) + + var writer Writer + if cfg.TerminalMode { + writer = TerminalWriter{} + } else { + mw := NewMarkdownWriter(cfg) + os.Remove(mw.FilePath) + writer = mw + } fp := gofeed.NewParser() - writer := getWriter() - displayWeather(writer) - displaySunriseSunset(writer) - generateAnalysis(fp, writer) - - for _, feed := range myFeeds { - parsedFeed := parseFeed(fp, feed.url, feed.limit) - - if parsedFeed == nil { - continue - } - - items := generateFeedItems(writer, parsedFeed, feed) - if items != "" { - writeFeed(writer, parsedFeed, items) - } + + DisplayWeather(cfg, writer) + DisplaySunriseSunset(cfg, writer) + + RunAnalyst(cfg, store, llm, writer, fp) + + for _, feedConfig := range cfg.Feeds { + ProcessFeed(feedConfig, cfg, store, llm, writer, fp) } - defer db.Close() } diff --git a/markdown_writer.go b/markdown_writer.go index 75f1457..b7ad0ae 100644 --- a/markdown_writer.go +++ b/markdown_writer.go @@ -1,45 +1,87 @@ package main import ( + "fmt" "log" + "net/url" "os" "path/filepath" + "strings" + "time" + + "github.com/mmcdole/gofeed" ) -type MarkdownWriter struct{} +type MarkdownWriter struct { + FilePath string +} + +func NewMarkdownWriter(cfg *Config) *MarkdownWriter { + date := time.Now().Format("2006-01-02") + fname := cfg.MarkdownFilePrefix + date + cfg.MarkdownFileSuffix + ".md" + return &MarkdownWriter{ + FilePath: filepath.Join(cfg.MarkdownDirPath, fname), + } +} -func (w MarkdownWriter) write(body string) { - markdown_file_name := mdPrefix + currentDate + mdSuffix + ".md" - f, err := os.OpenFile(filepath.Join(markdownDirPath, markdown_file_name), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) +func (w MarkdownWriter) Write(body string) { + f, err := os.OpenFile(w.FilePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) if err != nil { - log.Fatal(err) + log.Printf("Error opening file %s: %v", w.FilePath, err) + return } + defer f.Close() + if _, err := f.Write([]byte(body)); err != nil { - log.Fatal(err) - } - if err := f.Close(); err != nil { - log.Fatal(err) + log.Printf("Error writing to file: %v", err) } } -func (w MarkdownWriter) writeLink(title string, url string, newline bool, readingTime string) string { - var content string - content = "[" + title + "](" + url + ")" +func (w MarkdownWriter) WriteLink(title string, url string, newLine bool, readingTime string) string { + content := fmt.Sprintf("[%s](%s)", title, url) if readingTime != "" { - content += " (" + readingTime + ")" + content += fmt.Sprintf(" (%s)", readingTime) } - if newline { - content += "\n" + + if newLine { + content += " \n" } return content } -func (w MarkdownWriter) writeSummary(content string, newline bool) string { +func (w MarkdownWriter) WriteSummary(content string, newLine bool) string { if content == "" { - return content + return "" } - if newline { + + if newLine { content += " \n\n" } return content } + +func (w MarkdownWriter) WriteHeader(feed *gofeed.Feed) string { + favicon := w.getFaviconHTML(feed) + return fmt.Sprintf("\n### %s %s\n", favicon, feed.Title) +} + +// Helper method specifically for MarkdownWriter +func (w MarkdownWriter) getFaviconHTML(s *gofeed.Feed) string { + var src string + + // Hacker news is a special case + if strings.Contains(s.Title, "Hacker News") { + src = "https://news.ycombinator.com/favicon.ico" + } else if s.FeedLink == "" { + return "šŸµ" + } else { + u, err := url.Parse(s.FeedLink) + if err != nil { + // If URL parsing fails, just return emoji + return "šŸµ" + } + src = "https://www.google.com/s2/favicons?sz=32&domain=" + u.Hostname() + } + + return fmt.Sprintf(``, src) +} diff --git a/migrations.go b/migrations.go index 451dcd5..16f6178 100644 --- a/migrations.go +++ b/migrations.go @@ -1,24 +1,44 @@ package main -import "database/sql" +import ( + "database/sql" + "time" +) -func applyMigrations(db *sql.DB) error { +type Storage struct { + db *sql.DB +} + +func NewStorage(dbPath string) (*Storage, error) { + db, err := sql.Open("sqlite", dbPath) + if err != nil { + return nil, err + } + + s := &Storage{db: db} + if err := s.applyMigrations(); err != nil { + return nil, err + } + return s, nil +} + +func (s *Storage) applyMigrations() error { // create new table on database var err error - _, err = db.Exec("CREATE TABLE IF NOT EXISTS seen (url TEXT, date TEXT, summary TEXT)") + _, err = s.db.Exec("CREATE TABLE IF NOT EXISTS seen (url TEXT, date TEXT, summary TEXT)") if err != nil { return err } - err = addSummaryColumnIfNotExists(db) + err = s.addSummaryColumnIfNotExists() if err != nil { return err } return nil } -func addSummaryColumnIfNotExists(db *sql.DB) error { - tx, err := db.Begin() +func (s *Storage) addSummaryColumnIfNotExists() error { + tx, err := s.db.Begin() if err != nil { return err } @@ -64,3 +84,29 @@ func addSummaryColumnIfNotExists(db *sql.DB) error { } return nil } + +func (s *Storage) MarkAsSeen(url, summary string) error { + today := time.Now().Format("2006-01-02") + _, err := s.db.Exec("INSERT INTO seen(url, date, summary) values(?,?,?)", url, today, summary) + return err +} + +// IsSeen returns (seen, seen_today, summary) +func (s *Storage) IsSeen(link string) (bool, bool, string) { + var urlStr, date, summary sql.NullString + err := s.db.QueryRow("SELECT url, date, summary FROM seen WHERE url=?", link).Scan(&urlStr, &date, &summary) + + if err != nil { + return false, false, "" + } + + today := time.Now().Format("2006-01-02") + isSeen := urlStr.Valid && date.String != today + isSeenToday := urlStr.Valid && date.String == today + + return isSeen, isSeenToday, summary.String +} + +func (s *Storage) Close() { + s.db.Close() +} diff --git a/processor.go b/processor.go new file mode 100644 index 0000000..0828f33 --- /dev/null +++ b/processor.go @@ -0,0 +1,205 @@ +package main + +import ( + "fmt" + "log" + "regexp" + "strconv" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" + readability "github.com/go-shiori/go-readability" + "github.com/mmcdole/gofeed" +) + +func ProcessFeed(rss RSS, cfg *Config, store *Storage, llm *LLMClient, w Writer, fp *gofeed.Parser) { + feed, err := fp.ParseURL(rss.url) + if err != nil { + log.Printf("Error parsing %s: %v", rss.url, err) + return + } + + if len(feed.Items) > rss.limit { + feed.Items = feed.Items[:rss.limit] + } + + var outputBuffer string + itemsFound := false + + for _, item := range feed.Items { + // Check DB for seen status + seen, seenToday, prevSummary := store.IsSeen(item.Link) + + // If we've seen it before (and not today), skip it + if seen { + continue + } + + itemsFound = true + + title := item.Title + if title == "" { + title = stripHtmlRegex(item.Description) + } + + summary := prevSummary + + if summary == "" && rss.summarize { + summary = getSummary(llm, item, cfg) + } + + var readingTime string + if cfg.ReadingTime { + readingTime = getReadingTime(item.Link) + } + + if strings.Contains(feed.Link, "news.ycombinator.com") { + outputBuffer += formatHackerNewsLinks(w, item) + } + + if cfg.Instapaper && !cfg.TerminalMode { + outputBuffer += getInstapaperLink(item.Link) + } + + outputBuffer += w.WriteLink(title, item.Link, true, readingTime) + + if rss.summarize { + outputBuffer += w.WriteSummary(summary, true) + } + + if cfg.ShowImages && !cfg.TerminalMode { + img := extractImageTagFromHTML(item.Content) + if img != "" { + outputBuffer += img + "\n" + } + } + + if !seenToday { + store.MarkAsSeen(item.Link, summary) + } + } + + if itemsFound && outputBuffer != "" { + header := w.WriteHeader(feed) + w.Write(header + outputBuffer) + } +} + +func getSummary(llm *LLMClient, item *gofeed.Item, cfg *Config) string { + fmt.Println("outside") + if llm != nil { + scrapedText, err := readability.FromURL(item.Link, 30*time.Second) + content := item.Description + if err == nil { + content = scrapedText.TextContent + } + + fmt.Println("we have crawled") + + return llm.Summarize(content) + } + + return item.Description +} + +func getReadingTime(link string) string { + article, err := readability.FromURL(link, 30*time.Second) + if err != nil { + return "" + } + + words := strings.Fields(article.TextContent) + if len(words) == 0 { + return "" + } + + // 200 wpm + minutes := len(words) / 200 + if minutes == 0 { + return "" // < 1 min + } + return strconv.Itoa(minutes) + " min" +} + +func extractImageTagFromHTML(htmlText string) string { + doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlText)) + if err != nil { + return "" + } + + imgTags := doc.Find("img") + if imgTags.Length() == 0 { + return "" + } + + firstImgTag := imgTags.First() + + // Resize logic + width := firstImgTag.AttrOr("width", "") + height := firstImgTag.AttrOr("height", "") + + if width != "" && height != "" { + wInt, _ := strconv.Atoi(width) + hInt, _ := strconv.Atoi(height) + if wInt > 0 && hInt > 0 { + aspectRatio := float64(wInt) / float64(hInt) + const maxWidth = 400 + if wInt > maxWidth { + wInt = maxWidth + hInt = int(float64(wInt) / aspectRatio) + } + firstImgTag.SetAttr("width", fmt.Sprintf("%d", wInt)) + firstImgTag.SetAttr("height", fmt.Sprintf("%d", hInt)) + } + } + + html, err := goquery.OuterHtml(firstImgTag) + if err != nil { + return "" + } + return html +} + +func formatHackerNewsLinks(w Writer, item *gofeed.Item) string { + desc := item.Description + + commentsURL := "" + if start := strings.Index(desc, "Comments URL"); start != -1 { + safeStart := start + 23 + if safeStart+45 < len(desc) { + commentsURL = desc[safeStart : safeStart+45] + } + } + + // Find count + count := 0 + if start := strings.Index(desc, "Comments:"); start != -1 { + s := desc[start+10:] + s = strings.Replace(s, "

\n", "", -1) + s = strings.TrimSpace(s) + count, _ = strconv.Atoi(s) + } + + icon := "šŸ’¬ " + if count >= 100 { + icon = "šŸ”„ " + } + + // If parsing failed, default to item.Link (often the comments page for text posts) + if commentsURL == "" { + commentsURL = item.Link + } + + return w.WriteLink(icon, commentsURL, false, "") +} + +func getInstapaperLink(link string) string { + return fmt.Sprintf(`[](https://www.instapaper.com/hello2?url=%s)`, link) +} + +func stripHtmlRegex(s string) string { + const regex = `<.*?>` + r := regexp.MustCompile(regex) + return r.ReplaceAllString(s, "") +} diff --git a/summarize.go b/summarize.go deleted file mode 100644 index d362e97..0000000 --- a/summarize.go +++ /dev/null @@ -1,71 +0,0 @@ -package main - -import ( - "context" - "fmt" - "time" - - readability "github.com/go-shiori/go-readability" - openai "github.com/sashabaranov/go-openai" -) - -func getSummaryFromLink(url string) string { - article, err := readability.FromURL(url, 30*time.Second) - if err != nil { - fmt.Printf("Failed to parse %s, %v\n", url, err) - } - - return summarize(article.TextContent) - -} - -func summarize(text string) string { - // Not sending everything to preserve Openai tokens in case the article is too long - maxCharactersToSummarize := 5000 - if len(text) > maxCharactersToSummarize { - text = text[:maxCharactersToSummarize] - } - - // Dont summarize if the article is too short - if len(text) < 200 { - return "" - } - - prompt := summaryPrompt - if prompt == "" { - prompt = "Summarize the following text:" - } - - clientConfig := openai.DefaultConfig(openaiApiKey) - if openaiBaseURL != "" { - clientConfig.BaseURL = openaiBaseURL - } - model := openai.GPT3Dot5Turbo - if openaiModel != "" { - model = openaiModel - } - client := openai.NewClientWithConfig(clientConfig) - resp, err := client.CreateChatCompletion( - context.Background(), - openai.ChatCompletionRequest{ - Model: model, - Messages: []openai.ChatCompletionMessage{ - { - Role: openai.ChatMessageRoleAssistant, - Content: prompt, - }, - { - Role: openai.ChatMessageRoleUser, - Content: text, - }, - }, - }, - ) - - if err != nil { - fmt.Printf("ChatCompletion error: %v\n", err) - return "" - } - - return resp.Choices[0].Message.Content -} diff --git a/terminal_writer.go b/terminal_writer.go index e1db9de..139555e 100644 --- a/terminal_writer.go +++ b/terminal_writer.go @@ -9,11 +9,11 @@ import ( type TerminalWriter struct{} -func (w TerminalWriter) write(body string) { +func (w TerminalWriter) Write(body string) { fmt.Println(body) } -func (w TerminalWriter) writeLink(title string, url string, newline bool, readingTime string) string { +func (w TerminalWriter) WriteLink(title string, url string, newline bool, readingTime string) string { var content string content = termlink.Link(title, url) if readingTime != "" { @@ -25,13 +25,14 @@ func (w TerminalWriter) writeLink(title string, url string, newline bool, readin return content } -func (w TerminalWriter) writeSummary(content string, newline bool) string { +func (w TerminalWriter) WriteSummary(content string, newline bool) string { + if newline { content += "\n" } return content } -func (w TerminalWriter) writeFavicon(s *gofeed.Feed) string { - return "" +func (w TerminalWriter) WriteHeader(feed *gofeed.Feed) string { + return fmt.Sprintf("\n### šŸµ %s\n", feed.Title) } diff --git a/utils.go b/utils.go deleted file mode 100644 index 3cb329b..0000000 --- a/utils.go +++ /dev/null @@ -1,13 +0,0 @@ -package main - -import ( - "regexp" -) - -const regex = `<.*?>` - -// This method uses a regular expresion to remove HTML tags. -func stripHtmlRegex(s string) string { - r := regexp.MustCompile(regex) - return r.ReplaceAllString(s, "") -} diff --git a/weather.go b/weather.go index 689ec19..3f742a4 100644 --- a/weather.go +++ b/weather.go @@ -20,46 +20,66 @@ func (c *UserAgentTransport) RoundTrip(r *http.Request) (*http.Response, error) return c.RoundTripper.RoundTrip(r) } -func displayWeather(w Writer) { - // Display weather if lat and lon are set - if lat != 0 && lon != 0 { - w.write(getWeather(lat, lon)) +func DisplayWeather(cfg *Config, w Writer) { + if cfg.WeatherLat == 0 && cfg.WeatherLon == 0 { + return + } + + weatherStr := getWeather(cfg.WeatherLat, cfg.WeatherLon) + if weatherStr != "" { + w.Write(weatherStr) } } -func displaySunriseSunset(w Writer) { - if sunrise_sunset && lat != 0 && lon != 0 { - rise, set := sunrise.SunriseSunset( - lat, lon, - time.Now().Year(), time.Now().Month(), time.Now().Day(), - ) - w.write(fmt.Sprintf("šŸŒ… %s šŸŒ‡ %s", rise.Local().Format("15:04"), set.Local().Format("15:04"))) +func DisplaySunriseSunset(cfg *Config, w Writer) { + if !cfg.SunriseSunset || cfg.WeatherLat == 0 || cfg.WeatherLon == 0 { + return } + + now := time.Now() + rise, set := sunrise.SunriseSunset( + cfg.WeatherLat, cfg.WeatherLon, + now.Year(), now.Month(), now.Day(), + ) + + w.Write(fmt.Sprintf("šŸŒ… %s šŸŒ‡ %s\n", rise.Local().Format("15:04"), set.Local().Format("15:04"))) } func getWeather(lat, lon float64) string { client := &http.Client{ Transport: &UserAgentTransport{http.DefaultTransport}, + Timeout: 10 * time.Second, } - resp, err := client.Get(fmt.Sprintf("https://api.met.no/weatherapi/locationforecast/2.0/compact?lat=%.2f&lon=%.2f", lat, lon)) + url := fmt.Sprintf("https://api.met.no/weatherapi/locationforecast/2.0/compact?lat=%.2f&lon=%.2f", lat, lon) + resp, err := client.Get(url) if err != nil { - panic(err) + fmt.Printf("Error fetching weather: %v\n", err) + return "" } defer resp.Body.Close() body, err := io.ReadAll(resp.Body) if err != nil { - panic(err) + fmt.Printf("Error reading weather response: %v\n", err) + return "" + } + + var res AutoGenerated + if err := json.Unmarshal(body, &res); err != nil { + fmt.Printf("Error parsing weather data: %v\n", err) + return "" + } + + if len(res.Properties.Timeseries) == 0 { + return "" } - res := AutoGenerated{} - json.Unmarshal([]byte(body), &res) - var temperature float64 = res.Properties.Timeseries[0].Data.Instant.Details.AirTemperature - var next_12_hours string = res.Properties.Timeseries[0].Data.Next12Hours.Summary.SymbolCode - var weatherEmoji string = determineWeatherEmoji(next_12_hours) - return fmt.Sprintf("# %d°C %s ļø", int(temperature+0.5), weatherEmoji) + temperature := res.Properties.Timeseries[0].Data.Instant.Details.AirTemperature + symbolCode := res.Properties.Timeseries[0].Data.Next12Hours.Summary.SymbolCode + weatherEmoji := determineWeatherEmoji(symbolCode) + return fmt.Sprintf("# %d°C %s\n", int(temperature+0.5), weatherEmoji) } func determineWeatherEmoji(desc string) string { @@ -81,7 +101,7 @@ func determineWeatherEmoji(desc string) string { case strings.Contains(desc, "fair_night") || strings.Contains(desc, "fair_day"): return "🌤" default: - fmt.Println("Unknown weather: " + desc) + fmt.Printf("Unknown weather symbol: %s\n", desc) return "" } } @@ -89,8 +109,8 @@ func determineWeatherEmoji(desc string) string { type AutoGenerated struct { Type string `json:"type"` Geometry struct { - Type string `json:"type"` - Coordinates []int `json:"coordinates"` + Type string `json:"type"` + Coordinates []float32 `json:"coordinates"` } `json:"geometry"` Properties struct { Meta struct { diff --git a/writer.go b/writer.go new file mode 100644 index 0000000..f6919eb --- /dev/null +++ b/writer.go @@ -0,0 +1,12 @@ +package main + +import ( + "github.com/mmcdole/gofeed" +) + +type Writer interface { + Write(body string) + WriteLink(title string, url string, newLine bool, readingTime string) string + WriteSummary(content string, newLine bool) string + WriteHeader(feed *gofeed.Feed) string +}