diff --git a/README.md b/README.md
index 6b77932..08fcb51 100644
--- a/README.md
+++ b/README.md
@@ -19,9 +19,44 @@
***
+## Technical Details
+
+### Content Generation Pipeline
+
+```mermaid
+flowchart TD
+ A["BlogPostTitleSuggestion.generate_content()"] --> B["init_blog_post_content_generation()"]
+ B --> C["AI: generate outline section titles
(Introduction + middle sections + Conclusion)"]
+ C --> D["DB: create GeneratedBlogPost + GeneratedBlogPostSection rows"]
+
+ D --> E{"For each middle section"}
+ E --> F["Task: generate research questions for section
(local: 1 question)"]
+
+ F --> G{"For each research question"}
+ G --> H["Task: Exa search for links
(local: 2 links)"]
+
+ H --> I{"For each research link"}
+ I --> J["Task: scrape link with Jina Reader"]
+ J --> K["Task: analyze link (AI)
summary + contextual summary + answer"]
+
+ K --> L{"All links attempted/analyzed?"}
+ L -- no --> K
+ L -- yes --> M["Task: synthesize middle section contents (AI)"]
+
+ M --> N{"All middle sections have content?"}
+ N -- yes --> O["Task: generate Introduction + Conclusion (AI)"]
+ N -- no --> M
+
+ O --> P{"All sections (incl. intro/conclusion) have content?"}
+ P -- yes --> Q["Task: populate GeneratedBlogPost.content
(code: combine sections into final markdown)"]
+ P -- no --> O
+```
+
## TOC
- [Overview](#overview)
+- [Technical Details](#technical-details)
+ - [Content Generation Pipeline](#content-generation-pipeline)
- [TOC](#toc)
- [Deployment](#deployment)
- [Render](#render)
@@ -39,9 +74,8 @@
[](https://render.com/deploy?repo=https://github.com/rasulkireev/tuxseo)
The only required env vars are:
-- OPENAI_API_KEY
-- TAVILY_API_KEY
- GEMINI_API_KEY
+- EXA_API_KEY
- PERPLEXITY_API_KEY
- JINA_READER_API_KEY
- KEYWORDS_EVERYWHERE_API_KEY
diff --git a/content_generation/__init__.py b/content_generation/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/content_generation/admin.py b/content_generation/admin.py
new file mode 100644
index 0000000..846f6b4
--- /dev/null
+++ b/content_generation/admin.py
@@ -0,0 +1 @@
+# Register your models here.
diff --git a/content_generation/apps.py b/content_generation/apps.py
new file mode 100644
index 0000000..f88ddd6
--- /dev/null
+++ b/content_generation/apps.py
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class ContentGenerationConfig(AppConfig):
+ default_auto_field = "django.db.models.BigAutoField"
+ name = "content_generation"
diff --git a/content_generation/migrations/__init__.py b/content_generation/migrations/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/content_generation/models.py b/content_generation/models.py
new file mode 100644
index 0000000..4875573
--- /dev/null
+++ b/content_generation/models.py
@@ -0,0 +1,369 @@
+from urllib.request import urlopen
+
+import replicate
+import requests
+from django.conf import settings
+from django.core.files.base import ContentFile
+from django.db import models
+
+from core.agents import (
+ create_insert_links_agent,
+)
+from core.agents.schemas import (
+ GeneratedBlogPostSchema,
+ LinkInsertionContext,
+ ProjectPageContext,
+)
+from core.base_models import BaseModel
+from core.choices import (
+ OGImageStyle,
+)
+from core.models import AutoSubmissionSetting, BlogPostTitleSuggestion, Project
+from core.utils import (
+ get_og_image_prompt,
+ get_relevant_external_pages_for_blog_post,
+)
+from tuxseo.utils import get_tuxseo_logger
+
+logger = get_tuxseo_logger(__name__)
+
+
+class GeneratedBlogPost(BaseModel):
+ project = models.ForeignKey(
+ Project,
+ null=True,
+ blank=True,
+ on_delete=models.CASCADE,
+ related_name="generated_blog_posts",
+ )
+ title_suggestion = models.ForeignKey(
+ BlogPostTitleSuggestion,
+ null=True,
+ blank=True,
+ on_delete=models.CASCADE,
+ related_name="generated_blog_posts",
+ )
+
+ # Final Output Items
+ title = models.CharField(max_length=250)
+ description = models.TextField(blank=True)
+ slug = models.SlugField(max_length=250)
+ tags = models.TextField()
+ content = models.TextField()
+ icon = models.ImageField(upload_to="generated_blog_post_icons/", blank=True)
+ image = models.ImageField(upload_to="generated_blog_post_images/", blank=True)
+
+ # Preparation
+ # GeneratedBlogPostSection model
+
+ # Other
+ posted = models.BooleanField(default=False)
+ date_posted = models.DateTimeField(null=True, blank=True)
+
+ def __str__(self):
+ return f"{self.project.name}: {self.title}"
+
+ @classmethod
+ def blog_post_structure_rules(cls):
+ return """
+ - Use markdown.
+ - Start with the title as h1 (#). Do no include any other metadata (description, slug, etc.)
+ - Then do and intro, starting with `## Introduction`, then a paragraph of text.
+ - Continue with h2 (##) topics as you see fit.
+ - Do not go deeper than h2 (##) for post structure.
+ - Never inlcude placeholder items (insert image here, link suggestions, etc.)
+ - Do not have `References` section, insert all the links into the post directly, organically.
+ - Do not include a call to action paragraph at the end of the post.
+ - Finish the post with a conclusion.
+ - Instead of using links as a reference, try to insert them into the post directly, organically.
+ """ # noqa: E501
+
+ @property
+ def generated_blog_post_schema(self):
+ return GeneratedBlogPostSchema(
+ description=self.description,
+ slug=self.slug,
+ tags=self.tags,
+ content=self.content,
+ )
+
+ def submit_blog_post_to_endpoint(self):
+ from core.utils import replace_placeholders
+
+ project = self.project
+ submission_settings = (
+ AutoSubmissionSetting.objects.filter(project=project).order_by("-id").first()
+ )
+
+ if not submission_settings or not submission_settings.endpoint_url:
+ logger.warning(
+ "No AutoSubmissionSetting or endpoint_url found for project", project_id=project.id
+ )
+ return False
+
+ url = submission_settings.endpoint_url
+ headers = replace_placeholders(submission_settings.header, self)
+ body = replace_placeholders(submission_settings.body, self)
+
+ logger.info(
+ "[Submit Blog Post] Submitting blog post to endpoint",
+ project_id=project.id,
+ profile_id=project.profile.id,
+ endpoint_url=url,
+ headers_configured=bool(headers),
+ body_configured=bool(body),
+ )
+
+ try:
+ session = requests.Session()
+ session.cookies.clear()
+
+ if headers is None:
+ headers = {}
+
+ if "content-type" not in headers and "Content-Type" not in headers:
+ headers["Content-Type"] = "application/json"
+
+ response = session.post(url, json=body, headers=headers, timeout=15)
+ response.raise_for_status()
+ return True
+
+ except requests.RequestException as e:
+ logger.error(
+ "[Submit Blog Post to Endpoint] Request error",
+ error=str(e),
+ url=url,
+ headers=headers,
+ exc_info=True,
+ )
+ return False
+
+ def generate_og_image(self) -> tuple[bool, str]:
+ """
+ Generate an Open Graph image for a blog post using Replicate flux-schnell model.
+
+ Args:
+ generated_post: The GeneratedBlogPost instance to generate an image for
+ replicate_api_token: Replicate API token for authentication
+
+ Returns:
+ A tuple of (success: bool, message: str)
+ """
+
+ if not settings.REPLICATE_API_TOKEN:
+ logger.error(
+ "[GenerateOGImage] Replicate API token not configured",
+ blog_post_id=self.id,
+ project_id=self.project_id,
+ )
+ return False, "Replicate API token not configured"
+
+ if self.image:
+ logger.info(
+ "[GenerateOGImage] Image already exists for blog post",
+ blog_post_id=self.id,
+ project_id=self.project_id,
+ )
+ return True, f"Image already exists for blog post {self.id}"
+
+ try:
+ blog_post_category = (
+ self.title_suggestion.category if self.title_suggestion.category else "technology"
+ )
+
+ project_og_style = self.project.og_image_style or OGImageStyle.MODERN_GRADIENT
+ prompt = get_og_image_prompt(project_og_style, blog_post_category)
+
+ logger.info(
+ "[GenerateOGImage] Starting image generation",
+ blog_post_id=self.id,
+ project_id=self.project_id,
+ category=blog_post_category,
+ og_style=project_og_style,
+ prompt=prompt,
+ )
+
+ replicate_client = replicate.Client(api_token=settings.REPLICATE_API_TOKEN)
+
+ output = replicate_client.run(
+ "black-forest-labs/flux-schnell",
+ input={
+ "prompt": prompt,
+ "aspect_ratio": "16:9",
+ "output_format": "png",
+ "output_quality": 90,
+ },
+ )
+
+ if not output:
+ logger.error(
+ "[GenerateOGImage] No output from Replicate",
+ blog_post_id=self.id,
+ project_id=self.project_id,
+ )
+ return False, f"Failed to generate image for blog post {self.id}"
+
+ file_output = output[0] if isinstance(output, list) else output
+ image_url = str(file_output)
+
+ logger.info(
+ "[GenerateOGImage] Image generated successfully",
+ blog_post_id=self.id,
+ project_id=self.project_id,
+ image_url=image_url,
+ )
+
+ image_response = urlopen(image_url)
+ image_content = ContentFile(image_response.read())
+
+ filename = f"og-image-{self.id}.png"
+ self.image.save(filename, image_content, save=True)
+
+ logger.info(
+ "[GenerateOGImage] Image saved to blog post",
+ blog_post_id=self.id,
+ project_id=self.project_id,
+ saved_url=self.image.url,
+ )
+
+ return True, f"Successfully generated and saved OG image for blog post {self.id}"
+
+ except replicate.exceptions.ReplicateError as replicate_error:
+ logger.error(
+ "[GenerateOGImage] Replicate API error",
+ error=str(replicate_error),
+ exc_info=True,
+ blog_post_id=self.id,
+ project_id=self.project_id,
+ )
+ return False, f"Replicate API error: {str(replicate_error)}"
+ except Exception as error:
+ logger.error(
+ "[GenerateOGImage] Unexpected error during image generation",
+ error=str(error),
+ exc_info=True,
+ blog_post_id=self.id,
+ project_id=self.project_id,
+ )
+ return False, f"Unexpected error: {str(error)}"
+
+ def insert_links_into_post(self, max_pages=4, max_external_pages=3):
+ """
+ Insert links from project pages into the blog post content organically.
+ Uses PydanticAI to intelligently place links without modifying the content.
+
+ Args:
+ max_pages: Maximum number of internal project pages to use for linking (default: 4)
+ max_external_pages: Maximum number of external project pages to use for linking (default: 3)
+
+ Returns:
+ str: The blog post content with links inserted
+ """ # noqa: E501
+ from core.utils import (
+ get_relevant_pages_for_blog_post,
+ run_agent_synchronously,
+ )
+
+ if not self.title_suggestion:
+ logger.warning(
+ "[InsertLinksIntoPost] No title suggestion found for blog post",
+ blog_post_id=self.id,
+ project_id=self.project_id,
+ )
+ return self.content
+
+ # Get internal project pages
+ manually_selected_project_pages = list(self.project.project_pages.filter(always_use=True))
+ relevant_project_pages = list(
+ get_relevant_pages_for_blog_post(
+ self.project,
+ self.title_suggestion.suggested_meta_description,
+ max_pages=max_pages,
+ )
+ )
+
+ all_project_pages = manually_selected_project_pages + relevant_project_pages
+
+ # Get external project pages if link exchange is enabled
+ external_project_pages = []
+ if self.project.particiate_in_link_exchange:
+ external_project_pages = list(
+ get_relevant_external_pages_for_blog_post(
+ meta_description=self.title_suggestion.suggested_meta_description,
+ exclude_project=self.project,
+ max_pages=max_external_pages,
+ )
+ )
+ # Filter to only include pages from projects that also participate in link exchange
+ external_project_pages = [
+ page for page in external_project_pages if page.project.particiate_in_link_exchange
+ ]
+
+ all_pages_to_link = all_project_pages + external_project_pages
+
+ if not all_pages_to_link:
+ logger.info(
+ "[InsertLinksIntoPost] No pages found for link insertion",
+ blog_post_id=self.id,
+ project_id=self.project_id,
+ )
+ return self.content
+
+ project_page_contexts = [
+ ProjectPageContext(
+ url=page.url,
+ title=page.title,
+ description=page.description,
+ summary=page.summary,
+ )
+ for page in all_pages_to_link
+ ]
+
+ # Extract URLs for logging
+ urls_to_insert = [page.url for page in all_pages_to_link]
+ internal_urls = [page.url for page in all_project_pages]
+ external_urls = [page.url for page in external_project_pages]
+
+ link_insertion_context = LinkInsertionContext(
+ blog_post_content=self.content,
+ project_pages=project_page_contexts,
+ )
+
+ insert_links_agent = create_insert_links_agent()
+
+ prompt = "Insert the provided project page links into the blog post content organically. Do not modify the existing content, only add links where appropriate." # noqa: E501
+
+ logger.info(
+ "[InsertLinksIntoPost] Running link insertion agent",
+ blog_post_id=self.id,
+ project_id=self.project_id,
+ num_total_pages=len(project_page_contexts),
+ num_internal_pages=len(all_project_pages),
+ num_external_pages=len(external_project_pages),
+ num_always_use_pages=len(manually_selected_project_pages),
+ participate_in_link_exchange=self.project.particiate_in_link_exchange,
+ urls_to_insert=urls_to_insert,
+ internal_urls=internal_urls,
+ external_urls=external_urls,
+ )
+
+ result = run_agent_synchronously(
+ insert_links_agent,
+ prompt,
+ deps=link_insertion_context,
+ function_name="insert_links_into_post",
+ model_name="GeneratedBlogPost",
+ )
+
+ content_with_links = result.output
+
+ self.content = content_with_links
+ self.save(update_fields=["content"])
+
+ logger.info(
+ "[InsertLinksIntoPost] Links inserted successfully",
+ blog_post_id=self.id,
+ project_id=self.project_id,
+ )
+
+ return content_with_links
diff --git a/content_generation/tests.py b/content_generation/tests.py
new file mode 100644
index 0000000..a39b155
--- /dev/null
+++ b/content_generation/tests.py
@@ -0,0 +1 @@
+# Create your tests here.
diff --git a/content_generation/views.py b/content_generation/views.py
new file mode 100644
index 0000000..60f00ef
--- /dev/null
+++ b/content_generation/views.py
@@ -0,0 +1 @@
+# Create your views here.
diff --git a/core/agents/blog_post_outline_agent.py b/core/agents/blog_post_outline_agent.py
new file mode 100644
index 0000000..1f15e44
--- /dev/null
+++ b/core/agents/blog_post_outline_agent.py
@@ -0,0 +1,98 @@
+from __future__ import annotations
+
+from pydantic import BaseModel, Field
+from pydantic_ai import Agent
+
+from core.agents.schemas import BlogPostGenerationContext
+from core.agents.system_prompts import (
+ add_language_specification,
+ add_project_details,
+ add_target_keywords,
+ add_title_details,
+ add_todays_date,
+)
+from core.choices import get_default_ai_model
+
+
+class BlogPostOutlineSection(BaseModel):
+ title: str = Field(description="Section title (use plain text, no markdown prefixes)")
+
+
+class BlogPostOutline(BaseModel):
+ sections: list[BlogPostOutlineSection] = Field(
+ description=(
+ "Ordered list of 4-8 section titles that will be used as H2 (##) headers in the blog post." # noqa: E501
+ )
+ )
+
+
+BLOG_POST_OUTLINE_SYSTEM_PROMPT = """
+You are an expert content strategist.
+
+Your task: propose only the middle-section outline for the blog post.
+
+Requirements:
+- Generate 4-8 main topics that will be used as H2 (##) sections.
+- Do NOT include markdown symbols in section titles (no leading #, ##, -, etc.).
+- Keep titles short and descriptive.
+- Do NOT include 'Introduction' or 'Conclusion' yet.
+
+Output must be a structured list of section titles only.
+"""
+
+
+def create_blog_post_outline_agent(model: str | None = None) -> Agent:
+ agent = Agent(
+ model or get_default_ai_model(),
+ output_type=BlogPostOutline,
+ deps_type=BlogPostGenerationContext,
+ system_prompt=BLOG_POST_OUTLINE_SYSTEM_PROMPT,
+ retries=2,
+ model_settings={"temperature": 0.7},
+ )
+
+ agent.system_prompt(add_project_details)
+ agent.system_prompt(add_title_details)
+ agent.system_prompt(add_todays_date)
+ agent.system_prompt(add_language_specification)
+ agent.system_prompt(add_target_keywords)
+
+ return agent
+
+
+class BlogPostSectionResearchQuestions(BaseModel):
+ questions: list[str] = Field(
+ default_factory=list,
+ description="3-6 concrete research questions for a single section",
+ )
+
+
+BLOG_POST_SECTION_QUESTIONS_SYSTEM_PROMPT = """
+You are an expert content researcher.
+
+Given a blog post section title, generate 3-6 specific research questions to investigate.
+
+Requirements:
+- Questions should be specific and searchable.
+- Prefer questions that lead to concrete examples, comparisons, metrics, pitfalls, and best practices.
+- Avoid vague or overly broad questions.
+""" # noqa: E501
+
+
+def create_blog_post_section_research_questions_agent(model: str | None = None) -> Agent:
+ agent = Agent(
+ model or get_default_ai_model(),
+ output_type=BlogPostSectionResearchQuestions,
+ deps_type=BlogPostGenerationContext,
+ system_prompt=BLOG_POST_SECTION_QUESTIONS_SYSTEM_PROMPT,
+ retries=2,
+ model_settings={"temperature": 0.7},
+ )
+
+ agent.system_prompt(add_project_details)
+ agent.system_prompt(add_title_details)
+ agent.system_prompt(add_todays_date)
+ agent.system_prompt(add_language_specification)
+ agent.system_prompt(add_target_keywords)
+
+ return agent
diff --git a/core/agents/generate_blog_post_intro_conclusion_agent.py b/core/agents/generate_blog_post_intro_conclusion_agent.py
new file mode 100644
index 0000000..281454f
--- /dev/null
+++ b/core/agents/generate_blog_post_intro_conclusion_agent.py
@@ -0,0 +1,100 @@
+from django.utils import timezone
+from pydantic_ai import Agent
+
+from core.agents.schemas import (
+ BlogPostIntroConclusionGenerationContext,
+ GeneratedBlogPostIntroConclusionSchema,
+)
+from core.choices import ContentType, get_default_ai_model
+from core.prompts import GENERATE_CONTENT_SYSTEM_PROMPTS
+
+INTRO_CONCLUSION_SYSTEM_PROMPT = """
+You are an expert blog post writer.
+
+Your task: write BOTH the Introduction and the Conclusion for a blog post in a single response.
+
+Rules:
+- Return two fields only: introduction and conclusion.
+- Do NOT include markdown headings for either section. No leading '#', '##', or '###'.
+- Use the existing section contents as the source of truth for what the post covers.
+- The introduction should set up the promise and smoothly lead into the first middle section.
+- The conclusion should summarize the key takeaways and close cleanly without adding new topics.
+- Do not add placeholders.
+"""
+
+
+def create_generate_blog_post_intro_conclusion_agent(
+ content_type: ContentType = ContentType.SHARING, model=None
+):
+ """
+ Create an agent to generate a blog post Introduction + Conclusion in one call.
+ """
+ agent = Agent(
+ model or get_default_ai_model(),
+ output_type=GeneratedBlogPostIntroConclusionSchema,
+ deps_type=BlogPostIntroConclusionGenerationContext,
+ system_prompt=(
+ INTRO_CONCLUSION_SYSTEM_PROMPT
+ + "\n\n"
+ + (GENERATE_CONTENT_SYSTEM_PROMPTS.get(content_type, "") or "")
+ ),
+ retries=2,
+ model_settings={"max_tokens": 6000, "temperature": 0.7},
+ )
+
+ @agent.system_prompt
+ def add_intro_conclusion_context(ctx) -> str:
+ intro_conclusion_context: BlogPostIntroConclusionGenerationContext = ctx.deps
+ generation_context = intro_conclusion_context.blog_post_generation_context
+ project_details = generation_context.project_details
+ title_suggestion = generation_context.title_suggestion
+ target_keywords = title_suggestion.target_keywords or []
+
+ section_titles_text = (
+ "\n".join(
+ [
+ f"- {title}"
+ for title in (intro_conclusion_context.section_titles_in_order or [])
+ if title
+ ]
+ )
+ or "- (none)"
+ )
+
+ sections_text = ""
+ for index, section in enumerate(intro_conclusion_context.sections_in_order or [], start=1):
+ sections_text += f"\nSection {index}: {section.title}\n{section.content}\n"
+
+ if not sections_text.strip():
+ sections_text = "\n(none)\n"
+
+ return f"""
+Today's date: {timezone.now().strftime("%Y-%m-%d")}
+
+Project details:
+- Project name: {project_details.name}
+- Project type: {project_details.type}
+- Project summary: {project_details.summary}
+- Blog theme: {project_details.blog_theme}
+- Key features: {project_details.key_features}
+- Target audience: {project_details.target_audience_summary}
+- Pain points: {project_details.pain_points}
+- Product usage: {project_details.product_usage}
+
+Blog post title suggestion:
+- Title: {title_suggestion.title}
+- Category: {title_suggestion.category}
+- Description: {title_suggestion.description}
+- Suggested meta description: {title_suggestion.suggested_meta_description}
+- Target keywords: {", ".join(target_keywords) if target_keywords else "None"}
+
+Outline:
+{section_titles_text}
+
+All existing section contents (use this as the truth of what the post covers):
+{sections_text}
+
+Language: Write in {project_details.language}.
+"""
+
+ return agent
diff --git a/core/agents/generate_blog_post_section_content_agent.py b/core/agents/generate_blog_post_section_content_agent.py
new file mode 100644
index 0000000..7c03082
--- /dev/null
+++ b/core/agents/generate_blog_post_section_content_agent.py
@@ -0,0 +1,123 @@
+from django.utils import timezone
+from pydantic_ai import Agent
+
+from core.agents.schemas import (
+ BlogPostSectionContentGenerationContext,
+ GeneratedBlogPostSectionContentSchema,
+)
+from core.choices import ContentType, get_default_ai_model
+from core.prompts import GENERATE_CONTENT_SYSTEM_PROMPTS
+
+SECTION_CONTENT_SYSTEM_PROMPT = """
+You are an expert blog post writer.
+
+Your task: write the content for ONE blog post section (the body of the section only).
+
+Rules:
+- Do NOT write the Introduction or Conclusion.
+- Do NOT include the section title as a markdown header. No leading '#', '##', or '###'.
+- Avoid markdown headings entirely. Use paragraphs, bullet lists, and numbered lists only when useful.
+- Use the provided "Previous sections" to maintain continuity and avoid repetition.
+- Use the provided research link outputs as factual grounding. Do not invent sources or cite URLs.
+- Keep the section coherent with the overall outline and the order position provided.
+- Do not add placeholders.
+"""
+
+
+def create_generate_blog_post_section_content_agent(
+ content_type: ContentType = ContentType.SHARING, model=None
+):
+ """
+ Create an agent to generate the content for a single middle section of a blog post.
+ """
+ agent = Agent(
+ model or get_default_ai_model(),
+ output_type=GeneratedBlogPostSectionContentSchema,
+ deps_type=BlogPostSectionContentGenerationContext,
+ system_prompt=(
+ SECTION_CONTENT_SYSTEM_PROMPT
+ + "\n\n"
+ + (GENERATE_CONTENT_SYSTEM_PROMPTS.get(content_type, "") or "")
+ ),
+ retries=2,
+ model_settings={"max_tokens": 16000, "temperature": 0.7},
+ )
+
+ @agent.system_prompt
+ def add_section_content_context(ctx) -> str:
+ section_context: BlogPostSectionContentGenerationContext = ctx.deps
+ generation_context = section_context.blog_post_generation_context
+ project_details = generation_context.project_details
+ title_suggestion = generation_context.title_suggestion
+ target_keywords = title_suggestion.target_keywords or []
+
+ other_titles = [title for title in (section_context.other_section_titles or []) if title]
+ other_titles_text = "\n".join([f"- {title}" for title in other_titles]) or "- (none)"
+
+ previous_sections = section_context.previous_sections or []
+ previous_sections_text = ""
+ for previous_section_index, previous_section in enumerate(previous_sections, start=1):
+ previous_sections_text += (
+ f"\nPrevious section {previous_section_index}: {previous_section.title}\n"
+ f"{previous_section.content}\n"
+ )
+ if not previous_sections_text.strip():
+ previous_sections_text = "\n(none)\n"
+
+ research_questions_text = ""
+ for question_index, question in enumerate(
+ section_context.research_questions or [], start=1
+ ):
+ research_questions_text += (
+ f"\nResearch question {question_index}: {question.question}\n"
+ )
+ for link_index, link in enumerate(question.research_links or [], start=1):
+ research_questions_text += (
+ f"\nAnswered research link {link_index}:\n"
+ f"- summary_for_question_research:\n{link.summary_for_question_research}\n"
+ f"- general_summary:\n{link.general_summary}\n"
+ f"- answer_to_question:\n{link.answer_to_question}\n"
+ )
+
+ if not research_questions_text.strip():
+ research_questions_text = "\n(none)\n"
+
+ return f"""
+Today's date: {timezone.now().strftime("%Y-%m-%d")}
+
+Project details:
+- Project name: {project_details.name}
+- Project type: {project_details.type}
+- Project summary: {project_details.summary}
+- Blog theme: {project_details.blog_theme}
+- Key features: {project_details.key_features}
+- Target audience: {project_details.target_audience_summary}
+- Pain points: {project_details.pain_points}
+- Product usage: {project_details.product_usage}
+
+Blog post title suggestion:
+- Title: {title_suggestion.title}
+- Category: {title_suggestion.category}
+- Description: {title_suggestion.description}
+- Suggested meta description: {title_suggestion.suggested_meta_description}
+- Target keywords: {", ".join(target_keywords) if target_keywords else "None"}
+
+Outline coherence:
+- Other section titles:
+{other_titles_text}
+
+Current section to write:
+- Section title: {section_context.section_title}
+- Section order in outline: {section_context.section_order} / {section_context.total_sections}
+- Section order among middle sections: {section_context.research_section_order} / {section_context.total_research_sections}
+
+Previous sections (for continuity; do not repeat content):
+{previous_sections_text}
+
+Research answers for this section (only include content that is supported by these answers):
+{research_questions_text}
+
+Language: Write in {project_details.language}.
+"""
+
+ return agent
diff --git a/core/agents/research_link_summary_agent.py b/core/agents/research_link_summary_agent.py
new file mode 100644
index 0000000..75aedea
--- /dev/null
+++ b/core/agents/research_link_summary_agent.py
@@ -0,0 +1,123 @@
+from django.utils import timezone
+from pydantic_ai import Agent, RunContext
+
+from core.agents.schemas import (
+ ResearchLinkAnalysis,
+ ResearchLinkContextualSummaryContext,
+ TextSummary,
+ WebPageContent,
+)
+from core.choices import get_default_ai_model
+
+
+def _add_webpage_content_from_web_page_content(ctx: RunContext[WebPageContent]) -> str:
+ return (
+ "Web page content:\n"
+ f"Title: {ctx.deps.title}\n"
+ f"Description: {ctx.deps.description}\n"
+ f"Content: {ctx.deps.markdown_content}\n"
+ )
+
+
+def _add_webpage_content_from_contextual_deps(
+ ctx: RunContext[ResearchLinkContextualSummaryContext],
+) -> str:
+ web_page_content = ctx.deps.web_page_content
+ return (
+ "Web page content:\n"
+ f"URL: {ctx.deps.url}\n"
+ f"Title: {web_page_content.title}\n"
+ f"Description: {web_page_content.description}\n"
+ f"Content: {web_page_content.markdown_content}\n"
+ )
+
+
+def _add_blog_post_research_context(ctx: RunContext[ResearchLinkContextualSummaryContext]) -> str:
+ blog_post_generation_context = ctx.deps.blog_post_generation_context
+ project_details = blog_post_generation_context.project_details
+ title_suggestion = blog_post_generation_context.title_suggestion
+ target_keywords = title_suggestion.target_keywords or []
+
+ return (
+ "Context for why we are summarizing this page:\n"
+ f"- Today's date: {timezone.now().strftime('%Y-%m-%d')}\n"
+ f"- Project: {project_details.name}\n"
+ f"- Project summary: {project_details.summary}\n"
+ f"- Blog post title: {ctx.deps.blog_post_title}\n"
+ f"- Blog post section: {ctx.deps.section_title}\n"
+ f"- Research question: {ctx.deps.research_question}\n"
+ f"- Target keywords: {', '.join(target_keywords) if target_keywords else 'None'}\n"
+ "\n"
+ "You must tailor the summary to help the writer answer the research question for that section.\n" # noqa: E501
+ )
+
+
+def create_general_research_link_summary_agent(model=None):
+ agent = Agent(
+ model or get_default_ai_model(),
+ output_type=TextSummary,
+ deps_type=WebPageContent,
+ system_prompt=(
+ "You are an expert content summarizer. Summarize the web page content provided.\n"
+ "Return a concise 2-3 sentence summary that captures the main purpose and key information.\n" # noqa: E501
+ "Focus on what the page is about and its main value proposition.\n"
+ ),
+ retries=2,
+ model_settings={"temperature": 0.4},
+ )
+ agent.system_prompt(_add_webpage_content_from_web_page_content)
+ return agent
+
+
+def create_contextual_research_link_summary_agent(model=None):
+ agent = Agent(
+ model or get_default_ai_model(),
+ output_type=TextSummary,
+ deps_type=ResearchLinkContextualSummaryContext,
+ system_prompt=(
+ "You are a research assistant helping write a blog post.\n"
+ "Summarize the page in a way that is maximally useful for answering the research question.\n" # noqa: E501
+ "Prefer concrete facts, definitions, steps, examples, and any notable stats. If the page is not relevant, say so clearly.\n" # noqa: E501
+ "Output markdown that includes:\n"
+ "- A short paragraph summary\n"
+ "- 'Key takeaways' as 3-7 bullet points\n"
+ "- 'How this helps our section' as 1-3 bullet points\n"
+ ),
+ retries=2,
+ model_settings={"temperature": 0.3},
+ )
+ agent.system_prompt(_add_blog_post_research_context)
+ agent.system_prompt(_add_webpage_content_from_contextual_deps)
+ return agent
+
+
+def create_research_link_analysis_agent(model=None):
+ """
+ Analyze a research link in a single model call and return:
+ - a general page summary
+ - a contextual summary tailored to the blog post section's research question
+ - a direct answer to the research question (if possible from the page)
+ """
+ agent = Agent(
+ model or get_default_ai_model(),
+ output_type=ResearchLinkAnalysis,
+ deps_type=ResearchLinkContextualSummaryContext,
+ system_prompt=(
+ "You are a research assistant helping write a blog post.\n"
+ "Using only the web page content provided, produce three outputs:\n"
+ "1) general_summary: a context-free 2-3 sentence summary of what the page is about.\n"
+ "2) summary_for_question_research: a markdown summary tailored to the research question. "
+ "Include: a short paragraph summary, 'Key takeaways' (3-7 bullets), and "
+ "'How this helps our section' (1-3 bullets).\n"
+ "3) answer_to_question: directly answer the research question in 1-6 sentences. "
+ "If the page does not answer it (or is irrelevant), say so clearly.\n"
+ "\n"
+ "Be concrete and avoid speculation. Prefer facts, definitions, steps, examples, and stats "
+ "that are present in the page.\n"
+ ),
+ retries=2,
+ model_settings={"temperature": 0.3},
+ )
+ agent.system_prompt(_add_blog_post_research_context)
+ agent.system_prompt(_add_webpage_content_from_contextual_deps)
+ return agent
diff --git a/core/agents/schemas.py b/core/agents/schemas.py
index 6852bfc..872597d 100644
--- a/core/agents/schemas.py
+++ b/core/agents/schemas.py
@@ -14,6 +14,31 @@ class WebPageContent(BaseModel):
markdown_content: str
+class TextSummary(BaseModel):
+ summary: str = Field(description="A concise summary of the provided content")
+
+
+class ResearchLinkAnalysis(BaseModel):
+ general_summary: str = Field(
+ description=(
+ "A general, context-free summary of the page content. Keep it to 2-3 sentences."
+ )
+ )
+ summary_for_question_research: str = Field(
+ description=(
+ "A markdown summary tailored to the blog post's research question. Include: "
+ "a short paragraph summary, 'Key takeaways' (3-7 bullets), and "
+ "'How this helps our section' (1-3 bullets)."
+ )
+ )
+ answer_to_question: str = Field(
+ description=(
+ "A direct answer to the research question, based strictly on the page content. "
+ "If the page does not answer the question, say so clearly."
+ )
+ )
+
+
class ProjectDetails(BaseModel):
name: str = Field(description="Official name of the project or organization")
type: str = Field(
@@ -189,6 +214,92 @@ class BlogPostGenerationContext(BaseModel):
content_type: str = Field(description="Type of content to generate (SEO or SHARING)")
+class ResearchLinkContextualSummaryContext(BaseModel):
+ url: str = Field(description="Source URL of the research page")
+ web_page_content: WebPageContent
+ blog_post_generation_context: BlogPostGenerationContext
+ blog_post_title: str = Field(description="Title of the blog post being written")
+ section_title: str = Field(description="Title of the blog post section being written")
+ research_question: str = Field(description="Research question we are trying to answer")
+
+
+class ResearchLinkAnswerSnippet(BaseModel):
+ summary_for_question_research: str = Field(
+ description="A markdown summary tailored to the research question"
+ )
+ general_summary: str = Field(description="A general, context-free 2-3 sentence page summary")
+ answer_to_question: str = Field(description="A direct answer to the research question")
+
+
+class ResearchQuestionWithAnsweredLinks(BaseModel):
+ question: str = Field(description="The research question we were answering")
+ research_links: list[ResearchLinkAnswerSnippet] = Field(
+ default_factory=list,
+ description="Only research links that include a non-empty answer_to_question",
+ )
+
+
+class PriorSectionContext(BaseModel):
+ title: str = Field(description="Section title")
+ content: str = Field(description="Section content (markdown)")
+
+
+class BlogPostSectionContentGenerationContext(BaseModel):
+ blog_post_generation_context: BlogPostGenerationContext
+ blog_post_title: str = Field(description="Title of the blog post being written")
+ section_title: str = Field(description="Title of the section to write")
+ section_order: int = Field(description="Order of this section in the overall outline")
+ total_sections: int = Field(description="Total number of sections in the outline")
+ research_section_order: int = Field(
+ description="1-based order of this section among the middle (non-intro/non-conclusion) sections"
+ )
+ total_research_sections: int = Field(
+ description="Total number of middle (non-intro/non-conclusion) sections"
+ )
+ other_section_titles: list[str] = Field(
+ default_factory=list,
+ description="Titles of the other sections in the blog post (for coherence)",
+ )
+ previous_sections: list[PriorSectionContext] = Field(
+ default_factory=list,
+ description="Previously generated section content (in order) to keep the narrative coherent",
+ )
+ research_questions: list[ResearchQuestionWithAnsweredLinks] = Field(
+ default_factory=list,
+ description="Research questions for this section, with only answered research links included",
+ )
+
+
+class GeneratedBlogPostSectionContentSchema(BaseModel):
+ content: str = Field(
+ description=(
+ "Markdown content for the section body only (do not include the section title as a header)"
+ )
+ )
+
+
+class BlogPostIntroConclusionGenerationContext(BaseModel):
+ blog_post_generation_context: BlogPostGenerationContext
+ blog_post_title: str = Field(description="Title of the blog post being written")
+ section_titles_in_order: list[str] = Field(
+ default_factory=list,
+ description="All section titles in outline order (including Introduction and Conclusion)",
+ )
+ sections_in_order: list[PriorSectionContext] = Field(
+ default_factory=list,
+ description="All existing section contents in order (including middle sections) to base intro/conclusion on",
+ )
+
+
+class GeneratedBlogPostIntroConclusionSchema(BaseModel):
+ introduction: str = Field(
+ description="Markdown content for the Introduction section body only (no heading)"
+ )
+ conclusion: str = Field(
+ description="Markdown content for the Conclusion section body only (no heading)"
+ )
+
+
class GeneratedBlogPostSchema(BaseModel):
description: str = Field(
description="Meta description (150-160 characters) optimized for search engines"
diff --git a/core/content_generator/__init__.py b/core/content_generator/__init__.py
new file mode 100644
index 0000000..976578a
--- /dev/null
+++ b/core/content_generator/__init__.py
@@ -0,0 +1,7 @@
+"""
+Content generation pipeline package.
+
+This package contains:
+- `pipeline.py`: pipeline "steps" that orchestrate content generation + research.
+- `utils.py`: small reusable helpers for the pipeline.
+"""
diff --git a/core/content_generator/pipeline.py b/core/content_generator/pipeline.py
new file mode 100644
index 0000000..88226e4
--- /dev/null
+++ b/core/content_generator/pipeline.py
@@ -0,0 +1,1263 @@
+from __future__ import annotations
+
+from django.conf import settings
+from django.core.cache import cache
+from django.db import transaction
+from django.utils import timezone
+from django.utils.dateparse import parse_datetime
+from django.utils.text import slugify
+from django_q.tasks import async_task
+from exa_py import Exa
+
+from core.agents.blog_post_outline_agent import (
+ create_blog_post_outline_agent,
+ create_blog_post_section_research_questions_agent,
+)
+from core.agents.generate_blog_post_intro_conclusion_agent import (
+ create_generate_blog_post_intro_conclusion_agent,
+)
+from core.agents.generate_blog_post_section_content_agent import (
+ create_generate_blog_post_section_content_agent,
+)
+from core.agents.research_link_summary_agent import (
+ create_research_link_analysis_agent,
+)
+from core.agents.schemas import (
+ BlogPostGenerationContext,
+ BlogPostIntroConclusionGenerationContext,
+ BlogPostSectionContentGenerationContext,
+ GeneratedBlogPostIntroConclusionSchema,
+ GeneratedBlogPostSectionContentSchema,
+ PriorSectionContext,
+ ResearchLinkAnswerSnippet,
+ ResearchLinkContextualSummaryContext,
+ ResearchQuestionWithAnsweredLinks,
+ WebPageContent,
+)
+from core.choices import ContentType
+from core.content_generator.utils import get_exa_date_range_iso_strings
+from core.models import (
+ GeneratedBlogPost,
+ GeneratedBlogPostResearchLink,
+ GeneratedBlogPostResearchQuestion,
+ GeneratedBlogPostSection,
+)
+from core.utils import get_markdown_content, run_agent_synchronously
+from tuxseo.utils import get_tuxseo_logger
+
+logger = get_tuxseo_logger(__name__)
+
+
+INTRODUCTION_SECTION_TITLE = "Introduction"
+CONCLUSION_SECTION_TITLE = "Conclusion"
+NON_RESEARCH_SECTION_TITLES = {INTRODUCTION_SECTION_TITLE, CONCLUSION_SECTION_TITLE}
+MAX_RESEARCH_LINK_MARKDOWN_CHARS_FOR_SUMMARY = 25_000
+LOCAL_MAX_RESEARCH_QUESTIONS_PER_SECTION = 1
+SECTION_SYNTHESIS_RETRY_CACHE_TTL_SECONDS = 6 * 60 * 60
+
+
+def _create_blog_post_generation_context(
+ *, title_suggestion, content_type_to_use: str
+) -> BlogPostGenerationContext:
+ keywords_to_use = title_suggestion.get_blog_post_keywords()
+ return BlogPostGenerationContext(
+ project_details=title_suggestion.project.project_details,
+ title_suggestion=title_suggestion.title_suggestion_schema,
+ project_keywords=keywords_to_use,
+ project_pages=[],
+ content_type=content_type_to_use,
+ )
+
+
+def generate_sections_to_create(*, title_suggestion, content_type: str | None = None) -> list[str]:
+ """
+ Step 1: Generate the section titles we will create (one AI query).
+ """
+ if title_suggestion is None:
+ raise ValueError("title_suggestion is required")
+
+ if not title_suggestion.project_id:
+ raise ValueError("title_suggestion must be associated to a project")
+
+ content_type_to_use = content_type or title_suggestion.content_type or ContentType.SHARING
+ outline_context = _create_blog_post_generation_context(
+ title_suggestion=title_suggestion,
+ content_type_to_use=content_type_to_use,
+ )
+
+ outline_agent = create_blog_post_outline_agent()
+ outline_result = run_agent_synchronously(
+ outline_agent,
+ "Generate the blog post outline sections.",
+ deps=outline_context,
+ function_name="generate_sections_to_create",
+ model_name="GeneratedBlogPost",
+ )
+
+ outline_sections = (
+ outline_result.output.sections if outline_result and outline_result.output else []
+ )
+
+ middle_section_titles = [
+ (section.title or "").strip()
+ for section in outline_sections
+ if (section.title or "").strip()
+ ]
+
+ return [INTRODUCTION_SECTION_TITLE, *middle_section_titles, CONCLUSION_SECTION_TITLE]
+
+
+def create_blog_post_and_sections(
+ *, title_suggestion, section_titles: list[str], content_type: str | None = None
+):
+ """
+ Step 1b: Persist the GeneratedBlogPost + GeneratedBlogPostSection rows.
+ """
+ content_type_to_use = content_type or title_suggestion.content_type or ContentType.SHARING
+ tags = ", ".join(title_suggestion.target_keywords) if title_suggestion.target_keywords else ""
+
+ with transaction.atomic():
+ blog_post = GeneratedBlogPost.objects.create(
+ project=title_suggestion.project,
+ title_suggestion=title_suggestion,
+ title=title_suggestion.title,
+ description=title_suggestion.suggested_meta_description,
+ slug=slugify(title_suggestion.title),
+ tags=tags,
+ content="",
+ )
+
+ for section_order, section_title in enumerate(section_titles):
+ GeneratedBlogPostSection.objects.create(
+ blog_post=blog_post,
+ title=(section_title or "")[:250],
+ content="",
+ order=section_order,
+ )
+
+ logger.info(
+ "[ContentGenerator] Blog post initialized",
+ blog_post_id=blog_post.id,
+ title_suggestion_id=title_suggestion.id,
+ project_id=title_suggestion.project_id,
+ num_sections_created=len(section_titles),
+ content_type=content_type_to_use,
+ )
+
+ return blog_post
+
+
+def queue_research_question_generation_for_sections(*, blog_post_id: int) -> int:
+ """
+ Step 2: Queue one task per (research) section to generate questions.
+ """
+ blog_post = (
+ GeneratedBlogPost.objects.prefetch_related("blog_post_sections")
+ .filter(id=blog_post_id)
+ .first()
+ )
+ if not blog_post:
+ raise ValueError(f"GeneratedBlogPost not found: {blog_post_id}")
+
+ blog_post_sections = list(blog_post.blog_post_sections.all())
+ research_sections = [
+ section
+ for section in blog_post_sections
+ if (section.title or "").strip() not in NON_RESEARCH_SECTION_TITLES
+ ]
+
+ for section in research_sections:
+ async_task(
+ "core.content_generator.tasks.generate_research_questions_for_section_task",
+ section.id,
+ group="Generate Research Questions",
+ )
+
+ logger.info(
+ "[ContentGenerator] Queued research question generation tasks",
+ blog_post_id=blog_post.id,
+ num_sections=len(blog_post_sections),
+ num_research_sections=len(research_sections),
+ )
+
+ return len(research_sections)
+
+
+def init_blog_post_content_generation(title_suggestion, content_type: str | None = None):
+ """
+ Pipeline entrypoint (currently stops after queuing tasks).
+
+ Step 1: generate sections we will create
+ Step 2: queue tasks to generate questions for each section
+ Step 3: (handled by the tasks) queue tasks to fetch Exa links for each generated question
+ Step 4: next steps later
+ """
+ section_titles = generate_sections_to_create(
+ title_suggestion=title_suggestion, content_type=content_type
+ )
+ blog_post = create_blog_post_and_sections(
+ title_suggestion=title_suggestion,
+ section_titles=section_titles,
+ content_type=content_type,
+ )
+ queue_research_question_generation_for_sections(blog_post_id=blog_post.id)
+ return blog_post
+
+
+def populate_research_links_for_question_from_exa(
+ research_question_id: int,
+ num_results_per_question: int = 2,
+ months_back: int = 6,
+):
+ """
+ Step 3: Get links for one question from Exa (called via a task per question).
+ """
+ research_question = (
+ GeneratedBlogPostResearchQuestion.objects.select_related("blog_post")
+ .filter(id=research_question_id)
+ .first()
+ )
+ if not research_question:
+ raise ValueError(f"GeneratedBlogPostResearchQuestion not found: {research_question_id}")
+
+ blog_post = research_question.blog_post
+ if not blog_post:
+ raise ValueError(f"GeneratedBlogPost missing on research question: {research_question_id}")
+
+ research_question_text = (research_question.question or "").strip()
+ if not research_question_text:
+ return 0
+
+ start_date_iso_format, end_date_iso_format = get_exa_date_range_iso_strings(
+ months_back=months_back
+ )
+ exa = Exa(api_key=settings.EXA_API_KEY)
+
+ exa_response = exa.search(
+ research_question_text,
+ end_crawl_date=end_date_iso_format,
+ end_published_date=end_date_iso_format,
+ start_crawl_date=start_date_iso_format,
+ start_published_date=start_date_iso_format,
+ num_results=num_results_per_question,
+ type="auto",
+ )
+
+ exa_results = (
+ exa_response.results
+ if hasattr(exa_response, "results")
+ else (exa_response or {}).get("results", [])
+ )
+ exa_results = exa_results or []
+
+ num_links_upserted = 0
+ num_scrape_tasks_queued = 0
+
+ for result in exa_results:
+ if hasattr(result, "url"):
+ url = getattr(result, "url", "") or ""
+ title = getattr(result, "title", "") or ""
+ author = getattr(result, "author", "") or ""
+ published_date_raw = getattr(result, "publishedDate", None)
+ else:
+ url = (result or {}).get("url", "") or ""
+ title = (result or {}).get("title", "") or ""
+ author = (result or {}).get("author", "") or ""
+ published_date_raw = (result or {}).get("publishedDate") or (result or {}).get(
+ "published_date"
+ )
+
+ url = url.strip()
+ if not url.startswith(("http://", "https://")):
+ continue
+
+ if len(url) > 200:
+ continue
+
+ published_date = parse_datetime(published_date_raw) if published_date_raw else None
+ if published_date and timezone.is_naive(published_date):
+ published_date = timezone.make_aware(
+ published_date, timezone=timezone.get_current_timezone()
+ )
+
+ research_link, _created = GeneratedBlogPostResearchLink.objects.update_or_create(
+ blog_post=blog_post,
+ research_question=research_question,
+ url=url,
+ defaults={
+ "title": title[:500],
+ "author": author[:250],
+ "published_date": published_date,
+ },
+ )
+
+ num_links_upserted += 1
+
+ should_queue_scrape_task = not (research_link.content or "").strip()
+ if should_queue_scrape_task:
+ async_task(
+ "core.content_generator.tasks.scrape_research_link_content_task",
+ research_link.id,
+ group="Scrape Research Links",
+ )
+ num_scrape_tasks_queued += 1
+
+ logger.info(
+ "[ContentGenerator] Exa research link search completed (single question)",
+ blog_post_id=blog_post.id,
+ research_question_id=research_question.id,
+ num_links_upserted=num_links_upserted,
+ num_scrape_tasks_queued=num_scrape_tasks_queued,
+ num_results_per_question=num_results_per_question,
+ months_back=months_back,
+ )
+
+ # If Exa returned no links for this question, nothing will trigger scrape/analyze kicks.
+ # This "kick" is safe (it will only queue synthesis when the overall blog post is ready).
+ if num_links_upserted == 0:
+ maybe_queue_section_content_synthesis_for_blog_post(blog_post_id=blog_post.id)
+
+ return num_links_upserted
+
+
+def scrape_research_link_content(*, research_link_id: int) -> bool:
+ """
+ Step 4a: For a single research link, fetch the page content using Jina Reader and store it.
+
+ Returns: True if content is present after the operation, False otherwise.
+ """
+ research_link = (
+ GeneratedBlogPostResearchLink.objects.select_related(
+ "blog_post",
+ "blog_post__title_suggestion",
+ "blog_post__project",
+ "research_question",
+ "research_question__section",
+ )
+ .filter(id=research_link_id)
+ .first()
+ )
+ if not research_link:
+ raise ValueError(f"GeneratedBlogPostResearchLink not found: {research_link_id}")
+
+ url = (research_link.url or "").strip()
+ if not url.startswith(("http://", "https://")):
+ logger.info(
+ "[ContentGenerator] Skipping scrape/summarize for invalid research link url",
+ research_link_id=research_link.id,
+ url=url,
+ )
+ return 0
+
+ blog_post = research_link.blog_post
+ research_question = research_link.research_question
+ if not blog_post or not research_question:
+ raise ValueError(f"Research link missing blog_post/research_question: {research_link_id}")
+
+ should_fetch_page_content = not (research_link.content or "").strip()
+ if not should_fetch_page_content:
+ logger.info(
+ "[ContentGenerator] Research link already scraped; skipping",
+ research_link_id=research_link.id,
+ blog_post_id=blog_post.id,
+ )
+ return True
+
+ page_title = research_link.title
+ page_description = research_link.description
+ page_markdown_content = research_link.content
+
+ scraped_title, scraped_description, scraped_content = get_markdown_content(url)
+ if not scraped_content.strip():
+ logger.warning(
+ "[ContentGenerator] Jina Reader returned empty content for research link",
+ research_link_id=research_link.id,
+ blog_post_id=blog_post.id,
+ url=url,
+ )
+ return False
+
+ page_title = scraped_title or page_title
+ page_description = scraped_description or ""
+ page_markdown_content = scraped_content
+
+ if not (page_markdown_content or "").strip():
+ logger.warning(
+ "[ContentGenerator] Research link has empty content; cannot summarize",
+ research_link_id=research_link.id,
+ blog_post_id=blog_post.id,
+ url=url,
+ )
+ return False
+
+ update_fields: list[str] = []
+
+ research_link.date_scraped = timezone.now()
+ update_fields.append("date_scraped")
+
+ research_link.title = (page_title or "")[:500]
+ update_fields.append("title")
+
+ research_link.description = page_description or ""
+ update_fields.append("description")
+
+ research_link.content = page_markdown_content or ""
+ update_fields.append("content")
+
+ research_link.save(update_fields=list(dict.fromkeys(update_fields)))
+
+ logger.info(
+ "[ContentGenerator] Research link scraped",
+ research_link_id=research_link.id,
+ blog_post_id=blog_post.id,
+ research_question_id=research_question.id,
+ updated_fields=update_fields,
+ url=url,
+ )
+
+ return True
+
+
+def analyze_research_link_content(*, research_link_id: int) -> int:
+ """
+ Step 4b: For a single research link (that already has content), generate:
+ - a general page summary
+ - a blog-post-contextual summary for the research question/section
+ - an answer to the research question (answer_to_question)
+
+ Returns: number of fields updated on the research link.
+ """
+ research_link = (
+ GeneratedBlogPostResearchLink.objects.select_related(
+ "blog_post",
+ "blog_post__title_suggestion",
+ "blog_post__project",
+ "research_question",
+ "research_question__section",
+ )
+ .filter(id=research_link_id)
+ .first()
+ )
+ if not research_link:
+ raise ValueError(f"GeneratedBlogPostResearchLink not found: {research_link_id}")
+
+ blog_post = research_link.blog_post
+ research_question = research_link.research_question
+ if not blog_post or not research_question:
+ raise ValueError(f"Research link missing blog_post/research_question: {research_link_id}")
+
+ url = (research_link.url or "").strip()
+ page_markdown_content = (research_link.content or "").strip()
+ if not page_markdown_content:
+ logger.info(
+ "[ContentGenerator] Research link has no content yet; skipping analysis",
+ research_link_id=research_link.id,
+ blog_post_id=blog_post.id,
+ url=url,
+ )
+ research_link.date_analyzed = timezone.now()
+ research_link.save(update_fields=["date_analyzed"])
+ maybe_queue_section_content_synthesis_for_blog_post(blog_post_id=blog_post.id)
+ return 0
+
+ should_run_general_summary = not (research_link.general_summary or "").strip()
+ should_run_contextual_summary = not (research_link.summary_for_question_research or "").strip()
+ should_run_answer_to_question = not (research_link.answer_to_question or "").strip()
+ if (
+ not should_run_general_summary
+ and not should_run_contextual_summary
+ and not should_run_answer_to_question
+ ):
+ logger.info(
+ "[ContentGenerator] Research link already analyzed; skipping",
+ research_link_id=research_link.id,
+ blog_post_id=blog_post.id,
+ )
+ maybe_queue_section_content_synthesis_for_blog_post(blog_post_id=blog_post.id)
+ return 0
+
+ webpage_content = WebPageContent(
+ title=(research_link.title or "").strip(),
+ description=(research_link.description or "").strip(),
+ markdown_content=page_markdown_content[:MAX_RESEARCH_LINK_MARKDOWN_CHARS_FOR_SUMMARY],
+ )
+
+ update_fields: list[str] = []
+
+ title_suggestion = blog_post.title_suggestion
+ if not title_suggestion:
+ raise ValueError(f"GeneratedBlogPost missing title_suggestion: {blog_post.id}")
+
+ content_type_to_use = title_suggestion.content_type or ContentType.SHARING
+ blog_post_generation_context = _create_blog_post_generation_context(
+ title_suggestion=title_suggestion,
+ content_type_to_use=content_type_to_use,
+ )
+
+ section_title = (getattr(research_question.section, "title", "") or "").strip()
+ research_question_text = (research_question.question or "").strip()
+
+ analysis_agent = create_research_link_analysis_agent()
+ analysis_deps = ResearchLinkContextualSummaryContext(
+ url=url,
+ web_page_content=webpage_content,
+ blog_post_generation_context=blog_post_generation_context,
+ blog_post_title=(blog_post.title or title_suggestion.title or "").strip(),
+ section_title=section_title,
+ research_question=research_question_text,
+ )
+ analysis_result = run_agent_synchronously(
+ analysis_agent,
+ "Analyze this page for blog-post research.",
+ deps=analysis_deps,
+ function_name="analyze_research_link_content.research_link_analysis",
+ model_name="GeneratedBlogPostResearchLink",
+ )
+
+ if should_run_general_summary:
+ research_link.general_summary = (analysis_result.output.general_summary or "").strip()
+ update_fields.append("general_summary")
+
+ if should_run_contextual_summary:
+ research_link.summary_for_question_research = (
+ analysis_result.output.summary_for_question_research or ""
+ ).strip()
+ update_fields.append("summary_for_question_research")
+
+ if should_run_answer_to_question:
+ research_link.answer_to_question = (analysis_result.output.answer_to_question or "").strip()
+ update_fields.append("answer_to_question")
+
+ research_link.date_analyzed = timezone.now()
+ update_fields.append("date_analyzed")
+
+ research_link.save(update_fields=list(dict.fromkeys(update_fields)))
+
+ logger.info(
+ "[ContentGenerator] Research link analyzed",
+ research_link_id=research_link.id,
+ blog_post_id=blog_post.id,
+ research_question_id=research_question.id,
+ updated_fields=update_fields,
+ url=url,
+ )
+
+ maybe_queue_section_content_synthesis_for_blog_post(blog_post_id=blog_post.id)
+
+ return len(set(update_fields))
+
+
+def generate_research_questions_for_section(*, section_id: int) -> list[int]:
+ """
+ Step 2 (task): Generate research questions for a single section.
+
+ Returns: list of created GeneratedBlogPostResearchQuestion IDs.
+ """
+ section = (
+ GeneratedBlogPostSection.objects.select_related(
+ "blog_post",
+ "blog_post__title_suggestion",
+ "blog_post__project",
+ )
+ .filter(id=section_id)
+ .first()
+ )
+ if not section:
+ raise ValueError(f"GeneratedBlogPostSection not found: {section_id}")
+
+ section_title = (section.title or "").strip()
+ if section_title in NON_RESEARCH_SECTION_TITLES:
+ logger.info(
+ "[ContentGenerator] Skipping research question generation for non-research section",
+ section_id=section.id,
+ section_title=section_title,
+ blog_post_id=section.blog_post_id,
+ )
+ return []
+
+ blog_post = section.blog_post
+ if not blog_post or not blog_post.title_suggestion_id:
+ raise ValueError(f"Section is missing blog_post/title_suggestion: {section_id}")
+
+ title_suggestion = blog_post.title_suggestion
+ content_type_to_use = title_suggestion.content_type or ContentType.SHARING
+ outline_context = _create_blog_post_generation_context(
+ title_suggestion=title_suggestion,
+ content_type_to_use=content_type_to_use,
+ )
+
+ research_questions_agent = create_blog_post_section_research_questions_agent()
+ questions_result = run_agent_synchronously(
+ research_questions_agent,
+ f"Generate research questions for section: {section_title}",
+ deps=outline_context,
+ function_name="generate_research_questions_for_section",
+ model_name="GeneratedBlogPost",
+ )
+
+ questions = (
+ questions_result.output.questions if questions_result and questions_result.output else []
+ )
+
+ questions_to_create = []
+ for question in questions:
+ research_question_text = (question or "").strip()
+ if not research_question_text:
+ continue
+ questions_to_create.append(
+ GeneratedBlogPostResearchQuestion(
+ blog_post=blog_post,
+ section=section,
+ question=research_question_text[:250],
+ )
+ )
+
+ if settings.DEBUG:
+ questions_to_create = questions_to_create[:LOCAL_MAX_RESEARCH_QUESTIONS_PER_SECTION]
+
+ created_questions = GeneratedBlogPostResearchQuestion.objects.bulk_create(questions_to_create)
+ created_question_ids = [
+ created_question.id for created_question in created_questions if created_question.id
+ ]
+
+ logger.info(
+ "[ContentGenerator] Research questions generated",
+ section_id=section.id,
+ blog_post_id=blog_post.id,
+ num_questions_created=len(created_question_ids),
+ )
+
+ # If no questions were created, nothing else will trigger Exa/scrape/analysis tasks.
+ # In that case, kick section synthesis so the pipeline can still proceed.
+ if not created_question_ids:
+ maybe_queue_section_content_synthesis_for_blog_post(blog_post_id=blog_post.id)
+
+ return created_question_ids
+
+
+def _build_research_questions_with_answered_links_for_section(
+ *, section: GeneratedBlogPostSection
+) -> list[ResearchQuestionWithAnsweredLinks]:
+ research_questions_with_answered_links: list[ResearchQuestionWithAnsweredLinks] = []
+
+ section_questions = list(section.research_questions.all())
+ for research_question in section_questions:
+ question_text = (research_question.question or "").strip()
+ if not question_text:
+ continue
+
+ research_links = list(research_question.research_links.all())
+ answered_links = [
+ research_link
+ for research_link in research_links
+ if (research_link.answer_to_question or "").strip()
+ ]
+
+ research_link_snippets = []
+ if answered_links:
+ research_link_snippets = [
+ ResearchLinkAnswerSnippet(
+ summary_for_question_research=(
+ (research_link.summary_for_question_research or "").strip()
+ ),
+ general_summary=(research_link.general_summary or "").strip(),
+ answer_to_question=(research_link.answer_to_question or "").strip(),
+ )
+ for research_link in answered_links
+ ]
+
+ research_questions_with_answered_links.append(
+ ResearchQuestionWithAnsweredLinks(
+ question=question_text,
+ research_links=research_link_snippets,
+ )
+ )
+
+ return research_questions_with_answered_links
+
+
+def _build_prior_section_contexts(
+ *, sections_in_order: list[GeneratedBlogPostSection], current_section_order: int
+) -> list[PriorSectionContext]:
+ prior_sections: list[PriorSectionContext] = []
+ for section in sections_in_order:
+ if section.order >= current_section_order:
+ continue
+ if (section.title or "").strip() in NON_RESEARCH_SECTION_TITLES:
+ continue
+ content = (section.content or "").strip()
+ if not content:
+ continue
+ prior_sections.append(
+ PriorSectionContext(title=(section.title or "").strip(), content=content)
+ )
+ return prior_sections
+
+
+def synthesize_section_contents_for_blog_post(*, blog_post_id: int) -> int:
+ """
+ Step 5: Synthesize content for each middle section sequentially (excluding Introduction/Conclusion).
+
+ Context passed to the model:
+ - Project details
+ - Title suggestion details
+ - Current section info
+ - Research link results (only for links with non-empty answer_to_question)
+ - Other section titles
+ - Section order + previous section content for coherence
+ """
+ blog_post = (
+ GeneratedBlogPost.objects.select_related(
+ "title_suggestion",
+ "project",
+ )
+ .prefetch_related(
+ "blog_post_sections__research_questions__research_links",
+ )
+ .filter(id=blog_post_id)
+ .first()
+ )
+ if not blog_post:
+ raise ValueError(f"GeneratedBlogPost not found: {blog_post_id}")
+
+ title_suggestion = blog_post.title_suggestion
+ if not title_suggestion:
+ raise ValueError(f"GeneratedBlogPost missing title_suggestion: {blog_post_id}")
+
+ content_type_to_use = title_suggestion.content_type or ContentType.SHARING
+ blog_post_generation_context = _create_blog_post_generation_context(
+ title_suggestion=title_suggestion,
+ content_type_to_use=content_type_to_use,
+ )
+
+ sections_in_order = sorted(
+ list(blog_post.blog_post_sections.all()),
+ key=lambda section: (section.order, section.id),
+ )
+
+ all_section_titles = [
+ (section.title or "").strip()
+ for section in sections_in_order
+ if (section.title or "").strip()
+ ]
+ total_sections = len(sections_in_order)
+
+ middle_sections_in_order = [
+ section
+ for section in sections_in_order
+ if (section.title or "").strip() not in NON_RESEARCH_SECTION_TITLES
+ ]
+ total_research_sections = len(middle_sections_in_order)
+
+ section_agent = create_generate_blog_post_section_content_agent(
+ content_type=content_type_to_use
+ )
+
+ num_sections_generated = 0
+ for research_section_index, section in enumerate(middle_sections_in_order, start=1):
+ section_title = (section.title or "").strip()
+ if not section_title:
+ continue
+
+ existing_content = (section.content or "").strip()
+ if existing_content:
+ continue
+
+ research_questions = _build_research_questions_with_answered_links_for_section(
+ section=section
+ )
+ prior_sections = _build_prior_section_contexts(
+ sections_in_order=sections_in_order,
+ current_section_order=section.order,
+ )
+
+ section_context = BlogPostSectionContentGenerationContext(
+ blog_post_generation_context=blog_post_generation_context,
+ blog_post_title=(blog_post.title or title_suggestion.title or "").strip(),
+ section_title=section_title,
+ section_order=section.order,
+ total_sections=total_sections,
+ research_section_order=research_section_index,
+ total_research_sections=total_research_sections,
+ other_section_titles=all_section_titles,
+ previous_sections=prior_sections,
+ research_questions=research_questions,
+ )
+
+ prompt = f"Write the section body content for: {section_title}"
+ generation_result = run_agent_synchronously(
+ section_agent,
+ prompt,
+ deps=section_context,
+ function_name="synthesize_section_contents_for_blog_post.section_content",
+ model_name="GeneratedBlogPostSection",
+ )
+
+ generated_schema: GeneratedBlogPostSectionContentSchema | None = (
+ generation_result.output if generation_result and generation_result.output else None
+ )
+ generated_content = (generated_schema.content if generated_schema else "").strip()
+ if not generated_content:
+ logger.warning(
+ "[ContentGenerator] Section content generation returned empty content",
+ blog_post_id=blog_post.id,
+ section_id=section.id,
+ section_title=section_title,
+ )
+ continue
+
+ section.content = generated_content
+ section.save(update_fields=["content"])
+ num_sections_generated += 1
+
+ logger.info(
+ "[ContentGenerator] Section content synthesized",
+ blog_post_id=blog_post.id,
+ section_id=section.id,
+ section_title=section_title,
+ section_order=section.order,
+ research_section_order=research_section_index,
+ total_research_sections=total_research_sections,
+ content_length=len(generated_content),
+ )
+
+ maybe_queue_intro_conclusion_generation_for_blog_post(blog_post_id=blog_post.id)
+ maybe_queue_section_content_synthesis_retry_for_blog_post(blog_post_id=blog_post.id)
+ return num_sections_generated
+
+
+def _get_section_synthesis_retry_cache_key(*, blog_post_id: int) -> str:
+ return f"content_generator:section_synthesis_retry_count:{blog_post_id}"
+
+
+def maybe_queue_section_content_synthesis_retry_for_blog_post(*, blog_post_id: int) -> bool:
+ """
+ Retry mechanism for Step 5:
+
+ If research is "done enough" (all links are in a terminal analyzed/attempted state),
+ but some middle sections still have empty content (e.g. a model returned empty output
+ or a task was missed), re-queue section synthesis a bounded number of times.
+ """
+ blog_post = (
+ GeneratedBlogPost.objects.prefetch_related("blog_post_sections")
+ .filter(id=blog_post_id)
+ .first()
+ )
+ if not blog_post:
+ return False
+
+ sections_in_order = _get_sections_in_order_for_blog_post(blog_post)
+ middle_sections = [
+ section
+ for section in sections_in_order
+ if (section.title or "").strip() not in NON_RESEARCH_SECTION_TITLES
+ ]
+ has_any_middle_section_missing_content = any(
+ not (section.content or "").strip() for section in middle_sections
+ )
+ if not has_any_middle_section_missing_content:
+ return False
+
+ # Only retry when link processing is "complete" (including failures).
+ # If there are links still being processed, let the normal kicks handle it.
+ links_queryset = GeneratedBlogPostResearchLink.objects.filter(blog_post_id=blog_post_id)
+ has_any_pending_link = links_queryset.filter(date_analyzed__isnull=True).exists()
+ if has_any_pending_link:
+ return False
+
+ max_retries = 5 if settings.DEBUG else 2
+ retry_cache_key = _get_section_synthesis_retry_cache_key(blog_post_id=blog_post_id)
+ retry_count = cache.get(retry_cache_key, 0) or 0
+ if retry_count >= max_retries:
+ logger.warning(
+ "[ContentGenerator] Not retrying section synthesis; max retries reached",
+ blog_post_id=blog_post_id,
+ retry_count=retry_count,
+ max_retries=max_retries,
+ num_middle_sections=len(middle_sections),
+ num_links_total=links_queryset.count(),
+ )
+ return False
+
+ cache.set(retry_cache_key, retry_count + 1, timeout=SECTION_SYNTHESIS_RETRY_CACHE_TTL_SECONDS)
+ async_task(
+ "core.content_generator.tasks.synthesize_section_contents_for_blog_post_task",
+ blog_post_id,
+ group="Synthesize Section Content (Retry)",
+ )
+ logger.info(
+ "[ContentGenerator] Queued section content synthesis retry task",
+ blog_post_id=blog_post_id,
+ retry_count=retry_count + 1,
+ max_retries=max_retries,
+ num_middle_sections=len(middle_sections),
+ num_links_total=links_queryset.count(),
+ )
+ return True
+
+
+def maybe_queue_section_content_synthesis_for_blog_post(*, blog_post_id: int) -> bool:
+ """
+ Queue Step 5 once research work is in a terminal state for all required inputs.
+
+ This is intentionally best-effort + idempotent:
+ - It may queue more than once, but the synthesis step skips sections that already have content.
+ """
+ blog_post = (
+ GeneratedBlogPost.objects.prefetch_related(
+ "blog_post_sections__research_questions__research_links"
+ )
+ .filter(id=blog_post_id)
+ .first()
+ )
+ if not blog_post:
+ return False
+
+ sections_in_order = _get_sections_in_order_for_blog_post(blog_post)
+ middle_sections_missing_content = [
+ section
+ for section in sections_in_order
+ if (section.title or "").strip() not in NON_RESEARCH_SECTION_TITLES
+ and not (section.content or "").strip()
+ ]
+
+ num_pending_links = 0
+ num_scrape_tasks_queued = 0
+ num_analyze_tasks_queued = 0
+
+ for section in middle_sections_missing_content:
+ section_questions = list(section.research_questions.all())
+ for research_question in section_questions:
+ research_links = list(research_question.research_links.all())
+ for research_link in research_links:
+ # Terminal state: we attempted analysis for this link (even if it failed).
+ if research_link.date_analyzed is not None:
+ continue
+
+ num_pending_links += 1
+
+ link_content = (research_link.content or "").strip()
+ if not link_content:
+ # If we haven't scraped content yet (or it failed previously but wasn't marked),
+ # re-queue a scrape attempt. The scrape task will always queue analysis next.
+ async_task(
+ "core.content_generator.tasks.scrape_research_link_content_task",
+ research_link.id,
+ group="Scrape Research Links (Retry/Kick)",
+ )
+ num_scrape_tasks_queued += 1
+ continue
+
+ # Content exists, but analysis hasn't run yet: queue AI augmentation.
+ async_task(
+ "core.content_generator.tasks.analyze_research_link_content_task",
+ research_link.id,
+ group="Analyze Research Links (Retry/Kick)",
+ )
+ num_analyze_tasks_queued += 1
+
+ if num_pending_links > 0:
+ logger.info(
+ "[ContentGenerator] Not queuing section synthesis; research links still pending",
+ blog_post_id=blog_post_id,
+ num_middle_sections_missing_content=len(middle_sections_missing_content),
+ num_pending_links=num_pending_links,
+ num_scrape_tasks_queued=num_scrape_tasks_queued,
+ num_analyze_tasks_queued=num_analyze_tasks_queued,
+ )
+ return False
+
+ async_task(
+ "core.content_generator.tasks.synthesize_section_contents_for_blog_post_task",
+ blog_post_id,
+ group="Synthesize Section Content",
+ )
+ logger.info(
+ "[ContentGenerator] Queued section content synthesis task",
+ blog_post_id=blog_post_id,
+ num_middle_sections_missing_content=len(middle_sections_missing_content),
+ )
+ return True
+
+
+def _get_sections_in_order_for_blog_post(
+ blog_post: GeneratedBlogPost,
+) -> list[GeneratedBlogPostSection]:
+ return sorted(
+ list(blog_post.blog_post_sections.all()),
+ key=lambda section: (section.order, section.id),
+ )
+
+
+def generate_intro_and_conclusion_for_blog_post(*, blog_post_id: int) -> int:
+ """
+ Step 6: Generate Introduction + Conclusion in a single model call.
+
+ Runs only when all middle sections have content.
+ """
+ blog_post = (
+ GeneratedBlogPost.objects.select_related(
+ "title_suggestion",
+ "project",
+ )
+ .prefetch_related("blog_post_sections")
+ .filter(id=blog_post_id)
+ .first()
+ )
+ if not blog_post:
+ raise ValueError(f"GeneratedBlogPost not found: {blog_post_id}")
+
+ title_suggestion = blog_post.title_suggestion
+ if not title_suggestion:
+ raise ValueError(f"GeneratedBlogPost missing title_suggestion: {blog_post_id}")
+
+ content_type_to_use = title_suggestion.content_type or ContentType.SHARING
+ blog_post_generation_context = _create_blog_post_generation_context(
+ title_suggestion=title_suggestion,
+ content_type_to_use=content_type_to_use,
+ )
+
+ sections_in_order = _get_sections_in_order_for_blog_post(blog_post)
+ section_titles_in_order = [
+ (section.title or "").strip()
+ for section in sections_in_order
+ if (section.title or "").strip()
+ ]
+
+ intro_section = next(
+ (
+ section
+ for section in sections_in_order
+ if (section.title or "").strip() == INTRODUCTION_SECTION_TITLE
+ ),
+ None,
+ )
+ conclusion_section = next(
+ (
+ section
+ for section in sections_in_order
+ if (section.title or "").strip() == CONCLUSION_SECTION_TITLE
+ ),
+ None,
+ )
+ if not intro_section or not conclusion_section:
+ raise ValueError(f"Blog post is missing Introduction/Conclusion sections: {blog_post_id}")
+
+ should_generate_intro = not (intro_section.content or "").strip()
+ should_generate_conclusion = not (conclusion_section.content or "").strip()
+ if not should_generate_intro and not should_generate_conclusion:
+ return 0
+
+ middle_sections = [
+ section
+ for section in sections_in_order
+ if (section.title or "").strip() not in NON_RESEARCH_SECTION_TITLES
+ ]
+ has_any_middle_section_missing_content = any(
+ not (section.content or "").strip() for section in middle_sections
+ )
+ if has_any_middle_section_missing_content:
+ logger.info(
+ "[ContentGenerator] Skipping intro/conclusion generation; middle sections not ready",
+ blog_post_id=blog_post.id,
+ num_middle_sections=len(middle_sections),
+ )
+ return 0
+
+ existing_sections_context = [
+ PriorSectionContext(
+ title=(section.title or "").strip(),
+ content=(section.content or "").strip(),
+ )
+ for section in sections_in_order
+ if (section.title or "").strip() and (section.content or "").strip()
+ ]
+
+ intro_conclusion_context = BlogPostIntroConclusionGenerationContext(
+ blog_post_generation_context=blog_post_generation_context,
+ blog_post_title=(blog_post.title or title_suggestion.title or "").strip(),
+ section_titles_in_order=section_titles_in_order,
+ sections_in_order=existing_sections_context,
+ )
+
+ agent = create_generate_blog_post_intro_conclusion_agent(content_type=content_type_to_use)
+ result = run_agent_synchronously(
+ agent,
+ "Write the Introduction and Conclusion for this blog post.",
+ deps=intro_conclusion_context,
+ function_name="generate_intro_and_conclusion_for_blog_post.intro_conclusion",
+ model_name="GeneratedBlogPostSection",
+ )
+
+ output: GeneratedBlogPostIntroConclusionSchema | None = (
+ result.output if result and result.output else None
+ )
+ if not output:
+ return 0
+
+ num_sections_updated = 0
+ if should_generate_intro:
+ introduction_content = (output.introduction or "").strip()
+ if introduction_content:
+ intro_section.content = introduction_content
+ intro_section.save(update_fields=["content"])
+ num_sections_updated += 1
+
+ if should_generate_conclusion:
+ conclusion_content = (output.conclusion or "").strip()
+ if conclusion_content:
+ conclusion_section.content = conclusion_content
+ conclusion_section.save(update_fields=["content"])
+ num_sections_updated += 1
+
+ logger.info(
+ "[ContentGenerator] Intro/conclusion generated",
+ blog_post_id=blog_post.id,
+ intro_generated=bool((intro_section.content or "").strip()),
+ conclusion_generated=bool((conclusion_section.content or "").strip()),
+ num_sections_updated=num_sections_updated,
+ )
+
+ maybe_populate_generated_blog_post_content(blog_post_id=blog_post.id)
+ return num_sections_updated
+
+
+def maybe_queue_intro_conclusion_generation_for_blog_post(*, blog_post_id: int) -> bool:
+ """
+ Queue Step 6 only when all middle sections have content.
+
+ Best-effort + idempotent: if it queues multiple times, the generation step skips when already present.
+ """
+ blog_post = (
+ GeneratedBlogPost.objects.prefetch_related("blog_post_sections")
+ .filter(id=blog_post_id)
+ .first()
+ )
+ if not blog_post:
+ return False
+
+ sections_in_order = _get_sections_in_order_for_blog_post(blog_post)
+ middle_sections = [
+ section
+ for section in sections_in_order
+ if (section.title or "").strip() not in NON_RESEARCH_SECTION_TITLES
+ ]
+
+ if any(not (section.content or "").strip() for section in middle_sections):
+ return False
+
+ async_task(
+ "core.content_generator.tasks.generate_intro_and_conclusion_for_blog_post_task",
+ blog_post_id,
+ group="Generate Intro and Conclusion",
+ )
+ logger.info(
+ "[ContentGenerator] Queued intro/conclusion generation task",
+ blog_post_id=blog_post_id,
+ num_middle_sections=len(middle_sections),
+ )
+ return True
+
+
+def _build_full_blog_post_markdown(*, blog_post: GeneratedBlogPost) -> str:
+ blog_post_title = (blog_post.title or "").strip()
+ if not blog_post_title:
+ return ""
+
+ sections_in_order = _get_sections_in_order_for_blog_post(blog_post)
+ markdown_chunks = [f"# {blog_post_title}", ""]
+
+ for section in sections_in_order:
+ section_title = (section.title or "").strip()
+ section_content = (section.content or "").strip()
+ if not section_title or not section_content:
+ continue
+
+ markdown_chunks.append(f"## {section_title}")
+ markdown_chunks.append("")
+ markdown_chunks.append(section_content)
+ markdown_chunks.append("")
+
+ full_markdown = "\n".join(markdown_chunks).strip() + "\n"
+ return full_markdown
+
+
+def populate_generated_blog_post_content(*, blog_post_id: int) -> bool:
+ """
+ Step 7: Populate GeneratedBlogPost.content from the generated section contents.
+
+ Runs only when:
+ - All sections (including Introduction + Conclusion) have non-empty content
+ - GeneratedBlogPost.content is currently empty
+ """
+ blog_post = (
+ GeneratedBlogPost.objects.prefetch_related("blog_post_sections")
+ .filter(id=blog_post_id)
+ .first()
+ )
+ if not blog_post:
+ raise ValueError(f"GeneratedBlogPost not found: {blog_post_id}")
+
+ if (blog_post.content or "").strip():
+ return False
+
+ sections_in_order = _get_sections_in_order_for_blog_post(blog_post)
+ if any(not (section.content or "").strip() for section in sections_in_order):
+ logger.info(
+ "[ContentGenerator] Skipping blog_post.content population; not all sections have content",
+ blog_post_id=blog_post.id,
+ num_sections=len(sections_in_order),
+ )
+ return False
+
+ full_markdown = _build_full_blog_post_markdown(blog_post=blog_post)
+ if not full_markdown.strip():
+ logger.warning(
+ "[ContentGenerator] Skipping blog_post.content population; built markdown is empty",
+ blog_post_id=blog_post.id,
+ )
+ return False
+
+ blog_post.content = full_markdown
+ blog_post.save(update_fields=["content"])
+
+ logger.info(
+ "[ContentGenerator] Populated GeneratedBlogPost.content from sections",
+ blog_post_id=blog_post.id,
+ content_length=len(full_markdown),
+ )
+ return True
+
+
+def maybe_populate_generated_blog_post_content(*, blog_post_id: int) -> bool:
+ """
+ Queue Step 7 when the whole pipeline is done.
+
+ Best-effort + idempotent: population skips if blog_post.content is already non-empty.
+ """
+ blog_post = (
+ GeneratedBlogPost.objects.prefetch_related("blog_post_sections")
+ .filter(id=blog_post_id)
+ .first()
+ )
+ if not blog_post:
+ return False
+
+ if (blog_post.content or "").strip():
+ return False
+
+ sections_in_order = _get_sections_in_order_for_blog_post(blog_post)
+ if any(not (section.content or "").strip() for section in sections_in_order):
+ return False
+
+ async_task(
+ "core.content_generator.tasks.populate_generated_blog_post_content_task",
+ blog_post_id,
+ group="Finalize Generated Blog Post Content",
+ )
+ logger.info(
+ "[ContentGenerator] Queued blog_post.content population task",
+ blog_post_id=blog_post_id,
+ num_sections=len(sections_in_order),
+ )
+ return True
diff --git a/core/content_generator/tasks.py b/core/content_generator/tasks.py
new file mode 100644
index 0000000..5f2463c
--- /dev/null
+++ b/core/content_generator/tasks.py
@@ -0,0 +1,149 @@
+from __future__ import annotations
+
+from django.conf import settings
+from django_q.tasks import async_task
+
+from core.content_generator.pipeline import (
+ analyze_research_link_content,
+ generate_intro_and_conclusion_for_blog_post,
+ generate_research_questions_for_section,
+ populate_generated_blog_post_content,
+ populate_research_links_for_question_from_exa,
+ scrape_research_link_content,
+ synthesize_section_contents_for_blog_post,
+)
+from tuxseo.utils import get_tuxseo_logger
+
+logger = get_tuxseo_logger(__name__)
+
+LOCAL_NUM_EXA_RESULTS_PER_QUESTION = 2
+
+
+def populate_research_links_for_question_from_exa_task(
+ research_question_id: int,
+ num_results_per_question: int = 2,
+ months_back: int = 6,
+):
+ """
+ Populate Exa research links for one research question.
+ """
+ num_results_per_question_to_use = (
+ LOCAL_NUM_EXA_RESULTS_PER_QUESTION if settings.DEBUG else num_results_per_question
+ )
+ num_links = populate_research_links_for_question_from_exa(
+ research_question_id=research_question_id,
+ num_results_per_question=num_results_per_question_to_use,
+ months_back=months_back,
+ )
+ logger.info(
+ "[ContentGenerator Tasks] Populated Exa research links for question",
+ research_question_id=research_question_id,
+ num_links_upserted=num_links,
+ num_results_per_question=num_results_per_question_to_use,
+ months_back=months_back,
+ )
+ return f"Populated {num_links} research links for research question {research_question_id}"
+
+
+def scrape_research_link_content_task(research_link_id: int):
+ """
+ Fetch research link content using Jina Reader.
+ Always queue the analysis task after the scrape attempt.
+
+ Rationale:
+ - Jina can return empty content for some URLs (parsing failures).
+ - We still want the pipeline to progress and eventually synthesize sections using
+ whatever research succeeded, rather than stalling forever on a few bad links.
+ """
+ did_fetch_content = scrape_research_link_content(research_link_id=research_link_id)
+ logger.info(
+ "[ContentGenerator Tasks] Scraped research link",
+ research_link_id=research_link_id,
+ did_fetch_content=did_fetch_content,
+ )
+ async_task(
+ "core.content_generator.tasks.analyze_research_link_content_task",
+ research_link_id,
+ group="Analyze Research Links",
+ )
+ return f"Scraped research link {research_link_id} (did_fetch_content={did_fetch_content})"
+
+
+def analyze_research_link_content_task(research_link_id: int):
+ """
+ Analyze a research link that has already been scraped:
+ - generate general summary
+ - generate blog-post contextual summary for the research question/section
+ - generate an answer to the research question
+ """
+ num_fields_updated = analyze_research_link_content(research_link_id=research_link_id)
+ logger.info(
+ "[ContentGenerator Tasks] Analyzed research link",
+ research_link_id=research_link_id,
+ num_fields_updated=num_fields_updated,
+ )
+ return f"Analyzed research link {research_link_id} (updated_fields={num_fields_updated})"
+
+
+def synthesize_section_contents_for_blog_post_task(blog_post_id: int):
+ """
+ Synthesize the content for each middle section sequentially (excluding Introduction/Conclusion).
+ """
+ num_sections_generated = synthesize_section_contents_for_blog_post(blog_post_id=blog_post_id)
+ logger.info(
+ "[ContentGenerator Tasks] Synthesized section contents for blog post",
+ blog_post_id=blog_post_id,
+ num_sections_generated=num_sections_generated,
+ )
+ return f"Synthesized {num_sections_generated} section(s) for blog post {blog_post_id}"
+
+
+def generate_intro_and_conclusion_for_blog_post_task(blog_post_id: int):
+ """
+ Generate Introduction + Conclusion in one AI call.
+ Only runs once all middle sections have content.
+ """
+ num_sections_updated = generate_intro_and_conclusion_for_blog_post(blog_post_id=blog_post_id)
+ logger.info(
+ "[ContentGenerator Tasks] Generated intro and conclusion for blog post",
+ blog_post_id=blog_post_id,
+ num_sections_updated=num_sections_updated,
+ )
+ return (
+ f"Generated intro/conclusion (updated={num_sections_updated}) for blog post {blog_post_id}"
+ )
+
+
+def populate_generated_blog_post_content_task(blog_post_id: int):
+ """
+ Populate GeneratedBlogPost.content from the generated sections.
+ """
+ did_populate = populate_generated_blog_post_content(blog_post_id=blog_post_id)
+ logger.info(
+ "[ContentGenerator Tasks] Populated GeneratedBlogPost.content",
+ blog_post_id=blog_post_id,
+ did_populate=did_populate,
+ )
+ return f"Populated GeneratedBlogPost.content for blog post {blog_post_id} (did_populate={did_populate})"
+
+
+def generate_research_questions_for_section_task(section_id: int):
+ """
+ Generate research questions for one section, then queue Exa research link tasks for each
+ created question.
+ """
+ created_research_question_ids = generate_research_questions_for_section(section_id=section_id)
+
+ for research_question_id in created_research_question_ids:
+ async_task(
+ "core.content_generator.tasks.populate_research_links_for_question_from_exa_task",
+ research_question_id,
+ group="Populate Research Links",
+ )
+
+ logger.info(
+ "[ContentGenerator Tasks] Generated research questions for section",
+ section_id=section_id,
+ num_questions_created=len(created_research_question_ids),
+ )
+ return f"Generated {len(created_research_question_ids)} research questions for section {section_id}" # noqa: E501
diff --git a/core/content_generator/utils.py b/core/content_generator/utils.py
new file mode 100644
index 0000000..c637adb
--- /dev/null
+++ b/core/content_generator/utils.py
@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+from datetime import timedelta
+
+from django.utils import timezone
+
+
+def get_exa_date_range_iso_strings(*, months_back: int) -> tuple[str, str]:
+ """
+ Exa expects date filters as strings (YYYY-MM-DD).
+ """
+ current_datetime = timezone.now()
+ end_date_iso_format = current_datetime.date().isoformat()
+ start_date_iso_format = (current_datetime - timedelta(days=months_back * 30)).date().isoformat()
+ return start_date_iso_format, end_date_iso_format
diff --git a/core/models.py b/core/models.py
index c23e23f..0005af3 100644
--- a/core/models.py
+++ b/core/models.py
@@ -10,9 +10,7 @@
from django.db import models, transaction
from django.urls import reverse
from django.utils import timezone
-from django.utils.text import slugify
from django_q.tasks import async_task
-from gpt_researcher import GPTResearcher
from pgvector.django import HnswIndex, VectorField
from core.agents import (
@@ -60,9 +58,7 @@
get_og_image_prompt,
get_relevant_external_pages_for_blog_post,
get_relevant_pages_for_blog_post,
- process_generated_blog_content,
run_agent_synchronously,
- run_gptr_synchronously,
)
from tuxseo.utils import get_tuxseo_logger
@@ -805,85 +801,19 @@ def get_blog_post_keywords(self):
return keywords_to_use
def generate_content(self, content_type=ContentType.SHARING):
- # query defines the research question researcher will analyze
- # custom_prompt controls how the research findings are presented
-
- # Suggestion Instructions
- query = "Write a post from the following suggestion:\n"
- query += f"{self.title_suggestion_string_for_ai}\n\n"
-
- # Get keywords to use in the blog post
- project_keywords = list(
- self.project.project_keywords.filter(use=True).select_related("keyword")
- )
- project_keyword_texts = [keyword.keyword.keyword_text for keyword in project_keywords]
- post_suggestion_keywords = self.target_keywords or []
- keywords_to_use = list(set(project_keyword_texts + post_suggestion_keywords))
- newline_separator = "\n"
- keywords_list = newline_separator.join([f"- {keyword}" for keyword in keywords_to_use])
- query += "The following keywords should be used (organically) in the blog post:\n"
- query += keywords_list
- query += "\n\n"
-
- query += "Quick reminder. You are writing a blog post for this company."
- query += self.project.project_desctiption_string_for_ai
- query += ". Make it look good, as the best solution for anyone reading the post."
- query += "\n\n"
-
- # # Writing Instructions
- # query += GENERATE_CONTENT_SYSTEM_PROMPTS[content_type]
- # query += "\n"
- query += GeneratedBlogPost.blog_post_structure_rules()
-
- agent = GPTResearcher(
- query,
- report_type="deep",
- tone="Simple (written for young readers, using basic vocabulary and clear explanations)", # noqa: E501
- report_format="markdown",
- )
-
- result = run_gptr_synchronously(agent)
+ """
+ Backward-compatible wrapper around the content generation pipeline.
- # Create blog post with raw content first
- slug = slugify(self.title)
- tags = ", ".join(self.target_keywords) if self.target_keywords else ""
+ Historically, this method created the blog post content directly. It is now kept
+ as a thin wrapper to preserve existing call sites while the pipeline evolves.
+ """
+ from core.content_generator.pipeline import init_blog_post_content_generation
- blog_post = GeneratedBlogPost.objects.create(
- project=self.project,
+ return init_blog_post_content_generation(
title_suggestion=self,
- title=self.title, # Temporary title, will be updated after processing
- description=self.suggested_meta_description,
- slug=slug,
- tags=tags,
- content=result, # Raw content from GPTResearcher
+ content_type=content_type,
)
- # Insert links into the blog post content
- blog_post.insert_links_into_post()
-
- # Process content after link insertion (extract title, clean up sections)
- blog_post_title, blog_post_content = process_generated_blog_content(
- generated_content=blog_post.content, # Use content after link insertion
- fallback_title=self.title,
- title_suggestion_id=self.id,
- project_id=self.project.id,
- )
-
- # Update blog post with processed content and extracted title
- blog_post.title = blog_post_title
- blog_post.slug = slugify(blog_post_title)
- blog_post.content = blog_post_content
- blog_post.save(update_fields=["title", "slug", "content"])
-
- if self.project.enable_automatic_og_image_generation:
- async_task(
- "core.tasks.generate_og_image_for_blog_post",
- blog_post.id,
- group="Generate OG Image",
- )
-
- return blog_post
-
class AutoSubmissionSetting(BaseModel):
project = models.ForeignKey(
@@ -930,6 +860,8 @@ class GeneratedBlogPost(BaseModel):
on_delete=models.CASCADE,
related_name="generated_blog_posts",
)
+
+ # Final Output Items
title = models.CharField(max_length=250)
description = models.TextField(blank=True)
slug = models.SlugField(max_length=250)
@@ -938,6 +870,10 @@ class GeneratedBlogPost(BaseModel):
icon = models.ImageField(upload_to="generated_blog_post_icons/", blank=True)
image = models.ImageField(upload_to="generated_blog_post_images/", blank=True)
+ # Preparation
+ # GeneratedBlogPostSection model
+
+ # Other
posted = models.BooleanField(default=False)
date_posted = models.DateTimeField(null=True, blank=True)
@@ -1250,6 +1186,73 @@ def insert_links_into_post(self, max_pages=4, max_external_pages=3):
return content_with_links
+class GeneratedBlogPostSection(BaseModel):
+ blog_post = models.ForeignKey(
+ GeneratedBlogPost,
+ null=True,
+ blank=True,
+ on_delete=models.CASCADE,
+ related_name="blog_post_sections",
+ )
+ title = models.CharField(max_length=250)
+ content = models.TextField(blank=True, default="")
+ order = models.IntegerField(default=0)
+ # GeneratedBlogPostResearchQuestion model
+
+
+class GeneratedBlogPostResearchQuestion(BaseModel):
+ blog_post = models.ForeignKey(
+ GeneratedBlogPost,
+ null=True,
+ blank=True,
+ on_delete=models.CASCADE,
+ related_name="research_questions",
+ )
+ section = models.ForeignKey(
+ GeneratedBlogPostSection,
+ null=True,
+ blank=True,
+ on_delete=models.CASCADE,
+ related_name="research_questions",
+ )
+ question = models.CharField(max_length=250)
+
+
+class GeneratedBlogPostResearchLink(BaseModel):
+ blog_post = models.ForeignKey(
+ GeneratedBlogPost,
+ null=True,
+ blank=True,
+ on_delete=models.CASCADE,
+ related_name="research_links",
+ )
+ research_question = models.ForeignKey(
+ GeneratedBlogPostResearchQuestion,
+ null=True,
+ blank=True,
+ on_delete=models.CASCADE,
+ related_name="research_links",
+ )
+
+ # initial data
+ url = models.URLField(max_length=200)
+ title = models.CharField(max_length=500, blank=True, default="")
+ author = models.CharField(max_length=250, blank=True, default="")
+ published_date = models.DateTimeField(null=True, blank=True)
+
+ # jina augmentation
+ date_scraped = models.DateTimeField(auto_now_add=True)
+ content = models.TextField(blank=True, default="")
+ description = models.TextField(blank=True, default="")
+
+ # ai augmentation
+ date_analyzed = models.DateTimeField(null=True, blank=True)
+ summary_for_question_research = models.TextField(blank=True, default="")
+ general_summary = models.TextField(blank=True)
+ general_summary_embedding = VectorField(dimensions=1024, default=None, null=True, blank=True)
+ answer_to_question = models.TextField(blank=True, default="")
+
+
class ProjectPage(BaseModel):
project = models.ForeignKey(
Project, null=True, blank=True, on_delete=models.CASCADE, related_name="project_pages"
@@ -1998,3 +2001,22 @@ class Meta:
def __str__(self):
return f"{self.email_type} to {self.email_address}"
+
+
+class Backlink(BaseModel):
+ linked_to_project_page = models.ForeignKey(
+ Project, null=True, blank=True, on_delete=models.CASCADE, related_name="backlinks_to"
+ )
+ linkning_to_project_page = models.ForeignKey(
+ ProjectPage, null=True, blank=True, on_delete=models.CASCADE, related_name="backlinks"
+ )
+
+ linked_from_project_page = models.ForeignKey(
+ Project, null=True, blank=True, on_delete=models.CASCADE, related_name="backlinks_from"
+ )
+ linking_from_blog_post = models.ForeignKey(
+ GeneratedBlogPost, null=True, blank=True, on_delete=models.CASCADE, related_name="backlinks"
+ )
+
+ def __str__(self):
+ return f"{self.linking_from_blog_post.title} -> {self.linked_to_project_page.url}"
diff --git a/core/urls.py b/core/urls.py
index 8df36d2..b90da66 100644
--- a/core/urls.py
+++ b/core/urls.py
@@ -56,6 +56,11 @@
views.GeneratedBlogPostDetailView.as_view(),
name="generated_blog_post_detail",
),
+ path(
+ "project//title-suggestion//research/",
+ views.BlogPostResearchProcessView.as_view(),
+ name="blog_post_research_process",
+ ),
path(
"project//post//download-pdf/",
views.download_blog_post_pdf,
diff --git a/core/views.py b/core/views.py
index 2524cfa..21d30c2 100644
--- a/core/views.py
+++ b/core/views.py
@@ -27,6 +27,7 @@
from core.models import (
AutoSubmissionSetting,
BlogPost,
+ BlogPostTitleSuggestion,
Competitor,
GeneratedBlogPost,
KeywordTrend,
@@ -38,6 +39,7 @@
track_event,
try_create_posthog_alias,
)
+from core.utils import get_relevant_external_pages_for_blog_post
from tuxseo.utils import get_tuxseo_logger
stripe.api_key = settings.STRIPE_SECRET_KEY
@@ -842,6 +844,7 @@ def get_queryset(self):
def get_context_data(self, **kwargs):
from urllib.parse import urlparse
+
from django.core.paginator import Paginator
context = super().get_context_data(**kwargs)
@@ -977,6 +980,213 @@ def get_context_data(self, **kwargs):
return context
+class BlogPostResearchProcessView(LoginRequiredMixin, DetailView):
+ model = BlogPostTitleSuggestion
+ template_name = "blog/blog_post_research_process.html"
+ context_object_name = "title_suggestion"
+
+ def get_queryset(self):
+ return BlogPostTitleSuggestion.objects.filter(
+ project__profile=self.request.user.profile,
+ project__pk=self.kwargs["project_pk"],
+ )
+
+ def _get_generated_blog_posts(self, title_suggestion: BlogPostTitleSuggestion):
+ return (
+ title_suggestion.generated_blog_posts.select_related("project", "title_suggestion")
+ .prefetch_related(
+ "blog_post_sections__research_questions__research_links",
+ "research_questions__research_links",
+ )
+ .order_by("-id")
+ )
+
+ def _build_generated_blog_posts_data(self, generated_blog_posts):
+ generated_blog_posts_data = []
+
+ for generated_blog_post in generated_blog_posts:
+ sections = sorted(
+ list(generated_blog_post.blog_post_sections.all()),
+ key=lambda section: (section.order, section.id),
+ )
+ blog_level_questions = sorted(
+ [
+ question
+ for question in generated_blog_post.research_questions.all()
+ if not question.section_id
+ ],
+ key=lambda question: question.id,
+ )
+
+ sections_data = []
+ for section in sections:
+ section_questions = sorted(
+ list(section.research_questions.all()),
+ key=lambda question: question.id,
+ )
+ section_questions_data = []
+ for section_question in section_questions:
+ research_links = sorted(
+ list(section_question.research_links.all()),
+ key=lambda research_link: research_link.id,
+ )
+ section_questions_data.append(
+ {
+ "id": section_question.id,
+ "question": section_question.question,
+ "links": research_links,
+ }
+ )
+
+ sections_data.append(
+ {
+ "id": section.id,
+ "order": section.order,
+ "title": section.title,
+ "content": section.content or "",
+ "questions": section_questions_data,
+ }
+ )
+
+ blog_level_questions_data = []
+ for blog_level_question in blog_level_questions:
+ research_links = sorted(
+ list(blog_level_question.research_links.all()),
+ key=lambda research_link: research_link.id,
+ )
+ blog_level_questions_data.append(
+ {
+ "id": blog_level_question.id,
+ "question": blog_level_question.question,
+ "links": research_links,
+ }
+ )
+
+ generated_blog_posts_data.append(
+ {
+ "id": generated_blog_post.id,
+ "project_id": generated_blog_post.project_id,
+ "title_suggestion_id": generated_blog_post.title_suggestion_id,
+ "title": generated_blog_post.title,
+ "description": generated_blog_post.description,
+ "slug": generated_blog_post.slug,
+ "tags": generated_blog_post.tags,
+ "posted": generated_blog_post.posted,
+ "date_posted": generated_blog_post.date_posted,
+ "content_length": len(generated_blog_post.content or ""),
+ "sections": sections_data,
+ "blog_level_questions": blog_level_questions_data,
+ }
+ )
+
+ return generated_blog_posts_data
+
+ def _get_internal_links(
+ self, title_suggestion: BlogPostTitleSuggestion, should_compute_links: bool
+ ):
+ manually_selected_project_pages = list(
+ title_suggestion.project.project_pages.filter(always_use=True)
+ )
+ if not should_compute_links:
+ return manually_selected_project_pages
+
+ if not settings.JINA_READER_API_KEY:
+ return manually_selected_project_pages
+
+ return title_suggestion.get_internal_links(max_pages=2)
+
+ def _get_external_links(
+ self, title_suggestion: BlogPostTitleSuggestion, should_compute_links: bool
+ ):
+ if not should_compute_links:
+ return []
+
+ if not settings.JINA_READER_API_KEY:
+ return []
+
+ meta_description = title_suggestion.suggested_meta_description or ""
+ external_pages = get_relevant_external_pages_for_blog_post(
+ meta_description=meta_description,
+ exclude_project=title_suggestion.project,
+ max_pages=3,
+ )
+ return list(external_pages)
+
+ def get_context_data(self, **kwargs):
+ context = super().get_context_data(**kwargs)
+ title_suggestion = self.object
+ project = title_suggestion.project
+ profile = self.request.user.profile
+
+ should_compute_links = self.request.GET.get("compute_links") == "true"
+
+ project_keywords = project.get_keywords()
+ title_suggestion.keywords_with_usage = []
+ if title_suggestion.target_keywords:
+ for keyword_text in title_suggestion.target_keywords:
+ keyword_info = project_keywords.get(
+ keyword_text.lower(),
+ {"keyword": None, "in_use": False, "project_keyword_id": None},
+ )
+ title_suggestion.keywords_with_usage.append(
+ {
+ "text": keyword_text,
+ "keyword": keyword_info["keyword"],
+ "in_use": keyword_info["in_use"],
+ "project_keyword_id": keyword_info["project_keyword_id"],
+ }
+ )
+
+ generated_blog_posts = self._get_generated_blog_posts(title_suggestion)
+ generated_blog_posts_data = self._build_generated_blog_posts_data(generated_blog_posts)
+
+ try:
+ keywords_to_use = title_suggestion.get_blog_post_keywords()
+ except (AttributeError, TypeError):
+ logger.warning(
+ "[BlogPostResearchProcessView] Failed to compute keywords_to_use",
+ title_suggestion_id=title_suggestion.id,
+ project_id=project.id,
+ exc_info=True,
+ )
+ keywords_to_use = []
+
+ try:
+ internal_links = self._get_internal_links(title_suggestion, should_compute_links)
+ except (AttributeError, TypeError, ValueError):
+ logger.warning(
+ "[BlogPostResearchProcessView] Failed to compute internal_links",
+ title_suggestion_id=title_suggestion.id,
+ project_id=project.id,
+ should_compute_links=should_compute_links,
+ exc_info=True,
+ )
+ internal_links = []
+
+ try:
+ external_links = self._get_external_links(title_suggestion, should_compute_links)
+ except (AttributeError, TypeError, ValueError):
+ logger.warning(
+ "[BlogPostResearchProcessView] Failed to compute external_links",
+ title_suggestion_id=title_suggestion.id,
+ project_id=project.id,
+ should_compute_links=should_compute_links,
+ exc_info=True,
+ )
+ external_links = []
+
+ context["project"] = project
+ context["has_pro_subscription"] = profile.is_on_pro_plan
+ context["jina_api_key_configured"] = bool(settings.JINA_READER_API_KEY)
+ context["should_compute_links"] = should_compute_links
+ context["keywords_to_use"] = keywords_to_use
+ context["internal_links"] = internal_links or []
+ context["external_links"] = external_links or []
+ context["generated_blog_posts"] = generated_blog_posts_data
+
+ return context
+
+
class CompetitorBlogPostDetailView(LoginRequiredMixin, DetailView):
model = Competitor
template_name = "project/competitor_blog_post_detail.html"
diff --git a/frontend/templates/blog/blog_post_research_process.html b/frontend/templates/blog/blog_post_research_process.html
new file mode 100644
index 0000000..d180a54
--- /dev/null
+++ b/frontend/templates/blog/blog_post_research_process.html
@@ -0,0 +1,336 @@
+{% extends "base_project.html" %}
+{% load static %}
+
+{% block meta %}
+Research Process - {{ title_suggestion.title }} - TuxSEO
+{% endblock meta %}
+
+{% block project_content %}
+
+
+
+
+
+
{{ title_suggestion.title }}
+ {% if title_suggestion.description %}
+
{{ title_suggestion.description }}
+ {% endif %}
+
+
+
+ {% if jina_api_key_configured %}
+ {% if should_compute_links %}
+
+ Hide computed links
+
+ {% else %}
+
+ Compute links
+
+ {% endif %}
+ {% else %}
+
+ Link computation requires Jina API key
+
+ {% endif %}
+
+
+
+
+
+
Title Suggestion
+
+
+
+
Suggestion ID
+
{{ title_suggestion.id }}
+
+
+
Content type
+
{{ title_suggestion.content_type }}
+
+
+
Category
+
{{ title_suggestion.category }}
+
+
+
Created
+
{{ title_suggestion.created_at|date:"F j, Y g:i A" }}
+
+
+
+ {% if title_suggestion.target_keywords %}
+
+
Target keywords
+
+ {% for keyword_data in title_suggestion.keywords_with_usage %}
+ {% include "components/keyword_chip.html" with keyword=keyword_data.text project_id=project.id keyword_in_use=keyword_data.in_use %}
+ {% endfor %}
+
+
+ {% endif %}
+
+ {% if title_suggestion.suggested_meta_description %}
+
+
Suggested meta description
+
+ {{ title_suggestion.suggested_meta_description }}
+
+
+ {% endif %}
+
+ {% if title_suggestion.prompt %}
+
+
Prompt
+
{{ title_suggestion.prompt }}
+
+ {% endif %}
+
+
+
+
+
Derived inputs
+
+ {% if should_compute_links %}
+ Computed links are enabled.
+ {% else %}
+ Computed links are disabled (fast mode).
+ {% endif %}
+
+
+
+
+
+
Keywords to use ({{ keywords_to_use|length }})
+ {% if keywords_to_use %}
+
+ {% for keyword_text in keywords_to_use %}
+
+ {{ keyword_text }}
+
+ {% endfor %}
+
+ {% else %}
+
No keywords computed.
+ {% endif %}
+
+
+
+
Internal links ({{ internal_links|length }})
+ {% if internal_links %}
+
+
+
+
+ | Title |
+ URL |
+ Always use |
+
+
+
+ {% for project_page in internal_links %}
+
+ | {{ project_page.title }} |
+
+
+ {{ project_page.url }}
+
+ |
+ {{ project_page.always_use|yesno:"Yes,No" }} |
+
+ {% endfor %}
+
+
+
+ {% else %}
+
No internal links available.
+ {% endif %}
+
+
+
+
External links ({{ external_links|length }})
+ {% if external_links %}
+
+
+
+
+ | Project |
+ Title |
+ URL |
+
+
+
+ {% for project_page in external_links %}
+
+ | {{ project_page.project.name }} |
+ {{ project_page.title }} |
+
+
+ {{ project_page.url }}
+
+ |
+
+ {% endfor %}
+
+
+
+ {% else %}
+
No external links available.
+ {% endif %}
+
+
+
+
+
+
Generated blog posts ({{ generated_blog_posts|length }})
+
+ {% if generated_blog_posts %}
+
+ {% for blog_post in generated_blog_posts %}
+
+
+
+
#{{ blog_post.id }} — {{ blog_post.title }}
+
+ content_length={{ blog_post.content_length }} · posted={{ blog_post.posted|yesno:"true,false" }}
+ {% if blog_post.slug %} · slug={{ blog_post.slug }}{% endif %}
+
+
+
+ {% if blog_post.content_length > 0 %}
+
+ View post
+
+ {% endif %}
+
+
+
+
+ {% if blog_post.description %}
+
+
Description
+
{{ blog_post.description }}
+
+ {% endif %}
+
+ {% if blog_post.sections %}
+
+
Sections ({{ blog_post.sections|length }})
+
+ {% for section in blog_post.sections %}
+
+
+ [{{ section.order }}] {{ section.title }}
+
+
+ {% if section.content %}
+
{{ section.content }}
+ {% endif %}
+
+
+
Research questions ({{ section.questions|length }})
+ {% if section.questions %}
+
+ {% for research_question in section.questions %}
+
+
Q{{ research_question.id }}: {{ research_question.question }}
+
+
Links ({{ research_question.links|length }})
+ {% if research_question.links %}
+
+ {% for research_link in research_question.links %}
+ -
+
{{ research_link.title|default:"(no title)" }}
+
+ {{ research_link.url }}
+
+
+ author={{ research_link.author|default:"" }}
+ {% if research_link.published_date %} · published={{ research_link.published_date|date:"Y-m-d" }}{% endif %}
+ {% if research_link.date_scraped %} · scraped={{ research_link.date_scraped|date:"Y-m-d H:i" }}{% endif %}
+ {% if research_link.date_analyzed %} · analyzed={{ research_link.date_analyzed|date:"Y-m-d H:i" }}{% endif %}
+
+
+ {% endfor %}
+
+ {% else %}
+
No links.
+ {% endif %}
+
+
+ {% endfor %}
+
+ {% else %}
+
No research questions.
+ {% endif %}
+
+
+
+ {% endfor %}
+
+
+ {% endif %}
+
+ {% if blog_post.blog_level_questions %}
+
+
Blog-level research questions ({{ blog_post.blog_level_questions|length }})
+
+ {% for research_question in blog_post.blog_level_questions %}
+
+
Q{{ research_question.id }}: {{ research_question.question }}
+
+
Links ({{ research_question.links|length }})
+ {% if research_question.links %}
+
+ {% for research_link in research_question.links %}
+ -
+
{{ research_link.title|default:"(no title)" }}
+
+ {{ research_link.url }}
+
+
+ author={{ research_link.author|default:"" }}
+ {% if research_link.published_date %} · published={{ research_link.published_date|date:"Y-m-d" }}{% endif %}
+ {% if research_link.date_scraped %} · scraped={{ research_link.date_scraped|date:"Y-m-d H:i" }}{% endif %}
+ {% if research_link.date_analyzed %} · analyzed={{ research_link.date_analyzed|date:"Y-m-d H:i" }}{% endif %}
+
+
+ {% endfor %}
+
+ {% else %}
+
No links.
+ {% endif %}
+
+
+ {% endfor %}
+
+
+ {% endif %}
+
+
+ {% endfor %}
+
+ {% else %}
+
No generated blog posts found for this title suggestion yet.
+ {% endif %}
+
+
+
+{% endblock project_content %}
diff --git a/frontend/templates/blog/generated_blog_post_detail.html b/frontend/templates/blog/generated_blog_post_detail.html
index c5daf0e..9dc8002 100644
--- a/frontend/templates/blog/generated_blog_post_detail.html
+++ b/frontend/templates/blog/generated_blog_post_detail.html
@@ -30,6 +30,20 @@
{{ generated_post.description }}
{% endif %}
+
+ {% if generated_post.title_suggestion_id %}
+
+ {% endif %}
diff --git a/frontend/templates/components/blog_post_suggestion_card.html b/frontend/templates/components/blog_post_suggestion_card.html
index 408b6ce..00ab42e 100644
--- a/frontend/templates/components/blog_post_suggestion_card.html
+++ b/frontend/templates/components/blog_post_suggestion_card.html
@@ -164,8 +164,18 @@
-
-
+
+
+
+ Research
+
+
+