Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Ignore files in the Python virtual environment
/env/

# A convenient place to store downloaded mbox files from production
# to facilitate testing, and automation.
/mboxes/
10 changes: 10 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# A handy target to reset the development environment back to a clean slate
# and run the development server.
# XXX: For now just use the single mbox file that was previously downloaded.
# Additional work in this area, for testing use cases, is needed.
dev-rebuild-and-run:
dropdb --if-exists archives
createdb archives
django/manage.py migrate
loader/load_message.py --list pgsql-hackers --mbox mboxes/pgsql-hackers.202504 >/dev/null
cd ./django && ./run_dev.py
77 changes: 77 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# PG archives

This application manages PostgreSQL mailing list archives. However, the search
feature is implemented in pgweb.

## The Application

This is a Django 4.2 application backed by PostgreSQL and running on Python 3.x.

## Getting Started

### Ubuntu instructions

First, prepare your development environment by installing python3, postgresql-server-dev-X.Y, formail and libtidy (use `--no-install-recommends` to avoid installing postfix):

```bash
sudo apt install python3 postgresql-server-dev-14 procmail libtidy5deb1 --no-install-recommends
```

Next, configure your local environment with virtualenv and install local dependencies.

```bash
python3 -m venv env
source env/bin/activate
pip install -r dev_requirements.txt
```

Create a database for the application:

```bash
createdb archives
cd django
./manage.py migrate
# Creates pgsql-hackers list with ID 1 if open.
```

Create config for the loader scripts:

```bash
cp loader/archives.ini.sample loader/archives.ini
```

Load some emails from the actual PostgreSQL archives by downloading an mbox
file from <https://www.postgresql.org/message-id/pgsql-hackers/> and running the
following command. NOTE: it's totally fine if some of the emails will fail to
load.

```bash
loader/load_message.py --list pgsql-hackers --mbox /path/to/downloaded/mbox/file
```

Then go to the `django` directory, that's where the actual web application is.

```bash
cd django
```

Create a local settings file (feel free to edit it):

```bash
cp archives/example_settings_local.py archives/settings_local.py
```

Finally, you're ready to start the web application:

```bash
./run_dev.py
```

Or, download the April 2025 mbox file from the PostgreSQL archives and place it in /mboxes.
Then run:
```bash
make dev-rebuild-and-run
```

Then open <http://localhost:8001/list/pgsql-hackers> to view your local mailing
list archives.
2 changes: 2 additions & 0 deletions dev_requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-r requirements.txt
uwsgi
21 changes: 21 additions & 0 deletions django/archives/example_settings_local.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Enable more debugging information
DEBUG = True
# Prevent logging to try to send emails to postgresql.org admins.
# Use the default Django logging settings instead.
LOGGING = None

DATABASES = {
"default": {
"ENGINE": "django.db.backends.postgresql_psycopg2",
"NAME": "archives",
"USER": "postgres",
"PASSWORD": "postgres",
"HOST": "0.0.0.0",
}
}

# Allow API access to all clients
PUBLIC_ARCHIVES = True
ALLOWED_HOSTS = ["*"]

PGWEB_ADDRESS = 'http://localhost:8001'
171 changes: 169 additions & 2 deletions django/archives/mailarchives/api.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from django.http import HttpResponse, HttpResponseForbidden
from django.shortcuts import get_object_or_404
from django.conf import settings
from django.db import connection
import ipaddress

from .views import cache
from .models import Message, List

import json

import requests
from django.http import JsonResponse

def is_host_allowed(request):
for ip_range in settings.API_CLIENTS:
Expand Down Expand Up @@ -117,9 +119,174 @@ def thread(request, msgid):
'date': m.date.isoformat(),
'from': m.mailfrom,
'subj': m.subject,
'atts': [{'id': a.id, 'name': a.filename} for a in m.attachment_set.all()],
'atts': [{'id': a.id, 'name': a.filename, 'is_patch': a.is_patch, 'content_type': a.contenttype}
for a in m.attachment_set.extra(select={'is_patch': 'attachments.is_patch'}).all()],
}
for m in mlist], resp)
if settings.PUBLIC_ARCHIVES:
resp['xkey'] = 'pgat_{0}'.format(msg.threadid)
return resp

def threads_with_patches(request):
if not settings.PUBLIC_ARCHIVES:
return HttpResponseForbidden('No API access on private archives for now')

with connection.cursor() as cursor:
cursor.execute("""-- Find threads with patches
select *
from (
select distinct on (threadid)
pm.threadid,
pm.id,
pm._from,
pm.subject,
pm.messageid,
ma.patch_count,
tm.subject AS thread_subject,
pm.date AS patch_date,
tm.date AS thread_date,
tm.messageid AS thread_messageid
from messages AS pm --patch message
-- threadid is a shared value but not a foreign key to anything
-- in particular, it is not a self-join of messages
join lateral (
select *
from messages as im
where im.threadid = pm.threadid
order by im.date asc
limit 1
) AS tm on true --thread message is first known message
join lateral (
select count(*) as patch_count
from attachments
where pm.id = attachments.message and is_patch(attachments)
) as ma on true
where pm.has_attachment and ma.patch_count > 0 and pm.hiddenstatus is null
order by pm.threadid, pm.date desc
) as threads_with_patches
order by patch_date DESC
limit 10;
""")
rows = cursor.fetchall()

# Convert the SQL result into thread_list
thread_list = [
{
"thread_id": str(row[0]),
"message_id": row[1],
"file_count": row[5],
"file_version": None,
"commit_sha": None,
"patch_id": None,
"subject_line": row[3],
"thread_subject": row[6],
"sender": row[2],
"id": row[1],
"patch_date": row[7].strftime('%Y-%m-%d %H:%M:%S') if row[7] else None,
"thread_date": row[8].strftime('%Y-%m-%d %H:%M:%S') if row[8] else None,
"message_code": row[4],
"thread_code": row[9]
}
for row in rows
]

resp = HttpResponse(content_type='application/json')
json.dump(thread_list, resp)

return resp

def get_patch_data_as_json(threadid, messageid):
with connection.cursor() as cursor:
cursor.execute("""-- Find threads with patches
select
pm.threadid,
pm.id,
tm.messageid as thread_messageid,
mrm.mostrecent_messageid,
pm.messageid as patch_messageid,
ma.fileset,
pm._from as patch_from_author,
tm.date as thread_messagedate,
mrm.mostrecent_messagedate,
pm.date as patch_messagedate,
tm.subject as thread_subject_line,
mrm.most_recent_subject_line,
mrm.most_recent_from_author,
tm._from as thread_from_author
from messages AS pm --patch message
join lateral (
select *
from messages as im
where im.threadid = pm.threadid
order by im.date asc
limit 1
) AS tm on true --thread message is first known message
join lateral (
select
id as mostrecent_id,
messageid as mostrecent_messageid,
date as mostrecent_messagedate,
subject as most_recent_subject_line,
_from as most_recent_from_author
from messages
where threadid = pm.threadid
order by date desc limit 1
) as mrm on true
join lateral (
select jsonb_agg(
jsonb_build_object(
'attachment_id', a.id,
'filename', a.filename,
'content_type', a.contenttype,
'is_patch', is_patch(a)
) order by a.filename) as fileset
from attachments as a
where pm.id = a.message
) as ma on true
where pm.id = %s;
""",
(messageid,))
row = cursor.fetchone()

# Convert the SQL result into patch_data
patch_data = {
"thread_id": row[0],
"message_id": row[1],
"thread_message_id": row[2],
"most_recent_message_id": row[3],
"patch_message_id": row[4],
"patch_from_author": row[6],
"fileset": json.loads(row[5]) if row[5] else [],
"thread_message_date": row[7].isoformat() if row[7] else None,
"most_recent_message_date": row[8].isoformat() if row[8] else None,
"patch_message_date": row[9].isoformat() if row[9] else None,
"thread_subject_line": row[10],
"most_recent_subject_line": row[11],
"most_recent_from_author": row[12],
"thread_from_author": row[13],
}

return json.dumps(patch_data)

def create_cfapp_patch(request):
if not settings.PUBLIC_ARCHIVES:
return HttpResponseForbidden('No API access on private archives for now')

if request.method != 'POST':
return JsonResponse({'error': 'Invalid request method'}, status=405)

body_string = request.body.decode("utf-8")
body_json = json.loads(body_string)

try:
# Forward the request body to the external service
response = requests.post(
'http://localhost:8007/api/test/cfapp/create_patch',
headers={'Content-Type': 'application/json'},
data=get_patch_data_as_json(body_json["thread_id"], body_json["message_id"]),
)

# Return the response from the external service
return JsonResponse(response.json(), status=response.status_code)
except requests.RequestException as e:
return JsonResponse({'error': f'Failed to proxy request: {str(e)}'}, status=500)
Loading