Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 39 additions & 11 deletions dokuwiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,28 @@ def __init__(self, rootpath):
for subdir in [ self.meta, self.attic, self.pages]:
ensure_directory_exists(subdir)

def write_pages(self, pages):
def write_pages(self, pages, namespaces):
"""
Given 'pages' as a list of mediawiki pages with revisions attached, export them to dokuwiki pages
"""
for page in pages:
self._convert_page(page)
"""
read the (numeric) namespace attribute and search
the matching namespace name
"""
namespace=None
ns=page['ns']
if ns == 0:
namespace=''
else:
for n in namespaces:
if ns == n['id']:
namespace=n['*']
if namespace==None:
raise RuntimeError("Page '%s' has unknown namespace ID: %i" % page,ns)

self._convert_page(page,namespace)

self._aggregate_changes(self.meta, "_dokuwiki.changes")

def write_images(self, images, file_namespace, http_user=None, http_pass=None):
Expand Down Expand Up @@ -74,21 +90,33 @@ def write_images(self, images, file_namespace, http_user=None, http_pass=None):
# aggregate all the new changes to the media_meta/_media.changes file
self._aggregate_changes(os.path.join(self.data, "media_meta"), "_media.changes")

def _convert_page(self, page):
def _convert_page(self, page, namespace):
""" Convert the supplied mediawiki page to a Dokuwiki page """
print("Converting %d revisions of page '%s'..." %
(len(page["revisions"]), page['title']))
# print("Converting %d revisions of page '%s'..." %
# (len(page["revisions"]), page['title']))
# remove leading namespace specifier, if any
page_title=re.sub('^'+namespace,'',page['title'])

# Sanitise the mediawiki pagename to something matching the dokuwiki pagename convention
full_title = make_dokuwiki_pagename(page['title'])
pagename = re.sub(r'_*[/:]+_*','_',make_dokuwiki_pagename(page_title))
if namespace!='':
namesparr=(namespace.replace("/",":")).split(':')
namesparr=map(make_dokuwiki_pagename(),namesparr)
page_ns = ":".join(namesparr)
else:
page_ns=''
full_title=":".join((page_ns,pagename))

# Mediawiki pagenames can contain namespace :s, convert these to dokuwiki / paths on the filesystem (becoming : namespaces in dokuwiki)
subdir, pagename = os.path.split(full_title.replace(':','/'))
subdir = page_ns.replace(":","/")
pagedir = os.path.join(self.pages, subdir)
metadir = os.path.join(self.meta, subdir)
atticdir = os.path.join(self.attic, subdir)
for d in pagedir, metadir, atticdir:
ensure_directory_exists(d)

ensure_directory_exists(d)
print("Converting page '%s' to '%s' (%d revisions) ..." %
(page['title'],pagename,len(page["revisions"])))

# Walk through the list of revisions
revisions = list(reversed(page["revisions"])) # order as oldest first
for revision in revisions:
Expand All @@ -108,7 +136,7 @@ def _convert_page(self, page):
# create gzipped attic revision
atticname = "%s.%s.txt.gz" % (pagename, timestamp)
atticpath = os.path.join(atticdir, atticname)
with gzip.open(atticpath, "wb") as f:
with gzip.open(atticpath.encode("utf-8"), "wb") as f:
f.write(content.encode("utf-8"))
os.utime(atticpath, (timestamp,timestamp))
# append entry to page's 'changes' metadata index
Expand Down Expand Up @@ -192,7 +220,7 @@ def make_dokuwiki_pagename(mediawiki_name):
Any namespacing that is in the form of a / is replaced with a :
"""
result = mediawiki_name.replace(" ","_")
return names.clean_id(camel_to_underscore(result)).replace("/",":")
return names.clean_id(camel_to_underscore(result))

def camel_to_underscore(camelcase):
"""
Expand Down
12 changes: 12 additions & 0 deletions mediawiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def get_all_pages(self):
query = {'list' : 'allpages'}
print("Getting list of pages...")
pages = self._query(query, [ 'allpages' ])

print("Query page revisions...")
for page in pages:
page["revisions"] = self._get_revisions(page)
Expand Down Expand Up @@ -96,6 +97,17 @@ def _query(self, args, path_to_result):
except KeyError:
return result

def get_all_namespaces(self):
"""
Return a list of dictionaries, each containing the keys
'subpages' (non-empty=may contain subpages),'*' (Name),
'id' (numeric), 'canonical' (canonical name)
"""
query = { 'action' : 'query', 'meta' : 'siteinfo', 'siprop' : 'namespaces|namespacealiases' }
result = self.mw.call(query)['query']
namespaces = result['namespaces'].values()
return namespaces

def get_file_namespaces(self):
"""
Return a tuple. First entry is the name used by default for the file namespace (which dokuwiki will also use.)
Expand Down
10 changes: 8 additions & 2 deletions names.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
Copyright (C) 2014 Angus Gratton
Licensed under New BSD License as described in the file LICENSE.
"""
import re, os.path, unicodedata
import re, os.path, unicodedata, urllib

def clean_id(name):
"""
Expand All @@ -13,6 +13,9 @@ def clean_id(name):
Ignores both slashes and colons as valid namespace choices (to convert slashes to colons,
call make_dokuwiki_pagename)
"""
# decode URL-ecoded characters
name=urllib.unquote(name)

main,ext = os.path.splitext(name)

# remove accents
Expand All @@ -23,9 +26,12 @@ def clean_id(name):
no_accent = main # name was plaintext to begin with

# recombine without any other characters
result = (re.sub(r'[^\w/:]+', '_', no_accent) + ext).lower()
# result = (re.sub(r'[^\w/:]+', '_', no_accent) + ext).lower()
result = (re.sub(r'[^\w]+', '_', no_accent) + ext).lower()
while "__" in result:
result = result.replace("__", "_") # this is a hack, unsure why regex doesn't catch it
# remove heading and trailing underscores
result=re.sub('^_|_$', '', result)
return result

def clean_user(name):
Expand Down
37 changes: 26 additions & 11 deletions wikicontent.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@ def set_file_namespaces(canonical_alias, aliases):
canonical_alias is the single namespace that dokuwiki will use (default File:)
aliases is a list of alternative namespace names that will be converted to the canonical alias
"""
print("match localised namespaces for files/images %s <- %s"%(canonical_alias,aliases))
global mw_file_namespace_aliases
global dw_file_namespace
dw_file_namespace = canonical_alias + ":"
mw_file_namespace_aliases = re.compile("^(%s):" % "|".join(aliases), re.IGNORECASE)
mw_file_namespace_aliases = re.compile("^(%s):" % "|".join([canonical_alias]+aliases), re.IGNORECASE)

def is_file_namespace(target):
"""
Expand Down Expand Up @@ -97,7 +98,8 @@ def convert(section, trailing_newline):
elif section.tagname == "@section":
level = section.level
heading = convert(section.children.pop(0), trailing_newline)
heading_boundary = "="*(6-level)
#highest level dokuwiki is six ='s, ->_7_-1
heading_boundary = "="*(7-level)
result = "\n%s %s %s\n" % (heading_boundary, heading, heading_boundary)
else:
print("Unknown tagname %s" % section.tagname)
Expand Down Expand Up @@ -131,10 +133,12 @@ def convert(url, trailing_newline):

@visitor.when(URL)
def convert(url, trailing_newline):
print(' ... converting URL %s'%url.caption)
return url.caption

@visitor.when(ImageLink)
def convert(link, trailing_newline):
print(' ... converting %s'%link.target)
suffix = ""
if link.width is not None:
if link.height is None:
Expand All @@ -150,11 +154,12 @@ def convert(link, trailing_newline):
prealign = " " if link.align in [ "center", "right" ] else ""
postalign = " " if link.align in [ "center", "left" ] else ""
target = canonicalise_file_namespace(link.target)
target = convert_internal_link(target)
target = ":".join(convert_internal_link(tg) for tg in target.split(":"))
return "{{%s%s%s%s}}" % (prealign, target, suffix, postalign)

@visitor.when(ArticleLink)
def convert(link, trailing_newline):
print(' ... converting %s'%link.target)
text = convert_children(link).strip(" ")
pagename = convert_internal_link(link.target)
if len(text):
Expand All @@ -164,21 +169,31 @@ def convert(link, trailing_newline):

@visitor.when(CategoryLink)
def convert(link, trailing_newline):
print(' ... converting %s'%link.target)
# Category functionality can be implemented with plugin:tag, but not used here
return ""

@visitor.when(NamespaceLink)
def convert(link, trailing_newline):
if is_file_namespace(link.target): # is a link to a file or image
filename = dokuwiki.make_dokuwiki_pagename(canonicalise_file_namespace(link.target))
caption = convert_children(link).strip()
print(' ... converting %s'%link.target)
target = re.sub(r'^:','',link.target)
if is_file_namespace(target): # is a link to a file or image
target = canonicalise_file_namespace(target)
#non-detected file link has a caption: sparate it
if re.match(r'\|',target):
target,caption=target.split('|')
else:
caption = convert_children(link).strip()
filename = convert_internal_link(re.sub(r'.*[:/]','',target))
target = ":".join(convert_internal_link(tg) for tg in target.split(":"))
print(' ... is a file link to %s'%filename)
if len(caption) > 0:
return "{{%s%s}}" % (filename, caption)
return "{{%s|%s}}" % (target, caption)
else:
return "{{%s}}" % filename

print("WARNING: Ignoring namespace link to " + link.target)
return convert_children(link)
return "{{%s}}" % (target)
else:
print("WARNING: Ignoring namespace link to " + link.target)
return convert_children(link)


@visitor.when(ItemList)
Expand Down
18 changes: 10 additions & 8 deletions wikicontent_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,15 @@

"""
from __future__ import print_function, unicode_literals, absolute_import, division
import sys, os, codecs, inspect, traceback
import sys, os, codecs, inspect, traceback, difflib, unicodedata
from pprint import pprint
import wikicontent, yamdwe

DELIMITER="*"*40
DELIMITER="@"*40

def prep_difflines(content):
""" difflib takes input in this "readlines" compatible format """
return [ x+"\n" for x in content.split("\n") ]

def run_test(testdir):
"""
Expand Down Expand Up @@ -59,12 +63,10 @@ def run_test(testdir):
print("Input Mediawiki:")
print(mw)
print(DELIMITER)
print("Expected Output:")
print(DELIMITER)
print(dw)
print(DELIMITER)
print("Actual Output:")
print(converted)

diff = difflib.unified_diff(prep_difflines(dw), prep_difflines(converted), fromfile='Expected Dokuwiki', tofile='Actual Dokuwiki', lineterm="\n")
sys.stdout.writelines(diff)
sys.stdout.write("\n")
print(DELIMITER)
return False

Expand Down
4 changes: 3 additions & 1 deletion yamdwe.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ def main():
canonical_file, aliases = importer.get_file_namespaces()
wikicontent.set_file_namespaces(canonical_file, aliases)


# Read all pages and page revisions
namespaces=importer.get_all_namespaces()
pages = importer.get_all_pages()
print("Found %d pages to export..." % len(pages))

Expand All @@ -58,7 +60,7 @@ def main():
page["revisions"].insert(0, latest)

# Export pages to Dokuwiki format
exporter.write_pages(pages)
exporter.write_pages(pages,namespaces)

# Bring over images
images = importer.get_all_images()
Expand Down