Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 34 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,16 +1,35 @@
FROM ubuntu:trusty
MAINTAINER Alex Dergachev <alex@evolvingweb.ca>

EXPOSE 12736
WORKDIR /var/gdocs-export/

############################################################
# Gdocs export dependencies (Ruby, Pandoc, Latex)
############################################################

# check if the docker host is running squid-deb-proxy, and use it
RUN route -n | awk '/^0.0.0.0/ {print $2}' > /tmp/host_ip.txt
RUN echo "HEAD /" | nc `cat /tmp/host_ip.txt` 8000 | grep squid-deb-proxy && (echo "Acquire::http::Proxy \"http://$(cat /tmp/host_ip.txt):8000\";" > /etc/apt/apt.conf.d/30proxy) || echo "No squid-deb-proxy detected"

# install misc tools
RUN apt-get update -y && apt-get install -y curl wget git fontconfig make vim
RUN apt-get update -y && apt-get install -y curl wget git fontconfig make vim dialog apt-utils apache2 php5 nano
RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections

# RUN echo 'LC_ALL="en_US.UTF-8"' > /etc/default/locale
# Set locale variables
RUN apt-get install -y locales
RUN locale-gen en_US en_US.UTF-8
RUN dpkg-reconfigure locales

RUN echo 'LC_ALL="en_US.UTF-8"' > /etc/default/locale
RUN apt-get install -y ruby1.9.3

#### RVM
# RUN apt-get install software-properties-common -y
# RUN apt-add-repository -y ppa:rael-gc/rvm -y
# RUN apt-get update -y
# RUN apt-get install rvm -y

# get pandocfilters, a helper library for writing pandoc filters in python
RUN apt-get -y install python-pip
RUN pip install pandocfilters
Expand All @@ -33,5 +52,16 @@ RUN cd /tmp && bundle config build.nokogiri --use-system-libraries && bundle ins
# install pandoc 1.12 by from manually downloaded trusty deb packages (saucy only has 1.11, which is too old)
RUN apt-get install -y pandoc

EXPOSE 12736
WORKDIR /var/gdocs-export/
############################################################
# Gdocs export server dependencies (Apache, PHP)
############################################################
RUN useradd -m -g www-data gdocs
# Update the default apache site with the config we created.
COPY server/apache-config.conf /etc/apache2/sites-available/000-default.conf
COPY server/envvars /etc/apache2/envvars

# By default start up apache in the foreground, override with /bin/bash for interative.
CMD apachectl -D FOREGROUND

# allow gdocs to run script as root
RUN printf "\ngdocs ALL=(root) NOPASSWD: /var/gdocs-export/server/scripts/web-convert-gdoc.sh" > /etc/sudoers
122 changes: 66 additions & 56 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,108 +1,118 @@
include .env
#===============================================================================
# DEFAULT MAKE VARIABLES
#===============================================================================
AUTH_FILE=google-api-authorization.yaml

# defaults to "Test doc for gd-pandoc"
doc = https://docs.google.com/a/evolvingweb.ca/document/d/1dwYaiiy4P0KA7PvNwAP2fsPAf6qMMNzwaq8W66mwyds/edit#heading=h.4lk08p1hx3w
#DATE=$(eval DATE_DIR=$(shell date +%Y-%m))
DATE=$(shell date +%Y-%m)

outdir=build
doc_id = $(shell echo $(doc) | sed -e 's@^https.*document/d/@@' -e 's@/edit.*@@')
name = default
input_file = input/$(name).html
OUTPUT=$(outdir)/$(name)
auth_file = google-api-authorization.yaml
docker_workdir=/var/gdocs-export/
docker_run_cmd = docker run -t -i -v `pwd`:$(docker_workdir) -p 12736:12736 dergachev/gdocs-export
FILE_NAME=default
THEME=sample

INPUT_FILE_DIR=input/$(DATE)
INPUT_FILE=$(INPUT_FILE_DIR)/$(FILE_NAME).html

OUTPUT_DIR=build/$(DATE)
OUTPUT_FILE_DIR=$(OUTPUT_DIR)/$(FILE_NAME)

# directory containing customized header.tex, etc...
theme = sample

all: convert

#===============================================================================
# GOOGLE_DRIVE_API TARGETS
# run on Docker container
#===============================================================================

install_auth_file:
cp $(workdir)$(auth_file) ~/.google-api.yaml
@cp ${APP_DOCKER_DIR}/${AUTH_FILE} ${APACHE_USER_HOME_DIR}/.google-api.yml

# Download google-api-authorization.yaml
# usage:
# make api_auth
api_auth:
bundle exec ruby bin/authorize.rb \
$(CLIENT_ID) $(CLIENT_SECRET) \
${GOOGLE_CLIENT_ID} ${GOOGLE_CLIENT_SECRET} \
https://www.googleapis.com/auth/drive.readonly \
> $(auth_file)

api_download: install_auth_file
> $(AUTH_FILE)

# Download HTML version of the Google document and store it in INPUT_FILE_DIR
# usage:
# make api_download DOC_ID=xxxxx FILE_NAME=xxx
api_download: #install_auth_file
if [ ! -d input/$(DATE) ]; then mkdir input/$(DATE); fi;
# get DOC_ID from input
bundle exec google-api execute \
-u "https://docs.google.com/feeds/download/documents/export/Export?id=$(doc_id)&exportFormat=html" \
> $(input_file)
-u "https://docs.google.com/feeds/download/documents/export/Export?id=$(DOC_ID)&exportFormat=html" \
> $(INPUT_FILE_DIR)/$(FILE_NAME).html

#===============================================================================
# PANDOC TARGETS
# run on Docker container
#===============================================================================

latex:
mkdir -p $(OUTPUT)
cp assets/default/* $(OUTPUT)
test -z "$(theme)" || cp assets/$(theme)/* $(OUTPUT)
cp $(input_file) $(OUTPUT)/in.html
mkdir -p $(OUTPUT_FILE_DIR)
cp assets/default/* $(OUTPUT_FILE_DIR)
test -z "$(THEME)" || cp assets/$(THEME)/* $(OUTPUT_FILE_DIR)
cp $(INPUT_FILE) $(OUTPUT_FILE_DIR)/in.html

bundle exec ruby -C$(OUTPUT) "$$PWD/lib/pandoc-preprocess.rb" in.html > $(OUTPUT)/preprocessed.html
pandoc --parse-raw $(OUTPUT)/preprocessed.html -t json > $(OUTPUT)/pre.json
cat $(OUTPUT)/pre.json | ./lib/pandoc-filter.py > $(OUTPUT)/post.json
bundle exec ruby -C$(OUTPUT_FILE_DIR) "$$PWD/lib/pandoc-preprocess.rb" in.html > $(OUTPUT_FILE_DIR)/preprocessed.html
pandoc --parse-raw $(OUTPUT_FILE_DIR)/preprocessed.html -t json > $(OUTPUT_FILE_DIR)/pre.json
cat $(OUTPUT_FILE_DIR)/pre.json | ./lib/pandoc-filter.py > $(OUTPUT_FILE_DIR)/post.json

# use pandoc to create metadata.tex, main.tex (these are included by ew-template.tex)
pandoc $(OUTPUT)/post.json --no-wrap -t latex --template $(OUTPUT)/template-metadata.tex > $(OUTPUT)/metadata.tex
pandoc $(OUTPUT)/post.json --chapters --no-wrap -t latex > $(OUTPUT)/main.tex
pandoc $(OUTPUT_FILE_DIR)/post.json --no-wrap -t latex --template $(OUTPUT_FILE_DIR)/template-metadata.tex > $(OUTPUT_FILE_DIR)/metadata.tex
pandoc $(OUTPUT_FILE_DIR)/post.json --chapters --no-wrap -t latex > $(OUTPUT_FILE_DIR)/main.tex

# must use -o with docx output format, since its binary
pandoc $(OUTPUT)/post.json -s -t docx -o $(OUTPUT)/$(name).docx
pandoc $(OUTPUT)/post.json -s -t rtf -o $(OUTPUT)/$(name).rtf
pandoc $(OUTPUT_FILE_DIR)/post.json -s -t docx -o $(OUTPUT_FILE_DIR)/$(FILE_NAME).docx
pandoc $(OUTPUT_FILE_DIR)/post.json -s -t rtf -o $(OUTPUT_FILE_DIR)/$(FILE_NAME).rtf

pdf:
# convert latex to PDF
echo "Created $(OUTPUT)/$(name).tex, compiling into $(name).pdf"
echo "Created $(OUTPUT_FILE_DIR)/$(FILE_NAME).tex, compiling into $(FILE_NAME).pdf"
# rubber will set output PDF filename based on latex input filename
cp -f $(OUTPUT)/template.tex $(OUTPUT)/$(name).tex
( cd $(OUTPUT); latexmk -pdf $(name))
cp -f $(OUTPUT_FILE_DIR)/template.tex $(OUTPUT_FILE_DIR)/$(FILE_NAME).tex
( cd $(OUTPUT_FILE_DIR); rubber --pdf $(FILE_NAME))

convert: latex pdf

diff:
/usr/bin/perl "`which latexdiff`" --flatten $(outdir)/$(before)/$(before).tex $(OUTPUT)/$(name).tex > $(OUTPUT)/diff.tex
(cd $(OUTPUT); latexmk -pdf diff)
# diff:
# /usr/bin/perl "`which latexdiff`" --flatten $(outdir)/$(before)/$(before).tex $(OUTPUT)/$(name).tex > $(OUTPUT)/diff.tex
# (cd $(OUTPUT); latexmk -pdf diff)


#===============================================================================
# DOCKER TARGETS
#===============================================================================

build_docker:
@echo "Warning: building can take a while (~15m)."
dpkg -l squid-deb-proxy || sudo apt-get install -y squid-deb-proxy
docker build -t dergachev/gdocs-export .

docker_debug:
$(docker_run_cmd) /bin/bash
docker-compose up -d --build

latest:
docker run -t -i `docker images -q | head -n 1` /bin/bash
access:
# Access docker container as gdocs user
docker exec -it --user ${APACHE_USER} ${DOCKER_CONTAINER} /bin/bash

docker_api_auth:
$(docker_run_cmd) make api_auth CLIENT_ID=$(CLIENT_ID) CLIENT_SECRET=$(CLIENT_SECRET)
stop:
docker-compose stop

docker_api_download:
$(docker_run_cmd) make api_download doc_id=$(doc_id) input_file=$(input_file) workdir=$(docker_workdir)

docker_convert:
$(docker_run_cmd) make convert OUTPUT=$(OUTPUT) name=$(name) input_file=$(input_file) theme=$(theme)

docker_diff:
docker run -t -i -v `pwd`:$(docker_workdir) -p 12736:12736 dergachev/gdocs-export make diff OUTPUT=$(OUTPUT) name=$(name) input_file=$(input_file) before=$(before)
restart:
docker-compose stop
docker-compose start

#===============================================================================
# MISC TARGETS
#===============================================================================

test:
bundle exec rspec

#===============================================================================
# TEST
# Test build Alex's public document
# https://docs.google.com/document/d/1dwYaiiy4P0KA7PvNwAP2fsPAf6qMMNzwaq8W66mwyds/edit
#===============================================================================
test_convert:
$(eval DOC_ID=1dwYaiiy4P0KA7PvNwAP2fsPAf6qMMNzwaq8W66mwyds)
$(eval FILE_NAME=sample2)
$(MAKE) api_download DOC_ID=$(DOC_ID) FILE_NAME=$(FILE_NAME)
$(MAKE) convert FILE_NAME=$(FILE_NAME) THEME=ew
36 changes: 10 additions & 26 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,40 +15,16 @@ Installation
------------

See below for how to get google drive API *client_id* and *client_secret*.
See the `Vagrantfile` for installation steps.
See the `Makefile` for usage.

Starts a VM with docker and squid-deb-proxy running, then builds the gdocs-export docker image:
Run the docker container by

```bash
vagrant up
vagrant ssh
cd /vagrant/

# pulls the image from index.docker.io (about ~2GB)
docker pull dergachev/gdocs-export
```

Alternatively, we can build the image from this repo, but this takes a while
and installing squid-deb-proxy to cache 'apt-get install' downloads is highly
recommended:

```bash
# optional, caches apt-get downloads in containers
apt-get install -y squid-deb-proxy

# takes 10-20 minutes
docker build -t dergachev/gdocs-export .
make build_docker
```

To run the tests, do the following:

```bash
bundle config build.nokogiri --use-system-libraries
bundle install

make test
```

Configuration
-------------
Expand Down Expand Up @@ -181,3 +157,11 @@ sudo tlmgr install latexmk
```

See http://mg.readthedocs.io/latexmk.html


# ====================

Authorization issue:
/usr/lib/ruby/1.9.1/webrick/utils.rb:85:in `initialize': Address already in use - bi
lsof -wni tcp:12736
kill -9 PID
Binary file not shown.
Binary file removed build/example/UNlogo.png
Binary file not shown.
47 changes: 0 additions & 47 deletions build/example/example.aux

This file was deleted.

Binary file removed build/example/example.docx
Binary file not shown.
Loading