diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..485dee6 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.idea diff --git a/Jenkinsfile b/Jenkinsfile index 106c636..7a465eb 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -11,7 +11,9 @@ pipeline { stage('Build Jupyter images') { steps { script { - sh "docker build -t saagie/jupyter-python-nbk:v2_$buildVersion ." + sh "cd minimal && docker build -t saagie/jupyter-python-nbk:v2-minimal_$buildVersion ." + sh "cd base && docker build -t saagie/jupyter-python-nbk:v2-base_$buildVersion -t saagie/jupyter-python-nbk:v2_$buildVersion ." + sh "cd scipy && docker build -t saagie/jupyter-python-nbk:v2-scipy_$buildVersion ." } } } @@ -25,6 +27,9 @@ pipeline { passwordVariable: 'PASSWORD')]) { sh "docker login -u $USERNAME -p $PASSWORD" + sh "docker push saagie/jupyter-python-nbk:v2-minimal_$buildVersion" + sh "docker push saagie/jupyter-python-nbk:v2-base_$buildVersion" + sh "docker push saagie/jupyter-python-nbk:v2-scipy_$buildVersion" sh "docker push saagie/jupyter-python-nbk:v2_$buildVersion" } } diff --git a/README.md b/README.md index e90dfac..5a59436 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,54 @@ # Jupyter Datascience Notebook for python +## Images + +Jupyter notebook for Python is declined into several images : + + * saagie/jupyter-python-nbk:v2-minimal + * saagie/jupyter-python-nbk:v2-base / saagie/jupyter-python-nbk:v2 + * saagie/jupyter-python-nbk:v2-scipy + +### saagie/jupyter-python-nbk:v2-minimal +This image is based **jupyter/minimal-notebook** one, + +=> adapted to run s;loothly on Saagie's platform + +=> with no particular datascience additionnal libs it's up toi you to add your owns. + +### saagie/jupyter-python-nbk:v2-base +This is the official and main image, base on **jupyter/minimal-notebook** + +=> it comes with a bunch of additional libraries + +=> and is quite similar to **jupyter/scipy-notebook** with even more features. + +This image is the same as **saagie/jupyter-python-nbk:v2** + +### saagie/jupyter-python-nbk:v2-scipy +This is the legacy @deprecated v2 image, initially based on **jupyter/scipy-notebook** + +=> it comes with a bunch of additional libraries + +=> but is now **deprecated** in favor of ***saagie/jupyter-python-nbk:v2-base*** + + ## Run with : - docker run -p 8888:8888 -v /path/to/data/notebooks/dir:/notebooks-dir saagie/jupyter-python-nbk:latest + +### Standalone image + + docker run -p 8888:8888 -v /path/to/data/notebooks/dir:/notebooks-dir saagie/jupyter-python-nbk:v2latest Mounting volume is optional (-v /path/to/data/notebooks/dir:/notebooks-dir) but if you want to do it: * create your local directory with: `mkdir -P /path/to/data/notebooks/dir` * make Jovyan (Jupyter notebook default user) the owner of this directory with: `chown -R 1000:100 /path/to/data/notebooks/dir` +### On Saagie's platform + + * use port 8888 + * define SAAGIE_BASE_PATH env var for base_path + * do not activate "rewrite url" + * optionnaly you can add a volume to map /notebooks-dir folder + ## Libraries : * Data Processing * numpy @@ -44,5 +86,5 @@ ### For python 3 !pip install libraryName -### For python 2 - !pip2 install libraryName +/!\ Python2 support dropped + diff --git a/base/Dockerfile b/base/Dockerfile new file mode 100644 index 0000000..b3ae1a6 --- /dev/null +++ b/base/Dockerfile @@ -0,0 +1,100 @@ +ARG PYTHON3_IMG="saagie/python:3.6.202005.84" + +ARG BASE_CONTAINER="saagie/jupyter-python-nbk:v2-minimal" + +FROM $PYTHON3_IMG AS PYTHON3 +FROM $BASE_CONTAINER + +MAINTAINER Saagie + +########################## LIBS PART BEGIN ########################## +USER root +# TODO check if all necessary seems there are duplicate from jupyter/scipy image +RUN apt-get update -qq && apt-get install -yqq --no-install-recommends \ + # replaces libpng3 for bionic + libpng16-16 \ + # replaces libdal6 for bionic + libgdal-dev \ + # needed to compile psycopg2 + libpq-dev \ + libxml2-dev libxslt1-dev antiword unrtf poppler-utils pstotext tesseract-ocr \ + flac ffmpeg lame libmad0 libsox-fmt-mp3 sox libjpeg-dev swig redis-server libpulse-dev \ + libfreetype6-dev libatlas-base-dev gfortran \ + sasl2-bin libsasl2-2 libsasl2-dev \ + libsasl2-modules unixodbc-dev python3-tk \ + qt5-default \ + libqt5webkit5-dev \ + libcurl4-openssl-dev \ + && rm -rf /var/lib/apt/lists/* +########################## LIBS PART END ########################## + + +################ Kernels / Conda envs / requirements PART BEGIN ################ +USER $NB_USER +# TODO check if all necessary seems there are duplicate from jupyter/scipy image +SHELL ["/bin/bash", "-c"] +# Add libs for python 3.6 env +# inherited from saagie/python:3.6 image +# installed via pip only +# installed via conda +COPY requirements_conda3.txt requirements_conda3.txt +COPY --from=PYTHON3 /requirements.txt ./requirements_python3.txt +COPY requirements_pip3.txt requirements_pip3.txt +RUN conda install -n py36 --quiet --yes --file requirements_conda3.txt \ + # Some installed library (scikit-learn) could not be removed so use --ignore-installed \ + && sed -n '/scikit-learn/p' requirements_python3.txt >> requirements_python3_ignore-installed.txt \ + && sed -i '/scikit-learn/d' requirements_python3.txt \ + && . activate py36 \ + && python -m pip install --no-cache-dir --ignore-installed -r requirements_python3_ignore-installed.txt \ + && python -m pip install --no-cache-dir -r requirements_python3.txt \ + && python -m pip install --no-cache-dir -r requirements_pip3.txt \ + && conda deactivate \ + && conda clean -ay \ + && rm -rf ~/.cache/pip +################ Kernels / Conda envs / requirements PART ENDS ################# + + +########################## CUDA PART BEGIN ########################## +USER root + +ENV PATH="${PATH}:/usr/local/nvidia/bin:/usr/local/cuda/bin" +ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/nvidia/lib:/usr/local/nvidia/lib64" +# nvidia-container-runtime +ENV NVIDIA_VISIBLE_DEVICES all +ENV NVIDIA_DRIVER_CAPABILITIES compute,utility +ENV NVIDIA_REQUIRE_CUDA "cuda>=10.0 brand=tesla,driver>=384,driver<385 brand=tesla,driver>=410,driver<411" + +ENV CUDA_VERSION 10.0.130 +ENV CUDA_PKG_VERSION 10-0=$CUDA_VERSION-1 +ENV NCCL_VERSION 2.4.2 +ENV CUDNN_VERSION 7.6.0.64 + +LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates apt-transport-https gnupg-curl && \ + rm -rf /var/lib/apt/lists/* && \ + NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \ + NVIDIA_GPGKEY_FPR=ae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80 && \ + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \ + apt-key adv --export --no-emit-version -a $NVIDIA_GPGKEY_FPR | tail -n +5 > cudasign.pub && \ + echo "$NVIDIA_GPGKEY_SUM cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \ + echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \ + echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \ + # For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a + apt-get update && apt-get install -y --no-install-recommends \ + cuda-cudart-$CUDA_PKG_VERSION \ + cuda-libraries-$CUDA_PKG_VERSION \ + cuda-nvtx-$CUDA_PKG_VERSION \ + cuda-compat-10-0 && \ + libnccl2=$NCCL_VERSION-1+cuda10.0 \ + libcudnn7=$CUDNN_VERSION-1+cuda10.0 \ + && apt-mark hold libnccl2 libcudnn7 \ + && ln -s cuda-10.0 /usr/local/cuda \ + && rm -rf /var/lib/apt/lists/* \ + # Path doesn't exists... here for compatibility it seems https://gitlab.com/nvidia/container-images/cuda/issues/27 + && echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf \ + && echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf +########################## CUDA PART END ########################## + +USER $NB_USER diff --git a/build.sh b/base/build.sh similarity index 100% rename from build.sh rename to base/build.sh diff --git a/base/python3_lib_test.py b/base/python3_lib_test.py new file mode 100644 index 0000000..b414cb4 --- /dev/null +++ b/base/python3_lib_test.py @@ -0,0 +1,103 @@ +import sys +print(sys.executable) +print(sys.version) +print(sys.version_info) + +### +### Test conda install +### FIXME find a way to test those installs +#from hdfs.hfile import Hfile +#import hdf5 + +### +### Test Jupyter specific +### +from PIL import Image +from google.protobuf import descriptor_pb2 + +### +### Test imports from python3 +### +import addok +import apiclient +import bs4 +import bokeh +import bs4 +from confluent_kafka import Producer +import crypto +import cython +import django +import dryscrape +import elasticsearch +import excel +from fastparquet import ParquetFile +import fiona +import folium +import gensim +import geopandas +import geopy +import graphviz +import h5py +import hdfs +import autosklearn.classification +import thrift_sasl +from pybrain.tools.shortcuts import buildNetwork +import ibis +from imblearn.over_sampling import RandomOverSampler +from impala.dbapi import connect +import ipywidgets +import jellyfish +import joblib +from kafka import KafkaConsumer +from keras.layers import Dense +import lime +import lxml +import matplotlib +import mpld3 +import mysql.connector +from neo4j import GraphDatabase +import networkx +import nltk +from numba import jit +import numpy +import cv2 +import openpyxl +import pandas +from pdfminer.psparser import * +import psycopg2 +from Crypto.Hash import SHA256 +import pycurl +import pydotplus +import pymongo +import pyodbc +import shapefile +import pytesseract +from Levenshtein import _levenshtein +from requests_kerberos import * +from skimage import data +from sklearn import datasets +import scipy +import scrapy +import seaborn +import shap +import shapely +import simplejson +import six +import spacy +from sqlalchemy import create_engine +import statsmodels +import tabula +import tensorflow as tf +print('Num GPUs Available: ', len(tf.config.experimental.list_physical_devices('GPU'))) +import tensorflow +import textract +import theano.tensor +import tika +import tokenizer +import torch +import torchvision +import tpot +import umap +from wand.image import Image +import xgboost +import xlwt diff --git a/base/requirements_conda3.txt b/base/requirements_conda3.txt new file mode 100644 index 0000000..6ce3105 --- /dev/null +++ b/base/requirements_conda3.txt @@ -0,0 +1,3 @@ +hdf5==1.10.1 +python-hdfs==2.0.16 +pycurl>=7.43,<7.44 diff --git a/base/requirements_pip3.txt b/base/requirements_pip3.txt new file mode 100644 index 0000000..1e39f6d --- /dev/null +++ b/base/requirements_pip3.txt @@ -0,0 +1,2 @@ +pillow==4.3.0 +protobuf==3.6.1 diff --git a/minimal/Dockerfile b/minimal/Dockerfile new file mode 100644 index 0000000..0e782fa --- /dev/null +++ b/minimal/Dockerfile @@ -0,0 +1,56 @@ +# use latest image with ubuntu 16.04 Xenial for CDH5 compatibility +# see (https://github.com/jupyter/docker-stacks/commits/master?after=04f7f60d34a674a2964d96a6cb97c57a7870a828+664) +FROM jupyter/minimal-notebook:f9e77e3ddd6f + +MAINTAINER Saagie + +ENV PATH="${PATH}:/home/$NB_USER/.local/bin" + +# Starts by cleaning useless npm cache & other files +RUN npm cache clean --force \ + && conda clean -ay \ + && rm -rf $CONDA_DIR/share/jupyter/lab/staging +# Not necessary to apt-get clean it seems + +########################## LIBS PART BEGIN ########################## +USER root +# TODO check if all necessary seems there are duplicate from jupyter/scipy image +RUN apt-get update -qq && apt-get install -yqq --no-install-recommends \ + curl \ + && rm -rf /var/lib/apt/lists/* +########################## LIBS PART END ########################## + + +################ Kernels / Conda envs / requirements PART BEGIN ################ +USER $NB_USER +# Uninstall python3 kernel +RUN jupyter kernelspec remove -f python3 + +# Update conda to latest version +#RUN conda update -n root conda \ +RUN conda clean -ay + +# seems there's sometimesa problem with pyzmq so need to reinstall it... +RUN conda create -n py36 python=3.6 \ + && bash -c "source activate py36 && pip uninstall pyzmq -y && pip install pyzmq && conda install notebook ipykernel -y && ipython kernel install --user --name py36 --display-name 'Python 3.6'" \ + && conda clean -ay \ + && rm -rf ~/.cache/pip +################ Kernels / Conda envs / requirements PART ENDS ################# + +########################## NOTEBOOKS DIR ########################## +USER root +# Create default workdir (useful if no volume mounted) +RUN mkdir /notebooks-dir && chown 1000:100 /notebooks-dir +# Define default workdir +WORKDIR /notebooks-dir +########################## NOTEBOOKS DIR END ########################## + +#Add entrypoint.sh +COPY entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +# Should run as $NB_USER +USER $NB_USER + +# Default: run without authentication +CMD ["/entrypoint.sh"] diff --git a/minimal/build.sh b/minimal/build.sh new file mode 100755 index 0000000..d46fd2e --- /dev/null +++ b/minimal/build.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -euxo pipefail + +NO_CACHE="" +export DOCKER_BUILDKIT=0 + +while (( $# )); do + case $1 in + --no-cache) NO_CACHE="--no-cache" + ;; + --buildkit) export DOCKER_BUILDKIT=1 + ;; + --*) echo "Bad Option $1" + ;; + *) TYPE=$1 + ;; + *) break + ;; + esac + shift +done + +docker build $NO_CACHE \ + -t $TYPE \ + . diff --git a/entrypoint.sh b/minimal/entrypoint.sh similarity index 100% rename from entrypoint.sh rename to minimal/entrypoint.sh diff --git a/Dockerfile b/scipy/Dockerfile similarity index 88% rename from Dockerfile rename to scipy/Dockerfile index 93668da..f1faaca 100644 --- a/Dockerfile +++ b/scipy/Dockerfile @@ -2,7 +2,7 @@ ARG PYTHON2_IMG="saagie/python:2.7.202005.84" ARG PYTHON3_IMG="saagie/python:3.6.202005.84" # FIXME should use a minimal image and add libs after + update to latest available -ARG BASE_CONTAINER="jupyter/scipy-notebook:c7fb6660d096" +ARG BASE_CONTAINER="jupyter/scipy-notebook:76402a27fd13" FROM $PYTHON2_IMG AS PYTHON2 FROM $PYTHON3_IMG AS PYTHON3 @@ -25,8 +25,8 @@ USER root RUN apt-get update && apt-get install -y --no-install-recommends \ libxml2-dev libxslt1-dev antiword unrtf poppler-utils pstotext tesseract-ocr \ flac ffmpeg lame libmad0 libsox-fmt-mp3 sox libjpeg-dev swig redis-server libpulse-dev \ - libpng3 libfreetype6-dev libatlas-base-dev gfortran \ - libgdal1-dev sasl2-bin libsasl2-2 libsasl2-dev \ + libpng16-16 libfreetype6-dev libatlas-base-dev gfortran \ + libgdal-dev sasl2-bin libsasl2-2 libsasl2-dev \ libsasl2-modules unixodbc-dev python3-tk \ qt5-default \ libqt5webkit5-dev \ @@ -121,15 +121,11 @@ ENV CUDNN_VERSION 7.6.0.64 LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}" RUN apt-get update && apt-get install -y --no-install-recommends \ - ca-certificates apt-transport-https gnupg-curl && \ + ca-certificates apt-transport-https gnupg2 curl && \ rm -rf /var/lib/apt/lists/* && \ - NVIDIA_GPGKEY_SUM=d1be581509378368edeec8c1eb2958702feedf3bc3d17011adbf24efacce4ab5 && \ - NVIDIA_GPGKEY_FPR=ae09fe4bbd223a84b2ccfce3f60f4b3d7fa2af80 && \ - apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub && \ - apt-key adv --export --no-emit-version -a $NVIDIA_GPGKEY_FPR | tail -n +5 > cudasign.pub && \ - echo "$NVIDIA_GPGKEY_SUM cudasign.pub" | sha256sum -c --strict - && rm cudasign.pub && \ - echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \ - echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \ + curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub | apt-key add - && \ + echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \ + echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \ # For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a apt-get update && apt-get install -y --no-install-recommends \ cuda-cudart-$CUDA_PKG_VERSION \ diff --git a/scipy/build.sh b/scipy/build.sh new file mode 100755 index 0000000..d46fd2e --- /dev/null +++ b/scipy/build.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -euxo pipefail + +NO_CACHE="" +export DOCKER_BUILDKIT=0 + +while (( $# )); do + case $1 in + --no-cache) NO_CACHE="--no-cache" + ;; + --buildkit) export DOCKER_BUILDKIT=1 + ;; + --*) echo "Bad Option $1" + ;; + *) TYPE=$1 + ;; + *) break + ;; + esac + shift +done + +docker build $NO_CACHE \ + -t $TYPE \ + . diff --git a/scipy/entrypoint.sh b/scipy/entrypoint.sh new file mode 100755 index 0000000..682c6eb --- /dev/null +++ b/scipy/entrypoint.sh @@ -0,0 +1,4 @@ +#!/bin/bash +chown -R jovyan /notebooks-dir + +start-notebook.sh --KernelSpecManager.ensure_native_kernel=False --NotebookApp.token='' --NotebookApp.password='' --NotebookApp.base_url=$SAAGIE_BASE_PATH diff --git a/python2_lib_test.py b/scipy/python2_lib_test.py similarity index 100% rename from python2_lib_test.py rename to scipy/python2_lib_test.py diff --git a/python3_lib_test.py b/scipy/python3_lib_test.py similarity index 100% rename from python3_lib_test.py rename to scipy/python3_lib_test.py diff --git a/requirements_conda2.txt b/scipy/requirements_conda2.txt similarity index 100% rename from requirements_conda2.txt rename to scipy/requirements_conda2.txt diff --git a/requirements_conda3.txt b/scipy/requirements_conda3.txt similarity index 100% rename from requirements_conda3.txt rename to scipy/requirements_conda3.txt diff --git a/requirements_pip2.txt b/scipy/requirements_pip2.txt similarity index 100% rename from requirements_pip2.txt rename to scipy/requirements_pip2.txt diff --git a/requirements_pip3.txt b/scipy/requirements_pip3.txt similarity index 100% rename from requirements_pip3.txt rename to scipy/requirements_pip3.txt