diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..42afabf
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/build
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..540e1a7
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,23 @@
+cmake_minimum_required(VERSION 3.6)
+
+set(ProjectName "InOneWeekend")
+
+project(${ProjectName})
+
+# Add source and header files
+file(GLOB_RECURSE sources ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp ${CMAKE_CURRENT_SOURCE_DIR}/*.h ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/*.cc)
+# Exclude the build directory
+list(FILTER sources EXCLUDE REGEX "build/")
+# Create the VS filters based on the directory tree layout
+source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${sources})
+
+# Add the project
+add_executable (${ProjectName} ${sources})
+
+# Add openmp
+# https://stackoverflow.com/questions/56202041/compiling-and-linking-against-openmp-with-appleclang-on-mac-os-x-mojave
+find_package(OpenMP) # Find the package
+target_link_libraries(${PROJECT_NAME} ${OpenMP_CXX_LIBRARIES}) # Link against it for C++
+
+# Set this project as startup project (MSVC)
+set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT ${ProjectName})
\ No newline at end of file
diff --git a/src/camera.h b/src/camera.h
index 58d793e..7499f95 100644
--- a/src/camera.h
+++ b/src/camera.h
@@ -12,11 +12,13 @@
 //==================================================================================================
 
 #include "ray.h"
+#include "constants.hpp"		// pi<float>(), issues: #65, #67
+#include "rng.hpp"				// random(), issue: #65
 
 vec3 random_in_unit_disk() {
     vec3 p;
     do {
-        p = 2.0*vec3(drand48(),drand48(),0) - vec3(1,1,0);
+        p = 2.0*vec3(random(),random(),0) - vec3(1,1,0);
     } while (dot(p,p) >= 1.0);
     return p;
 }
@@ -26,7 +28,7 @@ class camera {
         camera(vec3 lookfrom, vec3 lookat, vec3 vup, float vfov, float aspect, float aperture, float focus_dist) {
             // vfov is top to bottom in degrees
             lens_radius = aperture / 2;
-            float theta = vfov*M_PI/180;
+            float theta = vfov*pi<float>()/180;
             float half_height = tan(theta/2);
             float half_width = aspect * half_height;
             origin = lookfrom;
diff --git a/src/constants.hpp b/src/constants.hpp
new file mode 100644
index 0000000..4e09194
--- /dev/null
+++ b/src/constants.hpp
@@ -0,0 +1,11 @@
+#pragma once
+/*
+	Addresses issues: #65, #69
+*/
+
+
+template<typename T>
+constexpr T pi() { return static_cast<T>(3.141592653589793238); }
+
+// there's no ideal value, it is scene dependent
+constexpr const float OFFSET_EPSILON = 0.0001f;
\ No newline at end of file
diff --git a/src/hitable_list.h b/src/hitable_list.h
index 16fcce2..177795f 100644
--- a/src/hitable_list.h
+++ b/src/hitable_list.h
@@ -25,7 +25,7 @@ class hitable_list: public hitable  {
 bool hitable_list::hit(const ray& r, float t_min, float t_max, hit_record& rec) const {
     hit_record temp_rec;
     bool hit_anything = false;
-    double closest_so_far = t_max;
+    float closest_so_far = t_max;
     for (int i = 0; i < list_size; i++) {
         if (list[i]->hit(r, t_min, closest_so_far, temp_rec)) {
             hit_anything = true;
diff --git a/src/main.cc b/src/main.cc
index 3118f98..311c681 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -10,17 +10,21 @@
 //==================================================================================================
 
 #include <iostream>
+#include <limits>				// std::numeric_limits<float>::infinity()
+#include <fstream>				// doesn't require output redirection
+#include <chrono>				// Output elapsed time
 #include "sphere.h"
 #include "hitable_list.h"
 #include "float.h"
 #include "camera.h"
 #include "material.h"
-#include "random.h"
+#include "rng.hpp"				// random()
+
 
 
 vec3 color(const ray& r, hitable *world, int depth) {
     hit_record rec;
-    if (world->hit(r, 0.001, MAXFLOAT, rec)) {
+    if (world->hit(r, 0.001f, std::numeric_limits<float>::infinity(), rec)) {
         ray scattered;
         vec3 attenuation;
         if (depth < 50 && rec.mat_ptr->scatter(r, rec, attenuation, scattered)) {
@@ -32,8 +36,8 @@ vec3 color(const ray& r, hitable *world, int depth) {
     }
     else {
         vec3 unit_direction = unit_vector(r.direction());
-        float t = 0.5*(unit_direction.y() + 1.0);
-        return (1.0-t)*vec3(1.0, 1.0, 1.0) + t*vec3(0.5, 0.7, 1.0);
+        float t = 0.5f*(unit_direction.y() + 1.0f);
+        return (1.0f-t)*vec3(1.0f, 1.0f, 1.0f) + t*vec3(0.5f, 0.7f, 1.0f);
     }
 }
 
@@ -45,70 +49,133 @@ hitable *random_scene() {
     int i = 1;
     for (int a = -11; a < 11; a++) {
         for (int b = -11; b < 11; b++) {
-            float choose_mat = random_double();
-            vec3 center(a+0.9*random_double(),0.2,b+0.9*random_double());
-            if ((center-vec3(4,0.2,0)).length() > 0.9) {
-                if (choose_mat < 0.8) {  // diffuse
+            float choose_mat = random();
+            vec3 center(a+0.9f*random(),0.2f,b+0.9f*random());
+            if ((center-vec3(4,0.2f,0)).length() > 0.9f) {
+                if (choose_mat < 0.8f) {  // diffuse
                     list[i++] = new sphere(
-                        center, 0.2,
-                        new lambertian(vec3(random_double()*random_double(),
-                                            random_double()*random_double(),
-                                            random_double()*random_double()))
+                        center, 0.2f,
+                        new lambertian(vec3(random()*random(),
+                                            random()*random(),
+                                            random()*random()))
                     );
                 }
-                else if (choose_mat < 0.95) { // metal
+                else if (choose_mat < 0.95f) { // metal
                     list[i++] = new sphere(
-                        center, 0.2,
-                        new metal(vec3(0.5*(1 + random_double()),
-                                       0.5*(1 + random_double()),
-                                       0.5*(1 + random_double())),
-                                  0.5*random_double())
+                        center, 0.2f,
+                        new metal(vec3(0.5f*(1 + random()),
+                                       0.5f*(1 + random()),
+                                       0.5f*(1 + random())),
+                                  0.5f*random())
                     );
                 }
                 else {  // glass
-                    list[i++] = new sphere(center, 0.2, new dielectric(1.5));
+                    list[i++] = new sphere(center, 0.2f, new dielectric(1.5f));
                 }
             }
         }
     }
 
-    list[i++] = new sphere(vec3(0, 1, 0), 1.0, new dielectric(1.5));
-    list[i++] = new sphere(vec3(-4, 1, 0), 1.0, new lambertian(vec3(0.4, 0.2, 0.1)));
-    list[i++] = new sphere(vec3(4, 1, 0), 1.0, new metal(vec3(0.7, 0.6, 0.5), 0.0));
+    list[i++] = new sphere(vec3(0, 1, 0), 1.0f, new dielectric(1.5f));
+    list[i++] = new sphere(vec3(-4, 1, 0), 1.0f, new lambertian(vec3(0.4f, 0.2f, 0.1f)));
+    list[i++] = new sphere(vec3(4, 1, 0), 1.0f, new metal(vec3(0.7f, 0.6f, 0.5f), 0.0f));
 
     return new hitable_list(list,i);
 }
 
 
 int main() {
-    int nx = 1200;
-    int ny = 800;
+
+	// faster preview
+	/*
+		int nx = 1200;
+		int ny = 800;
+	*/
+	int nx = 320;
+	int ny = 240;
     int ns = 10;
-    std::cout << "P3\n" << nx << " " << ny << "\n255\n";
+
+
+	
+
+
     hitable *world = random_scene();
 
     vec3 lookfrom(13,2,3);
     vec3 lookat(0,0,0);
-    float dist_to_focus = 10.0;
-    float aperture = 0.1;
+    float dist_to_focus = 10.0f;
+    float aperture = 0.1f;
 
     camera cam(lookfrom, lookat, vec3(0,1,0), 20, float(nx)/float(ny), aperture, dist_to_focus);
 
-    for (int j = ny-1; j >= 0; j--) {
+	/*
+		Addresses issue: #58
+
+		We write the code to memory before saving to disk, this may prove beneficial not only 
+		with regards to parallelization.
+
+		To use OMP with MSVC add /openmp to the command line options
+	*/
+	vec3* image = new vec3[nx * ny];
+
+
+	/*
+		Extra feature: output rendering time (useful to see the speedup from multi-threading)
+
+		On my machine rendering with 1 core takes: ~8s, and with 4 cores: ~2s
+	*/
+	std::chrono::high_resolution_clock clock;
+	auto timeStamp = clock.now();
+
+
+#pragma omp parallel for
+    for (int j = 0; j < ny ; j++) {
         for (int i = 0; i < nx; i++) {
             vec3 col(0, 0, 0);
             for (int s=0; s < ns; s++) {
-                float u = float(i + random_double()) / float(nx);
-                float v = float(j + random_double()) / float(ny);
+                float u = float(i + random()) / float(nx);
+                float v = float(j + random()) / float(ny);
                 ray r = cam.get_ray(u, v);
                 col += color(r, world,0);
             }
             col /= float(ns);
-            col = vec3( sqrt(col[0]), sqrt(col[1]), sqrt(col[2]) );
-            int ir = int(255.99*col[0]);
-            int ig = int(255.99*col[1]);
-            int ib = int(255.99*col[2]);
-            std::cout << ir << " " << ig << " " << ib << "\n";
+
+			// flat array indexing, perform the flip here j' = ny-1-j
+			image[i + nx * (ny-1-j)] = col; // store in memory
         }
     }
+
+	std::chrono::duration<float> elapsedTime = clock.now() - timeStamp;
+	std::cout << "Rendering took: " << elapsedTime.count() << "s.\n";
+
+	/*
+		Addresses issue: #18
+		std::ofstream rather than std::cout, so one doesn't need to redirect the output
+	*/
+	std::ofstream file;
+	const char* outputFilename = "output_image.ppm";
+	file.open(outputFilename);
+	if (!file.good()) std::cerr << "Failed to open file " << outputFilename << std::endl;
+	file << "P3\n" << nx << " " << ny << "\n255\n";
+	for (int i = 0; i < nx * ny; ++i)
+	{
+		vec3 col = image[i];
+
+		// gamma correction
+		col = vec3(sqrt(col[0]), sqrt(col[1]), sqrt(col[2]));
+
+		// map from [0,1] to {0,1,...,255} (quantization)
+		int ir = int(255.99 * col.r());
+		int ig = int(255.99 * col.g());
+		int ib = int(255.99 * col.b());
+
+		// save pixel to disk
+		file << ir << " " << ig << " " << ib << "\n";
+	}
+	file.close();
+	delete[] image;
+
+	
+
+	return 0;
 }
diff --git a/src/material.h b/src/material.h
index 14580ad..6340341 100644
--- a/src/material.h
+++ b/src/material.h
@@ -13,7 +13,8 @@
 
 #include "ray.h"
 #include "hitable.h"
-#include "random.h"
+//#include "random.h"
+#include "rng.hpp" // random() provides better random numbers than rand() % (RAND_MAX + 1.0);
 
 struct hit_record;
 
@@ -28,7 +29,7 @@ float schlick(float cosine, float ref_idx) {
 bool refract(const vec3& v, const vec3& n, float ni_over_nt, vec3& refracted) {
     vec3 uv = unit_vector(v);
     float dt = dot(uv, n);
-    float discriminant = 1.0 - ni_over_nt*ni_over_nt*(1-dt*dt);
+    float discriminant = 1.0f - ni_over_nt*ni_over_nt*(1-dt*dt);
     if (discriminant > 0) {
         refracted = ni_over_nt*(uv - n*dt) - n*sqrt(discriminant);
         return true;
@@ -46,11 +47,29 @@ vec3 reflect(const vec3& v, const vec3& n) {
 vec3 random_in_unit_sphere() {
     vec3 p;
     do {
-        p = 2.0*vec3(random_double(),random_double(),random_double()) - vec3(1,1,1);
+        p = 2.0f*vec3(random(),random(),random()) - vec3(1,1,1);
     } while (p.squared_length() >= 1.0);
     return p;
 }
 
+/*
+	Addresses issue: #26
+
+	Using random_on_unit_sphere() in the lambertian material doesn't generate cosine weighted hemisphere samples, 
+	but rather cos^3 weighted hemisphere samples, resulting in a cos^2 brdf and not a diffuse one (the two differ 
+	only slightly visually though).
+
+	Note that this has a singularity at (0,0,0), theoretically the probability of 
+	picking (0,0,0) is 0 (since a point is a set of measure 0 with respect to the volume Lebesgue measure, 
+	however in practice with finite precision this is not so).
+*/
+vec3 random_on_unit_sphere()
+{
+	vec3 res = random_in_unit_sphere();
+	res.make_unit_vector();
+	return res;
+}
+
 
 class material  {
     public:
@@ -62,8 +81,21 @@ class lambertian : public material {
     public:
         lambertian(const vec3& a) : albedo(a) {}
         virtual bool scatter(const ray& r_in, const hit_record& rec, vec3& attenuation, ray& scattered) const  {
-             vec3 target = rec.p + rec.normal + random_in_unit_sphere();
-             scattered = ray(rec.p, target-rec.p);
+			/*
+				Addresses issue: #9
+
+				Fixes scattering on the outside, by picking the correct normal. 
+				And adds an offset along the normal to avoid self-intersection caused by numerical errors.
+			*/
+
+			// always pick the correct facing normal (in case of scattering inside a sphere)
+			vec3 normal = dot(rec.normal, r_in.direction()) < 0.0f ? rec.normal : -rec.normal;
+
+			// use correct normal
+             vec3 target = rec.p + normal + random_on_unit_sphere(); // random_in_unite_sphere() -> random_on_unit_sphere(), Addresses issue: #26
+             
+			 // offset the origin to avoid self-intersection
+			 scattered = ray(rec.p + normal * OFFSET_EPSILON, target-rec.p);
              attenuation = albedo;
              return true;
         }
@@ -76,10 +108,23 @@ class metal : public material {
     public:
         metal(const vec3& a, float f) : albedo(a) { if (f < 1) fuzz = f; else fuzz = 1; }
         virtual bool scatter(const ray& r_in, const hit_record& rec, vec3& attenuation, ray& scattered) const  {
-            vec3 reflected = reflect(unit_vector(r_in.direction()), rec.normal);
-            scattered = ray(rec.p, reflected + fuzz*random_in_unit_sphere());
+			/*
+				Addresses issue: #9
+
+				Fixes scattering on the outside, by picking the correct normal.
+				And adds an offset along the normal to avoid self-intersection caused by numerical errors.
+			*/
+
+			// always pick the correct facing normal (in case of scattering inside a sphere)
+			vec3 normal = dot(rec.normal, r_in.direction()) < 0.0f ? rec.normal : -rec.normal;
+			// use correct normal
+            vec3 reflected = reflect(unit_vector(r_in.direction()), normal);
+			// offset the origin to avoid self-intersection
+            scattered = ray(rec.p + normal * OFFSET_EPSILON, reflected + fuzz*random_on_unit_sphere()); // on_unit_sphere for lambertian behaviour
             attenuation = albedo;
-            return (dot(scattered.direction(), rec.normal) > 0);
+
+			// use the correct normal
+            return (dot(scattered.direction(), normal) > 0);
         }
         vec3 albedo;
         float fuzz;
@@ -90,6 +135,12 @@ class dielectric : public material {
     public:
         dielectric(float ri) : ref_idx(ri) {}
         virtual bool scatter(const ray& r_in, const hit_record& rec, vec3& attenuation, ray& scattered) const  {
+			/*
+				Addresses issue: #9
+
+				Adds an offset along the normal to avoid self-intersection caused by numerical errors.
+			*/
+
              vec3 outward_normal;
              vec3 reflected = reflect(r_in.direction(), rec.normal);
              float ni_over_nt;
@@ -106,17 +157,28 @@ class dielectric : public material {
              }
              else {
                   outward_normal = rec.normal;
-                  ni_over_nt = 1.0 / ref_idx;
+                  ni_over_nt = 1.0f / ref_idx;
                   cosine = -dot(r_in.direction(), rec.normal) / r_in.direction().length();
              }
-             if (refract(r_in.direction(), outward_normal, ni_over_nt, refracted))
-                reflect_prob = schlick(cosine, ref_idx);
+			 if (refract(r_in.direction(), outward_normal, ni_over_nt, refracted))
+			 {
+				 /*
+					Addresses issue: #9
+
+					Comment: schlick(cosine, ref_idx) may be confusing, since one would expect schlick(cosine, ni_over_nt)
+					But as it happens those two are equal.
+				 */
+				 reflect_prob = schlick(cosine, ref_idx);
+			 }
              else
                 reflect_prob = 1.0;
-             if (random_double() < reflect_prob)
-                scattered = ray(rec.p, reflected);
+
+			 // offset the origin to avoid self-intersection
+             if (random() < reflect_prob)
+                scattered = ray(rec.p + outward_normal * OFFSET_EPSILON, reflected);
              else
-                scattered = ray(rec.p, refracted);
+                scattered = ray(rec.p - outward_normal * OFFSET_EPSILON, refracted); // negative offset, since the origin must continue on the other side of the surface
+
              return true;
         }
 
diff --git a/src/random.h b/src/random.h
deleted file mode 100644
index 539f6b7..0000000
--- a/src/random.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef RANDOMH
-#define RANDOMH
-
-#include <cstdlib>
-
-inline double random_double() {
-    return rand() / (RAND_MAX + 1.0);
-}
-
-#endif
diff --git a/src/rng.hpp b/src/rng.hpp
new file mode 100644
index 0000000..702ae12
--- /dev/null
+++ b/src/rng.hpp
@@ -0,0 +1,42 @@
+#pragma once
+/*
+	@author: Vassillen Chizhov, August 2019
+
+	random() can serve as a substitute for drand48()
+
+	Addresses Issue #65
+*/
+
+
+#include <random>
+#include <array>
+#include <cstdint>
+
+class RNG64
+{
+protected:
+	std::mt19937_64 gen;
+	std::uniform_real_distribution<float> uniDistF32;
+public:
+	RNG64() : uniDistF32(0.0f, 1.0f)
+	{
+		std::random_device randomDevice;
+		// initialize the engine with all 312 random numbers (they are twice less than for the 32-bit version since they are 64-bit).
+		auto randomVals = std::array<std::mt19937_64::result_type, std::mt19937_64::state_size>{};
+		// stitch together 2 random 32-bit values from 
+		for (auto& val : randomVals) val = ((static_cast<uint64_t>(randomDevice()) << 32) | static_cast<uint64_t>(randomDevice()));
+		std::seed_seq seq(begin(randomVals), end(randomVals));
+		gen = std::mt19937_64{ seq };
+	}
+
+	float uniform()
+	{
+		return uniDistF32(gen);
+	}
+};
+
+float random()
+{
+	thread_local static RNG64 rng;
+	return rng.uniform();
+}
\ No newline at end of file
diff --git a/src/sphere.h b/src/sphere.h
index 4c715bc..e8e1b8b 100644
--- a/src/sphere.h
+++ b/src/sphere.h
@@ -26,6 +26,50 @@ class sphere: public hitable  {
 
 
 bool sphere::hit(const ray& r, float t_min, float t_max, hit_record& rec) const {
+
+	/*
+		@author: Vassillen Chizhov, August 2019
+
+		Addresses issues: #17, #54
+
+
+		Sphere intersection derivation:
+
+		Canonic sphere equation:
+
+		||p-center||^2 = radius^2
+
+		Parameteric ray equation:
+
+		r(t) = r.origin() + t * r.direction()
+
+		Plug in p = r(t) and solve for t:
+
+		dot(r.origin() + t * r.direction() - center, r.origin() + t * r.direction() - center) = radius * radius
+		Let oc = r.origin() - center
+
+		Use the linearity, distributivity and commutativity of the dot product to get:
+
+		dot(oc,oc) - radius * radius + 2 * dot(r.direction(),oc) * t + dot(r.direction(),r.direction()) * t * t = 0
+
+		Let A = dot(r.direction(),r.direction()), B = dot(r.direction(),oc), C = dot(oc,oc) - radius * radius, then:
+
+		A * t^2 + 2 * B * t + C = 0
+
+		D' = B^2 - A * C
+		D = 4 * B^2 - 4 * A * C = 4 * (B^2 - A * C) = 4 * D'
+
+		t_1 = (-2*B - sqrt(D))/(2*A) = (-2*B - sqrt(4*D'))/(2*A) = 2 * (-B - sqrt(D'))/(2*A) = (-B - sqrt(D'))/A
+
+		Analogously:
+
+		t_2 = (-2*B + sqrt(D))/(2*A) = (-2*B + 2*sqrt(D'))/(2*A) = (-B + sqrt(D'))/A
+
+		(Additionally the minus in front of B can be gotten rid of if one uses oc = center-r.origin(), and the division 
+		if the rays are normalized, since then length(r.direction()) = 1, however I did not dare change those since 
+		it will cause a discrepancy with the book).
+	*/
+
     vec3 oc = r.origin() - center;
     float a = dot(r.direction(), r.direction());
     float b = dot(oc, r.direction());
@@ -40,6 +84,14 @@ bool sphere::hit(const ray& r, float t_min, float t_max, hit_record& rec) const
             rec.mat_ptr = mat_ptr;
             return true;
         }
+
+		/*
+			Adresses issue: #15
+
+			The t_2 = (-b + sqrt(discriminant)) / a; 
+			is necessary if the ray starts inside the sphere, or if the first intersection 
+			got culled due to temp <= t_min
+		*/
         temp = (-b + sqrt(discriminant)) / a;
         if (temp < t_max && temp > t_min) {
             rec.t = temp;
diff --git a/src/vec3.h b/src/vec3.h
index 5d690a2..a6356b0 100644
--- a/src/vec3.h
+++ b/src/vec3.h
@@ -58,7 +58,7 @@ inline std::ostream& operator<<(std::ostream &os, const vec3 &t) {
 }
 
 inline void vec3::make_unit_vector() {
-    float k = 1.0 / sqrt(e[0]*e[0] + e[1]*e[1] + e[2]*e[2]);
+    float k = 1.0f / sqrt(e[0]*e[0] + e[1]*e[1] + e[2]*e[2]);
     e[0] *= k; e[1] *= k; e[2] *= k;
 }