Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions lib/google_crawler/errors.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
defmodule GoogleCrawler.Errors.FileNotSupportedError do
defexception message: "File is not supported"
end
73 changes: 73 additions & 0 deletions lib/google_crawler/search.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
defmodule GoogleCrawler.Search do
@moduledoc """
The Search context.
"""

import Ecto.Query, warn: false
alias GoogleCrawler.Repo

alias GoogleCrawler.Search.Keyword
alias GoogleCrawler.Search.KeywordFile

@doc """
Returns the list of keywords.

## Examples

iex> list_keywords()
[%Keyword{}, ...]

"""
def list_keywords do
Repo.all(Keyword)
end

@doc """
Gets a single keyword.

Raises `Ecto.NoResultsError` if the Keyword does not exist.

## Examples

iex> get_keyword(123)
%Keyword{}

iex> get_keyword(456)
nil

"""
def get_keyword(id), do: Repo.get(Keyword, id)

@doc """
Creates a keyword.

## Examples

iex> create_keyword(%{field: value})
{:ok, %Keyword{}}

iex> create_keyword(%{field: bad_value})
{:error, %Ecto.Changeset{}}

"""
def create_keyword(attrs \\ %{}) do
%Keyword{}
|> Keyword.changeset(attrs)
|> Repo.insert()
end

@doc """
Parses the keyword from the given file.
Returns the stream for each line in the csv file as [line_result].
Raise an exception if the file mime type is not supported or the file parsing is failed.

### Examples

iex > parse_keywords_from_file!("var/folder/abcdef", "text/csv") |> Enum.to_list
[ok: ["hotels"], ok: ["restaurants"]]

"""
def parse_keywords_from_file!(file_path, mime_type) do
KeywordFile.parse!(file_path, mime_type)
end
end
16 changes: 16 additions & 0 deletions lib/google_crawler/search/keyword.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
defmodule GoogleCrawler.Search.Keyword do
use Ecto.Schema
import Ecto.Changeset

schema "keywords" do
field :keyword, :string

timestamps()
end

def changeset(keyword, attrs \\ %{}) do
keyword
|> cast(attrs, [:keyword])
|> validate_required([:keyword])
end
end
38 changes: 38 additions & 0 deletions lib/google_crawler/search/keyword_file.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
defmodule GoogleCrawler.Search.KeywordFile do
use Ecto.Schema
import Ecto.Changeset

embedded_schema do

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a great idea to use embedded_schema. I just read about it last weekend. It's the closest to implement some kind of form object 💪

field :file, :map
end

@accept_file_ext ~w(.csv)

def changeset(keyword_file, attrs \\ %{}) do
keyword_file
|> cast(attrs, [:file])
|> validate_required([:file])
|> validate_file_ext()
end

def parse!(file_path, "text/csv") do
file_path
|> File.stream!()
|> CSV.decode!()
end

def parse!(_file_path, _unexpected_mime_type) do
raise GoogleCrawler.Errors.FileNotSupportedError,
message: "File with this extension is not supported"
end

defp validate_file_ext(changeset) do
validate_change(changeset, :file, fn :file, file ->
if Enum.member?(@accept_file_ext, Path.extname(file.filename)) do
[]
else
[file: "is not supported"]
end

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about:

defp validate_file_ext(changeset) do
  if Enum.member?(@accept_file_ext, Path.extname(file.filename)) do
    changeset
  else
	add_error(changeset, : file, "is not supported")
  end
end

The return of an empty array [] is kind of strange while returning changeset seems more expected.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I rechecked this one. I found something interesting 📝

When using the validate_change, seems like it will only validate when the value is not nil - https://hexdocs.pm/ecto/Ecto.Changeset.html#validate_change/3. So after removing it out, there is one case that if the file is nil, it will still validate and the error occurs when file.filename because the file is nil. For this, we might need to add more condition/clause function to handle this case 👍

end)
end
end
11 changes: 11 additions & 0 deletions lib/google_crawler_web/controllers/dashboard_controller.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
defmodule GoogleCrawlerWeb.DashboardController do
use GoogleCrawlerWeb, :controller

alias GoogleCrawler.Search.KeywordFile

def index(conn, _params) do
changeset = KeywordFile.changeset(%KeywordFile{})

render(conn, "index.html", changeset: changeset)
end
end
7 changes: 0 additions & 7 deletions lib/google_crawler_web/controllers/page_controller.ex

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ defmodule GoogleCrawlerWeb.RegistrationController do
conn
|> put_flash(:info, gettext("You have signed up successfully!"))
# TODO: Change to login path
|> redirect(to: Routes.page_path(conn, :index))
|> redirect(to: Routes.dashboard_path(conn, :index))

{:error, changeset} ->
render(conn, "new.html", changeset: changeset)
Expand Down
2 changes: 1 addition & 1 deletion lib/google_crawler_web/controllers/session_controller.ex
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ defmodule GoogleCrawlerWeb.SessionController do
conn
|> put_flash(:info, gettext("Welcome back"))
|> put_session(:current_user_id, user.id)
|> redirect(to: Routes.page_path(conn, :index))
|> redirect(to: Routes.dashboard_path(conn, :index))

{:error, _reason} ->
conn
Expand Down
23 changes: 23 additions & 0 deletions lib/google_crawler_web/controllers/upload_controller.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
defmodule GoogleCrawlerWeb.UploadController do
use GoogleCrawlerWeb, :controller
import Ecto.Changeset, only: [get_change: 3]

alias GoogleCrawler.Search
alias GoogleCrawler.Search.KeywordFile

def create(conn, %{"keyword_file" => keyword_file}) do
changeset = KeywordFile.changeset(%KeywordFile{}, keyword_file)

if changeset.valid? do
file = get_change(changeset, :file, nil)
result = Search.parse_keywords_from_file!(file.path, file.content_type)

# TODO: Save these keywords and triggers the task to google search for each keyword
text(conn, result |> Enum.map(fn keyword -> List.first(keyword) end) |> Enum.join(", "))
else
conn
|> put_flash(:error, gettext("Invalid file, please select again."))
|> redirect(to: Routes.dashboard_path(conn, :index))
end
end
end
2 changes: 1 addition & 1 deletion lib/google_crawler_web/plugs/skip_after_auth.ex
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ defmodule GoogleCrawlerWeb.Plugs.SkipAfterAuth do
if conn.assigns.user_signed_in? do
conn
|> put_flash(:info, gettext("You are already signed in."))
|> redirect(to: Routes.page_path(conn, :index))
|> redirect(to: Routes.dashboard_path(conn, :index))
|> halt()
else
conn
Expand Down
4 changes: 2 additions & 2 deletions lib/google_crawler_web/router.ex
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ defmodule GoogleCrawlerWeb.Router do
pipe_through [:browser, GoogleCrawlerWeb.Plugs.EnsureAuth]

resources "/sessions", SessionController, only: [:delete]
resources "/upload", UploadController, only: [:create]

# TODO: Cleanup this default route
get "/", PageController, :index
get "/", DashboardController, :index
end

# Other scopes may use custom stacks.
Expand Down
6 changes: 6 additions & 0 deletions lib/google_crawler_web/templates/dashboard/index.html.eex
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<%= render GoogleCrawlerWeb.KeywordView, "_form.html", assigns %>
<hr>
<section>
<h3><%= gettext("Keywords") %></h3>
<p><%= gettext("You don't have any keywords.") %></p>
</section>
11 changes: 11 additions & 0 deletions lib/google_crawler_web/templates/keyword/_form.html.eex
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<section>
<h3><%= gettext("Upload your keyword file (.csv)") %></h3>
<p><%= gettext("📝 Please put one keyword per line") %></p>
<%= form_for @changeset, Routes.upload_path(@conn, :create), [multipart: true], fn f -> %>
<%= label f, :file %>
<%= file_input f, :file, required: true %>
<%= error_tag f, :file %>

<%= submit gettext("Upload") %>
<% end %>
</section>
35 changes: 0 additions & 35 deletions lib/google_crawler_web/templates/page/index.html.eex

This file was deleted.

3 changes: 3 additions & 0 deletions lib/google_crawler_web/views/dashboard_view.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
defmodule GoogleCrawlerWeb.DashboardView do
use GoogleCrawlerWeb, :view
end
3 changes: 3 additions & 0 deletions lib/google_crawler_web/views/keyword_view.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
defmodule GoogleCrawlerWeb.KeywordView do
use GoogleCrawlerWeb, :view
end
3 changes: 0 additions & 3 deletions lib/google_crawler_web/views/page_view.ex

This file was deleted.

3 changes: 2 additions & 1 deletion mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ defmodule GoogleCrawler.MixProject do
{:jason, "~> 1.0"},
{:plug_cowboy, "~> 2.0"},
{:bcrypt_elixir, "~> 2.0"},
{:faker_elixir_octopus, "~> 1.0.0", only: [:dev, :test]}
{:faker_elixir_octopus, "~> 1.0.0", only: [:dev, :test]},
{:csv, "~> 2.3"}
]
end

Expand Down
2 changes: 2 additions & 0 deletions mix.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"connection": {:hex, :connection, "1.0.4", "a1cae72211f0eef17705aaededacac3eb30e6625b04a6117c1b2db6ace7d5976", [:mix], [], "hexpm", "4a0850c9be22a43af9920a71ab17c051f5f7d45c209e40269a1938832510e4d9"},
"cowboy": {:hex, :cowboy, "2.7.0", "91ed100138a764355f43316b1d23d7ff6bdb0de4ea618cb5d8677c93a7a2f115", [:rebar3], [{:cowlib, "~> 2.8.0", [hex: :cowlib, repo: "hexpm", optional: false]}, {:ranch, "~> 1.7.1", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm", "04fd8c6a39edc6aaa9c26123009200fc61f92a3a94f3178c527b70b767c6e605"},
"cowlib": {:hex, :cowlib, "2.8.0", "fd0ff1787db84ac415b8211573e9a30a3ebe71b5cbff7f720089972b2319c8a4", [:rebar3], [], "hexpm", "79f954a7021b302186a950a32869dbc185523d99d3e44ce430cd1f3289f41ed4"},
"csv": {:hex, :csv, "2.3.1", "9ce11eff5a74a07baf3787b2b19dd798724d29a9c3a492a41df39f6af686da0e", [:mix], [{:parallel_stream, "~> 1.0.4", [hex: :parallel_stream, repo: "hexpm", optional: false]}], "hexpm", "86626e1c89a4ad9a96d0d9c638f9e88c2346b89b4ba1611988594ebe72b5d5ee"},
"db_connection": {:hex, :db_connection, "2.2.1", "caee17725495f5129cb7faebde001dc4406796f12a62b8949f4ac69315080566", [:mix], [{:connection, "~> 1.0.2", [hex: :connection, repo: "hexpm", optional: false]}], "hexpm", "2b02ece62d9f983fcd40954e443b7d9e6589664380e5546b2b9b523cd0fb59e1"},
"decimal": {:hex, :decimal, "1.8.1", "a4ef3f5f3428bdbc0d35374029ffcf4ede8533536fa79896dd450168d9acdf3c", [:mix], [], "hexpm", "3cb154b00225ac687f6cbd4acc4b7960027c757a5152b369923ead9ddbca7aec"},
"ecto": {:hex, :ecto, "3.3.4", "95b05c82ae91361475e5491c9f3ac47632f940b3f92ae3988ac1aad04989c5bb", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "9b96cbb83a94713731461ea48521b178b0e3863d310a39a3948c807266eebd69"},
Expand All @@ -15,6 +16,7 @@
"gettext": {:hex, :gettext, "0.17.4", "f13088e1ec10ce01665cf25f5ff779e7df3f2dc71b37084976cf89d1aa124d5c", [:mix], [], "hexpm", "3c75b5ea8288e2ee7ea503ff9e30dfe4d07ad3c054576a6e60040e79a801e14d"},
"jason": {:hex, :jason, "1.2.0", "10043418c42d2493d0ee212d3fddd25d7ffe484380afad769a0a38795938e448", [:mix], [{:decimal, "~> 1.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "116747dbe057794c3a3e4e143b7c8390b29f634e16c78a7f59ba75bfa6852e7f"},
"mime": {:hex, :mime, "1.3.1", "30ce04ab3175b6ad0bdce0035cba77bba68b813d523d1aac73d9781b4d193cf8", [:mix], [], "hexpm", "6cbe761d6a0ca5a31a0931bf4c63204bceb64538e664a8ecf784a9a6f3b875f1"},
"parallel_stream": {:hex, :parallel_stream, "1.0.6", "b967be2b23f0f6787fab7ed681b4c45a215a81481fb62b01a5b750fa8f30f76c", [:mix], [], "hexpm", "639b2e8749e11b87b9eb42f2ad325d161c170b39b288ac8d04c4f31f8f0823eb"},
"phoenix": {:hex, :phoenix, "1.4.16", "2cbbe0c81e6601567c44cc380c33aa42a1372ac1426e3de3d93ac448a7ec4308", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 1.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:plug, "~> 1.8.1 or ~> 1.9", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 1.0 or ~> 2.0", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "856cc1a032fa53822737413cf51aa60e750525d7ece7d1c0576d90d7c0f05c24"},
"phoenix_ecto": {:hex, :phoenix_ecto, "4.1.0", "a044d0756d0464c5a541b4a0bf4bcaf89bffcaf92468862408290682c73ae50d", [:mix], [{:ecto, "~> 3.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 2.9", [hex: :phoenix_html, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "c5e666a341ff104d0399d8f0e4ff094559b2fde13a5985d4cb5023b2c2ac558b"},
"phoenix_html": {:hex, :phoenix_html, "2.14.1", "7dabafadedb552db142aacbd1f11de1c0bbaa247f90c449ca549d5e30bbc66b4", [:mix], [{:plug, "~> 1.5", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "536d5200ad37fecfe55b3241d90b7a8c3a2ca60cd012fc065f776324fa9ab0a9"},
Expand Down
11 changes: 11 additions & 0 deletions priv/repo/migrations/20200329085134_create_keywords.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
defmodule GoogleCrawler.Repo.Migrations.CreateKeywords do
use Ecto.Migration

def change do
create table(:keywords) do
add :keyword, :string

timestamps()
end
end
end
21 changes: 21 additions & 0 deletions test/factories/keyword_factory.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
defmodule GoogleCrawler.KeywordFactory do
alias GoogleCrawler.Search

def default_attrs do
%{
keyword: FakerElixir.Lorem.word()
}
end

def build_attrs(attrs \\ %{}) do
Enum.into(attrs, default_attrs())
end

def create(attrs \\ %{}) do
keyword_attrs = build_attrs(attrs)

{:ok, keyword} = Search.create_keyword(keyword_attrs)

keyword
end
end
13 changes: 13 additions & 0 deletions test/factories/keyword_file_factory.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
defmodule GoogleCrawler.KeywordFileFactory do
import GoogleCrawler.FixtureHelper

def default_attrs do
%{
file: upload_file_fixture("keyword_files/invalid_keyword.csv")
}
end

def build_attrs(attrs \\ %{}) do
Enum.into(attrs, default_attrs())
end
end
2 changes: 2 additions & 0 deletions test/fixtures/keyword_files/invalid_keyword.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
elixir, ruby
javascript
1 change: 1 addition & 0 deletions test/fixtures/keyword_files/unsupported_keyword.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
elixir ruby javascript
3 changes: 3 additions & 0 deletions test/fixtures/keyword_files/valid_keyword.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
elixir
ruby
javascript
3 changes: 1 addition & 2 deletions test/google_crawler/accounts_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@ defmodule GoogleCrawler.AccountsTest do
use GoogleCrawler.DataCase

alias GoogleCrawler.Accounts
alias GoogleCrawler.Accounts.User
alias GoogleCrawler.UserFactory

describe "users" do
alias GoogleCrawler.Accounts.User

test "get_user/1 returns the user with given id" do
user = UserFactory.create()

Expand Down
Loading