diff --git a/assets/css/screens/keyword.scss b/assets/css/screens/keyword.scss index ea55079..74abcd6 100644 --- a/assets/css/screens/keyword.scss +++ b/assets/css/screens/keyword.scss @@ -1,4 +1,29 @@ body.keyword.show { + nav ul { + list-style: none; + } + + .badge { + border: { + width: 1px; + style: solid; + radius: 5px; + } + font-size: x-small; + padding: 3px; + text-transform: uppercase; + } + + .badge-ads { + border-color: green; + color: green; + } + + .badge-ads-position { + border-color: grey; + color: grey; + } + iframe { width: 100%; height: 400px; diff --git a/lib/google_crawler/accounts.ex b/lib/google_crawler/accounts.ex index 874d58f..4b71e29 100644 --- a/lib/google_crawler/accounts.ex +++ b/lib/google_crawler/accounts.ex @@ -64,7 +64,7 @@ defmodule GoogleCrawler.Accounts do ## Examples iex> auth_user("bob@email.com", "valid_password") - {:ok, $User{}} + {:ok, %User{}} iex> auth_user("bob@email.com", "invalid_password") {:error, "invalid password"} diff --git a/lib/google_crawler/google/api_client.ex b/lib/google_crawler/google/api_client.ex index 245013f..982a2af 100644 --- a/lib/google_crawler/google/api_client.ex +++ b/lib/google_crawler/google/api_client.ex @@ -5,10 +5,11 @@ end defmodule GoogleCrawler.Google.ApiClient do @behaviour GoogleCrawler.Google.ApiClientBehaviour - @url "https://www.google.com/search?q=" + @url "https://www.google.com/search?hl=en&q=" + @user_agent "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36" def search(keyword) do - case HTTPoison.get(@url <> URI.encode(keyword)) do + case HTTPoison.get(@url <> URI.encode(keyword), request_headers()) do {:ok, %HTTPoison.Response{status_code: 200, body: body}} -> {:ok, body} @@ -19,4 +20,10 @@ defmodule GoogleCrawler.Google.ApiClient do {:error, reason} end end + + def request_headers do + [ + {"User-Agent", @user_agent} + ] + end end diff --git a/lib/google_crawler/google/scraper.ex b/lib/google_crawler/google/scraper.ex new file mode 100644 index 0000000..a8ad2cc --- /dev/null +++ b/lib/google_crawler/google/scraper.ex @@ -0,0 +1,74 @@ +defmodule GoogleCrawler.Google.Scraper do + alias GoogleCrawler.Google.ScraperResult + + @selectors %{ + total_results: "#result-stats", + non_ads_links: "div.r > a", + top_ads_links: "#tads .ads-ad > .ad_cclk > a.V0MxL", + bottom_ads_links: "#bottomads .ads-ad > .ad_cclk > a.V0MxL" + } + + def scrap(html) do + result = %ScraperResult{} + + {:ok, document} = Floki.parse_document(html) + + parse_raw_html_result(result, html) + |> parse_total_results(document) + |> parse_non_ads_links(document) + |> parse_top_ads_links(document) + |> parse_bottom_ads_links(document) + end + + defp parse_total_results(result, document) do + total_results_text = Floki.find(document, @selectors.total_results) |> Floki.text() + + total_results = + Regex.named_captures(~r/About (?.*) results/, total_results_text) + |> Map.get("total_result") + |> String.replace(",", "") + |> Integer.parse() + |> elem(0) + + %{result | total_results: total_results} + end + + defp parse_non_ads_links(result, document) do + non_ads_links = parse_links(document, @selectors.non_ads_links) + + %{result | links: non_ads_links, total_links: length(non_ads_links)} + end + + defp parse_top_ads_links(result, document) do + top_ads_links = parse_links(document, @selectors.top_ads_links) + + %{result | top_ads_links: top_ads_links, total_top_ads_links: length(top_ads_links)} + end + + defp parse_bottom_ads_links(result, document) do + bottom_ads_links = parse_links(document, @selectors.bottom_ads_links) + + %{ + result + | bottom_ads_links: bottom_ads_links, + total_bottom_ads_links: length(bottom_ads_links) + } + end + + defp parse_raw_html_result(result, html) do + %{result | raw_html_result: cleanup_html(html)} + end + + defp parse_links(document, selector) do + document + |> Floki.find(selector) + |> Floki.attribute("href") + end + + defp cleanup_html(html) do + html + |> String.chunk(:printable) + |> Enum.filter(&String.printable?/1) + |> Enum.join() + end +end diff --git a/lib/google_crawler/google/scraper_result.ex b/lib/google_crawler/google/scraper_result.ex new file mode 100644 index 0000000..652dfb5 --- /dev/null +++ b/lib/google_crawler/google/scraper_result.ex @@ -0,0 +1,10 @@ +defmodule GoogleCrawler.Google.ScraperResult do + defstruct raw_html_result: nil, + total_results: 0, + links: [], + total_links: 0, + top_ads_links: [], + total_top_ads_links: 0, + bottom_ads_links: [], + total_bottom_ads_links: 0 +end diff --git a/lib/google_crawler/google/scrapper.ex b/lib/google_crawler/google/scrapper.ex deleted file mode 100644 index 2940793..0000000 --- a/lib/google_crawler/google/scrapper.ex +++ /dev/null @@ -1,15 +0,0 @@ -defmodule GoogleCrawler.Google.Scrapper do - def scrap(html) do - # TODO: Scrap the page content - %{ - raw_html_result: cleanup_html(html) - } - end - - def cleanup_html(html) do - html - |> String.chunk(:printable) - |> Enum.filter(&String.printable?/1) - |> Enum.join() - end -end diff --git a/lib/google_crawler/search.ex b/lib/google_crawler/search.ex index 4e88b88..27e5c1f 100644 --- a/lib/google_crawler/search.ex +++ b/lib/google_crawler/search.ex @@ -4,10 +4,13 @@ defmodule GoogleCrawler.Search do """ import Ecto.Query, warn: false + alias Ecto.Multi alias GoogleCrawler.Repo alias GoogleCrawler.Search.Keyword alias GoogleCrawler.Search.KeywordFile + alias GoogleCrawler.Search.Link + alias GoogleCrawler.Google.ScraperResult @doc """ Returns the list of keywords belongs to the given user. @@ -21,6 +24,7 @@ defmodule GoogleCrawler.Search do def list_user_keywords(user) do Keyword |> where(user_id: ^user.id) + |> order_by(desc: :inserted_at) |> Repo.all() end @@ -38,7 +42,10 @@ defmodule GoogleCrawler.Search do ** (Ecto.NoResultsError) """ - def get_keyword(id), do: Repo.get(Keyword, id) + def get_keyword(id) do + Repo.get(Keyword, id) + |> Repo.preload(:links) + end @doc """ Creates a keyword. @@ -98,6 +105,53 @@ defmodule GoogleCrawler.Search do |> Repo.update() end + @doc """ + Update the search result for a keyword. + + ## Examples + + iex> update_keyword_result(keyword, %{field: new_value}) + {:ok, %Keyword{}} + + iex> update_keyword_result(keyword, %{field: bad_value}) + {:error, %Ecto.Changeset{}} + + """ + def update_keyword_result(%Keyword{} = keyword, attrs) do + keyword + |> Keyword.update_result_changeset(attrs) + |> Repo.update() + end + + @doc """ + Update the keyword result from the scraper result and mark the keyword as completed. + """ + def update_keyword_result_from_scraper(%Keyword{} = keyword, %ScraperResult{} = result) do + keyword_changeset = + Keyword.update_result_changeset(keyword, %{ + status: :completed, + raw_html_result: result.raw_html_result, + total_results: result.total_results, + total_ads_links: result.total_top_ads_links + result.total_bottom_ads_links, + total_links: result.total_links + }) + + Multi.new() + |> create_keyword_link_multi(keyword, result.top_ads_links, %{ + is_ads: true, + ads_position: :top + }) + |> create_keyword_link_multi(keyword, result.bottom_ads_links, %{ + is_ads: true, + ads_position: :bottom + }) + |> create_keyword_link_multi(keyword, result.links, %{ + is_ads: false + }) + |> Multi.update(:keyword, keyword_changeset) + |> Repo.transaction() + end + @doc """ Parses the keyword from the given file. Returns the stream for each line in the csv file as [line_result]. @@ -112,4 +166,29 @@ defmodule GoogleCrawler.Search do def parse_keywords_from_file!(file_path, mime_type) do KeywordFile.parse!(file_path, mime_type) end + + @doc """ + List result links of the given keyword. + """ + def list_keyword_links(keyword, query \\ []) do + Link + |> where(keyword_id: ^keyword.id) + |> where(^query) + |> order_by(desc: :is_ads) + |> Repo.all() + end + + # Create the multi to insert the links. + # Other attributes of the link except the link itself must be specified + defp create_keyword_link_multi(multi, _keyword, [], _attrs), do: multi + + defp create_keyword_link_multi(multi, keyword, [link | rest_of_links], attrs) do + changeset = + Ecto.build_assoc(keyword, :links) + |> Link.changeset(Map.put(attrs, :url, link)) + + multi + |> Multi.insert("link_#{length(multi.operations)}", changeset) + |> create_keyword_link_multi(keyword, rest_of_links, attrs) + end end diff --git a/lib/google_crawler/search/keyword.ex b/lib/google_crawler/search/keyword.ex index 8872947..a775ad9 100644 --- a/lib/google_crawler/search/keyword.ex +++ b/lib/google_crawler/search/keyword.ex @@ -10,17 +10,30 @@ defmodule GoogleCrawler.Search.Keyword do field :keyword, :string field :status, GoogleCrawler.Search.Keyword.Status, default: :in_queue field :raw_html_result, :string + field :total_results, :integer + field :total_ads_links, :integer + field :total_links, :integer belongs_to :user, GoogleCrawler.Accounts.User + has_many :links, GoogleCrawler.Search.Link timestamps() end - @fields ~w(keyword user_id status raw_html_result)a + @fields ~w(keyword user_id status raw_html_result total_results total_ads_links total_links)a def changeset(keyword, attrs \\ %{}) do keyword |> cast(attrs, @fields) |> validate_required([:keyword, :user_id, :status]) end + + def update_result_changeset(keyword, attrs \\ %{}) do + keyword + |> changeset(attrs) + |> validate_required([:raw_html_result, :total_results, :total_ads_links, :total_links]) + |> validate_number(:total_results, greater_than_or_equal_to: 0) + |> validate_number(:total_ads_links, greater_than_or_equal_to: 0) + |> validate_number(:total_links, greater_than_or_equal_to: 0) + end end diff --git a/lib/google_crawler/search/link.ex b/lib/google_crawler/search/link.ex new file mode 100644 index 0000000..5ea4924 --- /dev/null +++ b/lib/google_crawler/search/link.ex @@ -0,0 +1,34 @@ +import EctoEnum + +defenum(GoogleCrawler.Search.Link.AdsPosition, top: 0, bottom: 1) + +defmodule GoogleCrawler.Search.Link do + use Ecto.Schema + import Ecto.Changeset + + schema "links" do + field :url, :string + field :is_ads, :boolean, default: false + field :ads_position, GoogleCrawler.Search.Link.AdsPosition + + belongs_to :keyword, GoogleCrawler.Search.Keyword + + timestamps() + end + + @fields ~w(url is_ads ads_position)a + + def changeset(link, attrs \\ %{}) do + link + |> cast(attrs, @fields) + |> validate_required([:url, :is_ads]) + |> validate_ads_position() + end + + def validate_ads_position(%Ecto.Changeset{changes: %{is_ads: true}} = changeset) do + changeset + |> validate_required(:ads_position) + end + + def validate_ads_position(changeset), do: changeset +end diff --git a/lib/google_crawler/search/search_keyword_task.ex b/lib/google_crawler/search/search_keyword_task.ex index bd14ffa..ab820b7 100644 --- a/lib/google_crawler/search/search_keyword_task.ex +++ b/lib/google_crawler/search/search_keyword_task.ex @@ -1,11 +1,11 @@ defmodule GoogleCrawler.Search.SearchKeywordTask do alias GoogleCrawler.Search.Keyword - alias GoogleCrawler.Google.Scrapper + alias GoogleCrawler.Google.Scraper def perform(%Keyword{} = keyword) do case google_api_client().search(keyword.keyword) do {:ok, body} -> - Scrapper.scrap(body) + Scraper.scrap(body) {:error, reason} -> raise "Keyword search failed: #{reason}" diff --git a/lib/google_crawler/search/search_keyword_worker.ex b/lib/google_crawler/search/search_keyword_worker.ex index f2ed80d..8bdf9fe 100644 --- a/lib/google_crawler/search/search_keyword_worker.ex +++ b/lib/google_crawler/search/search_keyword_worker.ex @@ -1,4 +1,9 @@ defmodule GoogleCrawler.SearchKeywordWorker do + @moduledoc """ + Perform the keyword search and scrap in background. + Update the result of the keyword after it is successfully scraped. + The retry mechanism is implemented. So the task will be retried if it is failed. + """ use GenServer alias GoogleCrawler.Search @@ -43,38 +48,46 @@ defmodule GoogleCrawler.SearchKeywordWorker do end def handle_info({ref, result}, state) do - {keyword, _retry_count} = Map.get(state, ref) - - Search.update_keyword(keyword, %{ - status: :completed, - raw_html_result: result.raw_html_result - }) + {keyword, retry_count} = Map.get(state, ref) - # Demonitor the task and remove from the state - Process.demonitor(ref, [:flush]) - new_state = Map.delete(state, ref) + new_state = + case Search.update_keyword_result_from_scraper(keyword, result) do + {:ok, _result} -> + # Demonitor the task and remove from the state + Process.demonitor(ref, [:flush]) + Map.delete(state, ref) + + {:error, _reason} -> + maybe_retry(state, ref, keyword, retry_count) + end {:noreply, new_state} end + def handle_info({:DOWN, _ref, :process, _pid, :normal}, state) do + {:noreply, state} + end + def handle_info({:DOWN, ref, :process, _pid, _reason}, state) do {keyword, retry_count} = Map.get(state, ref) - - new_state = - if retry_count < @max_retry_count do - task = start_task(keyword) - - state - |> Map.delete(ref) - |> Map.put(task.ref, {keyword, retry_count + 1}) - else - Search.update_keyword(keyword, %{status: :failed}) - Map.delete(state, ref) - end + new_state = maybe_retry(state, ref, keyword, retry_count) {:noreply, new_state} end + defp maybe_retry(state, ref, keyword, retry_count) do + if retry_count < @max_retry_count do + task = start_task(keyword) + + state + |> Map.delete(ref) + |> Map.put(task.ref, {keyword, retry_count + 1}) + else + Search.update_keyword(keyword, %{status: :failed}) + Map.delete(state, ref) + end + end + defp start_task(%Keyword{} = keyword) do Task.Supervisor.async_nolink(GoogleCrawler.TaskSupervisor, fn -> GoogleCrawler.Search.SearchKeywordTask.perform(keyword) diff --git a/lib/google_crawler_web/controllers/registration_controller.ex b/lib/google_crawler_web/controllers/registration_controller.ex index e7f8d2e..cd33b0a 100644 --- a/lib/google_crawler_web/controllers/registration_controller.ex +++ b/lib/google_crawler_web/controllers/registration_controller.ex @@ -15,7 +15,6 @@ defmodule GoogleCrawlerWeb.RegistrationController do {:ok, _user} -> conn |> put_flash(:info, gettext("You have signed up successfully!")) - # TODO: Change to login path |> redirect(to: Routes.dashboard_path(conn, :index)) {:error, changeset} -> diff --git a/lib/google_crawler_web/controllers/upload_controller.ex b/lib/google_crawler_web/controllers/upload_controller.ex index d1a6fb2..38256cb 100644 --- a/lib/google_crawler_web/controllers/upload_controller.ex +++ b/lib/google_crawler_web/controllers/upload_controller.ex @@ -22,7 +22,6 @@ defmodule GoogleCrawlerWeb.UploadController do end end - # TODO: Trigger the scrapper background worker defp create_and_trigger_google_search(csv_result, conn) do csv_result |> Stream.map(fn keyword_row -> List.first(keyword_row) end) diff --git a/lib/google_crawler_web/templates/keyword/show.html.eex b/lib/google_crawler_web/templates/keyword/show.html.eex index f297b04..cdfec27 100644 --- a/lib/google_crawler_web/templates/keyword/show.html.eex +++ b/lib/google_crawler_web/templates/keyword/show.html.eex @@ -1,6 +1,41 @@ +
+
    +
  • <%= link(gettext("⬅️ Back"), to: Routes.dashboard_path(@conn, :index)) %>
  • +
+
+

<%= gettext("Keyword: %{keyword}", keyword: @keyword.keyword) %>

-

Result:

+
    +
  • + <%= gettext("Total Results: %{total_results}", total_results: @keyword.total_results)%> +
  • +
  • + <%= gettext("Total Ads: %{total_ads}", total_ads: @keyword.total_ads_links)%> +
  • +
  • + <%= gettext("Total Non-Ads: %{total_non_ads}", total_non_ads: @keyword.total_links)%> +
  • +
+
+ +
+

<%= gettext("Results:") %>

+
    + <%= for link <- @keyword.links do %> +
  • + <%= if link.is_ads do %> + <%= gettext("Ads") %> + <%= link.ads_position %> + <% end %> + <%= link(link.url, to: link.url) %> +
  • + <% end %> +
+
+ +
+

<%= gettext("Raw Html Result:") %>

diff --git a/mix.exs b/mix.exs index 84cd248..d8a21d0 100644 --- a/mix.exs +++ b/mix.exs @@ -47,7 +47,8 @@ defmodule GoogleCrawler.MixProject do {:faker_elixir_octopus, "~> 1.0.0", only: [:dev, :test]}, {:csv, "~> 2.3"}, {:httpoison, "~> 1.6"}, - {:ecto_enum, "~> 1.4"} + {:ecto_enum, "~> 1.4"}, + {:floki, "~> 0.26.0"} ] end diff --git a/priv/repo/migrations/20200414103012_create_links_and_add_links_count_to_keywords.exs b/priv/repo/migrations/20200414103012_create_links_and_add_links_count_to_keywords.exs new file mode 100644 index 0000000..63c8e1a --- /dev/null +++ b/priv/repo/migrations/20200414103012_create_links_and_add_links_count_to_keywords.exs @@ -0,0 +1,23 @@ +defmodule GoogleCrawler.Repo.Migrations.CreateLinksAndAddLinksCountToKeyword do + use Ecto.Migration + + def change do + alter table(:keywords) do + add :total_results, :bigint + add :total_ads_links, :integer + add :total_links, :integer + end + + create table(:links) do + add :url, :text + add :is_ads, :boolean + add :ads_position, :integer + + add :keyword_id, references(:keywords, on_delete: :delete_all), null: false + + timestamps() + end + + create index(:links, [:keyword_id]) + end +end diff --git a/test/factories/link_factory.ex b/test/factories/link_factory.ex new file mode 100644 index 0000000..6007309 --- /dev/null +++ b/test/factories/link_factory.ex @@ -0,0 +1,27 @@ +defmodule GoogleCrawler.LinkFactory do + alias GoogleCrawler.Repo + alias GoogleCrawler.Search.Link + alias GoogleCrawler.KeywordFactory + + def default_attrs do + %{ + url: FakerElixir.Internet.url(), + is_ads: false + } + end + + def build_attrs(attrs \\ %{}) do + Enum.into(attrs, default_attrs()) + end + + def create(attrs \\ %{}, keyword \\ KeywordFactory.create()) do + link_attrs = build_attrs(attrs) + + {:ok, link} = + Ecto.build_assoc(keyword, :links) + |> Link.changeset(link_attrs) + |> Repo.insert() + + link + end +end diff --git a/test/factories/scraper_result_factory.ex b/test/factories/scraper_result_factory.ex new file mode 100644 index 0000000..c28bee6 --- /dev/null +++ b/test/factories/scraper_result_factory.ex @@ -0,0 +1,33 @@ +defmodule GoogleCrawler.ScraperResultFactory do + def default_attrs(total_links, total_top_ads_links, total_bottom_ads_links) do + total_links = total_links || Enum.random(0..10) + total_top_ads_links = total_top_ads_links || Enum.random(0..5) + total_bottom_ads_links = total_bottom_ads_links || Enum.random(0..5) + + %{ + raw_html_result: FakerElixir.Lorem.sentences(10..20), + total_results: Enum.random(100_000..200_000), + links: build_link(total_links), + total_links: total_links, + top_ads_links: build_link(total_top_ads_links), + total_top_ads_links: total_top_ads_links, + bottom_ads_links: build_link(total_bottom_ads_links), + total_bottom_ads_links: total_bottom_ads_links + } + end + + def build_attrs(attrs \\ %{}) do + Enum.into( + attrs, + default_attrs( + attrs[:total_links], + attrs[:total_top_ads_links], + attrs[:total_bottom_ads_links] + ) + ) + end + + defp build_link(count) do + for _ <- 0..(count - 1), do: FakerElixir.Internet.url() + end +end diff --git a/test/fixtures/search_results/hotels.html b/test/fixtures/search_results/hotels.html new file mode 100644 index 0000000..44c3bb8 --- /dev/null +++ b/test/fixtures/search_results/hotels.html @@ -0,0 +1,245 @@ + + +hotels - Google Search

Accessibility links

Skip to main contentAccessibility help
Accessibility feedback
Sign in
To all public transportation workers, thank you
  • Remove
  • Report inappropriate predictions

    Search modes

    All
    Maps
    Images
    News
    Videos
    More
    ShoppingBooksFlightsFinance
    Settings
    Search settingsLanguages
    Turn on SafeSearch
    Advanced searchSearch activityYour data in SearchSearch help
    Tools
      Any time
      • Any time
      • Past hour
      • Past 24 hours
      • Past week
      • Past month
      • Past year
      • Custom range...
        Customised date range
      All results
      • All results
      • Verbatim
      About 5,970,000,000 results (0.79 seconds) 

      Ads


      1. Hotels: Booking.com‎

        Ad·www.booking.com/Hotels‎
        Ad·www.booking.com/Hotels‎
        Why this ad?
        Lowest Price Guarantee! Book at over 2,590,000 hotels online. Best Price Guarantee.
        • Book Now
        • Book for Tonight
        • Book for Tomorrow
        • No Booking Fees
        • Secure Booking
        bangkok hotels 5-star
        cheap hotels in bangkok
        hotels near me
        3-star hotels in bangkok
        bangkok hotels near airport
        cheapest hotel in bangkok price
        4-star hotels in bangkok
        hotel booking

        People also search for

      Search Results

      Hotels | Bang Chak, Phra Khanong, Bangkok
      About these results

      Some of these hotel search results may be personalised based on your browsing activity and recent searches on Google, as well as travel confirmation sent to your Gmail. Manage your search and app activity and personalised search results.

      Hotel prices come from Google's partners.

      Mon, May 11
      Tue, May 12
      1 night
      Done
      2
      1 guest
      2 guests
      3 guests
      4 guests
      Top-rated
      Budget options
      Luxury stays
      For tonight
      Photo
      THB 415
      Resort M Bangkok - On Nut
      3.6 (456)
      Indoor pool
      Free Wi-Fi
      Photo
      THB 272
      Hide Bangkok
      4.5 (304)
      Spa
      Free Wi-Fi
      Photo
      THB 1,139
      X2 Vibe Bangkok Sukhumvit Hotel โรงแรมครอสทูไวบ์กรุงเทพสุขุมวิท
      4.1 (1,631)
      Outdoor pool
      Spa
      Photo
      THB 823
      Chateau de Sukhumvit 56
      4.1 (335)
      Free Wi-Fi
      THB 415
      THB 272
      THB 1,139
      THB 823
      THB 4,260
      THB 823
      THB 1,039
      THB 286
      THB 1,081
      THB 1,271
      THB 272
      THB 380
      THB 1,139
      THB 340
      THB 1,170
      THB 465
      THB 805
      THB 415
      THB 1,087
      THB 454
      THB 501
      THB 500
      map image
      View larger map
      View 112 hotels

      Web results


      Hotels.com - Deals & Discounts for Hotel Reservations from ...

      www.hotels.com
      www.hotels.com
      1. Cached
      2. Similar
      Hotels.com | Find cheap hotels and discounts when you book on Hotels.com. Compare hotel deals, offers and read unbiased reviews on hotels.
      ‎Deals · ‎Hotels.com · ‎Hotels in Singapore
      discount hotels
      sign in hotel
      how does hotel.com work
      hotels.com account
      hotels.com coupon
      how to get a free night at a hotel

      People also search for


      10 Best Bangkok Hotels, Thailand (From $8) - Booking.com

      www.booking.com › Hotels in Thailand › Hotels in Bangkok Province
      www.booking.com › Hotels in Thailand › Hotels in Bangkok Province
      1. Cached
      Great savings on hotels in Bangkok, Thailand online. Good availability and great rates. Read hotel reviews and choose the best hotel deal for your stay.

      How much does it cost to stay in a hotel in Bangkok?
      +

      On average, 3-star hotels in Bangkok cost $37 per night, and 4-star hotels in Bangkok are $67 per night. If you're looking for something really special, a 5-star hotel in Bangkok can be found for $141 per night, on average (based on Booking.com prices).

      +
      Which hotels in Bangkok offer an especially good breakfast?
      +

      For hotels in Bangkok that serve highly-rated breakfasts, try Mandarin Oriental Bangkok, Kasayapi Hotel, and Old Capital Bike Inn.

      +
      Which hotels in Bangkok are good for families?
      +

      Many families visiting Bangkok loved staying at Baan Vajra, The Choice Residence - Adults Only, and Mandarin Oriental Bangkok.

      +
      How much is a hotel in Bangkok for this weekend?
      +

      The average price per night for a 3-star hotel in Bangkok this weekend is $44 or, for a 4-star hotel, $60. Looking for something even fancier? 5-star hotels in Bangkok for this weekend cost around $136 per night, on average (based on Booking.com prices).

      +
      Which hotels in Bangkok are good for couples?
      +

      These hotels in Bangkok are highly rated by couples: Kessara Hotel, Mandarin Oriental Bangkok, and Kasayapi Hotel.

      +
      What are the best hotels in Bangkok near Suvarnabhumi Airport?
      +

      Travelers who stayed in Bangkok near Suvarnabhumi Airport (BKK) have said good things about Novotel Bangkok Suvarnabhumi Airport, OYO 872 Saen Sabai Hostel, and WJ Residence at Suvarnaphumi.

      +
      Which hotels in Bangkok have nice views?
      +

      Avani+ Riverside Bangkok Hotel, SO/ BANGKOK, and Eastin Grand Hotel Sathorn got great room view-related reviews from travelers in Bangkok.

      +
      How much is a hotel in Bangkok for tonight?
      +

      On average, it costs $43 per night to book a 3-star hotel in Bangkok for tonight. You'll pay around $59 if you choose to stay in a 4-star hotel tonight, while a 5-star hotel in Bangkok will cost around $138, on average (based on Booking.com prices).

      +
      Which neighborhood is the best one to stay at in Bangkok?
      +

      Sukhumvit, Downtown Bangkok, and Wattana are popular with other travelers visiting Bangkok.

      +
      What are the best hotels in Bangkok near Victory Monument?
      +

      Some of the best hotels in Bangkok near Victory Monument include Victory Park Hostel, Royal View Resort - Rang Nam and YELLO ROOMS Victory Monument.

      +
      View more on booking.com

      Show more
      Show less

      THE 10 BEST Hotels in Bangkok for 2020 (from $12 ...

      www.tripadvisor.com › Asia › Thailand › Bangkok
      www.tripadvisor.com › Asia › Thailand › Bangkok
      1. Similar
      Free parking. Special offer. Hotel website. Chatrium Hotel Riverside Bangkok. Show Prices. 166777 reviews. #3 Best Value of 4107 places to stay in Bangkok.

      What are the best hotels near Wat Phra Chetuphon?
      sala rattanakosin Bangkok, Aurum The River Place, and OYO 434 Boonsiri Place Hotel are some of the most popular hotels for travelers looking to stay near Wat Phra Chetuphon. See the full list: Hotels near Wat Phra Chetuphon.
      What are the best resorts in Bangkok?
      The Siam, Ariyasomvilla, and Anantara Riverside Bangkok Resort are all popular resorts for travelers staying in Bangkok. See the full list: Bangkok Resorts.
      What are the best hotels near BTS Skytrain?
      Popular hotels close to BTS Skytrain include Novotel Bangkok on Siam Square, Amari Watergate Bangkok, and The St. Regis Bangkok. See the full list: Hotels near BTS Skytrain.
      What are the best luxury hotels in Bangkok?
      Popular luxury hotels in Bangkok include The Athenee Hotel, A Luxury Collection Hotel, Bangkok, Bangkok Marriott Hotel The Surawongse, and Mandarin Oriental, Bangkok. See the full list: Bangkok Luxury Hotels.
      Which hotels are closest to Bangkok Intl Airport?
      Popular hotels close to Bangkok Intl Airport include Novotel Bangkok Suvarnabhumi Airport, Siam Mandarina Hotel, and Miracle Suvarnabhumi Airport Hotel. See the full list: Hotels near (BKK) Suvarnabhumi Intl.
      What are the best hotels near The Grand Palace?
      A few of the most popular hotels near The Grand Palace are sala rattanakosin Bangkok, Ibrik Resort by the River, and Aurum The River Place. See the full list: Hotels near The Grand Palace.
      What are the best pet-friendly hotels in Bangkok?
      Some of the most popular pet-friendly hotels in Bangkok are W Bangkok, InterContinental Bangkok, and Sathorn Vista, Bangkok - Marriott Executive Apartments. See the full list: Pet Friendly Hotels in Bangkok.
      What are the best cheap hotels in Bangkok?
      Popular cheap hotels in Bangkok include Phranakorn-Nornlen Hotel, Villa Phra Sumen Bangkok, and ibis Styles Bangkok Khaosan Viengtai. See the full list: Cheap Hotels in Bangkok.
      What are the best hotels with a spa in Bangkok?
      The Athenee Hotel, A Luxury Collection Hotel, Bangkok, Bangkok Marriott Hotel The Surawongse, and Mandarin Oriental, Bangkok have a spa and received excellent reviews from travelers in Bangkok. See the full list: Bangkok Spa Resorts.
      Which neighborhoods in Bangkok are popular to stay in?
      Popular Bangkok neighborhoods with hotels include Sukhumvit, Downtown Bangkok, and Pathum Wan.
      View more on tripadvisor.com

      Show more
      Show less

      30 Best Bangkok Hotels in 2020 | Great Savings & Reviews of ...

      www.agoda.com › World › Thailand Hotels › Bangkok Province Hotels
      www.agoda.com › World › Thailand Hotels › Bangkok Province Hotels
      1. Cached
      2. Similar
      Super cheap hotels in Bangkok tend be of lower quality. (I've lived in Thailand for 15+ years). This hotel was very solid. Robert, United States.

      What's the average price of a hotel in Bangkok?

      The average price for a 3-star hotel in Bangkok is 37 USD, the average price for a 4-star hotel in Bangkok is 60 USD, and the average price for a 5-star hotel in Bangkok is 77 USD.

      What's the average price of a hotel in Bangkok this weekend?

      The average price for a 3-star hotel in Bangkok this weekend is 37 USD, the average price for a 4-star hotel in Bangkok is 55 USD, and the average price for a 5-star hotel in Bangkok is 65 USD.

      What's the average price of a hotel in Bangkok tonight?

      The average price for a 3-star hotel in Bangkok tonight is 40 USD, the average price for a 4-star hotel in Bangkok is 56 USD, and the average price for a 5-star hotel in Bangkok is 67 USD.

      What are the best hotels in Bangkok near Suvarnabhumi Airport?

      Verified travelers to Bangkok who stayed near Suvarnabhumi Airport have given top reviews to Novotel Bangkok Suvarnabhumi Airport, Amaranth Suvarnabhumi Airport, BW Premier Collection by Best Western, and Mariya Boutique Residence.

      What are the best hotels in Bangkok near Grand Palace?

      Verified travelers to Bangkok who stayed near Grand Palace have given top reviews to Chillax Resort, Nouvo City Hotel, and The Warehouse Bangkok.

      What are the best hotels in Bangkok near Central World?

      Verified travelers to Bangkok who stayed near Central World have given top reviews to Grande Centre Point Hotel Ratchadamri, Centara Grand at Central World Hotel, and Novotel Bangkok On Siam Square Hotel.

      What are the most popular hotels in Bangkok?

      Some of our most popular hotels in Bangkok are The Berkeley Hotel Pratunam, Amari Don Muang Airport Bangkok Hotel, and Grande Centre Point Hotel Terminal 21

      What are the best hotels in Bangkok for couples?

      Hotels in Bangkok that are highly rated by couples include Amari Don Muang Airport Bangkok Hotel, Grande Centre Point Hotel Terminal 21, and Grande Centre Point Hotel Ratchadamri.

      What are the best hotels in Bangkok for families?

      Hotels in Bangkok that are highly rated by families include The Berkeley Hotel Pratunam, Amari Don Muang Airport Bangkok Hotel, and Grande Centre Point Hotel Terminal 21.

      What hotels in Bangkok have the best views?

      Verified travelers to Bangkok have given top reviews for the views at AETAS Lumpini, Somerset Ekamai Bangkok, and Grande Centre Point Hotel Terminal 21.

      View more on agoda.com

      Show more
      Show less
      Popular hotel‎: ‎The Berkeley Hotel Pratunam
      Nightly rates from‎: ‎$5
      Accommodations‎: ‎11,488 properties
      Popular area‎: ‎Sukhumvit

      Thailand Hotels 2020 - Where to Stay and the Best Hotels in ...

      www.agoda.com › country › thailand
      www.agoda.com › country › thailand
      1. Cached
      2. Similar
      Thailand Hotels. Agoda offers 76813 hotels in Thailand including Bangkok, Pattaya, Chiang Mai, Phuket, Hua Hin / Cha-am and more. LOW RATES ...
      Accommodations‎: ‎166,707 properties
      Popular City‎: ‎Bangkok
      Nightly rates from‎: ‎$5
      Reasons to visit‎: ‎Beaches, Nature, Sightseeing
      anantara agoda
      agoda bangkok
      agoda thailand office
      budget hotel in bangkok
      famous hotel in bangkok
      agoda wiki

      People also search for


      The Best Hotels in Bangkok (FREE cancellation on select ...

      www.expedia.co.th › Thailand
      www.expedia.co.th › Thailand
      1. Similar
      Book now your hotel in Bangkok and pay later with Expedia. Enjoy free cancellation on most hotels. Browse Expedia's selection of 4479 hotels and places to ...
      expedia bangkok flights
      expedia bangkok office
      hotel bangkok sukhumvit
      grand hyatt erawan
      expedia thailand
      traveloka

      People also search for


      Bangkok Hotels - Where to Stay in Bangkok

      www.bangkok.com › hotels
      www.bangkok.com › hotels
      1. Similar
      Rating: 4.1 - ‎94 votes
      Business travellers will feel right at home at one of the high-end hotels in Ploenchit-Chidlom and Sukhumvit areas. Visitors who come to shop might want to stay in ...

      Bangkok Hotels | Find & compare great deals on trivago

      www.trivago.com › Thailand › Central Region
      www.trivago.com › Thailand › Central Region
      1. Cached
      2. Similar
      Compare the prices of 18764 hotels in Bangkok, Thailand. Find your ideal accommodation from hundreds of great deals and save with trivago.com.
      cheapest hotel in bangkok price
      trivago bangkok flights
      bangkok hotels 5-star
      hotel in bangkok near pratunam
      3-star hotels in bangkok
      trivago pattaya

      People also search for


      Bangkok Hotels: 6,250 Cheap Bangkok Hotel Deals, Thailand

      www.hotelscombined.com › Thailand › Central Thailand
      www.hotelscombined.com › Thailand › Central Thailand
      1. Cached
      2. Similar
      HotelsCombined™ compares all Bangkok hotel deals from the best accommodation sites at once. Read Guest Reviews on 6250 hotels in Bangkok, Thailand.
      cheap hotels in bangkok sukhumvit
      bangkok hotels pratunam
      hotels bangkok agoda
      cheapest hotel in bangkok price
      bangkok hotels 5-star
      3-star hotels in bangkok

      People also search for

      Images for hotels

      Guided Search Filters

      mercia hotels
      lebua hotels
      pgs hotels
      thailand
      holiday inn
      state tower
      bangkok
      inn express
      hotels near
      athenee hotel
      hotel deals
      ihg
      resort
      thai
      rooms
      waterfront
      hotel room
      phuket
      Image result for hotels
      Image result for hotels
      Image result for hotels
      Image result for hotels
      Image result for hotels
      Image result for hotels
      Image result for hotels
      Image result for hotels
      Image result for hotels
      Image result for hotels
      View all
      More images for hotelsReportedReport imagesThank you for the feedback. Report another imagePlease report the offensive image. CancelDone

      Searches related to hotels

      bangkok hotels 5-star

      cheap hotels in bangkok

      hotels near me

      3-star hotels in bangkok

      bangkok hotels near airport

      cheapest hotel in bangkok price

      4-star hotels in bangkok

      hotel booking

      Page navigation

      12345678910Next

      Footer links

      Thailand
      Bang Chak, Phra Khanong, Bangkok - From your Internet address - Use precise location - Learn more
      HelpSend feedbackPrivacyTerms
      Google apps
      \ No newline at end of file diff --git a/test/fixtures/search_result.html b/test/fixtures/search_results/search_result.html similarity index 100% rename from test/fixtures/search_result.html rename to test/fixtures/search_results/search_result.html diff --git a/test/google_crawler/google/scraper_test.exs b/test/google_crawler/google/scraper_test.exs new file mode 100644 index 0000000..c9ff989 --- /dev/null +++ b/test/google_crawler/google/scraper_test.exs @@ -0,0 +1,48 @@ +defmodule GoogleCrawler.Google.ScraperTest do + use ExUnit.Case + + alias GoogleCrawler.Google.Scraper + alias GoogleCrawler.Google.ScraperResult + + test "scrap/1" do + html = response_fixtures("hotels.html") + + result = Scraper.scrap(html) + raw_html = cleanup_html(html) + + assert %ScraperResult{ + raw_html_result: ^raw_html, + total_results: 5_970_000_000, + links: [ + "https://www.hotels.com/", + "https://www.booking.com/city/th/bangkok.html", + "https://www.tripadvisor.com/Hotels-g293916-Bangkok-Hotels.html", + "https://www.agoda.com/city/bangkok-th.html", + "https://www.agoda.com/country/thailand.html", + "https://www.expedia.co.th/en/Bangkok-Hotels.d178236.Travel-Guide-Hotels", + "http://www.bangkok.com/hotels/", + "https://www.trivago.com/bangkok-519/hotel", + "https://www.hotelscombined.com/Place/Bangkok.htm" + ], + total_links: 9, + top_ads_links: [ + "https://www.booking.com/" + ], + total_top_ads_links: 1, + bottom_ads_links: [], + total_bottom_ads_links: 0 + } = result + end + + defp response_fixtures(path) do + Path.join(["test/fixtures/search_results", path]) + |> File.read!() + end + + defp cleanup_html(html) do + html + |> String.chunk(:printable) + |> Enum.filter(&String.printable?/1) + |> Enum.join() + end +end diff --git a/test/google_crawler/search/keyword_test.exs b/test/google_crawler/search/keyword_test.exs index 7e4b4b9..b691b0a 100644 --- a/test/google_crawler/search/keyword_test.exs +++ b/test/google_crawler/search/keyword_test.exs @@ -40,4 +40,86 @@ defmodule Googlecrawler.Search.KeywordTest do assert %{status: ["is invalid"]} = errors_on(changeset) end end + + describe "update result changeset" do + test "raw_html_result is required" do + attrs = KeywordFactory.build_attrs(%{raw_html_result: ""}) + changeset = Keyword.update_result_changeset(%Keyword{}, attrs) + + refute changeset.valid? + assert %{raw_html_result: ["can't be blank"]} = errors_on(changeset) + end + + test "total_results is required" do + attrs = KeywordFactory.build_attrs(%{total_results: ""}) + changeset = Keyword.update_result_changeset(%Keyword{}, attrs) + + refute changeset.valid? + assert %{total_results: ["can't be blank"]} = errors_on(changeset) + end + + test "total_results is a number" do + attrs = KeywordFactory.build_attrs(%{total_results: "invalid"}) + changeset = Keyword.update_result_changeset(%Keyword{}, attrs) + + refute changeset.valid? + assert %{total_results: ["is invalid"]} = errors_on(changeset) + end + + test "total_results is greater than or equal to 0" do + attrs = KeywordFactory.build_attrs(%{total_results: -1}) + changeset = Keyword.update_result_changeset(%Keyword{}, attrs) + + refute changeset.valid? + assert %{total_results: ["must be greater than or equal to 0"]} = errors_on(changeset) + end + + test "total_ads_links is required" do + attrs = KeywordFactory.build_attrs(%{total_ads_links: ""}) + changeset = Keyword.update_result_changeset(%Keyword{}, attrs) + + refute changeset.valid? + assert %{total_ads_links: ["can't be blank"]} = errors_on(changeset) + end + + test "total_ads_links is a number" do + attrs = KeywordFactory.build_attrs(%{total_ads_links: "invalid"}) + changeset = Keyword.update_result_changeset(%Keyword{}, attrs) + + refute changeset.valid? + assert %{total_ads_links: ["is invalid"]} = errors_on(changeset) + end + + test "total_ads_links is greater than or equal to 0" do + attrs = KeywordFactory.build_attrs(%{total_ads_links: -1}) + changeset = Keyword.update_result_changeset(%Keyword{}, attrs) + + refute changeset.valid? + assert %{total_ads_links: ["must be greater than or equal to 0"]} = errors_on(changeset) + end + + test "total_links is required" do + attrs = KeywordFactory.build_attrs(%{total_links: ""}) + changeset = Keyword.update_result_changeset(%Keyword{}, attrs) + + refute changeset.valid? + assert %{total_links: ["can't be blank"]} = errors_on(changeset) + end + + test "total_links is a number" do + attrs = KeywordFactory.build_attrs(%{total_links: "invalid"}) + changeset = Keyword.update_result_changeset(%Keyword{}, attrs) + + refute changeset.valid? + assert %{total_links: ["is invalid"]} = errors_on(changeset) + end + + test "total_links is greater than or equal to 0" do + attrs = KeywordFactory.build_attrs(%{total_links: -1}) + changeset = Keyword.update_result_changeset(%Keyword{}, attrs) + + refute changeset.valid? + assert %{total_links: ["must be greater than or equal to 0"]} = errors_on(changeset) + end + end end diff --git a/test/google_crawler/search/link_test.exs b/test/google_crawler/search/link_test.exs new file mode 100644 index 0000000..aa4458e --- /dev/null +++ b/test/google_crawler/search/link_test.exs @@ -0,0 +1,41 @@ +defmodule Googlecrawler.Search.LinkTest do + use GoogleCrawler.DataCase + + alias GoogleCrawler.LinkFactory + alias GoogleCrawler.Search.Link + + describe "changeset" do + test "url is required" do + attrs = LinkFactory.build_attrs(%{url: ""}) + changeset = Link.changeset(%Link{}, attrs) + + refute changeset.valid? + assert %{url: ["can't be blank"]} = errors_on(changeset) + end + + test "is_ads is required" do + attrs = LinkFactory.build_attrs(%{is_ads: nil}) + changeset = Link.changeset(%Link{}, attrs) + + refute changeset.valid? + assert %{is_ads: ["can't be blank"]} = errors_on(changeset) + end + + test "ads position is required if the link is ads" do + attrs = LinkFactory.build_attrs(%{is_ads: true, ads_position: nil}) + changeset = Link.changeset(%Link{}, attrs) + + refute changeset.valid? + assert %{ads_position: ["can't be blank"]} = errors_on(changeset) + end + + test "ads position is valid" do + attrs = LinkFactory.build_attrs(%{is_ads: true, ads_position: :left}) + changeset = Link.changeset(%Link{}, attrs) + + refute changeset.valid? + # TODO: Recheck the validations + assert %{ads_position: ["is invalid"]} = errors_on(changeset) + end + end +end diff --git a/test/google_crawler/search/search_keyword_worker_test.exs b/test/google_crawler/search/search_keyword_worker_test.exs index db0fcd8..18ccbaa 100644 --- a/test/google_crawler/search/search_keyword_worker_test.exs +++ b/test/google_crawler/search/search_keyword_worker_test.exs @@ -15,7 +15,9 @@ defmodule GoogleCrawler.Search.SearchKeywordWorkerTest do assert %{^task_ref => {keyword, 0}} = SearchKeywordWorker.get_state() assert Search.get_keyword(keyword.id).status == :in_progress - :timer.sleep(1000) + # Find a way to test without sleep 😔 + :timer.sleep(1500) + assert SearchKeywordWorker.get_state() == %{} end @@ -25,7 +27,7 @@ defmodule GoogleCrawler.Search.SearchKeywordWorkerTest do SearchKeywordWorker.search(keyword.id) # Find a way to test without sleep 😔 - :timer.sleep(1000) + :timer.sleep(1500) keyword = Search.get_keyword(keyword.id) assert keyword.status == :completed @@ -38,10 +40,9 @@ defmodule GoogleCrawler.Search.SearchKeywordWorkerTest do keyword = KeywordFactory.create(%{keyword: "error"}) task = SearchKeywordWorker.search(keyword.id) - task_ref = task.ref # Find a way to test without sleep 😔 - :timer.sleep(1000) + :timer.sleep(1500) assert Search.get_keyword(keyword.id).status == :failed assert SearchKeywordWorker.get_state() == %{} diff --git a/test/google_crawler/search_test.exs b/test/google_crawler/search_test.exs index 52c9b49..cc337ff 100644 --- a/test/google_crawler/search_test.exs +++ b/test/google_crawler/search_test.exs @@ -3,7 +3,9 @@ defmodule GoogleCrawler.SearchTest do alias GoogleCrawler.Search alias GoogleCrawler.Search.Keyword + alias GoogleCrawler.Google.ScraperResult alias GoogleCrawler.KeywordFactory + alias GoogleCrawler.ScraperResultFactory alias GoogleCrawler.UserFactory describe "keywords" do @@ -72,6 +74,48 @@ defmodule GoogleCrawler.SearchTest do assert {:error, %Ecto.Changeset{}} = Search.update_keyword(keyword, keyword_attrs) assert Repo.get_by(Keyword, keyword: keyword.keyword) != nil end + + test "update_keyword_result_from_scraper/2 updates the keyword results and associates the keyword links" do + keyword = KeywordFactory.create() + + scraper_result = + struct( + ScraperResult, + ScraperResultFactory.build_attrs( + total_results: 50_000, + total_links: 10, + total_top_ads_links: 3, + total_bottom_ads_links: 1 + ) + ) + + Search.update_keyword_result_from_scraper(keyword, scraper_result) + + # Keyword result summary is updated + assert %{ + total_results: 50_000, + total_links: 10, + total_ads_links: 4 + } = Search.get_keyword(keyword.id) + + # Top Ads links is inserted + top_ads_query = [is_ads: true, ads_position: :top] + top_ads_links = get_link_urls(Search.list_keyword_links(keyword, top_ads_query)) + assert 3 = length(top_ads_links) + assert scraper_result.top_ads_links == top_ads_links + + # Bottom Ads links is inserted + bottom_ads_query = [is_ads: true, ads_position: :bottom] + bottom_ads_links = get_link_urls(Search.list_keyword_links(keyword, bottom_ads_query)) + assert 1 = length(bottom_ads_links) + assert scraper_result.bottom_ads_links == bottom_ads_links + + # Non-Ads links is inserted + non_ads_query = [is_ads: false] + links = get_link_urls(Search.list_keyword_links(keyword, non_ads_query)) + assert 10 = length(links) + assert scraper_result.links == links + end end describe "keyword file" do @@ -94,4 +138,8 @@ defmodule GoogleCrawler.SearchTest do end end end + + defp get_link_urls(links) do + Enum.map(links, &Map.get(&1, :url)) + end end diff --git a/test/support/mocks/google_api_client.ex b/test/support/mocks/google_api_client.ex index bf50d94..ef0285a 100644 --- a/test/support/mocks/google_api_client.ex +++ b/test/support/mocks/google_api_client.ex @@ -6,11 +6,11 @@ defmodule GoogleCrawler.Google.MockApiClient do end def search(_keyword) do - {:ok, response_fixtures('search_result.html')} + {:ok, response_fixtures('hotels.html')} end defp response_fixtures(path) do - Path.join(["test/fixtures", path]) + Path.join(["test/fixtures/search_results", path]) |> File.read!() end end