115 lines
3.0 KiB
Elixir
115 lines
3.0 KiB
Elixir
|
defmodule AmazonHistory.Scraper do
|
||
|
@base_url "https://www.amazon.co.uk"
|
||
|
@order_url "#{@base_url}/gp/your-account/order-history?opt=ab&digitalOrders=1&unifiedOrders=1&returnTo=&orderFilter=year-"
|
||
|
@end_year DateTime.utc_now.year
|
||
|
@orders_per_page 10
|
||
|
|
||
|
require Logger
|
||
|
use Hound.Helpers
|
||
|
|
||
|
def fetch(email, password, start_year) do
|
||
|
Hound.start_session()
|
||
|
|
||
|
login(email, password)
|
||
|
|
||
|
Logger.info("Logged in successfully. Scraping with start_year: #{start_year}")
|
||
|
|
||
|
items = Enum.reduce(start_year..@end_year, [], fn year, acc ->
|
||
|
acc ++ process_year(year, 0, [])
|
||
|
end)
|
||
|
|
||
|
Hound.end_session()
|
||
|
|
||
|
items
|
||
|
end
|
||
|
|
||
|
defp login(email, password) do
|
||
|
navigate_to(@base_url)
|
||
|
|
||
|
find_element(:id, "nav-link-yourAccount")
|
||
|
|> click
|
||
|
|
||
|
find_element(:id, "ap_email")
|
||
|
|> fill_field(email)
|
||
|
|
||
|
find_element(:id, "continue")
|
||
|
|> click
|
||
|
|
||
|
find_element(:id, "ap_password")
|
||
|
|> fill_field(password)
|
||
|
|
||
|
find_element(:id, "signInSubmit")
|
||
|
|> click
|
||
|
end
|
||
|
|
||
|
defp process_year(year, startIndex, items) do
|
||
|
order_url(year, startIndex)
|
||
|
|> navigate_to
|
||
|
|
||
|
find_element(:id, "ordersContainer")
|
||
|
|> find_all_within_element(:class, "order")
|
||
|
|> process_order_elements(year, startIndex, items)
|
||
|
end
|
||
|
|
||
|
defp order_url(year, startIndex) do
|
||
|
@order_url <> to_string(year) <> "&startIndex=" <> to_string(startIndex)
|
||
|
end
|
||
|
|
||
|
defp process_order_elements([], _year, _startIndex, items) do
|
||
|
items
|
||
|
end
|
||
|
|
||
|
defp process_order_elements(order_elements, year, startIndex, items) do
|
||
|
new_items = Enum.reduce(order_elements, [], fn order_element, acc ->
|
||
|
acc ++ process_order_element(order_element)
|
||
|
end)
|
||
|
|
||
|
process_year(year, startIndex + @orders_per_page, items ++ new_items)
|
||
|
end
|
||
|
|
||
|
defp process_order_element(order_element) do
|
||
|
order_placed = extract_order_placed(order_element)
|
||
|
order_number = extract_order_number(order_element)
|
||
|
total = extract_total(order_element)
|
||
|
|
||
|
find_all_within_element(order_element, :class, "a-link-normal")
|
||
|
|> Enum.map(&(to_row(&1, order_placed, order_number, total)))
|
||
|
|> Enum.filter(&is_valid_row?/1)
|
||
|
end
|
||
|
|
||
|
defp extract_order_placed(order_element) do
|
||
|
find_within_element(order_element, :xpath, "//span[@class='a-color-secondary value']")
|
||
|
|> inner_text
|
||
|
end
|
||
|
|
||
|
defp extract_order_number(order_element) do
|
||
|
find_within_element(order_element, :xpath, "(//span[@class='a-color-secondary value'])[3]")
|
||
|
|> inner_text
|
||
|
end
|
||
|
|
||
|
defp extract_total(order_element) do
|
||
|
order_element
|
||
|
|> search_within_element(:class, "a-color-price", 1)
|
||
|
|> total_or_empty_string
|
||
|
end
|
||
|
|
||
|
defp total_or_empty_string({:ok, element}) do
|
||
|
inner_text(element)
|
||
|
end
|
||
|
|
||
|
defp total_or_empty_string({:error, _}) do
|
||
|
""
|
||
|
end
|
||
|
|
||
|
defp to_row(item_element, order_placed, order_number, total) do
|
||
|
name = inner_text(item_element)
|
||
|
url = attribute_value(item_element, :href)
|
||
|
|
||
|
[ order_number, order_placed, name, total, url ]
|
||
|
end
|
||
|
|
||
|
defp is_valid_row?([_order_number, _order_placed, name, _total, url]) do
|
||
|
String.trim(name) != "" && String.contains?(url, "gp/product")
|
||
|
end
|
||
|
end
|