-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoriflame_parser.rb
54 lines (47 loc) · 2.03 KB
/
oriflame_parser.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
require 'rubygems'
require 'nokogiri'
require 'open-uri'
require 'fileutils'
def oriflame_parser
codes = File.foreach('codes.txt').map {|line| line.split(' ')}
begin
codes.each do |p_code|
p_code.each_with_index do |pp_c, i|
url_page = "http://ru.oriflame.com/products/product?code=#{pp_c}"
puts "Fetching #{url_page}..."
page = Nokogiri::HTML(open(url_page))
@data_dir= 'products'
title = page.css('h1.name').text
Dir.mkdir("#{@data_dir}/#{title}") unless File.exists?("#{@data_dir}/#{title}")
@ofile = File.open("#{@data_dir}/#{title}/data.txt", 'w')
volume = page.css('dl.size').text
bb = page.css('dl.points').text
code = page.css('dl.code').text
description = page.css('div.description').text
ingredients = page.css('div.ingredients').text
htu = page.css('div.how-to-use').text
price = page.css('span.mainCurrency').text
@ofile.puts(title, volume, bb, code , "\n", description, "\n", ingredients, "\n", htu ,"\n",price)
@image = page.css('li.ui-color-box')
@image.xpath(".//img/@src").each do |src|
uri = URI.join( url_page, src ).to_s # make absolute uri
File.open("#{@data_dir}/#{title}/#{File.basename(uri)}",'wb'){ |f| f.write(open(uri).read) }
end
Nokogiri::HTML(open(url_page)).xpath("//img[@class='image figure']/@src").each do |src|
uri = URI.join( url_page, src ).to_s # make absolute uri
File.open("#{@data_dir}/#{title}/#{File.basename(uri)}",'wb'){ |f| f.write(open(uri).read) }
end
@ul_images = page.css('ul.variants')
begin
@ul_images.xpath(".//li/@data-srcset").each do |src|
puts "Fetching images..."
uri = URI.join( url_page, src ).to_s # make absolute uri
File.open("#{@data_dir}/#{title}/#{File.basename(uri)}",'wb'){ |f| f.write(open(uri).read) }
puts "File saved to #{@data_dir}/#{title}"
end
end
end
end
end
end
oriflame_parser