@@ -8,7 +8,7 @@ def self.require_deps
8
8
rubygems
9
9
fileutils
10
10
safe_yaml
11
- hpricot
11
+ nokogiri
12
12
time
13
13
open-uri
14
14
open_uri_redirections
@@ -22,16 +22,16 @@ def self.specify_options(c)
22
22
end
23
23
24
24
# Will modify post DOM tree
25
- def self . download_images ( title , post_hpricot , assets_folder )
26
- images = ( post_hpricot / "img" )
25
+ def self . download_images ( title , post_doc , assets_folder )
26
+ images = post_doc . css ( "img" )
27
27
return if images . empty?
28
28
29
- Jekyll . logger . info "Downloading images for " , title
29
+ Jekyll . logger . info "Downloading:" , " images for #{ title } "
30
30
images . each do |i |
31
31
uri = URI ::DEFAULT_PARSER . escape ( i [ "src" ] )
32
32
33
33
dst = File . join ( assets_folder , File . basename ( uri ) )
34
- i [ "src" ] = File . join ( "{{ site.baseurl }}" , dst )
34
+ i [ "src" ] = File . join ( "{{site.baseurl}}" , dst )
35
35
Jekyll . logger . info uri
36
36
if File . exist? ( dst )
37
37
Jekyll . logger . info "Already in cache. Clean assets folder if you want a redownload."
@@ -54,15 +54,18 @@ def self.download_images(title, post_hpricot, assets_folder)
54
54
55
55
class Item
56
56
def initialize ( node )
57
+ raise "Node is nil" if node . nil?
58
+
57
59
@node = node
58
60
end
59
61
60
62
def text_for ( path )
61
- @node . at ( path ) . inner_text
63
+ subnode = @node . at_xpath ( "./#{ path } " ) || @node . at ( path ) || @node . children . find { |child | child . name == path }
64
+ subnode . text
62
65
end
63
66
64
67
def title
65
- @title ||= text_for ( : title) . strip
68
+ @title ||= text_for ( " title" ) . strip
66
69
end
67
70
68
71
def permalink_title
@@ -76,12 +79,10 @@ def permalink_title
76
79
end
77
80
78
81
def permalink
79
- # Hpricot thinks "link" is a self closing tag so it puts the text of the link after the tag
80
- # but sometimes it works right! I think it's the xml declaration
81
82
@permalink ||= begin
82
83
uri = text_for ( "link" )
83
- uri = @node . at ( "link" ) . following [ 0 ] if uri . empty?
84
- URI ( uri . to_s ) . path
84
+ uri = @node . at ( "link" ) . next_sibling . text if uri . empty?
85
+ URI ( uri . to_s . strip ) . path
85
86
end
86
87
end
87
88
@@ -127,12 +128,8 @@ def published?
127
128
128
129
def excerpt
129
130
@excerpt ||= begin
130
- text = Hpricot ( text_for ( "excerpt:encoded" ) ) . inner_text
131
- if text . empty?
132
- nil
133
- else
134
- text
135
- end
131
+ text = Nokogiri ::HTML ( text_for ( "excerpt:encoded" ) ) . text
132
+ text . empty? ? nil : text
136
133
end
137
134
end
138
135
end
@@ -144,29 +141,32 @@ def self.process(options)
144
141
FileUtils . mkdir_p ( assets_folder )
145
142
146
143
import_count = Hash . new ( 0 )
147
- doc = Hpricot ::XML ( File . read ( source ) )
144
+ doc = Nokogiri ::XML ( File . read ( source ) )
148
145
# Fetch authors data from header
149
146
authors = Hash [
150
- ( doc / :channel / "wp:author" ) . map do |author |
151
- [ author . at ( "wp:author_login" ) . inner_text . strip , {
152
- "login" => author . at ( "wp:author_login" ) . inner_text . strip ,
153
- "email" => author . at ( "wp:author_email" ) . inner_text ,
154
- "display_name" => author . at ( "wp:author_display_name" ) . inner_text ,
155
- "first_name" => author . at ( "wp:author_first_name" ) . inner_text ,
156
- "last_name" => author . at ( "wp:author_last_name" ) . inner_text ,
157
- } , ]
147
+ doc . xpath ( "//channel/wp:author" ) . map do |author |
148
+ [
149
+ author . xpath ( "./wp:author_login" ) . text . strip ,
150
+ {
151
+ "login" => author . xpath ( "./wp:author_login" ) . text . strip ,
152
+ "email" => author . xpath ( "./wp:author_email" ) . text ,
153
+ "display_name" => author . xpath ( "./wp:author_display_name" ) . text ,
154
+ "first_name" => author . xpath ( "./wp:author_first_name" ) . text ,
155
+ "last_name" => author . xpath ( "./wp:author_last_name" ) . text ,
156
+ } ,
157
+ ]
158
158
end
159
159
] rescue { }
160
160
161
- ( doc / : channel / : item) . each do |node |
161
+ doc . css ( " channel > item" ) . each do |node |
162
162
item = Item . new ( node )
163
- categories = node . search ( 'category[@ domain="category"]' ) . map ( &:inner_text ) . reject { |c | c == "Uncategorized" } . uniq
164
- tags = node . search ( 'category[@ domain="post_tag"]' ) . map ( &:inner_text ) . uniq
163
+ categories = node . css ( 'category[domain="category"]' ) . map ( &:text ) . reject { |c | c == "Uncategorized" } . uniq
164
+ tags = node . css ( 'category[domain="post_tag"]' ) . map ( &:text ) . uniq
165
165
166
166
metas = { }
167
- node . search ( " wp:postmeta") . each do |meta |
168
- key = meta . at ( " wp:meta_key") . inner_text
169
- value = meta . at ( " wp:meta_value") . inner_text
167
+ node . xpath ( "./ wp:postmeta") . each do |meta |
168
+ key = meta . at_xpath ( "./ wp:meta_key") . text
169
+ value = meta . at_xpath ( "./ wp:meta_value") . text
170
170
metas [ key ] = value
171
171
end
172
172
@@ -189,7 +189,7 @@ def self.process(options)
189
189
}
190
190
191
191
begin
192
- content = Hpricot ( item . text_for ( "content:encoded" ) )
192
+ content = Nokogiri :: HTML ( item . text_for ( "content:encoded" ) )
193
193
header [ "excerpt" ] = item . excerpt if item . excerpt
194
194
195
195
if fetch
@@ -221,7 +221,7 @@ def self.process(options)
221
221
end
222
222
223
223
import_count . each do |key , value |
224
- Jekyll . logger . info "Imported #{ value } #{ key } s "
224
+ Jekyll . logger . info "Imported" , " #{ value } #{ Util . pluralize ( key , value ) } "
225
225
end
226
226
end
227
227
0 commit comments