@@ -7,69 +7,71 @@ module PlainTextConversion
7
7
extend self
8
8
9
9
def node_to_plain_text ( node )
10
- remove_trailing_newlines ( plain_text_for_node ( node ) )
10
+ remove_trailing_newlines ( node_to_plain_text_content_tree ( node ) . content )
11
11
end
12
12
13
13
private
14
- def plain_text_for_node ( node , index = 0 )
14
+ def node_to_plain_text_content_tree ( node )
15
+ BottomUpReplacer . replace_content ( node . dup ) { |n | plain_text_for_node ( n ) }
16
+ end
17
+
18
+ def plain_text_for_node ( node )
15
19
if respond_to? ( plain_text_method_for_node ( node ) , true )
16
- send ( plain_text_method_for_node ( node ) , node , index )
20
+ send ( plain_text_method_for_node ( node ) , node )
17
21
else
18
22
plain_text_for_node_children ( node )
19
23
end
20
24
end
21
25
22
26
def plain_text_for_node_children ( node )
23
- texts = [ ]
24
- node . children . each_with_index do |child , index |
25
- next if skippable? ( child )
27
+ node . children . map ( &:content ) . join
28
+ end
26
29
27
- texts << plain_text_for_node ( child , index )
28
- end
29
- texts . join
30
+ def plain_text_method_for_node ( node )
31
+ :"plain_text_for_#{ node . name } _node"
30
32
end
31
33
32
- def skippable? ( node )
33
- node . name == "script" || node . name == "style "
34
+ def plain_text_for_unsupported_node ( node )
35
+ " "
34
36
end
35
37
36
- def plain_text_method_for_node ( node )
37
- :"plain_text_for_#{ node . name } _node"
38
+ %i[ script style ] . each do | element |
39
+ alias_method :"plain_text_for_#{ element } _node" , :plain_text_for_unsupported_node
38
40
end
39
41
40
- def plain_text_for_block ( node , index = 0 )
42
+ def plain_text_for_block ( node )
41
43
"#{ remove_trailing_newlines ( plain_text_for_node_children ( node ) ) } \n \n "
42
44
end
43
45
44
46
%i[ h1 p ] . each do |element |
45
47
alias_method :"plain_text_for_#{ element } _node" , :plain_text_for_block
46
48
end
47
49
48
- def plain_text_for_list ( node , index )
50
+ def plain_text_for_list ( node )
49
51
"#{ break_if_nested_list ( node , plain_text_for_block ( node ) ) } "
50
52
end
51
53
52
54
%i[ ul ol ] . each do |element |
53
55
alias_method :"plain_text_for_#{ element } _node" , :plain_text_for_list
54
56
end
55
57
56
- def plain_text_for_br_node ( node , index )
58
+ def plain_text_for_br_node ( node )
57
59
"\n "
58
60
end
59
61
60
- def plain_text_for_text_node ( node , index )
62
+ def plain_text_for_text_node ( node )
61
63
remove_trailing_newlines ( node . text )
62
64
end
63
65
64
- def plain_text_for_div_node ( node , index )
66
+ def plain_text_for_div_node ( node )
65
67
"#{ remove_trailing_newlines ( plain_text_for_node_children ( node ) ) } \n "
66
68
end
67
69
68
- def plain_text_for_figcaption_node ( node , index )
70
+ def plain_text_for_figcaption_node ( node )
69
71
"[#{ remove_trailing_newlines ( plain_text_for_node_children ( node ) ) } ]"
70
72
end
71
73
72
- def plain_text_for_blockquote_node ( node , index )
74
+ def plain_text_for_blockquote_node ( node )
73
75
text = plain_text_for_block ( node )
74
76
return "“”" if text . blank?
75
77
@@ -79,8 +81,8 @@ def plain_text_for_blockquote_node(node, index)
79
81
text
80
82
end
81
83
82
- def plain_text_for_li_node ( node , index )
83
- bullet = bullet_for_li_node ( node , index )
84
+ def plain_text_for_li_node ( node )
85
+ bullet = bullet_for_li_node ( node )
84
86
text = remove_trailing_newlines ( plain_text_for_node_children ( node ) )
85
87
indentation = indentation_for_li_node ( node )
86
88
@@ -91,8 +93,9 @@ def remove_trailing_newlines(text)
91
93
text . chomp ( "" )
92
94
end
93
95
94
- def bullet_for_li_node ( node , index )
96
+ def bullet_for_li_node ( node )
95
97
if list_node_name_for_li_node ( node ) == "ol"
98
+ index = node . parent . children . index ( node )
96
99
"#{ index + 1 } ."
97
100
else
98
101
"•"
@@ -121,5 +124,38 @@ def break_if_nested_list(node, text)
121
124
text
122
125
end
123
126
end
127
+
128
+ class BottomUpReplacer
129
+ def self . replace_content ( node , &block )
130
+ new ( node ) . replace_content ( &block )
131
+ end
132
+
133
+ def initialize ( node )
134
+ @node = node
135
+ end
136
+
137
+ def replace_content ( &block )
138
+ @node . tap do |node |
139
+ traverse_bottom_up ( node ) do |n |
140
+ n . content = block . call ( n )
141
+ end
142
+ end
143
+ end
144
+
145
+ private
146
+ def traverse_bottom_up ( node )
147
+ call_stack , processing_stack = [ node ] , [ ]
148
+
149
+ until call_stack . empty?
150
+ node = call_stack . pop
151
+ processing_stack . push ( node )
152
+ call_stack . concat node . children
153
+ end
154
+
155
+ processing_stack . reverse_each do |node |
156
+ yield node
157
+ end
158
+ end
159
+ end
124
160
end
125
161
end
0 commit comments