@@ -9,17 +9,40 @@ const FuzzyPartialUrlPatterns_1 = require("../pattern/FuzzyPartialUrlPatterns");
9
9
const BasePatterns_1 = require ( "../pattern/BasePatterns" ) ;
10
10
const ProtocolPatterns_1 = require ( "../pattern/ProtocolPatterns" ) ;
11
11
const DomainPatterns_1 = require ( "../pattern/DomainPatterns" ) ;
12
+ const valid_1 = __importDefault ( require ( "../valid" ) ) ;
12
13
exports . UrlNormalizer = {
13
- modifiedUrl : null ,
14
+ sacrificedUrl : null ,
15
+ currentStep : 0 ,
16
+ /**
17
+ * Initializes the UrlNormalizer with a given URL.
18
+ * @param url - The URL to normalize.
19
+ */
20
+ initializeSacrificedUrl ( url ) {
21
+ this . sacrificedUrl = util_1 . default . Text . removeAllSpaces ( valid_1 . default . validateAndTrimString ( url ) ) ;
22
+ if ( ! this . sacrificedUrl ) {
23
+ throw new Error ( "modifiedUrl cannot be null or empty" ) ;
24
+ }
25
+ this . currentStep = 1 ;
26
+ } ,
27
+ /**
28
+ * Check if the required previous step is completed.
29
+ * @param requiredStep - The step that should have been completed.
30
+ */
31
+ ensureStepCompleted ( requiredStep ) {
32
+ if ( this . currentStep != requiredStep ) {
33
+ throw new Error ( `Step ${ requiredStep } must be completed before this step ${ this . currentStep } ` ) ;
34
+ }
35
+ } ,
14
36
extractAndNormalizeProtocolFromSpacesRemovedUrl ( ) {
15
- if ( this . modifiedUrl == undefined ) {
16
- throw new Error ( "modifiedUrl cannot be null" ) ;
37
+ this . ensureStepCompleted ( 1 ) ;
38
+ if ( ! this . sacrificedUrl ) {
39
+ throw new Error ( "modifiedUrl cannot be null or empty" ) ;
17
40
}
18
41
let protocol = null ;
19
42
let rx = new RegExp ( '^(' + FuzzyPartialUrlPatterns_1 . FuzzyPartialUrlPatterns . getFuzzyProtocolsRxStr + '|' + FuzzyPartialUrlPatterns_1 . FuzzyPartialUrlPatterns . fuzzierProtocol + ')' + FuzzyPartialUrlPatterns_1 . FuzzyPartialUrlPatterns . fuzzierProtocolDomainDelimiter ) ;
20
43
let match ;
21
44
let isMatched = false ;
22
- while ( ( match = rx . exec ( this . modifiedUrl ) ) !== null ) {
45
+ while ( ( match = rx . exec ( this . sacrificedUrl ) ) !== null ) {
23
46
if ( match && match [ 1 ] ) {
24
47
isMatched = true ;
25
48
if ( match [ 1 ] === 'localhost' ) {
@@ -37,11 +60,13 @@ exports.UrlNormalizer = {
37
60
break ;
38
61
}
39
62
}
40
- this . modifiedUrl = this . modifiedUrl . replace ( rx , '' ) ;
63
+ this . sacrificedUrl = this . sacrificedUrl . replace ( rx , '' ) ;
64
+ this . currentStep = 2 ;
41
65
return protocol ;
42
66
} ,
43
67
extractAndNormalizeDomainFromProtocolRemovedUrl ( ) {
44
- if ( this . modifiedUrl == undefined ) {
68
+ this . ensureStepCompleted ( 2 ) ;
69
+ if ( this . sacrificedUrl == undefined ) {
45
70
throw new Error ( "modifiedUrl cannot be null" ) ;
46
71
}
47
72
let result = {
@@ -51,7 +76,7 @@ exports.UrlNormalizer = {
51
76
let rx1 = new RegExp ( '(' + FuzzyPartialUrlPatterns_1 . FuzzyPartialUrlPatterns . getFuzzyDomainBody + '.*?)(' + FuzzyPartialUrlPatterns_1 . FuzzyPartialUrlPatterns . optionalFuzzyPort +
52
77
FuzzyPartialUrlPatterns_1 . FuzzyPartialUrlPatterns . optionalFuzzyUrlParams + ')$' , 'gi' ) ;
53
78
let match1 ;
54
- while ( ( match1 = rx1 . exec ( this . modifiedUrl ) ) !== null ) {
79
+ while ( ( match1 = rx1 . exec ( this . sacrificedUrl ) ) !== null ) {
55
80
// remaining full url
56
81
let domain_temp = match1 [ 0 ] ;
57
82
// domain
@@ -141,46 +166,49 @@ exports.UrlNormalizer = {
141
166
else {
142
167
result . domain = domain_temp2 ;
143
168
}
144
- this . modifiedUrl = domain_temp3 ;
169
+ this . sacrificedUrl = domain_temp3 ;
145
170
}
146
171
//console.log("before : " + this.modifiedUrl)
147
172
// This sort of characters should NOT be located at the start.
148
- this . modifiedUrl = this . modifiedUrl . replace ( new RegExp ( '^(?:' + BasePatterns_1 . BasePatterns . twoBytesNum + '|' + BasePatterns_1 . BasePatterns . langChar + ')+' , 'i' ) , '' ) ;
149
- //console.log("after : " + this.modifiedUrl)
173
+ this . sacrificedUrl = this . sacrificedUrl . replace ( new RegExp ( '^(?:' + BasePatterns_1 . BasePatterns . twoBytesNum + '|' + BasePatterns_1 . BasePatterns . langChar + ')+' , 'i' ) , '' ) ;
174
+ this . currentStep = 3 ;
150
175
return result ;
151
176
} ,
152
177
extractAndNormalizePortFromDomainRemovedUrl ( ) {
178
+ this . ensureStepCompleted ( 3 ) ;
153
179
let port = null ;
154
180
let rx = new RegExp ( '^' + FuzzyPartialUrlPatterns_1 . FuzzyPartialUrlPatterns . mandatoryFuzzyPort , 'gi' ) ;
155
181
let match ;
156
- if ( this . modifiedUrl == undefined ) {
182
+ if ( this . sacrificedUrl == undefined ) {
157
183
throw new Error ( "modifiedUrl cannot be null" ) ;
158
184
}
159
- while ( ( match = rx . exec ( this . modifiedUrl ) ) !== null ) {
185
+ while ( ( match = rx . exec ( this . sacrificedUrl ) ) !== null ) {
160
186
port = match [ 0 ] . replace ( / ^ \D + / g, '' ) ;
161
- if ( this . modifiedUrl != undefined ) {
162
- this . modifiedUrl = this . modifiedUrl . replace ( rx , '' ) ;
187
+ if ( this . sacrificedUrl != undefined ) {
188
+ this . sacrificedUrl = this . sacrificedUrl . replace ( rx , '' ) ;
163
189
}
164
190
}
191
+ this . currentStep = 4 ;
165
192
return port ;
166
193
} ,
167
- finalizeNormalization ( protocol , port , domain ) {
168
- if ( this . modifiedUrl == undefined ) {
194
+ extractNormalizedUrl ( protocol , port , domain ) {
195
+ this . ensureStepCompleted ( 4 ) ;
196
+ if ( this . sacrificedUrl == undefined ) {
169
197
throw new Error ( "modifiedUrl cannot be null" ) ;
170
198
}
171
199
/* Now, only the end part of a domain is left */
172
200
/* Consecutive param delimiters should be replaced into one */
173
- this . modifiedUrl = this . modifiedUrl . replace ( / [ # ] { 2 , } / gi, '#' ) ;
174
- this . modifiedUrl = this . modifiedUrl . replace ( / [ / ] { 2 , } / gi, '/' ) ;
175
- this . modifiedUrl = this . modifiedUrl . replace ( / ( .* ?) [ ? ] { 2 , } ( [ ^ / ] * ?(?: = | $ ) ) ( .* ) / i, function ( match , $1 , $2 , $3 ) {
201
+ this . sacrificedUrl = this . sacrificedUrl . replace ( / [ # ] { 2 , } / gi, '#' ) ;
202
+ this . sacrificedUrl = this . sacrificedUrl . replace ( / [ / ] { 2 , } / gi, '/' ) ;
203
+ this . sacrificedUrl = this . sacrificedUrl . replace ( / ( .* ?) [ ? ] { 2 , } ( [ ^ / ] * ?(?: = | $ ) ) ( .* ) / i, function ( match , $1 , $2 , $3 ) {
176
204
//console.log(modified_url + ' a :' + $1 + '?' + $2 + $3);
177
205
return $1 + '?' + $2 + $3 ;
178
206
} ) ;
179
207
/* 'modified_url' must start with '/,?,#' */
180
208
let rx_modified_url = new RegExp ( '(?:\\/|\\?|\\#)' , 'i' ) ;
181
209
let match_modified_url ;
182
- if ( ( match_modified_url = rx_modified_url . exec ( this . modifiedUrl ) ) !== null ) {
183
- this . modifiedUrl = this . modifiedUrl . replace ( new RegExp ( '^.*?(' + util_1 . default . Text . escapeRegex ( match_modified_url [ 0 ] ) + '.*)$' , 'i' ) , function ( match , $1 ) {
210
+ if ( ( match_modified_url = rx_modified_url . exec ( this . sacrificedUrl ) ) !== null ) {
211
+ this . sacrificedUrl = this . sacrificedUrl . replace ( new RegExp ( '^.*?(' + util_1 . default . Text . escapeRegex ( match_modified_url [ 0 ] ) + '.*)$' , 'i' ) , function ( match , $1 ) {
184
212
return $1 ;
185
213
} ) ;
186
214
}
@@ -202,42 +230,45 @@ exports.UrlNormalizer = {
202
230
if ( ! onlyDomain_str ) {
203
231
onlyDomain_str = '' ;
204
232
}
205
- return protocol_str + onlyDomain_str + port_str + this . modifiedUrl ;
233
+ this . currentStep = 5 ;
234
+ return protocol_str + onlyDomain_str + port_str + this . sacrificedUrl ;
206
235
} ,
207
236
extractAndNormalizeUriParamsFromPortRemovedUrl ( ) {
208
- if ( this . modifiedUrl == undefined ) {
237
+ this . ensureStepCompleted ( 5 ) ;
238
+ if ( this . sacrificedUrl == undefined ) {
209
239
throw new Error ( "modifiedUrl cannot be null" ) ;
210
240
}
211
241
let result = {
212
242
uri : null ,
213
243
params : null
214
244
} ;
215
- if ( ! this . modifiedUrl || this . modifiedUrl . trim ( ) === '' ) {
245
+ if ( ! this . sacrificedUrl || this . sacrificedUrl . trim ( ) === '' ) {
216
246
result . params = null ;
217
247
result . uri = null ;
218
248
}
219
249
else {
220
250
// PARAMS
221
251
let rx3 = new RegExp ( '\\?(?:.)*$' , 'gi' ) ;
222
252
let match3 ;
223
- while ( ( match3 = rx3 . exec ( this . modifiedUrl ) ) !== null ) {
253
+ while ( ( match3 = rx3 . exec ( this . sacrificedUrl ) ) !== null ) {
224
254
result . params = match3 [ 0 ] ;
225
255
}
226
- this . modifiedUrl = this . modifiedUrl . replace ( rx3 , '' ) ;
256
+ this . sacrificedUrl = this . sacrificedUrl . replace ( rx3 , '' ) ;
227
257
if ( result . params === "?" ) {
228
258
result . params = null ;
229
259
}
230
260
// URI
231
261
let rx4 = new RegExp ( '[#/](?:.)*$' , 'gi' ) ;
232
262
let match4 ;
233
- while ( ( match4 = rx4 . exec ( this . modifiedUrl ) ) !== null ) {
263
+ while ( ( match4 = rx4 . exec ( this . sacrificedUrl ) ) !== null ) {
234
264
result . uri = match4 [ 0 ] ;
235
265
}
236
- this . modifiedUrl = this . modifiedUrl . replace ( rx4 , '' ) ;
266
+ this . sacrificedUrl = this . sacrificedUrl . replace ( rx4 , '' ) ;
237
267
if ( result . uri === "/" ) {
238
268
result . uri = null ;
239
269
}
240
270
}
271
+ this . currentStep = 6 ;
241
272
return result ;
242
273
}
243
274
} ;
0 commit comments