Skip to content

Commit 6684812

Browse files
Create findBrokenLinks.js
Broken Links in articles
1 parent fbab8cb commit 6684812

File tree

1 file changed

+157
-0
lines changed

1 file changed

+157
-0
lines changed

Find Broken Links/findBrokenLinks.js

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
// Choose start and end indexes
2+
var indexOffset = 16;
3+
var windowSize = 500;
4+
5+
var startIndex = windowSize * indexOffset;
6+
var endIndex = windowSize * indexOffset + windowSize - 1;
7+
8+
// Define a new GlideRecord object for the Knowledge Article table
9+
var article = new GlideRecord('kb_knowledge');
10+
// Add a query to find all published knowledge articles
11+
article.addQuery('workflow_state', 'published');
12+
article.orderByDesc("number");
13+
14+
15+
16+
// Apply indexes
17+
article.chooseWindow(startIndex, endIndex);
18+
// Execute the query to find the knowledge articles
19+
article.query();
20+
21+
// Iterate through the knowledge articles
22+
var invalidArticles = [];
23+
while (article.next()) {
24+
25+
// Get the article body. If empty, continue
26+
var body = article.getValue('text');
27+
if (!body)
28+
continue;
29+
30+
var arrayUtil = new ArrayUtil();
31+
var regex = /href=(["'])http(.*?)\1/g;
32+
33+
// Obtain a list of all unique links found in the article
34+
var links = body.match(regex);
35+
if (!links)
36+
continue;
37+
38+
links = arrayUtil.unique(links);
39+
40+
var articleNum = article.getValue('number');
41+
var articleSys = article.getUniqueValue();
42+
var articleOwnerSys = article.getValue("u_knowledge_owner");
43+
var articleOwner = article.getDisplayValue('u_knowledge_owner');
44+
var invalid = false;
45+
var invalidLinks = [];
46+
47+
// Validate each link
48+
links.forEach(function(l) {
49+
if (!l)
50+
return;
51+
52+
l = l.substring(6, l.length - 1);
53+
54+
// Check if we've already recorded errors for this article. If so, continue
55+
if (checkLinkAlreadyLogged(articleSys, l))
56+
return;
57+
58+
if (l.indexOf('sys_kb_id') != -1) {
59+
// Link is another knowledge article, determine if article is outdated
60+
var sysRegex = /sys_kb_id(=|(%3d))([^\s]+)/gi;
61+
var sysId = l.match(sysRegex)[0].substring(10, 42);
62+
63+
// Check if the referenced knowledge article is unpublished
64+
var unpublished = new GlideRecord("kb_knowledge");
65+
unpublished.addQuery("sys_id", sysId);
66+
unpublished.addQuery("workflow_state", "!=", "published");
67+
unpublished.query();
68+
69+
// Article is unpublished, log broken link
70+
if (unpublished.next()) {
71+
invalid = true;
72+
var reason = "Contains unpublished knowledge article link";
73+
if (l.indexOf('sysparm_article') == -1)
74+
reason += " (without KB Article Number)";
75+
var il = {
76+
"link": l,
77+
"reason": reason
78+
};
79+
invalidLinks.push(il);
80+
addBrokenLinkRecord(articleSys, articleOwnerSys, l, reason, null);
81+
}
82+
} else {
83+
// Link is to an external site. Send a REST Message and log result
84+
try {
85+
var request = new sn_ws.RESTMessageV2();
86+
request.setEndpoint(l);
87+
request.setHttpMethod('GET');
88+
var response = request.execute();
89+
90+
var httpStatus = response.getStatusCode();
91+
92+
// HTTP Error returned, log result
93+
if (httpStatus != 200) {
94+
invalid = true;
95+
var reason = "External link returns status code " + httpStatus;
96+
var il = {
97+
"link": l,
98+
"reason": reason
99+
};
100+
invalidLinks.push(il);
101+
addBrokenLinkRecord(articleSys, articleOwnerSys, l, reason, httpStatus);
102+
}
103+
} catch(e) {
104+
// Error occurred while attempting to send a REST Message
105+
// Log a result
106+
addBrokenLinkRecord(articleSys, articleOwnerSys, l, e, null);
107+
}
108+
}
109+
});
110+
111+
if (invalid) {
112+
invalidArticles.push({
113+
number: articleNum,
114+
owner: articleOwner,
115+
links: invalidLinks
116+
});
117+
}
118+
}
119+
120+
gs.info("Completed reviewing articles " + startIndex + " - " + endIndex);
121+
122+
// if (invalidArticles.length) {
123+
// var str = "Articles with invalid links: " + invalidArticles.length + "\n";
124+
125+
// for (var i = 0; i < invalidArticles.length; i++) {
126+
// str += "\nArticle: " + invalidArticles[i].number;
127+
// str += "\nOwner: " + invalidArticles[i].owner;
128+
129+
// for (var j = 0; j < invalidArticles[i].links.length; j++) {
130+
// str += "\n\tInvalid link " + (j + 1) + ":";
131+
// str += "\n\t\tLink: " + invalidArticles[i].links[j].link;
132+
// str += "\n\t\tReason: " + invalidArticles[i].links[j].reason;
133+
// }
134+
// }
135+
136+
// gs.info(str);
137+
// }
138+
139+
function checkLinkAlreadyLogged(article, link) {
140+
var gr = new GlideRecord("u_broken_knowledge_links");
141+
gr.addQuery("u_article", article);
142+
gr.addQuery("u_link", link);
143+
gr.query();
144+
145+
return gr.hasNext();
146+
}
147+
148+
function addBrokenLinkRecord(article, owner, link, reason, httpError) {
149+
var gr = new GlideRecord("u_broken_knowledge_links");
150+
gr.initialize();
151+
gr.u_article = article;
152+
gr.u_owner = owner;
153+
gr.u_link = link;
154+
gr.u_reason = reason;
155+
gr.u_http_error_code = httpError;
156+
gr.insert();
157+
}

0 commit comments

Comments
 (0)