Skip to content

Commit 112846f

Browse files
authored
chore: Inline test fixtures (#683)
Not to be confused with extractor fixtures, which are snapshots of a webpage. This change removes the pattern of separate JS files that provide "fixtures" for tests, which are used as provided or expected strings in tests. They were inconsistent and disorganized, and generally just served to add indirection to test files. So now all those strings are defined where they are used in their respective tests.
1 parent 0d2bad5 commit 112846f

File tree

64 files changed

+1260
-2995
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+1260
-2995
lines changed

src/cleaners/fixtures/html.js

-15
This file was deleted.

src/cleaners/lead-image-url.test.js

+1-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@ describe('clean(leadImageUrl)', () => {
99
});
1010

1111
it('returns null if the url is not valid', () => {
12-
const url = 'this is not a valid url';
13-
assert.equal(clean(url), null);
12+
assert.equal(clean('this is not a valid url'), null);
1413
});
1514

1615
it('trims whitespace', () => {

src/cleaners/title.test.js

+13-5
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,35 @@
11
import assert from 'assert';
22
import cheerio from 'cheerio';
33

4-
import HTML from './fixtures/html';
54
import { cleanTitle } from './index';
65

76
describe('cleanTitle(title, { url, $ })', () => {
87
it('only uses h1 if there is only one on the page', () => {
98
const title = 'Too Short';
10-
const $ = cheerio.load(HTML.docWith2H1s);
9+
const $ = cheerio.load(`
10+
<div>
11+
<h1>This Is the Real Title</h1>
12+
<h1>This Is the Real Title</h1>
13+
</div>
14+
`);
1115

1216
assert.equal(cleanTitle(title, { url: '', $ }), title);
1317
});
1418

1519
it('removes HTML tags from titles', () => {
16-
const $ = cheerio.load(HTML.docWithTagsInH1.before);
20+
const $ = cheerio.load(
21+
'<div><h1>This Is the <em>Real</em> Title</h1></div>'
22+
);
1723
const title = $('h1').html();
1824

19-
assert.equal(cleanTitle(title, { url: '', $ }), HTML.docWithTagsInH1.after);
25+
assert.equal(cleanTitle(title, { url: '', $ }), 'This Is the Real Title');
2026
});
2127

2228
it('trims extraneous spaces', () => {
2329
const title = " This Is a Great Title That You'll Love ";
24-
const $ = cheerio.load(HTML.docWithTagsInH1.before);
30+
const $ = cheerio.load(
31+
'<div><h1>This Is the <em>Real</em> Title</h1></div>'
32+
);
2533

2634
assert.equal(cleanTitle(title, { url: '', $ }), title.trim());
2735
});

src/extractors/detect-by-html.test.js

+4-6
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,15 @@ import detectByHtml from './detect-by-html';
55

66
describe('detectByHtml', () => {
77
it('detects a medium post from the html', () => {
8-
const html = '<head><meta name="al:ios:app_name" value="Medium" /></head>';
9-
10-
const $ = cheerio.load(html);
8+
const $ = cheerio.load(
9+
'<head><meta name="al:ios:app_name" value="Medium" /></head>'
10+
);
1111

1212
assert.equal(detectByHtml($).domain, 'medium.com');
1313
});
1414

1515
it('returns nothing if no match is found', () => {
16-
const html = '<div></div>';
17-
18-
const $ = cheerio.load(html);
16+
const $ = cheerio.load('<div></div>');
1917

2018
assert.equal(detectByHtml($), null);
2119
});

src/extractors/generic/author/extractor.test.js

+22-7
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,54 @@
11
import assert from 'assert';
22
import cheerio from 'cheerio';
33

4-
import HTML from './fixtures/html';
54
import GenericAuthorExtractor from './extractor';
65

76
describe('GenericAuthorExtractor', () => {
87
describe('extract($, cachedMeta)', () => {
98
it('extracts author from meta tags', () => {
10-
const $ = cheerio.load(HTML.authorMeta.test);
9+
const $ = cheerio.load(`
10+
<html>
11+
<meta name="dc.author" value="Adam" />
12+
</html>
13+
`);
1114
const result = GenericAuthorExtractor.extract({
1215
$,
1316
metaCache: ['dc.author', 'something-else'],
1417
});
1518

16-
assert.equal(result, HTML.authorMeta.result);
19+
assert.equal(result, 'Adam');
1720
});
1821

1922
it('extracts author from author selectors', () => {
20-
const $ = cheerio.load(HTML.authorSelectors.test);
23+
const $ = cheerio.load(`
24+
<div>
25+
<div class="byline">
26+
<a href="/author/adam">Adam</a>
27+
</div>
28+
</div>
29+
`);
2130
const result = GenericAuthorExtractor.extract({
2231
$,
2332
metaCache: ['dc.author', 'something-else'],
2433
});
2534

26-
assert.equal(result, HTML.authorSelectors.result);
35+
assert.equal(result, 'Adam');
2736
});
2837

2938
it('extracts author with regex selectors', () => {
30-
const $ = cheerio.load(HTML.authorRegSelectors.test);
39+
const $ = cheerio.load(`
40+
<div>
41+
<div class="byline">
42+
<span>By Adam</span>
43+
</div>
44+
</div>
45+
`);
3146
const result = GenericAuthorExtractor.extract({
3247
$,
3348
metaCache: ['dc.author', 'something-else'],
3449
});
3550

36-
assert.equal(result, HTML.authorRegSelectors.result);
51+
assert.equal(result, 'Adam');
3752
});
3853

3954
it('returns null if no author found', () => {

src/extractors/generic/author/fixtures/html.js

-32
This file was deleted.

src/extractors/generic/content/extract-best-node.test.js

+4-6
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,12 @@ const fs = require('fs');
77
describe('extractBestNode($, flags)', () => {
88
it('scores the dom nodes and returns the best option', () => {
99
const html = fs.readFileSync('./fixtures/latimes.html', 'utf-8');
10-
const opts = {
11-
stripUnlikelyCandidates: true,
12-
weightNodes: true,
13-
};
14-
1510
const $ = cheerio.load(html);
1611

17-
const bestNode = extractBestNode($, opts);
12+
const bestNode = extractBestNode($, {
13+
stripUnlikelyCandidates: true,
14+
weightNodes: true,
15+
});
1816

1917
assert(typeof bestNode, 'object');
2018
});

src/extractors/generic/content/scoring/add-score.test.js

+4-6
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,15 @@ describe('Scoring utils', () => {
77
describe('addScore(node, $, amount)', () => {
88
it("adds the specified amount to a node's score", () => {
99
const $ = cheerio.load('<p score="25">Foo</p>');
10-
let $node = $('p').first();
11-
12-
$node = addScore($node, $, 25);
10+
const $node = $('p').first();
11+
addScore($node, $, 25);
1312
assert.equal(getScore($node), 50);
1413
});
1514

1615
it('adds score if score not yet set (assumes score is 0)', () => {
1716
const $ = cheerio.load('<p>Foo</p>');
18-
let $node = $('p').first();
19-
20-
$node = addScore($node, $, 25);
17+
const $node = $('p').first();
18+
addScore($node, $, 25);
2119
assert.equal(getScore($node), 25);
2220
});
2321
});

src/extractors/generic/content/scoring/add-to-parent.test.js

+2-5
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,8 @@ import { addToParent, getScore } from './index';
66
describe('Scoring utils', () => {
77
describe('addToParent(node, $, amount)', () => {
88
it("adds 1/4 of a node's score it its parent", () => {
9-
const html = '<div score="25"><p score="40">Foo</p></div>';
10-
const $ = cheerio.load(html);
11-
let $node = $('p').first();
12-
13-
$node = addToParent($node, $, 40);
9+
const $ = cheerio.load('<div score="25"><p score="40">Foo</p></div>');
10+
const $node = addToParent($('p').first(), $, 40);
1411

1512
assert.equal(getScore($node.parent()), 35);
1613
assert.equal(getScore($node), 40);

src/extractors/generic/content/scoring/find-top-candidate.test.js

+29-11
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,65 @@
11
import assert from 'assert';
22
import cheerio from 'cheerio';
33

4-
import HTML from './fixtures/html';
5-
64
import { getScore, findTopCandidate, scoreContent } from './index';
75

86
const fs = require('fs');
97

108
describe('findTopCandidate($)', () => {
119
it('finds the top candidate from simple case', () => {
12-
const $ = cheerio.load(HTML.findDom1);
10+
const $ = cheerio.load(`
11+
<div score="100">
12+
<p score="1">Lorem ipsum etc</p>
13+
</div>
14+
`);
1315

1416
const $$topCandidate = findTopCandidate($);
1517

1618
assert.equal(getScore($$topCandidate), 100);
1719
});
1820

1921
it('finds the top candidate from a nested case', () => {
20-
const $ = cheerio.load(HTML.findDom2);
22+
const $ = cheerio.load(`
23+
<div score="10">
24+
<article score="50">
25+
<p score="1">Lorem ipsum etc</p>
26+
</article>
27+
</div>
28+
`);
2129

2230
const $$topCandidate = findTopCandidate($);
2331

24-
// this is wrapped in a div so checking
25-
// the score of the first child
32+
// this is wrapped in a div so checking the score of the first child
2633
assert.equal(getScore($$topCandidate.first()), 50);
2734
});
2835

2936
it('ignores tags like BR', () => {
30-
const $ = cheerio.load(HTML.findDom3);
37+
const $ = cheerio.load(`
38+
<article score="50">
39+
<p score="1">Lorem ipsum br</p>
40+
<br score="1000" />
41+
</article>
42+
`);
3143

3244
const $topCandidate = findTopCandidate($);
3345

3446
assert.equal(getScore($topCandidate), 50);
3547
});
3648

3749
it('returns BODY if no candidates found', () => {
38-
const $ = cheerio.load(HTML.topBody);
50+
const $ = cheerio.load(`
51+
<body>
52+
<article>
53+
<p>Lorem ipsum etc</p>
54+
<br />
55+
</article>
56+
<body>
57+
`);
3958

4059
const $topCandidate = findTopCandidate($);
4160

42-
// browser won't allow body tag to be placed
43-
// arbitrarily/loaded on the page, so we tranform
44-
// it in cheerio-query, so this test would fail.
61+
// browser won't allow body tag to be placed arbitrarily/loaded on the page,
62+
// so we tranform it in cheerio-query, so this test would fail.
4563
if (!$.browser) {
4664
assert.equal($topCandidate.get(0).tagName, 'body');
4765
}

0 commit comments

Comments
 (0)