Skip to content

Commit 728b895

Browse files
committed
Deal in scalar values
Closes mathiasbynens#3.
1 parent 4800cd9 commit 728b895

9 files changed

+112
-73
lines changed

.gitignore

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# Generated test data file (> 100 MB)
22
tests/data.json
33

4-
# JSON version of coverage report
5-
coverage/coverage.json
4+
# Coverage report
5+
coverage
66

77
# Installed npm modules
88
node_modules

.travis.yml

+5-3
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,14 @@ before_script:
88
- "sudo apt-get install -qq openjdk-6-jre"
99
- "PACKAGE=rhino1_7R3; wget http://ftp.mozilla.org/pub/mozilla.org/js/$PACKAGE.zip && sudo unzip $PACKAGE -d /opt/ && rm $PACKAGE.zip"
1010
- "PACKAGE=rhino1_7R3; echo -e '#!/bin/sh\\njava -jar /opt/'$PACKAGE'/js.jar $@' | sudo tee /usr/local/bin/rhino && sudo chmod +x /usr/local/bin/rhino"
11-
- "PACKAGE=ringojs-0.9; wget http://ringojs.org/downloads/$PACKAGE.zip && sudo unzip $PACKAGE -d /opt/ && rm $PACKAGE.zip"
12-
- "PACKAGE=ringojs-0.9; sudo ln -s /opt/$PACKAGE/bin/ringo /usr/local/bin/ringo && sudo chmod +x /usr/local/bin/ringo"
11+
- "PACKAGE=ringojs-0.11; wget https://github.com/ringo/ringojs/releases/download/v0.11.0/$PACKAGE.zip && sudo unzip $PACKAGE -d /opt/ && rm $PACKAGE.zip"
12+
- "PACKAGE=ringojs-0.11; sudo ln -s /opt/$PACKAGE/bin/ringo /usr/local/bin/ringo && sudo chmod +x /usr/local/bin/ringo"
1313
- "PACKAGE=v0.3.2; wget https://github.com/280north/narwhal/archive/$PACKAGE.zip && sudo unzip $PACKAGE -d /opt/ && rm $PACKAGE.zip"
1414
- "PACKAGE=narwhal-0.3.2; sudo ln -s /opt/$PACKAGE/bin/narwhal /usr/local/bin/narwhal && sudo chmod +x /usr/local/bin/narwhal"
1515
# If the enviroment stores rt.jar in a different directory, find it and symlink the directory
1616
- "PREFIX=/usr/lib/jvm; if [ ! -d $PREFIX/java-6-openjdk ]; then for d in $PREFIX/java-6-openjdk-*; do if [ -e $d/jre/lib/rt.jar ]; then sudo ln -s $d $PREFIX/java-6-openjdk; break; fi; done; fi"
1717
- "sudo apt-get install -qq python; python --version"
1818
script:
19-
"grunt ci"
19+
- "grunt ci"
20+
after_script:
21+
- "grunt shell:cover-coveralls"

Gruntfile.js

+5-2
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,12 @@ module.exports = function(grunt) {
1515
}
1616
}
1717
},
18-
'cover': {
18+
'cover-html': {
1919
'command': 'istanbul cover --report "html" --verbose --dir "coverage" "tests/tests.js"; istanbul report --root "coverage" --format "html"'
2020
},
21+
'cover-coveralls': {
22+
'command': 'istanbul cover --verbose --dir "coverage" "tests/tests.js" && cat coverage/lcov.info | coveralls; rm -rf coverage/lcov*'
23+
},
2124
'test-narwhal': {
2225
'command': 'echo "Testing in Narwhal..."; export NARWHAL_OPTIMIZATION=-1; narwhal "tests/tests.js"'
2326
},
@@ -49,7 +52,7 @@ module.exports = function(grunt) {
4952

5053
grunt.loadNpmTasks('grunt-shell');
5154

52-
grunt.registerTask('cover', 'shell:cover');
55+
grunt.registerTask('cover', 'shell:cover-html');
5356
grunt.registerTask('ci', [
5457
'shell:generate-test-data',
5558
'shell:test-narwhal',

LICENSE-MIT.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Copyright Mathias Bynens <http://mathiasbynens.be/>
1+
Copyright Mathias Bynens <https://mathiasbynens.be/>
22

33
Permission is hereby granted, free of charge, to any person obtaining
44
a copy of this software and associated documentation files (the

README.md

+10-10
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
# utf8.js [![Build status](https://travis-ci.org/mathiasbynens/utf8.js.svg?branch=master)](https://travis-ci.org/mathiasbynens/utf8.js) [![Dependency status](https://gemnasium.com/mathiasbynens/utf8.js.svg)](https://gemnasium.com/mathiasbynens/utf8.js)
1+
# utf8.js [![Build status](https://travis-ci.org/mathiasbynens/utf8.js.svg?branch=master)](https://travis-ci.org/mathiasbynens/utf8.js) [![Code coverage status](http://img.shields.io/coveralls/mathiasbynens/utf8.js/master.svg)](https://coveralls.io/r/mathiasbynens/utf8.js) [![Dependency status](https://gemnasium.com/mathiasbynens/utf8.js.svg)](https://gemnasium.com/mathiasbynens/utf8.js)
22

3-
_utf8.js_ is a well-tested UTF-8 encoder/decoder written in JavaScript. Unlike many other JavaScript solutions, it is designed to be a _proper_ UTF-8 encoder/decoder: it can encode/decode any given Unicode code point, including astral symbols and unpaired surrogates. [Here’s an online demo.](http://mothereff.in/utf-8)
3+
_utf8.js_ is a well-tested UTF-8 encoder/decoder written in JavaScript. Unlike many other JavaScript solutions, it is designed to be a _proper_ UTF-8 encoder/decoder: it can encode/decode any scalar Unicode code point values, as per [the Encoding Standard](https://encoding.spec.whatwg.org/#utf-8). [Here’s an online demo.](https://mothereff.in/utf-8)
44

55
Feel free to fork if you see possible improvements!
66

77
## Installation
88

9-
Via [npm](http://npmjs.org/):
9+
Via [npm](https://www.npmjs.org/):
1010

1111
```bash
1212
npm install utf8
@@ -30,7 +30,7 @@ In a browser:
3030
<script src="utf8.js"></script>
3131
```
3232

33-
In [Narwhal](http://narwhaljs.org/), [Node.js](http://nodejs.org/), and [RingoJS ≥ v0.8.0](http://ringojs.org/):
33+
In [Narwhal](http://narwhaljs.org/), [Node.js](https://nodejs.org/), and [RingoJS ≥ v0.8.0](http://ringojs.org/):
3434

3535
```js
3636
var utf8 = require('utf8');
@@ -62,7 +62,7 @@ require(
6262

6363
### `utf8.encode(string)`
6464

65-
Encodes any given JavaScript string (`string`) as UTF-8, and returns the UTF-8-encoded version of the string.
65+
Encodes any given JavaScript string (`string`) as UTF-8, and returns the UTF-8-encoded version of the string. It throws an error if the input string contains a non-scalar value, i.e. a lone surrogate. (If you need to be able to encode non-scalar values as well, use [WTF-8](https://mths.be/wtf8) instead.)
6666

6767
```js
6868
// U+00A9 COPYRIGHT SIGN; see http://codepoints.net/U+00A9
@@ -75,7 +75,7 @@ utf8.encode('\uD800\uDC01');
7575

7676
### `utf8.decode(byteString)`
7777

78-
Decodes any given UTF-8-encoded string (`byteString`) as UTF-8, and returns the UTF-8-decoded version of the string. It throws an error when malformed UTF-8 is detected.
78+
Decodes any given UTF-8-encoded string (`byteString`) as UTF-8, and returns the UTF-8-decoded version of the string. It throws an error when malformed UTF-8 is detected. (If you need to be able to decode encoded non-scalar values as well, use [WTF-8](https://mths.be/wtf8) instead.)
7979

8080
```js
8181
utf8.decode('\xC2\xA9');
@@ -92,15 +92,15 @@ A string representing the semantic version number.
9292

9393
## Support
9494

95-
utf8.js has been tested in at least Chrome 27-29, Firefox 3-22, Safari 4-6, Opera 10-12, IE 6-10, Node.js v0.10.0, Narwhal 0.3.2, RingoJS 0.8-0.9, PhantomJS 1.9.0, and Rhino 1.7RC4.
95+
utf8.js has been tested in at least Chrome 27-39, Firefox 3-34, Safari 4-8, Opera 10-28, IE 6-11, Node.js v0.10.0, Narwhal 0.3.2, RingoJS 0.8-0.11, PhantomJS 1.9.0, and Rhino 1.7RC4.
9696

9797
## Unit tests & code coverage
9898

9999
After cloning this repository, run `npm install` to install the dependencies needed for development and testing. You may want to install Istanbul _globally_ using `npm install istanbul -g`.
100100

101101
Once that’s done, you can run the unit tests in Node using `npm test` or `node tests/tests.js`. To run the tests in Rhino, Ringo, Narwhal, PhantomJS, and web browsers as well, use `grunt test`.
102102

103-
To generate [the code coverage report](http://rawgithub.com/mathiasbynens/utf8.js/master/coverage/utf8.js/utf8.js.html), use `grunt cover`.
103+
To generate the code coverage report, use `grunt cover`.
104104

105105
## FAQ
106106

@@ -112,8 +112,8 @@ Long before utf8.js was created, the `utf8` module on npm was registered and use
112112

113113
| [![twitter/mathias](https://gravatar.com/avatar/24e08a9ea84deb17ae121074d0f17125?s=70)](https://twitter.com/mathias "Follow @mathias on Twitter") |
114114
|---|
115-
| [Mathias Bynens](http://mathiasbynens.be/) |
115+
| [Mathias Bynens](https://mathiasbynens.be/) |
116116

117117
## License
118118

119-
utf8.js is available under the [MIT](http://mths.be/mit) license.
119+
utf8.js is available under the [MIT](https://mths.be/mit) license.

package.json

+10-23
Original file line numberDiff line numberDiff line change
@@ -2,47 +2,34 @@
22
"name": "utf8",
33
"version": "2.0.0",
44
"description": "A well-tested UTF-8 encoder/decoder written in JavaScript.",
5-
"homepage": "http://mths.be/utf8js",
5+
"homepage": "https://mths.be/utf8js",
66
"main": "utf8.js",
77
"keywords": [
88
"charset",
99
"encoding",
1010
"unicode",
1111
"utf8"
1212
],
13-
"licenses": [
14-
{
15-
"type": "MIT",
16-
"url": "http://mths.be/mit"
17-
},
18-
{
19-
"type": "GPL",
20-
"url": "http://mths.be/gpl"
21-
}
22-
],
13+
"license": "MIT",
2314
"author": {
2415
"name": "Mathias Bynens",
25-
"url": "http://mathiasbynens.be/"
16+
"url": "https://mathiasbynens.be/"
2617
},
2718
"repository": {
2819
"type": "git",
2920
"url": "https://github.com/mathiasbynens/utf8.js.git"
3021
},
31-
"bugs": {
32-
"url": "https://github.com/mathiasbynens/utf8.js/issues"
33-
},
34-
"directories": {
35-
"test": "tests"
36-
},
22+
"bugs": "https://github.com/mathiasbynens/utf8.js/issues",
3723
"scripts": {
3824
"test": "node tests/tests.js"
3925
},
4026
"devDependencies": {
41-
"grunt": "~0.4.4",
42-
"grunt-shell": "~0.6.4",
43-
"istanbul": "~0.2.6",
44-
"qunit-clib": "~1.3.0",
27+
"coveralls": "^2.11.1",
28+
"grunt": "^0.4.5",
29+
"grunt-shell": "^1.1.1",
30+
"istanbul": "^0.3.5",
31+
"qunit-extras": "^1.4.0",
4532
"qunitjs": "~1.11.0",
46-
"requirejs": "~2.1.11"
33+
"requirejs": "^2.1.11"
4734
}
4835
}

tests/generate-test-data.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import re
44
import json
55

6-
# http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
6+
# https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
77
# http://stackoverflow.com/a/13436167/96656
88
def unisymbol(codePoint):
99
if codePoint >= 0x0000 and codePoint <= 0xFFFF:
@@ -25,6 +25,9 @@ def writeFile(filename, contents):
2525

2626
data = []
2727
for codePoint in range(0x000000, 0x10FFFF + 1):
28+
# Skip non-scalar values.
29+
if codePoint >= 0xD800 and codePoint <= 0xDFFF:
30+
continue
2831
symbol = unisymbol(codePoint)
2932
# http://stackoverflow.com/a/17199950/96656
3033
bytes = symbol.encode('utf8').decode('latin1')

tests/tests.js

+62-28
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,27 @@
1-
;(function(root) {
1+
(function(root) {
22
'use strict';
33

4-
/** Use a single `load` function */
5-
var load = typeof require == 'function' ? require : root.load;
4+
var noop = Function.prototype;
5+
6+
var load = (typeof require == 'function' && !(root.define && define.amd)) ?
7+
require :
8+
(!root.document && root.java && root.load) || noop;
69

7-
/** The unit testing framework */
810
var QUnit = (function() {
9-
var noop = Function.prototype;
1011
return root.QUnit || (
1112
root.addEventListener || (root.addEventListener = noop),
1213
root.setTimeout || (root.setTimeout = noop),
1314
root.QUnit = load('../node_modules/qunitjs/qunit/qunit.js') || root.QUnit,
14-
(load('../node_modules/qunit-clib/qunit-clib.js') || { 'runInContext': noop }).runInContext(root),
1515
addEventListener === noop && delete root.addEventListener,
1616
root.QUnit
1717
);
1818
}());
1919

20+
var qe = load('../node_modules/qunit-extras/qunit-extras.js');
21+
if (qe) {
22+
qe.runInContext(root);
23+
}
24+
2025
/** The `utf8` object to test */
2126
var utf8 = root.utf8 || (root.utf8 = (
2227
utf8 = load('../utf8.js') || root.utf8,
@@ -96,63 +101,75 @@
96101
{
97102
'codePoint': 0xD800,
98103
'decoded': '\uD800',
99-
'encoded': '\xED\xA0\x80'
104+
'encoded': '\xED\xA0\x80',
105+
'error': true
100106
},
101107
{
102108
'description': 'High surrogate followed by another high surrogate',
103109
'decoded': '\uD800\uD800',
104-
'encoded': '\xED\xA0\x80\xED\xA0\x80'
110+
'encoded': '\xED\xA0\x80\xED\xA0\x80',
111+
'error': true
105112
},
106113
{
107114
'description': 'High surrogate followed by a symbol that is not a surrogate',
108115
'decoded': '\uD800A',
109-
'encoded': '\xED\xA0\x80A'
116+
'encoded': '\xED\xA0\x80A',
117+
'error': true
110118
},
111119
{
112120
'description': 'Unmatched high surrogate, followed by a surrogate pair, followed by an unmatched high surrogate',
113121
'decoded': '\uD800\uD834\uDF06\uD800',
114-
'encoded': '\xED\xA0\x80\xF0\x9D\x8C\x86\xED\xA0\x80'
122+
'encoded': '\xED\xA0\x80\xF0\x9D\x8C\x86\xED\xA0\x80',
123+
'error': true
115124
},
116125
{
117126
'codePoint': 0xD9AF,
118127
'decoded': '\uD9AF',
119-
'encoded': '\xED\xA6\xAF'
128+
'encoded': '\xED\xA6\xAF',
129+
'error': true
120130
},
121131
{
122132
'codePoint': 0xDBFF,
123133
'decoded': '\uDBFF',
124-
'encoded': '\xED\xAF\xBF'
134+
'encoded': '\xED\xAF\xBF',
135+
'error': true
125136
},
126137
// low surrogates: 0xDC00 to 0xDFFF
127138
{
128139
'codePoint': 0xDC00,
129140
'decoded': '\uDC00',
130-
'encoded': '\xED\xB0\x80'
141+
'encoded': '\xED\xB0\x80',
142+
'error': true
131143
},
132144
{
133145
'description': 'Low surrogate followed by another low surrogate',
134146
'decoded': '\uDC00\uDC00',
135-
'encoded': '\xED\xB0\x80\xED\xB0\x80'
147+
'encoded': '\xED\xB0\x80\xED\xB0\x80',
148+
'error': true
136149
},
137150
{
138151
'description': 'Low surrogate followed by a symbol that is not a surrogate',
139152
'decoded': '\uDC00A',
140-
'encoded': '\xED\xB0\x80A'
153+
'encoded': '\xED\xB0\x80A',
154+
'error': true
141155
},
142156
{
143157
'description': 'Unmatched low surrogate, followed by a surrogate pair, followed by an unmatched low surrogate',
144158
'decoded': '\uDC00\uD834\uDF06\uDC00',
145-
'encoded': '\xED\xB0\x80\xF0\x9D\x8C\x86\xED\xB0\x80'
159+
'encoded': '\xED\xB0\x80\xF0\x9D\x8C\x86\xED\xB0\x80',
160+
'error': true
146161
},
147162
{
148163
'codePoint': 0xDEEE,
149164
'decoded': '\uDEEE',
150-
'encoded': '\xED\xBB\xAE'
165+
'encoded': '\xED\xBB\xAE',
166+
'error': true
151167
},
152168
{
153169
'codePoint': 0xDFFF,
154170
'decoded': '\uDFFF',
155-
'encoded': '\xED\xBF\xBF'
171+
'encoded': '\xED\xBF\xBF',
172+
'error': true
156173
},
157174

158175
// 4-byte
@@ -188,16 +205,33 @@
188205
forEach(data, function(object) {
189206
var description = object.description || 'U+' + object.codePoint.toString(16).toUpperCase();
190207
;
191-
equal(
192-
object.encoded,
193-
utf8.encode(object.decoded),
194-
'Encoding: ' + description
195-
);
196-
equal(
197-
object.decoded,
198-
utf8.decode(object.encoded),
199-
'Decoding: ' + description
200-
);
208+
if (object.error) {
209+
raises(
210+
function() {
211+
utf8.decode(object.encoded);
212+
},
213+
Error,
214+
'Error: non-scalar value detected'
215+
);
216+
raises(
217+
function() {
218+
utf8.encode(object.decoded);
219+
},
220+
Error,
221+
'Error: non-scalar value detected'
222+
);
223+
} else {
224+
equal(
225+
object.encoded,
226+
utf8.encode(object.decoded),
227+
'Encoding: ' + description
228+
);
229+
equal(
230+
object.decoded,
231+
utf8.decode(object.encoded),
232+
'Decoding: ' + description
233+
);
234+
}
201235
});
202236

203237
// Error handling

0 commit comments

Comments
 (0)