Skip to content

Commit d0a5dc1

Browse files
committed
use gzip for mnist
1 parent 5161a47 commit d0a5dc1

File tree

2 files changed

+85
-24
lines changed

2 files changed

+85
-24
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@ docs/equations/*.aux
22
docs/equations/*.log
33
docs/equations/*.out
44
docs/equations/*.synctex.gz
5+
code/ch12/mnist
6+
code/datasets/mnist/t10k-images-idx3-ubyte
7+
code/datasets/mnist/t10k-labels-idx1-ubyte
8+
code/datasets/mnist/train-images-idx3-ubyte
9+
code/datasets/mnist/train-labels-idx1-ubyte
510

611
.ipynb_checkpoints
712
.DS_Store

code/ch12/ch12.ipynb

Lines changed: 80 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,14 @@
4242
"output_type": "stream",
4343
"text": [
4444
"Sebastian Raschka \n",
45-
"last updated: 2016-09-29 \n",
45+
"last updated: 2017-07-29 \n",
4646
"\n",
47-
"CPython 3.5.2\n",
48-
"IPython 5.1.0\n",
47+
"CPython 3.6.1\n",
48+
"IPython 6.0.0\n",
4949
"\n",
50-
"numpy 1.11.1\n",
51-
"scipy 0.18.1\n",
52-
"matplotlib 1.5.1\n"
50+
"numpy 1.13.1\n",
51+
"scipy 0.19.1\n",
52+
"matplotlib 2.0.2\n"
5353
]
5454
}
5555
],
@@ -108,8 +108,10 @@
108108
},
109109
{
110110
"cell_type": "code",
111-
"execution_count": 3,
112-
"metadata": {},
111+
"execution_count": 2,
112+
"metadata": {
113+
"collapsed": true
114+
},
113115
"outputs": [],
114116
"source": [
115117
"from IPython.display import Image\n",
@@ -139,7 +141,7 @@
139141
},
140142
{
141143
"cell_type": "code",
142-
"execution_count": 4,
144+
"execution_count": 3,
143145
"metadata": {},
144146
"outputs": [
145147
{
@@ -149,7 +151,7 @@
149151
"<IPython.core.display.Image object>"
150152
]
151153
},
152-
"execution_count": 4,
154+
"execution_count": 3,
153155
"metadata": {
154156
"image/png": {
155157
"width": 600
@@ -179,7 +181,7 @@
179181
},
180182
{
181183
"cell_type": "code",
182-
"execution_count": 5,
184+
"execution_count": 4,
183185
"metadata": {},
184186
"outputs": [
185187
{
@@ -189,7 +191,7 @@
189191
"<IPython.core.display.Image object>"
190192
]
191193
},
192-
"execution_count": 5,
194+
"execution_count": 4,
193195
"metadata": {
194196
"image/png": {
195197
"width": 400
@@ -204,7 +206,7 @@
204206
},
205207
{
206208
"cell_type": "code",
207-
"execution_count": 6,
209+
"execution_count": 5,
208210
"metadata": {},
209211
"outputs": [
210212
{
@@ -214,7 +216,7 @@
214216
"<IPython.core.display.Image object>"
215217
]
216218
},
217-
"execution_count": 6,
219+
"execution_count": 5,
218220
"metadata": {
219221
"image/png": {
220222
"width": 500
@@ -244,7 +246,7 @@
244246
},
245247
{
246248
"cell_type": "code",
247-
"execution_count": 7,
249+
"execution_count": 6,
248250
"metadata": {},
249251
"outputs": [
250252
{
@@ -254,7 +256,7 @@
254256
"<IPython.core.display.Image object>"
255257
]
256258
},
257-
"execution_count": 7,
259+
"execution_count": 6,
258260
"metadata": {
259261
"image/png": {
260262
"width": 500
@@ -316,7 +318,7 @@
316318
},
317319
{
318320
"cell_type": "code",
319-
"execution_count": 8,
321+
"execution_count": 7,
320322
"metadata": {
321323
"collapsed": true
322324
},
@@ -348,6 +350,50 @@
348350
" return images, labels"
349351
]
350352
},
353+
{
354+
"cell_type": "markdown",
355+
"metadata": {},
356+
"source": [
357+
"**Important Note**\n",
358+
"\n",
359+
"Some readers experienced issues with the `load_mnist` function above as certain decompression tools renamed the files from *-labels-idx1-ubyte* to *-labels.idx1-ubyte*. To avoid this problem altogether, you the modified function above will directly load the dataset from the `gz` archives using Python's `gzip` module."
360+
]
361+
},
362+
{
363+
"cell_type": "code",
364+
"execution_count": 8,
365+
"metadata": {
366+
"collapsed": true
367+
},
368+
"outputs": [],
369+
"source": [
370+
"import os\n",
371+
"import struct\n",
372+
"import numpy as np\n",
373+
"import gzip\n",
374+
" \n",
375+
"def load_mnist(path, kind='train'):\n",
376+
" \"\"\"Load MNIST data from `path`\"\"\"\n",
377+
" labels_path = os.path.join(path, \n",
378+
" '%s-labels-idx1-ubyte.gz' % kind)\n",
379+
" images_path = os.path.join(path, \n",
380+
" '%s-images-idx3-ubyte.gz' % kind)\n",
381+
" \n",
382+
" with gzip.open(labels_path, 'rb') as lbpath:\n",
383+
" lbpath.read(8)\n",
384+
" buffer = lbpath.read()\n",
385+
" labels = np.frombuffer(buffer, dtype=np.uint8)\n",
386+
"\n",
387+
" with gzip.open(images_path, 'rb') as imgpath:\n",
388+
" imgpath.read(16)\n",
389+
" buffer = imgpath.read()\n",
390+
" images = np.frombuffer(buffer, \n",
391+
" dtype=np.uint8).reshape(\n",
392+
" len(labels), 784).astype(np.float64)\n",
393+
" \n",
394+
" return images, labels"
395+
]
396+
},
351397
{
352398
"cell_type": "code",
353399
"execution_count": 9,
@@ -476,7 +522,9 @@
476522
{
477523
"cell_type": "code",
478524
"execution_count": 13,
479-
"metadata": {},
525+
"metadata": {
526+
"collapsed": true
527+
},
480528
"outputs": [],
481529
"source": [
482530
"# np.savetxt('train_img.csv', X_train, fmt='%i', delimiter=',')\n",
@@ -510,7 +558,9 @@
510558
{
511559
"cell_type": "code",
512560
"execution_count": 8,
513-
"metadata": {},
561+
"metadata": {
562+
"collapsed": true
563+
},
514564
"outputs": [],
515565
"source": [
516566
"import numpy as np\n",
@@ -921,7 +971,9 @@
921971
{
922972
"cell_type": "code",
923973
"execution_count": 15,
924-
"metadata": {},
974+
"metadata": {
975+
"collapsed": true
976+
},
925977
"outputs": [],
926978
"source": [
927979
"nn = NeuralNetMLP(n_output=10, \n",
@@ -996,7 +1048,9 @@
9961048
{
9971049
"cell_type": "code",
9981050
"execution_count": 18,
999-
"metadata": {},
1051+
"metadata": {
1052+
"collapsed": true
1053+
},
10001054
"outputs": [],
10011055
"source": [
10021056
"batches = np.array_split(range(len(nn.cost_)), 1000)\n",
@@ -1753,7 +1807,9 @@
17531807
{
17541808
"cell_type": "code",
17551809
"execution_count": 29,
1756-
"metadata": {},
1810+
"metadata": {
1811+
"collapsed": true
1812+
},
17571813
"outputs": [],
17581814
"source": [
17591815
"nn_check = MLPGradientCheck(n_output=10, \n",
@@ -2011,7 +2067,7 @@
20112067
"metadata": {
20122068
"anaconda-cloud": {},
20132069
"kernelspec": {
2014-
"display_name": "Python [default]",
2070+
"display_name": "Python 3",
20152071
"language": "python",
20162072
"name": "python3"
20172073
},
@@ -2025,7 +2081,7 @@
20252081
"name": "python",
20262082
"nbconvert_exporter": "python",
20272083
"pygments_lexer": "ipython3",
2028-
"version": "3.5.2"
2084+
"version": "3.6.1"
20292085
}
20302086
},
20312087
"nbformat": 4,

0 commit comments

Comments
 (0)