Fix trainling spaces

nielsbasjes · Oct 29, 2020 · 3ff33bb · 3ff33bb
1 parent 1d3a999
commit 3ff33bb
Show file tree

Hide file tree

Showing 12 changed files with 150 additions and 150 deletions.
diff --git a/Benchmark/GetStatistisc.make b/Benchmark/GetStatistisc.make
@@ -6,7 +6,7 @@ LastJob  = 0267
 
 JobTargets = $(shell for I in `seq -w $(FirstJob) $(LastJob)` ; do echo Results-$$I.txt ; done)
 
-Results.txt:: $(JobTargets) 
+Results.txt:: $(JobTargets)
 	@echo "Merge results"
 	@cat Results-*.txt > $@
 
@@ -17,18 +17,18 @@ Results-%.txt: Extract-%.txt
 	@( ID=$$(echo $@ | cut -d'-' -f 2 | cut -d'.' -f1) ; echo "Calculate run $${ID}" )
 	@cat $< | sed 's@\([0-9]*\)hrs, @(\1*3600)+@g;s@\([0-9]*\)mins, @(\1*60)+@g;s@\([0-9]*\)sec@(\1)@g;s@ @@g;s@|@ @g' |\
 	while read name splits elapsed avgmap ; do echo "$${name} | $${splits} | $$((elapsed)) | $$((avgmap)) | $$(( (avgmap) * (splits) )) " ; done >$@
-	
+
 clean::
 	rm -f $(JobTargets)
 
 #.PRECIOUS: Extract-%.txt
 Extract-%.txt: Output-%.txt
-	@cat $< | tr -d '\n' | sed 's@ *<@<@g;s@> *@>@g;s@.*>Job Name:<td>\([^<]*\).*>Elapsed:<td>\([^<]*\).*>Average Map Time<td>\([^<]*\).*>Map</a><td>\([^<]*\).*@\1 | \4 |  \2 |  \3 @;s@Wordcount-@@g;s@Wordcount@GzipCodec@g;' > $@ ; 
-	@echo >> $@ 
+	@cat $< | tr -d '\n' | sed 's@ *<@<@g;s@> *@>@g;s@.*>Job Name:<td>\([^<]*\).*>Elapsed:<td>\([^<]*\).*>Average Map Time<td>\([^<]*\).*>Map</a><td>\([^<]*\).*@\1 | \4 |  \2 |  \3 @;s@Wordcount-@@g;s@Wordcount@GzipCodec@g;' > $@ ;
+	@echo >> $@
 
 clean::
 	rm -f Extract-*.txt
-	
+
 
 .PRECIOUS: Output-%.txt
 Output-%.txt:

diff --git a/Benchmark/Makefile b/Benchmark/Makefile
@@ -3,10 +3,10 @@
 IoBytesPerChecksum = 512
 InputFile = words-20K.txt.gz
 
-all: 
+all:
 	@echo "Don't use the makefile directly. Run the ./run.sh instead."
 
-clean:: 
+clean::
 	-rm -f wordcount-normal.txt
 
 wordcount-normal.txt: $(InputFile) target/wordcount-1.0-job.jar
@@ -24,7 +24,7 @@ wordcount-%.txt: $(InputFile) target/wordcount-1.0-job.jar
 	hdfs dfs -text wordcount-$${SPLITS}/part-r-* | sort > wordcount-$${SPLITS}.txt ; \
 	)
 
-clean:: 
+clean::
 	-rm -f wordcount-*.txt
 
 words.txt.gz:

diff --git a/Benchmark/README.md b/Benchmark/README.md
@@ -34,8 +34,8 @@ After the maximal parallelization point (12) we see that the processing time jum
 
 From the ganglia graphs of the cluster you can see that beyond the point of `no more free mappers` the only thing that really changes is that the cluster is moving more and more data around over the network.
 
-![CPU Usage](cpu-graph.gif) 
-![Load](load-graph.gif) 
+![CPU Usage](cpu-graph.gif)
+![Load](load-graph.gif)
 ![Network traffic](network-graph.gif)
 
 Splitsize | Splits | Elapsed | Avg. Map Time | Total Map Time
@@ -101,57 +101,57 @@ On a single machine simply running a `gzip -t <file>` takes about 45 seconds. So
 
 Splitsize | Splits | Elapsed | Avg. Map Time | Total Map Time
  --:|--:|--:|--:|--:|
-GzipCodec | 1 | 1401 | 1393 | 1393 
-1101900001 | 1 | 1363 | 1355 | 1355 
-550950001 | 2 | 764 | 757 | 1514 
-367300001 | 3 | 559 | 528 | 1584 
-275475001 | 4 | 442 | 415 | 1660 
-220380001 | 5 | 365 | 315 | 1575 
-183650001 | 6 | 281 | 259 | 1554 
-157414286 | 7 | 246 | 224 | 1568 
-137737501 | 8 | 234 | 205 | 1640 
-122433334 | 9 | 208 | 182 | 1638 
-110190001 | 10 | 191 | 170 | 1700 
-100172728 | 11 | 174 | 151 | 1661 
-91825001 | 12 | 174 | 144 | 1728 
-84761539 | 13 | 247 | 132 | 1716 
-78707143 | 14 | 247 | 124 | 1736 
-73460001 | 15 | 238 | 117 | 1755 
-68868751 | 16 | 229 | 112 | 1792 
-64817648 | 17 | 248 | 106 | 1802 
-61216667 | 18 | 231 | 102 | 1836 
-57994737 | 19 | 217 | 98 | 1862 
-55095001 | 20 | 220 | 94 | 1880 
-52471429 | 21 | 195 | 90 | 1890 
-50086364 | 22 | 205 | 89 | 1958 
-47908696 | 23 | 210 | 85 | 1955 
-45912501 | 24 | 236 | 80 | 1920 
-44076001 | 25 | 226 | 80 | 2000 
-42380770 | 26 | 242 | 78 | 2028 
-40811112 | 27 | 234 | 75 | 2025 
-39353572 | 28 | 232 | 73 | 2044 
-37996552 | 29 | 228 | 71 | 2059 
-36730001 | 30 | 238 | 71 | 2130 
-35545162 | 31 | 234 | 70 | 2170 
-34434376 | 32 | 229 | 67 | 2144 
-33390910 | 33 | 223 | 65 | 2145 
-32408824 | 34 | 241 | 64 | 2176 
-31482858 | 35 | 247 | 62 | 2170 
-30608334 | 36 | 248 | 61 | 2196 
-29781082 | 37 | 248 | 60 | 2220 
-28997369 | 38 | 255 | 60 | 2280 
-28253847 | 39 | 238 | 58 | 2262 
-27547501 | 40 | 237 | 56 | 2240 
-26875610 | 41 | 252 | 56 | 2296 
-26235715 | 42 | 245 | 56 | 2352 
-25625582 | 43 | 245 | 55 | 2365 
-25043182 | 44 | 256 | 54 | 2376 
-24486667 | 45 | 238 | 52 | 2340 
-23954348 | 46 | 265 | 52 | 2392 
-23444681 | 47 | 258 | 51 | 2397 
-22956251 | 48 | 258 | 50 | 2400 
-22487756 | 49 | 270 | 51 | 2499 
-22038001 | 50 | 258 | 48 | 2400 
+GzipCodec | 1 | 1401 | 1393 | 1393
+1101900001 | 1 | 1363 | 1355 | 1355
+550950001 | 2 | 764 | 757 | 1514
+367300001 | 3 | 559 | 528 | 1584
+275475001 | 4 | 442 | 415 | 1660
+220380001 | 5 | 365 | 315 | 1575
+183650001 | 6 | 281 | 259 | 1554
+157414286 | 7 | 246 | 224 | 1568
+137737501 | 8 | 234 | 205 | 1640
+122433334 | 9 | 208 | 182 | 1638
+110190001 | 10 | 191 | 170 | 1700
+100172728 | 11 | 174 | 151 | 1661
+91825001 | 12 | 174 | 144 | 1728
+84761539 | 13 | 247 | 132 | 1716
+78707143 | 14 | 247 | 124 | 1736
+73460001 | 15 | 238 | 117 | 1755
+68868751 | 16 | 229 | 112 | 1792
+64817648 | 17 | 248 | 106 | 1802
+61216667 | 18 | 231 | 102 | 1836
+57994737 | 19 | 217 | 98 | 1862
+55095001 | 20 | 220 | 94 | 1880
+52471429 | 21 | 195 | 90 | 1890
+50086364 | 22 | 205 | 89 | 1958
+47908696 | 23 | 210 | 85 | 1955
+45912501 | 24 | 236 | 80 | 1920
+44076001 | 25 | 226 | 80 | 2000
+42380770 | 26 | 242 | 78 | 2028
+40811112 | 27 | 234 | 75 | 2025
+39353572 | 28 | 232 | 73 | 2044
+37996552 | 29 | 228 | 71 | 2059
+36730001 | 30 | 238 | 71 | 2130
+35545162 | 31 | 234 | 70 | 2170
+34434376 | 32 | 229 | 67 | 2144
+33390910 | 33 | 223 | 65 | 2145
+32408824 | 34 | 241 | 64 | 2176
+31482858 | 35 | 247 | 62 | 2170
+30608334 | 36 | 248 | 61 | 2196
+29781082 | 37 | 248 | 60 | 2220
+28997369 | 38 | 255 | 60 | 2280
+28253847 | 39 | 238 | 58 | 2262
+27547501 | 40 | 237 | 56 | 2240
+26875610 | 41 | 252 | 56 | 2296
+26235715 | 42 | 245 | 56 | 2352
+25625582 | 43 | 245 | 55 | 2365
+25043182 | 44 | 256 | 54 | 2376
+24486667 | 45 | 238 | 52 | 2340
+23954348 | 46 | 265 | 52 | 2392
+23444681 | 47 | 258 | 51 | 2397
+22956251 | 48 | 258 | 50 | 2400
+22487756 | 49 | 270 | 51 | 2499
+22038001 | 50 | 258 | 48 | 2400
 
 ## A 220 MB input file doing a wordcount.
 
@@ -218,7 +218,7 @@ Splitsize | Splits | Elapsed | Avg. Map Time | Total Map Time
  70000000 |  2 |  90 sec |  83 sec | 166 sec |
  50000000 |  3 |  66 sec |  58 sec | 174 sec |
  35000000 |  4 |  55 sec |  48 sec | 192 sec |
- 28000000 |  5 |  49 sec |  42 sec | 210 sec | 
+ 28000000 |  5 |  49 sec |  42 sec | 210 sec |
  25000000 |  6 |  39 sec |  32 sec | 192 sec |
  20000000 |  7 |  38 sec |  30 sec | 210 sec |
  17500000 |  8 |  32 sec |  26 sec | 208 sec |
@@ -227,15 +227,15 @@ Splitsize | Splits | Elapsed | Avg. Map Time | Total Map Time
  11500000 | 13 |  44 sec |  19 sec | 247 sec |
  10000000 | 14 |  39 sec |  18 sec | 252 sec |
   5000000 | 28 |  42 sec |  11 sec | 308 sec |
-  2500000 | 56 |  59 sec |   8 sec | 448 sec | 
+  2500000 | 56 |  59 sec |   8 sec | 448 sec |
 
 Splitsize | Splits | Elapsed | Avg. Map Time | Total Map Time
  --:|--:|--:|--:|--:|
 139796017 |  1 | 167 sec | 159 sec | 159 sec |
  70000000 |  2 |  90 sec |  82 sec | 164 sec |
  50000000 |  3 |  65 sec |  57 sec | 171 sec |
  35000000 |  4 |  57 sec |  48 sec | 192 sec |
- 28000000 |  5 |  48 sec |  41 sec | 205 sec | 
+ 28000000 |  5 |  48 sec |  41 sec | 205 sec |
  25000000 |  6 |  44 sec |  35 sec | 210 sec |
  20000000 |  7 |  38 sec |  29 sec | 203 sec |
  17500000 |  8 |  34 sec |  28 sec | 224 sec |
@@ -244,6 +244,6 @@ Splitsize | Splits | Elapsed | Avg. Map Time | Total Map Time
  11500000 | 13 |  43 sec |  19 sec | 247 sec |
  10000000 | 14 |  40 sec |  18 sec | 252 sec |
   5000000 | 28 |  43 sec |  12 sec | 336 sec |
-  2500000 | 56 |  62 sec |   9 sec | 504 sec | 
+  2500000 | 56 |  62 sec |   9 sec | 504 sec |
 
 
diff --git a/Benchmark/javamr/src/main/assembly/job.xml b/Benchmark/javamr/src/main/assembly/job.xml
@@ -1,5 +1,5 @@
-<assembly 
-  xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2" 
+<assembly
+  xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
   xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
   xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
   <id>job</id>
@@ -20,12 +20,12 @@
       <unpack>false</unpack>
       <scope>runtime</scope>
     </dependencySet>
-    <dependencySet> 
-      <useProjectArtifact>false</useProjectArtifact> 
-      <outputDirectory>lib</outputDirectory> 
-      <unpack>false</unpack> 
-      <scope>provided</scope> 
-    </dependencySet> 
+    <dependencySet>
+      <useProjectArtifact>false</useProjectArtifact>
+      <outputDirectory>lib</outputDirectory>
+      <unpack>false</unpack>
+      <scope>provided</scope>
+    </dependencySet>
     -->
   </dependencySets>
   <fileSets>

diff --git a/Benchmark/pig/src/main/bash/run.sh b/Benchmark/pig/src/main/bash/run.sh
@@ -9,12 +9,12 @@ for SPLITS in `seq 1 100`;
 do
     hdfs dfs -rm -R wordcount-${SPLITS}.txt
     FILESIZE=$(stat -c%s "${INPUT}")
-    SPLITSIZE=$(((FILESIZE/SPLITS)+1)) 
+    SPLITSIZE=$(((FILESIZE/SPLITS)+1))
 
     exec 3>&1 4>&2
     TIME=$(TIMEFORMAT="%R"; { time pig -param file=${INPUT} -param splits=${SPLITS} -param splitsize=${SPLITSIZE} pig/wordcount.pig 1>&3 2>&4; } 2>&1)
     exec 3>&- 4>&-
     echo $(date),${SPLITS},${FILESIZE},${SPLITSIZE},${TIME} >> output.txt
 done
 
-cat output.txt
+cat output.txt
diff --git a/Benchmark/pig/src/main/resources/log4j.properties b/Benchmark/pig/src/main/resources/log4j.properties
@@ -1,6 +1,6 @@
 # Root logger option
 log4j.rootLogger=WARN, stdout
- 
+
 # Direct log messages to stdout
 log4j.appender.stdout=org.apache.log4j.ConsoleAppender
 log4j.appender.stdout.Target=System.out

diff --git a/Benchmark/run.sh b/Benchmark/run.sh
@@ -2,7 +2,7 @@
 
 make wordcount-normal.txt
 
-for I in `seq 1 50` ; 
-do 
+for I in `seq 1 50` ;
+do
     make wordcount-${I}.txt
 done
diff --git a/README-Spark.md b/README-Spark.md
@@ -4,7 +4,7 @@
 Thanks to [Nicholas Chammas](https://github.com/nchammas) for contributing this documentation.
 
 # Common problem for Spark users
-Apparently the fact that GZipped files are not splittable is also in the Spark arena a recurring problem as you can see 
+Apparently the fact that GZipped files are not splittable is also in the Spark arena a recurring problem as you can see
 in this [Spark Jira ticket](https://issues.apache.org/jira/browse/SPARK-29102?focusedCommentId=16932921&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-16932921) and
  these two questions on StackOverflow [Question 1](https://stackoverflow.com/q/28127119/877069) [Question 2](https://stackoverflow.com/q/27531816/877069).