|
24 | 24 | import java.util.HashMap;
|
25 | 25 | import java.util.List;
|
26 | 26 | import java.util.Map;
|
| 27 | +import java.util.Map.Entry; |
27 | 28 | import java.util.Properties;
|
28 | 29 | import java.util.SortedMap;
|
29 | 30 |
|
30 | 31 | import org.apache.avro.Schema;
|
31 | 32 | import org.apache.hadoop.fs.FileSystem;
|
| 33 | +import org.apache.hadoop.fs.Path; |
32 | 34 | import org.apache.log4j.Logger;
|
33 | 35 |
|
34 | 36 |
|
@@ -80,7 +82,7 @@ public class PartitionCollapsingExecutionPlanner extends ExecutionPlanner
|
80 | 82 | private List<DatePath> _inputsToProcess = new ArrayList<DatePath>();
|
81 | 83 | private List<DatePath> _newInputsToProcess = new ArrayList<DatePath>();
|
82 | 84 | private List<DatePath> _oldInputsToProcess = new ArrayList<DatePath>();
|
83 |
| - private Map<Date,List<DatePath>> _inputsToProcessByDate = new HashMap<Date,List<DatePath>>(); |
| 85 | + private Map<String,String> _latestInputByPath = new HashMap<String,String>(); |
84 | 86 | private DatePath _previousOutputToProcess;
|
85 | 87 | private List<Schema> _inputSchemas = new ArrayList<Schema>();
|
86 | 88 | private Map<String,Schema> _inputSchemasByPath = new HashMap<String,Schema>();
|
@@ -282,17 +284,17 @@ private void determineNumReducers() throws IOException
|
282 | 284 | */
|
283 | 285 | private void determineInputSchemas() throws IOException
|
284 | 286 | {
|
285 |
| - List<Date> dates = new ArrayList<Date>(_inputsToProcessByDate.keySet()); |
286 |
| - if (dates.size() > 0) |
| 287 | + if (_latestInputByPath.size() > 0) |
287 | 288 | {
|
288 |
| - Collections.sort(dates); |
289 |
| - Date lastDate = dates.get(dates.size()-1); |
290 |
| - List<DatePath> lastInputs = _inputsToProcessByDate.get(lastDate); |
291 |
| - for (DatePath input : lastInputs) |
| 289 | + _log.info("Determining input schemas"); |
| 290 | + for (Entry<String,String> entry : _latestInputByPath.entrySet()) |
292 | 291 | {
|
293 |
| - Schema schema = PathUtils.getSchemaFromPath(getFileSystem(),input.getPath()); |
| 292 | + String root = entry.getKey(); |
| 293 | + String input = entry.getValue(); |
| 294 | + _log.info("Loading schema for " + input); |
| 295 | + Schema schema = PathUtils.getSchemaFromPath(getFileSystem(),new Path(input)); |
294 | 296 | _inputSchemas.add(schema);
|
295 |
| - _inputSchemasByPath.put(input.getPath().toString(), schema); |
| 297 | + _inputSchemasByPath.put(root, schema); |
296 | 298 | }
|
297 | 299 | }
|
298 | 300 | }
|
@@ -331,7 +333,7 @@ private void determineInputsToProcess() throws IOException
|
331 | 333 | Calendar cal = Calendar.getInstance(PathUtils.timeZone);
|
332 | 334 |
|
333 | 335 | _inputsToProcess.clear();
|
334 |
| - _inputsToProcessByDate.clear(); |
| 336 | + _latestInputByPath.clear(); |
335 | 337 | _previousOutputToProcess = null;
|
336 | 338 |
|
337 | 339 | DateRange outputDateRange = null;
|
@@ -359,10 +361,11 @@ private void determineInputsToProcess() throws IOException
|
359 | 361 | _log.info(String.format("Input: %s",input.getPath()));
|
360 | 362 | _inputsToProcess.add(input);
|
361 | 363 | _oldInputsToProcess.add(input);
|
| 364 | + |
| 365 | + Path root = PathUtils.getNestedPathRoot(input.getPath()); |
| 366 | + _latestInputByPath.put(root.toString(), input.getPath().toString()); |
362 | 367 | }
|
363 |
| - |
364 |
| - _inputsToProcessByDate.put(currentDate, inputs); |
365 |
| - |
| 368 | + |
366 | 369 | cal.setTime(currentDate);
|
367 | 370 | cal.add(Calendar.DAY_OF_MONTH, 1);
|
368 | 371 | currentDate = cal.getTime();
|
@@ -414,10 +417,11 @@ private void determineInputsToProcess() throws IOException
|
414 | 417 | _log.info(String.format("Input: %s",input.getPath()));
|
415 | 418 | _inputsToProcess.add(input);
|
416 | 419 | _newInputsToProcess.add(input);
|
| 420 | + |
| 421 | + Path root = PathUtils.getNestedPathRoot(input.getPath()); |
| 422 | + _latestInputByPath.put(root.toString(), input.getPath().toString()); |
417 | 423 | }
|
418 |
| - |
419 |
| - _inputsToProcessByDate.put(currentDate, inputs); |
420 |
| - |
| 424 | + |
421 | 425 | newDataCount++;
|
422 | 426 | }
|
423 | 427 | }
|
|
0 commit comments