Skip to content

Commit

Permalink
Merge pull request #58 from ben-xo/feature/respect-changes-other-than…
Browse files Browse the repository at this point in the history
…-most-recently-modified

Feature/respect changes other than most recently modified
  • Loading branch information
ben-xo authored May 25, 2022
2 parents 22d6115 + 4584ca0 commit 7f7d169
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 28 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@ Changelog
=========

1.33 unreleased * README fixed by @denilsonsa - thanks!
* Caching algorithm changed to so that it is no longer dependent
on added media files having a newer file date (GitHub #51),
and will look for any changes in filesize or date, as well
as addition or removal of files. This means that dir2cast is
now much more friendly to use cases for local media collections.

1.32 2022-04-18 * Upgrade getID3 (includes important security fix for PHP 8+)

Expand Down
85 changes: 77 additions & 8 deletions dir2cast.php
Original file line number Diff line number Diff line change
Expand Up @@ -771,6 +771,29 @@ public function getModificationTime()
}
return max($mtimes);
}

public function getTotalFileSize()
{
$sizes = array(
$this->getFileSize()
);

$common_prefix = dirname($this->getFilename()) . '/' . basename($this->getFilename(), '.' . $this->getExtension());

foreach(array(
$this->getImageFilename('jpg'),
$this->getImageFilename('png'),
$common_prefix . '.txt',
$common_prefix . '_subtitle.txt'
) as $f)
{
if(file_exists($f))
{
$sizes[] = filesize($f);
}
}
return array_sum($sizes);
}
}

class Media_RSS_Item extends RSS_File_Item implements Serializable {
Expand Down Expand Up @@ -1095,6 +1118,10 @@ class Dir_Podcast extends Podcast
protected $scanned = false;
protected $unsorted_items = array();
protected $max_mtime = 0;
protected $item_hash_list = array();
protected $item_hash;

protected $clock_offset = 0;

/**
* Constructor
Expand Down Expand Up @@ -1145,6 +1172,8 @@ protected function scan()
if(self::$EMPTY_PODCAST_IS_ERROR && 0 == $item_count)
throw new Exception("No Items found in {$this->source_dir}");

$this->calculateItemHash();

$this->scanned = true;
$this->post_scan();
$this->sort();
Expand Down Expand Up @@ -1199,6 +1228,22 @@ public function getMaxMtime()
return $this->max_mtime;
}

public function calculateItemHash()
{
sort($this->item_hash_list);
$this->item_hash = md5(implode("\n", $this->item_hash_list));
}

public function getItemHash()
{
return $this->item_hash;
}

public function setClockOffset($offset)
{
$this->clock_offset = $offset;
}

/**
* Adds file to ->unsorted_items, and updates ->max_mtime
*
Expand All @@ -1210,17 +1255,20 @@ protected function addRssFileItem(RSS_File_Item $the_item)
if($the_item->getFileSize())
{
$filemtime_media_only = $the_item->getFileTimestamp();
$filemtime_inclusive = $the_item->getModificationTime();

if((self::$MIN_FILE_AGE > 0) && $filemtime_media_only > (time() - self::$MIN_FILE_AGE))
{
// don't add files which are so new that they may still be being uploaded
return;
}

$filemtime_inclusive = $the_item->getModificationTime();

// one array per mtime, just in case several MP3s share the same mtime.
$this->unsorted_items[$filemtime_media_only][] = $the_item;
$this->updateMaxMtime($filemtime_inclusive, $the_item->getFilename());
$this->unsorted_items[$filemtime_media_only][] = $the_item;
$hashlist_mtime = $filemtime_inclusive + $this->clock_offset; // clock offset is just used in testing.
$this->item_hash_list[] = "{$hashlist_mtime}:{$the_item->getTotalFileSize()}";
}
}

Expand Down Expand Up @@ -1250,7 +1298,7 @@ protected function sort() {
}
}

unset($this->unsorted_items);
unset($this->unsorted_items);
}

protected function pre_scan() { }
Expand All @@ -1267,6 +1315,7 @@ class Cached_Dir_Podcast extends Dir_Podcast
{
protected $temp_dir;
protected $temp_file;
protected $item_hash_file;
protected $cache_date;
protected $serve_from_cache;

Expand All @@ -1283,10 +1332,11 @@ class Cached_Dir_Podcast extends Dir_Podcast
public function __construct($source_dir, $temp_dir)
{
$this->temp_dir = $temp_dir;
$safe_source_dir = str_replace(array('/', '\\'), '_', $source_dir);
$safe_source_name = preg_replace('/[^\w]/', '_', dirname($source_dir) . '/' . basename($source_dir) );

// something unique, safe, stable and easily identifiable
$this->temp_file = rtrim($temp_dir, '/') . '/' . md5($source_dir) . '_' . $safe_source_dir . '.xml';
$this->temp_file = rtrim($temp_dir, '/') . '/' . md5($source_dir) . '_' . $safe_source_name . '.xml';
$this->item_hash_file = rtrim($temp_dir, '/') . '/' . md5($source_dir) . '_' . $safe_source_name . '__item_hash.txt';

parent::__construct($source_dir);
}
Expand All @@ -1308,15 +1358,24 @@ public function init()
{
self::$DEBUG && print("Cache file is older than " . self::$MIN_CACHE_TIME . " seconds\n");

$this->scan(); // sets $this->max_mtime
$previous_item_hash = "";
if(file_exists($this->item_hash_file))
$previous_item_hash = file_get_contents($this->item_hash_file);

$this->scan(); // sets $this->max_mtime and $this->item_hash
if( $this->cache_is_stale($cache_date, $this->max_mtime) )
{
self::$DEBUG && print("Cache is stale (cache file mtime: $cache_date, max mtime: {$this->max_mtime}). Uncaching\n");
$this->uncache();
}
elseif( $previous_item_hash != $this->item_hash )
{
self::$DEBUG && print("Cache has changed (before: $previous_item_hash, after: {$this->item_hash}). Uncaching\n");
$this->uncache();
}
else
{
self::$DEBUG && print("Cache is not stale (cache file mtime: $cache_date, max mtime: {$this->max_mtime}). Renewing\n");
self::$DEBUG && print("Cache is not stale (cache file mtime: $cache_date, max mtime: {$this->max_mtime} and previous hash {$previous_item_hash} and hash {$this->item_hash}). Renewing\n");
$this->renew();
}
}
Expand Down Expand Up @@ -1384,6 +1443,7 @@ public function generate()
{
$output = parent::generate();
file_put_contents($this->temp_file, $output); // save cached copy
file_put_contents($this->item_hash_file, $this->item_hash);
$this->serve_from_cache = true;
}

Expand Down Expand Up @@ -1586,7 +1646,7 @@ public static function bootstrap(array $SERVER, array $GET, array $argv)
define('INI_FILE', $ini_file_name);
}

$cli_options = getopt('', array('help', 'media-dir::', 'media-url::', 'output::', 'dont-uncache', 'min-file-age::', 'debug', 'ignore-dir2cast-mtime'));
$cli_options = getopt('', array('help', 'media-dir::', 'media-url::', 'output::', 'dont-uncache', 'min-file-age::', 'debug', 'ignore-dir2cast-mtime', 'clock-offset::'));
if($cli_options) {
if(isset($cli_options['help'])) {
print "Usage: php dir2cast.php [--help] [--media-dir=MP3_DIR] [--media-url=MP3_URL] [--output=OUTPUT_FILE]\n";
Expand All @@ -1596,6 +1656,7 @@ public static function bootstrap(array $SERVER, array $GET, array $argv)
// [--min-file-age=MIN_FILE_AGE]
// [--debug]
// [--ignore-dir2cast-mtime]
// [--clock-offset=CLOCK_OFFSET]

exit;
}
Expand Down Expand Up @@ -1627,6 +1688,10 @@ public static function bootstrap(array $SERVER, array $GET, array $argv)
{
define('IGNORE_DIR2CAST_MTIME', true);
}
if(!defined('CLOCK_OFFSET') && isset($cli_options['clock-offset']))
{
define('CLOCK_OFFSET', (int)$cli_options['clock-offset']);
}
}

if(!defined('MIN_CACHE_TIME'))
Expand Down Expand Up @@ -1833,6 +1898,9 @@ public static function defaults(array $SERVER)
if(!defined('DEBUG'))
define('DEBUG', false);

if(!defined('CLOCK_OFFSET'))
define('CLOCK_OFFSET', 0);

// Set up factory settings for Podcast subclasses
Dir_Podcast::$EMPTY_PODCAST_IS_ERROR = !defined('CLI_ONLY') || !CLI_ONLY;
Dir_Podcast::$RECURSIVE_DIRECTORY_ITERATOR = RECURSIVE_DIRECTORY_ITERATOR;
Expand Down Expand Up @@ -2069,6 +2137,7 @@ function main($args)
);

$podcast = new Locking_Cached_Dir_Podcast(MP3_DIR, TMP_DIR);
$podcast->setClockOffset(CLOCK_OFFSET);
$dispatcher = new Dispatcher($podcast);

$dispatcher->uncache_if_forced(FORCE_PASSWORD, $_GET);
Expand Down
37 changes: 30 additions & 7 deletions test/Cached_Dir_PodcastTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ public static function setUpBeforeClass(): void
{
Dir_PodcastTest::setUpBeforeClass();
Cached_Dir_Podcast::$MIN_CACHE_TIME = 5;
Cached_Dir_Podcast::$DEBUG = false;
}

public function setUp(): void
Expand All @@ -17,9 +18,10 @@ public function setUp(): void
mkdir('temp');
}

public function newPodcast()
public function newPodcast($offset=0)
{
$podcast = new Cached_Dir_Podcast('.', './temp');
$podcast->setClockOffset($offset);
$podcast->init();
return $podcast;
}
Expand Down Expand Up @@ -49,7 +51,7 @@ public function test_uses_generated_cache_file_if_min_time_not_elapsed_yet()
// this should be ignored
file_put_contents('extra.mp3', 'new data');

$mp2 = $this->newPodcast();
$mp2 = $this->newPodcast(2);
$content2 = $mp2->generate();

// should not pick up extra.mp3 as the cache file isn't old enough
Expand All @@ -71,14 +73,36 @@ public function test_does_not_use_generated_cache_file_if_min_time_has_elapsed_a
// this should be considered
file_put_contents('extra.mp3', 'new data');

$mp2 = $this->newPodcast();
$mp2 = $this->newPodcast(10);
$content2 = $mp2->generate();

// should pick up extra.mp3 as the cache file is older than the min, and there's new content
$this->assertNotEquals($content, $content2);
$this->assertEquals(1, preg_match('/extra\.mp3/', $content2));
}

public function test_does_not_use_generated_cache_file_if_min_time_has_elapsed_and_theres_additional_old_content()
{
$this->createTestItems();
age_dir_by('.', 3600);

$mp = $this->newPodcast();
$content = $mp->generate();
unset($mp); // release lock, in sub tests

age_dir_by('.', 10);

// this should be considered
file_put_contents('extra.mp3', 'new data');
touch('extra.mp3', time() - 86400);

$mp2 = $this->newPodcast(10);
$content2 = $mp2->generate();

// should pick up extra.mp3 as the cache file is older than the min, and there's new content
$this->assertNotEquals($content, $content2);
$this->assertEquals(1, preg_match('/extra\.mp3/', $content2));
}

public function test_renews_cache_if_old_but_not_stale()
{
Expand All @@ -91,8 +115,7 @@ public function test_renews_cache_if_old_but_not_stale()

age_dir_by('.', 3600);

$mp2 = $this->newPodcast();

$mp2 = $this->newPodcast(3600);
$content2 = $mp2->generate();

// should have used cache file anyway
Expand Down Expand Up @@ -126,7 +149,7 @@ public function test_lastBuildDate_is_valid_whether_served_from_cache_or_not()

age_dir_by('.', 3600);

$mp2 = $this->newPodcast();
$mp2 = $this->newPodcast(3600);
$this->assertTrue($mp2->isCached());
$mp2->generate();
clearstatcache();
Expand All @@ -138,7 +161,7 @@ public function test_lastBuildDate_is_valid_whether_served_from_cache_or_not()
clearstatcache();
age_dir_by('.', 3600);
sleep(1); // not much choice here!
$mp3 = $this->newPodcast();
$mp3 = $this->newPodcast(3600+3600);
$mp3->generate();

clearstatcache();
Expand Down
18 changes: 9 additions & 9 deletions test/CachingTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public function test_default_empty_podcast_doesnt_regenerate_in_first_MIN_CACHE_
clearstatcache();
$cached_mtime_before = filemtime($cached_output_files[0]);

exec('php dir2cast.php --output=out.xml --dont-uncache --ignore-dir2cast-mtime', $new_output, $this->returncode);
exec('php dir2cast.php --output=out.xml --dont-uncache --ignore-dir2cast-mtime --clock-offset=7', $new_output, $this->returncode);

clearstatcache();
$cached_output_files = glob(temp_xml_glob());
Expand Down Expand Up @@ -71,7 +71,7 @@ public function test_default_empty_podcast_renews_cache_file_mtime_after_MIN_CAC
clearstatcache();
$cached_mtime_before = filemtime($cached_output_files[0]);

exec('php dir2cast.php --output=out.xml --dont-uncache --ignore-dir2cast-mtime', $new_output, $this->returncode);
exec('php dir2cast.php --output=out.xml --dont-uncache --ignore-dir2cast-mtime --clock-offset=7', $new_output, $this->returncode);

clearstatcache();
$cached_output_files = glob(temp_xml_glob());
Expand Down Expand Up @@ -104,7 +104,7 @@ public function test_default_empty_podcast_doesnt_regenerate_before_MIN_CACHE_TI
clearstatcache();
$cached_mtime_before = filemtime($cached_output_files[0]);

exec('php dir2cast.php --output=out.xml --dont-uncache --ignore-dir2cast-mtime', $new_output, $this->returncode);
exec('php dir2cast.php --output=out.xml --dont-uncache --ignore-dir2cast-mtime --clock-offset=7', $new_output, $this->returncode);

clearstatcache();
$cached_output_files = glob(temp_xml_glob());
Expand Down Expand Up @@ -138,7 +138,7 @@ public function test_default_empty_podcast_regenerates_after_MIN_CACHE_TIME_with
clearstatcache();
$cached_mtime_before = filemtime($cached_output_files[0]);

exec('php dir2cast.php --output=out.xml --dont-uncache --ignore-dir2cast-mtime', $new_output, $this->returncode);
exec('php dir2cast.php --output=out.xml --dont-uncache --ignore-dir2cast-mtime --clock-offset=90', $new_output, $this->returncode);

clearstatcache();
$cached_output_files = glob(temp_xml_glob());
Expand Down Expand Up @@ -168,7 +168,7 @@ public function test_default_empty_podcast_obeys_minimum_cache_time_not_elapsed(

// --dont-uncache: tells dir2cast not use the default caching rules, not ignore them due to CLI
// --min-file-age=0 : tells dir2cast to include files that are brand new
exec('php dir2cast.php --output=out.xml --dont-uncache --min-file-age=0 --ignore-dir2cast-mtime', $this->output, $this->returncode);
exec('php dir2cast.php --output=out.xml --dont-uncache --min-file-age=0 --ignore-dir2cast-mtime --clock-offset=2', $this->output, $this->returncode);

$new_content = file_get_contents($this->file);
$this->assertEquals($this->content, $new_content);
Expand All @@ -184,7 +184,7 @@ public function test_default_empty_podcast_obeys_minimum_cache_time_elapsed(): v

// --dont-uncache: tells dir2cast not use the default caching rules, not ignore them due to CLI
// --min-file-age=0 : tells dir2cast to include files that are brand new
exec('php dir2cast.php --output=out.xml --dont-uncache --min-file-age=0 --ignore-dir2cast-mtime', $this->output, $this->returncode);
exec('php dir2cast.php --output=out.xml --dont-uncache --min-file-age=0 --ignore-dir2cast-mtime --clock-offset=3600', $this->output, $this->returncode);

$new_content = file_get_contents($this->file);
$this->assertNotEquals($this->content, $new_content);
Expand All @@ -198,7 +198,7 @@ public function test_default_empty_podcast_uncaches_without_dont_uncache_even_if
// too new to bust the cache, but cli runner uncaches anyway
file_put_contents('empty.mp3', 'test');

exec('php dir2cast.php --output=out.xml --min-file-age=0 --ignore-dir2cast-mtime', $this->output, $this->returncode);
exec('php dir2cast.php --output=out.xml --min-file-age=0 --ignore-dir2cast-mtime --clock-offset=2', $this->output, $this->returncode);

$new_content = file_get_contents($this->file);
$this->assertNotEquals($this->content, $new_content);
Expand All @@ -217,7 +217,7 @@ public function test_expired_podcast_is_regenerated(): void
file_put_contents('empty.mp3', 'test');
touch('empty.mp3', time()-3600); // busts cache as older than min-file-age

exec('php dir2cast.php --output=out.xml --dont-uncache --min-file-age=30 --ignore-dir2cast-mtime');
exec('php dir2cast.php --output=out.xml --dont-uncache --min-file-age=30 --ignore-dir2cast-mtime --clock-offset=86400');
$new_content = file_get_contents($this->file); // should have empty.mp3
$this->assertNotEquals($this->content, $new_content);
$this->assertEquals(1, preg_match('/empty\.mp3/', $new_content));
Expand Down Expand Up @@ -246,7 +246,7 @@ public function test_too_new_file_not_included_in_podcast(): void
// (which it shouldn't be!)
sleep(1);

exec('php dir2cast.php --output=out.xml --dont-uncache --min-file-age=30 --ignore-dir2cast-mtime');
exec('php dir2cast.php --output=out.xml --dont-uncache --min-file-age=30 --ignore-dir2cast-mtime --clock-offset=86400');

$new_content = file_get_contents($this->file); // should not have empty.mp3

Expand Down
Loading

0 comments on commit 7f7d169

Please sign in to comment.