diff --git a/ChangeLog.txt b/ChangeLog.txt new file mode 100644 index 0000000..91eef4a --- /dev/null +++ b/ChangeLog.txt @@ -0,0 +1,13 @@ +videosort-2.0: + - new options "TvCategories", "OtherTvDir" and "OtherTvFormat" for TV shows, whose file names look like movies (neither series nor dated shows); + - new format specifiers "%y", "%decade", "%0decade" for seasoned TV shows; + - added support for multi episode file names (example: My.Show.S01E02-03.mkv); + - new option "EpisodeSeparator" to adjust formatting of multi episode file names; + - added support for DNZB-Header "X-DNZB-UseNZBName"; + - added printing info-message when skipping small files; + - added using of command "[NZB] FINALDIR" to inform NZBGet about new files location; this path is then shown in the history dialog as "Destination"; + - destination directories (options "MoviesDir", etc.) can be left empty to use global "DestDir" or "CategoryX.DestDir" as destination; + - updated guessit-library to the newest release - this fixes several issues. + +videosort-1.0: + - initial release. \ No newline at end of file diff --git a/VideoSort.py b/VideoSort.py index ac842a0..6be9233 100755 --- a/VideoSort.py +++ b/VideoSort.py @@ -36,22 +36,48 @@ # Author: Andrey Prygunkov (nzbget@gmail.com). # Web-site: http://nzbget.sourceforge.net/VideoSort. # License: GPLv3 (http://www.gnu.org/licenses/gpl.html). -# PP-Script Version: 1.0. +# PP-Script Version: 2.0. # -# NOTE: This script requires Python to be installed on your system. +# NOTE: This script requires Python 2.x to be installed on your system. ############################################################################## ### OPTIONS ### # Destination directory for movies. +# +# The option can be left empty to use global DestDir or CategoryX.DestDir +# as destination. #MoviesDir=${DestDir}/movies # Destination directory for seasoned TV shows. +# +# The option can be left empty to use global DestDir or CategoryX.DestDir +# as destination. #SeriesDir=${DestDir}/series # Destination directory for dated TV shows. +# +# The option can be left empty to use global DestDir or CategoryX.DestDir +# as destination. #DatedDir=${DestDir}/tv +# Destination directory for other TV shows. +# +# The option can be left empty to use global DestDir or CategoryX.DestDir +# as destination. +#OtherTvDir=${DestDir}/tv + +# List of TV categories. +# +# Comma separated list of categories for TV. VideoSort automatically +# distinguishes movies from series and dated TV shows. But it needs help +# to distinguish movies from other TV shows because they are named +# using same conventions. If a download has associated category listed in +# option , VideoSort uses this information. +# +# Category names must match categories defined in NZBGet. +#TvCategories=tv + # File extensions for video files. # # Only files with these extensions are processed. Extensions must @@ -93,7 +119,7 @@ # %qah - audio channels (5.1); # %qrg - release group; # {{text}} - uppercase the text; -# {TEXT} - lowercase the text; +# {TEXT} - lowercase the text. #MoviesFormat=%t (%y) # Formatting rules for seasoned TV shows. @@ -108,10 +134,23 @@ # %0e - two-digits episode number (01, 02); # %en, %e.n, %e_n - episode name (case-adjusted); # %eN, %e.N, %e_N - episode name (original letter case); +# %y - year; +# %decade - two-digits decade (90, 00, 10); +# %0decade - four-digits decade (1990, 2000, 2010). # # For a list of common specifiers see option . #SeriesFormat=%sn/Season %s/%sn - S%0sE%0e - %en +# Separator for multi episodes. +# +# The option is used for seasoned TV shows when video file includes +# multiple episodes, e. g. "My.Show.S01E02-03.mkv". The option defines +# a character (or a string) which must be insterted between episode +# numbers. For example, if "EpisodeSeparator=E", the specifier "%0e" +# will expand to "02E03". Giving formatting string "%sN - S%0sE%0e" the +# resulting filename will be "My Show - S01E02E03.mkv". +#EpisodeSeparator=E + # Formatting rules for dated TV shows. # # Specifiers: @@ -124,11 +163,16 @@ # %m - month (1-12); # %0m - two-digits month (01-12); # %d - day (1-31); -# %0d - two-digits day (01-31); +# %0d - two-digits day (01-31). # # For a list of common specifiers see option . #DatedFormat=%sn/%sn - %y-%0m-%0d +# Formatting rules for other TV shows. +# +# All specifiers are same as in option . +#OtherTvFormat=%t + # List of words to keep in lower case. # # This option has effect on "case-adjusted"-specifiers. @@ -192,7 +236,7 @@ # Check if directory still exist (for post-process again) if not os.path.exists(os.environ['NZBPP_DIRECTORY']): - print('[INFO] Destination directory doesn\'t exist, exiting') + print('[INFO] Destination directory %s doesn\'t exist, exiting' % os.environ['NZBPP_DIRECTORY']) sys.exit(POSTPROCESS_NONE) # Check par and unpack status for errors @@ -201,23 +245,27 @@ sys.exit(POSTPROCESS_NONE) # Check if all required script config options are present in config file -required_options = ('NZBPO_MOVIESDIR', 'NZBPO_SERIESDIR', 'NZBPO_DATEDDIR', 'NZBPO_VIDEOEXTENSIONS', - 'NZBPO_SATELLITEEXTENSIONS', 'NZBPO_MINSIZE', 'NZBPO_MOVIESFORMAT', 'NZBPO_SERIESFORMAT', - 'NZBPO_DATEDFORMAT', 'NZBPO_OVERWRITE', 'NZBPO_CLEANUP', 'NZBPO_LOWERWORDS', 'NZBPO_UPPERWORDS', - 'NZBPO_PREVIEW', 'NZBPO_VERBOSE') +required_options = ('NZBPO_MoviesDir', 'NZBPO_SeriesDir', 'NZBPO_DatedDir', + 'NZBPO_OtherTvDir', 'NZBPO_VideoExtensions', 'NZBPO_SatelliteExtensions', 'NZBPO_MinSize', + 'NZBPO_MoviesFormat', 'NZBPO_SeriesFormat', 'NZBPO_OtherTvFormat', 'NZBPO_DatedFormat', + 'NZBPO_EpisodeSeparator', 'NZBPO_Overwrite', 'NZBPO_Cleanup', 'NZBPO_LowerWords', 'NZBPO_UpperWords', + 'NZBPO_TvCategories', 'NZBPO_Preview', 'NZBPO_Verbose') for optname in required_options: - if (not optname in os.environ): + if (not optname.upper() in os.environ): print('[ERROR] Option %s is missing in configuration file. Please check script settings' % optname[6:]) sys.exit(POSTPROCESS_ERROR) # Init script config options download_dir=os.environ['NZBPP_DIRECTORY'] +movies_format=os.environ['NZBPO_MOVIESFORMAT'] series_format=os.environ['NZBPO_SERIESFORMAT'] dated_format=os.environ['NZBPO_DATEDFORMAT'] -movies_format=os.environ['NZBPO_MOVIESFORMAT'] +othertv_format=os.environ['NZBPO_OTHERTVFORMAT'] +episode_separator=os.environ['NZBPO_EPISODESEPARATOR'] movies_dir=os.environ['NZBPO_MOVIESDIR'] series_dir=os.environ['NZBPO_SERIESDIR'] dated_dir=os.environ['NZBPO_DATEDDIR'] +othertv_dir=os.environ['NZBPO_OTHERTVDIR'] video_extensions=os.environ['NZBPO_VIDEOEXTENSIONS'].split(',') satellite_extensions=os.environ['NZBPO_SATELLITEEXTENSIONS'].split(',') min_size=int(os.environ['NZBPO_MINSIZE']) @@ -230,9 +278,21 @@ lower_words=os.environ['NZBPO_LOWERWORDS'].replace(' ', '').split(',') upper_words=os.environ['NZBPO_UPPERWORDS'].replace(' ', '').split(',') +tv_categories=os.environ['NZBPO_TVCATEGORIES'].lower().split(',') +category=os.environ.get('NZBPP_CATEGORY', ''); +force_tv=category.lower() in tv_categories + +force_nzbname=os.environ.get('NZBPR__DNZB_USENZBNAME', '').lower() == 'yes' + if preview: print('[WARNING] *** PREVIEW MODE ON - NO CHANGES TO FILE SYSTEM ***') +if verbose and force_nzbname: + print('[INFO] Forcing use of nzb-name (X-DNZB-UseNZBName)') + +if verbose and force_tv: + print('[INFO] Forcing TV sorting (category: %s)' % category) + # List of moved files (source path) moved_src_files = [] @@ -518,16 +578,6 @@ def strip_all(x): # END * From SABnzbd+ * END -def guess_hacks(filename, guess): - """ fix some strange guessit guessing: - if guessit doesn't find a year in the file name it thinks it is episode, - but we prefer it to be handled as movie instead - """ - if guess.get('type') == 'episode' and not guess.get('episodeNumber'): - guess['type'] = 'movie' - guess['title'] = guess.get('series') - guess['year'] = '1900' - def add_common_mapping(old_filename, guess, mapping): # Original dir name, file name and extension @@ -585,9 +635,31 @@ def add_series_mapping(guess, mapping): mapping.append(('%e_N', '')) # episode number - episode_num = str(guess.get('episodeNumber', '')) - mapping.append(('%e', episode_num)) - mapping.append(('%0e', episode_num.rjust(2,'0'))) + if guess.get('episodeList') == None: + episode_num = str(guess.get('episodeNumber', '')) + mapping.append(('%e', episode_num)) + mapping.append(('%0e', episode_num.rjust(2,'0'))) + else: + # multi episodes + episode_num_all = '' + episode_num_just = '' + for episode_num in guess.get('episodeList'): + ep_prefix = episode_separator if episode_num_all <> '' else '' + episode_num_all += ep_prefix + str(episode_num) + episode_num_just += ep_prefix + str(episode_num).rjust(2,'0') + + mapping.append(('%e', episode_num_all)) + mapping.append(('%0e', episode_num_just)) + + # year + year = str(guess.get('year', '')) + mapping.append(('%y', year)) + + # decades + decade, decade_two = get_decades(year) + mapping.append(('%decade', decade)) + mapping.append(('%0decade', decade_two)) + def add_movies_mapping(guess, mapping): @@ -672,45 +744,81 @@ def add_dated_mapping(guess, mapping): mapping.append(('%d', day)) mapping.append(('%0d', day.rjust(2, '0'))) -def construct_path(filename): - """ Parses the filename and generates new name for renaming """ +def guess_info(filename): + """ Parses the filename using guessit-library """ - if verbose: - print("filename: %s" % filename) + if force_nzbname: + guessfilename = os.path.join(os.path.dirname(filename), os.path.basename(download_dir)) + os.path.splitext(filename)[1] + else: + guessfilename = filename - guess = guessit.guess_file_info(filename, filetype = 'autodetect', info = ['filename']) + guess = guessit.guess_file_info(guessfilename, filetype = 'autodetect', info = ['filename']) if verbose: + print('Guessing: %s' % guessfilename) print(guess.nice_string()) - type = guess.get('type') + # fix some strange guessit guessing: + # if guessit doesn't find a year in the file name it thinks it is episode, + # but we prefer it to be handled as movie instead + if guess.get('type') == 'episode' and guess.get('episodeNumber', '') == '': + guess['type'] = 'movie' + guess['title'] = guess.get('series') + guess['year'] = '1900' + if verbose: + print(guess.nice_string()) - mapping = [] + if guess['type'] == 'movie': + date = guess.get('date') + if date: + guess['vtype'] = 'dated' + elif force_tv: + guess['vtype'] = 'othertv' + else: + guess['vtype'] = 'movie' + elif guess['type'] == 'episode': + guess['vtype'] = 'series' + + if verbose: + print('Type: %s' % guess['vtype']) - # fix some strange guessit guessing: - guess_hacks(filename, guess) + return guess + +def construct_path(filename): + """ Parses the filename and generates new name for renaming """ + + if verbose: + print("filename: %s" % filename) + guess = guess_info(filename); + type = guess.get('vtype') + mapping = [] add_common_mapping(filename, guess, mapping) if type == 'movie': - date = guess.get('date') - if date: - dest_dir = dated_dir - format = dated_format - add_dated_mapping(guess, mapping) - else: - dest_dir = movies_dir - format = movies_format - add_movies_mapping(guess, mapping) - elif type == 'episode': + dest_dir = movies_dir + format = movies_format + add_movies_mapping(guess, mapping) + elif type == 'series': dest_dir = series_dir format = series_format add_series_mapping(guess, mapping) + elif type == 'dated': + dest_dir = dated_dir + format = dated_format + add_dated_mapping(guess, mapping) + elif type == 'othertv': + dest_dir = othertv_dir + format = othertv_format + add_movies_mapping(guess, mapping) else: if verbose: print('Could not determine video type for %s' % filename) return None + if dest_dir == '': + dest_dir = os.path.dirname(download_dir) + # Find out a char most suitable as dupe_separator guess_dupe_separator(format) @@ -776,7 +884,9 @@ def construct_path(filename): if ext not in video_extensions: continue # Check minimum file size - if os.path.getsize(old_path) < min_size: continue + if os.path.getsize(old_path) < min_size: + print('[INFO] Skipping small: %s' % old_filename) + continue # This is our video file, we should process it new_path = construct_path(old_path) @@ -794,8 +904,20 @@ def construct_path(filename): errors = True print('[ERROR] Failed: %s' % old_filename) print('[ERROR] %s' % e) - if verbose: - traceback.print_exc() + traceback.print_exc() + +# Inform NZBGet about new destination path +finaldir = '' +uniquedirs = [] +for filename in moved_dst_files: + dir = os.path.dirname(filename) + if dir not in uniquedirs: + uniquedirs.append(dir) + finaldir += '|' if finaldir != '' else '' + finaldir += dir + +if finaldir != '': + print('[NZB] FINALDIR=%s' % finaldir) # Cleanup if: # 1) files were moved AND diff --git a/lib/guessit/ISO-3166-1_utf8.txt b/lib/guessit/ISO-3166-1_utf8.txt old mode 100755 new mode 100644 diff --git a/lib/guessit/ISO-639-2_utf-8.txt b/lib/guessit/ISO-639-2_utf-8.txt old mode 100755 new mode 100644 diff --git a/lib/guessit/NEWS.rst b/lib/guessit/NEWS.rst deleted file mode 100755 index 9af3e51..0000000 --- a/lib/guessit/NEWS.rst +++ /dev/null @@ -1,158 +0,0 @@ -.. This is your project NEWS file which will contain the release notes. -.. Example: http://www.python.org/download/releases/2.6/NEWS.txt -.. The content of this file, along with README.rst, will appear in your -.. project's PyPI page. - -News -==== - -0.5.4 ------ - -*Release date: 11-Feb-2013* - -* guessit can be installed as a system wide script (thanks @dplarson) -* Enhanced logging facilities -* Fixes for episode number and country detection - - -0.5.3 ------ - -*Release date: 1-Nov-2012* - -* GuessIt can now optionally act as a wrapper around the 'guess-language' python - module, and thus provide detection of the natural language in which a body of - text is written - -* Lots of fixes everywhere, mostly for properties and release group detection - - -0.5.2 ------ - -*Release date: 2-Oct-2012* - -* Much improved auto-detection of filetype -* Fixed some issues with the detection of release groups - - -0.5.1 ------ - -*Release date: 23-Sep-2012* - -* now detects 'country' property; also detect 'year' property for series -* more patterns and bugfixes - - -0.5 ---- - -*Release date: 29-Jul-2012* - -* Python3 compatibility -* the usual assortment of bugfixes - - -0.4.2 ------ - -*Release date: 19-May-2012* - -* added Language.tmdb language code property for TheMovieDB -* added ability to recognize list of episodes -* bugfixes for Language.__nonzero__ and episode regexps - - -0.4.1 ------ - -*Release date: 12-May-2012* - -* bugfixes for unicode, paths on Windows, autodetection, and language issues - - -0.4 ---- - -*Release date: 28-Apr-2012* - -* much improved language detection, now also detect language variants -* supports more video filetypes (thanks to Rob McMullen) - - -0.3.1 ------ - -*Release date: 15-Mar-2012* - -* fixed package installation from PyPI -* better imports for the transformations (thanks Diaoul!) -* some small language fixes - -0.3 ---- - -*Release date: 12-Mar-2012* - -* fix to recognize 1080p format (thanks to Jonathan Lauwers) - -0.3b2 ------ - -*Release date: 2-Mar-2012* - -* fixed the package installation - -0.3b1 ------ - -*Release date: 1-Mar-2012* - -* refactored quite a bit, code is much cleaner now -* fixed quite a few tests -* re-vamped the documentation, wrote some more - -0.2 ---- - -*Release date: 27-May-2011* - -* new parser/matcher completely replaced the old one -* quite a few more unittests and fixes - - -0.2b1 ------ - -*Release date: 20-May-2011* - -* brand new parser/matcher that is much more flexible and powerful -* lots of cleaning and a bunch of unittests - - -0.1 ---- - -*Release date: 10-May-2011* - -* fixed a few minor issues & heuristics - - -0.1b2 ------ - -*Release date: 12-Mar-2011* - -* Added PyPI trove classifiers -* fixed version number in setup.py - - -0.1b1 ------ - -*Release date: 12-Mar-2011* - -* first pre-release version; imported from Smewt with a few enhancements already - in there. diff --git a/lib/guessit/README.rst b/lib/guessit/README.rst deleted file mode 100755 index c959415..0000000 --- a/lib/guessit/README.rst +++ /dev/null @@ -1,102 +0,0 @@ -Guessit -======= - -.. image:: https://secure.travis-ci.org/wackou/guessit.png?branch=master - -GuessIt is a python library that tries to extract as much information as -possible from a video file. - -It has a very powerful filename matcher that allows to guess a lot of -metadata from a video using only its filename. This matcher works with -both movies and tv shows episodes. - -For example, GuessIt can do the following:: - - $ python guessit.py "Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.avi" - For: Treme.1x03.Right.Place,.Wrong.Time.HDTV.XviD-NoTV.avi - GuessIt found: { - [1.00] "mimetype": "video/x-msvideo", - [0.80] "episodeNumber": 3, - [0.80] "videoCodec": "XviD", - [1.00] "container": "avi", - [1.00] "format": "HDTV", - [0.70] "series": "Treme", - [0.50] "title": "Right Place, Wrong Time", - [0.80] "releaseGroup": "NoTV", - [0.80] "season": 1, - [1.00] "type": "episode" - } - - - -Features --------- - -At the moment, the filename matcher is able to recognize the following -property types:: - - [ title, # for movies and episodes - series, season, episodeNumber, # for episodes only - date, year, # 'date' instance of datetime.date - language, subtitleLanguage, # instances of guessit.Language - container, format, - videoCodec, audioCodec, - audioChannels, screenSize, - releaseGroup, website, - cdNumber, cdNumberTotal, - filmNumber, filmSeries, - bonusNumber, edition, other - ] - - -Guessit also allows you to compute a whole lof of hashes from a file, -namely all the ones you can find in the hashlib python module (md5, -sha1, ...), but also the Media Player Classic hash that is used (amongst -others) by OpenSubtitles and SMPlayer, as well as the ed2k hash. - - -Install -------- - -Installing GuessIt is simple with `pip `_:: - - $ pip install guessit - -or, with `easy_install `_:: - - $ easy_install guessit - -But, you really `shouldn't do that `_. - - - -Support -------- - -The project website for GuessIt is hosted at `ReadTheDocs `_. -There you will also find the User guide and Developer documentation. - -This project is hosted on GitHub: ``_ - -Please report issues via the `bug tracker `_. - - -Contribute ----------- - -GuessIt is under active development, and contributions are more than welcome! - -#. Check for open issues or open a fresh issue to start a discussion around a feature idea or a bug. - There is a Contributor Friendly tag for issues that should be ideal for people who are not very - familiar with the codebase yet. -#. Fork `the repository`_ on Github to start making your changes to the **master** - branch (or branch off of it). -#. Write a test which shows that the bug was fixed or that the feature works as expected. -#. Send a pull request and bug the maintainer until it gets merged and published. :) - -.. _the repository: https://github.com/wackou/guessit - -License -------- - -GuessIt is licensed under the `LGPLv3 license `_. diff --git a/lib/guessit/__init__.py b/lib/guessit/__init__.py old mode 100755 new mode 100644 index 386aa7f..ce14024 --- a/lib/guessit/__init__.py +++ b/lib/guessit/__init__.py @@ -20,7 +20,7 @@ from __future__ import unicode_literals -__version__ = '0.6-dev' +__version__ = '0.7-dev' __all__ = ['Guess', 'Language', 'guess_file_info', 'guess_video_info', 'guess_movie_info', 'guess_episode_info'] @@ -91,7 +91,28 @@ def emit(self, record): def _guess_filename(filename, filetype): + def find_nodes(tree, props): + """Yields all nodes containing any of the given props.""" + if isinstance(props, base_text_type): + props = [props] + for node in tree.nodes(): + if any(prop in node.guess for prop in props): + yield node + + def warning(title): + log.warning('%s, guesses: %s - %s' % (title, m.nice_string(), m2.nice_string())) + return m + mtree = IterativeMatcher(filename, filetype=filetype) + + # if there are multiple possible years found, we assume the first one is + # part of the title, reparse the tree taking this into account + years = set(n.value for n in find_nodes(mtree.match_tree, 'year')) + if len(years) >= 2: + mtree = IterativeMatcher(filename, filetype=filetype, + opts=['skip_first_year']) + + m = mtree.matched() if 'language' not in m and 'subtitleLanguage' not in m: @@ -102,20 +123,10 @@ def _guess_filename(filename, filetype): opts=['nolanguage', 'nocountry']) m2 = mtree2.matched() - def find_nodes(tree, props): - """Yields all nodes containing any of the given props.""" - if isinstance(props, base_text_type): - props = [props] - for node in tree.nodes(): - if any(prop in node.guess for prop in props): - yield node - - def warning(title): - log.warning('%s, guesses: %s - %s' % (title, m.nice_string(), m2.nice_string())) + if m.get('title') is None: return m - if m.get('title') != m2.get('title'): title = next(find_nodes(mtree.match_tree, 'title')) title2 = next(find_nodes(mtree2.match_tree, 'title')) diff --git a/lib/guessit/__main__.py b/lib/guessit/__main__.py old mode 100755 new mode 100644 diff --git a/lib/guessit/country.py b/lib/guessit/country.py old mode 100755 new mode 100644 diff --git a/lib/guessit/date.py b/lib/guessit/date.py old mode 100755 new mode 100644 diff --git a/lib/guessit/fileutils.py b/lib/guessit/fileutils.py old mode 100755 new mode 100644 diff --git a/lib/guessit/guess.py b/lib/guessit/guess.py old mode 100755 new mode 100644 index 62385e8..33d3651 --- a/lib/guessit/guess.py +++ b/lib/guessit/guess.py @@ -295,7 +295,7 @@ def merge_all(guesses, append=None): # then merge the remaining ones dups = set(result) & set(g) if dups: - log.warning('duplicate properties %s in merged result...' % dups) + log.warning('duplicate properties %s in merged result...' % [ (result[p], g[p]) for p in dups] ) result.update_highest_confidence(g) diff --git a/lib/guessit/hash_ed2k.py b/lib/guessit/hash_ed2k.py old mode 100755 new mode 100644 diff --git a/lib/guessit/hash_mpc.py b/lib/guessit/hash_mpc.py old mode 100755 new mode 100644 diff --git a/lib/guessit/language.py b/lib/guessit/language.py old mode 100755 new mode 100644 index 3b3a86a..2714c6e --- a/lib/guessit/language.py +++ b/lib/guessit/language.py @@ -326,7 +326,7 @@ def search_language(string, lang_filter=None): 'la', 'el', 'del', 'por', 'mar', # other 'ind', 'arw', 'ts', 'ii', 'bin', 'chan', 'ss', 'san', 'oss', 'iii', - 'vi', 'ben', 'da' + 'vi', 'ben', 'da', 'lt' ]) sep = r'[](){} \._-+' diff --git a/lib/guessit/matcher.py b/lib/guessit/matcher.py old mode 100755 new mode 100644 index cc77b81..4337819 --- a/lib/guessit/matcher.py +++ b/lib/guessit/matcher.py @@ -128,12 +128,14 @@ def apply_transfo(transfo_name, *args, **kwargs): apply_transfo(name) # more guessers for both movies and episodes - for name in ['guess_bonus_features', 'guess_year']: - apply_transfo(name) + apply_transfo('guess_bonus_features') + apply_transfo('guess_year', skip_first_year=('skip_first_year' in opts)) if 'nocountry' not in opts: apply_transfo('guess_country') + apply_transfo('guess_idnumber') + # split into '-' separated subgroups (with required separator chars # around the dash) diff --git a/lib/guessit/matchtree.py b/lib/guessit/matchtree.py old mode 100755 new mode 100644 index 2853c3a..0725e83 --- a/lib/guessit/matchtree.py +++ b/lib/guessit/matchtree.py @@ -275,7 +275,7 @@ def matched(self): for string_part in ('title', 'series', 'container', 'format', 'releaseGroup', 'website', 'audioCodec', 'videoCodec', 'screenSize', 'episodeFormat', - 'audioChannels'): + 'audioChannels', 'idNumber'): merge_similar_guesses(parts, string_part, choose_string) # 2- merge the rest, potentially discarding information not properly diff --git a/lib/guessit/patterns.py b/lib/guessit/patterns.py old mode 100755 new mode 100644 index a8a0607..ed3982b --- a/lib/guessit/patterns.py +++ b/lib/guessit/patterns.py @@ -43,13 +43,13 @@ (r'saison (?P[0-9]+)', 1.0, (0, 0)), # ... s02e13 ... - (r'[Ss](?P[0-9]{1,2}).?(?P(?:[Ee-][0-9]{1,2})+)[^0-9]', 1.0, (0, -1)), + (r'[Ss](?P[0-9]{1,3})[^0-9]?(?P(?:-?[eE-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)), - # ... s03-x02 ... - (r'[Ss](?P[0-9]{1,2}).?(?P(?:[Xx][0-9]{1,2})+)[^0-9]', 1.0, (0, -1)), + # ... s03-x02 ... # FIXME: redundant? remove it? + #(r'[Ss](?P[0-9]{1,3})[^0-9]?(?P(?:-?[xX-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)), # ... 2x13 ... - (r'[^0-9](?P[0-9]{1,2}).?(?P(?:[xX][0-9]{1,2})+)[^0-9]', 0.8, (1, -1)), + (r'[^0-9](?P[0-9]{1,2})[^0-9]?(?P(?:-?[xX][0-9]{1,3})+)[^0-9]', 1.0, (1, -1)), # ... s02 ... #(sep + r's(?P[0-9]{1,2})' + sep, 0.6, (1, -1)), @@ -122,20 +122,25 @@ 'VHS': [ 'VHS' ], 'WEB-DL': [ 'WEB-DL' ] }, - 'screenSize': { '480p': [ '480p?' ], - '720p': [ '720p?' ], - '1080p': [ '1080p?' ] }, + 'screenSize': { '480p': [ '480[pi]?' ], + '720p': [ '720[pi]?' ], + '1080p': [ '1080[pi]?' ] }, 'videoCodec': { 'XviD': [ 'Xvid' ], 'DivX': [ 'DVDivX', 'DivX' ], 'h264': [ '[hx]-264' ], - 'Rv10': [ 'Rv10' ] }, + 'Rv10': [ 'Rv10' ], + 'Mpeg2': [ 'Mpeg2' ] }, + + # has nothing to do here (or on filenames for that matter), but some + # releases use it and it helps to identify release groups, so we adapt + 'videoApi': { 'DXVA': [ 'DXVA' ] }, 'audioCodec': { 'AC3': [ 'AC3' ], 'DTS': [ 'DTS' ], 'AAC': [ 'He-AAC', 'AAC-He', 'AAC' ] }, - 'audioChannels': { '5.1': [ r'5\.1', 'DD5\.1', '5ch' ] }, + 'audioChannels': { '5.1': [ r'5\.1', 'DD5[\._ ]1', '5ch' ] }, 'episodeFormat': { 'Minisode': [ 'Minisodes?' ] } @@ -143,14 +148,21 @@ # prop_single dict of { property_name: [ canonical_form ] } prop_single = { 'releaseGroup': [ 'ESiR', 'WAF', 'SEPTiC', r'\[XCT\]', 'iNT', 'PUKKA', - 'CHD', 'ViTE', 'TLF', 'DEiTY', 'FLAiTE', - 'MDX', 'GM4F', 'DVL', 'SVD', 'iLUMiNADOS', 'FiNaLe', - 'UnSeeN', 'aXXo', 'KLAXXON', 'NoTV', 'ZeaL', 'LOL', - 'SiNNERS', 'DiRTY', 'REWARD', 'ECI', 'KiNGS', 'CLUE', - 'CtrlHD', 'POD', 'WiKi', 'DIMENSION', 'IMMERSE', 'FQM', - '2HD', 'REPTiLE', 'CTU', 'HALCYON', 'EbP', 'SiTV', - 'SAiNTS', 'HDBRiSe', 'AlFleNi-TeaM', 'EVOLVE', '0TV', - 'TLA', 'NTB', 'ASAP', 'MOMENTUM', 'FoV', 'D-Z0N3' ], + 'CHD', 'ViTE', 'TLF', 'FLAiTE', + 'MDX', 'GM4F', 'DVL', 'SVD', 'iLUMiNADOS', + 'aXXo', 'KLAXXON', 'NoTV', 'ZeaL', 'LOL', + 'CtrlHD', 'POD', 'WiKi','IMMERSE', 'FQM', + '2HD', 'CTU', 'HALCYON', 'EbP', 'SiTV', + 'HDBRiSe', 'AlFleNi-TeaM', 'EVOLVE', '0TV', + 'TLA', 'NTB', 'ASAP', 'MOMENTUM', 'FoV', 'D-Z0N3', + 'TrollHD', 'ECI' + ], + + # potentially confusing release group names (they are words) + 'weakReleaseGroup': [ 'DEiTY', 'FiNaLe', 'UnSeeN', 'KiNGS', 'CLUE', 'DIMENSION', + 'SAiNTS', 'ARROW', 'EuReKA', 'SiNNERS', 'DiRTY', 'REWARD', + 'REPTiLE', + ], 'other': [ 'PROPER', 'REPACK', 'LIMITED', 'DualAudio', 'Audiofixed', 'R5', 'complete', 'classic', # not so sure about these ones, could appear in a title @@ -179,6 +191,10 @@ def _to_rexp(prop): def find_properties(string): result = [] for property_name, props in properties_rexps.items(): + # FIXME: this should be done in a more flexible way... + if property_name in ['weakReleaseGroup']: + continue + for canonical_form, rexps in props.items(): for value_rexp in rexps: match = value_rexp.search(string) diff --git a/lib/guessit/slogging.py b/lib/guessit/slogging.py old mode 100755 new mode 100644 diff --git a/lib/guessit/textutils.py b/lib/guessit/textutils.py old mode 100755 new mode 100644 diff --git a/lib/guessit/transfo/__init__.py b/lib/guessit/transfo/__init__.py old mode 100755 new mode 100644 diff --git a/lib/guessit/transfo/guess_bonus_features.py b/lib/guessit/transfo/guess_bonus_features.py old mode 100755 new mode 100644 diff --git a/lib/guessit/transfo/guess_country.py b/lib/guessit/transfo/guess_country.py old mode 100755 new mode 100644 diff --git a/lib/guessit/transfo/guess_date.py b/lib/guessit/transfo/guess_date.py old mode 100755 new mode 100644 diff --git a/lib/guessit/transfo/guess_episode_info_from_position.py b/lib/guessit/transfo/guess_episode_info_from_position.py old mode 100755 new mode 100644 diff --git a/lib/guessit/transfo/guess_episodes_rexps.py b/lib/guessit/transfo/guess_episodes_rexps.py old mode 100755 new mode 100644 index 4ebfb54..29562be --- a/lib/guessit/transfo/guess_episodes_rexps.py +++ b/lib/guessit/transfo/guess_episodes_rexps.py @@ -28,7 +28,13 @@ log = logging.getLogger(__name__) def number_list(s): - return list(re.sub('[^0-9]+', ' ', s).split()) + l = [ int(n) for n in re.sub('[^0-9]+', ' ', s).split() ] + + if len(l) == 2: + # it is an episode interval, return all numbers in between + return range(l[0], l[1]+1) + + return l def guess_episodes_rexps(string): for rexp, confidence, span_adjust in episode_rexps: @@ -38,23 +44,23 @@ def guess_episodes_rexps(string): span = (match.start() + span_adjust[0], match.end() + span_adjust[1]) - # episodes which have a season > 25 are most likely errors + # episodes which have a season > 30 are most likely errors # (Simpsons is at 24!) - if int(guess.get('season', 0)) > 25: + if int(guess.get('season', 0)) > 30: continue # decide whether we have only a single episode number or an # episode list if guess.get('episodeNumber'): eplist = number_list(guess['episodeNumber']) - guess.set('episodeNumber', int(eplist[0]), confidence=confidence) + guess.set('episodeNumber', eplist[0], confidence=confidence) if len(eplist) > 1: - guess.set('episodeList', list(map(int, eplist)), confidence=confidence) + guess.set('episodeList', eplist, confidence=confidence) if guess.get('bonusNumber'): eplist = number_list(guess['bonusNumber']) - guess.set('bonusNumber', int(eplist[0]), confidence=confidence) + guess.set('bonusNumber', eplist[0], confidence=confidence) return guess, span diff --git a/lib/guessit/transfo/guess_filetype.py b/lib/guessit/transfo/guess_filetype.py old mode 100755 new mode 100644 diff --git a/lib/guessit/transfo/guess_idnumber.py b/lib/guessit/transfo/guess_idnumber.py new file mode 100644 index 0000000..0e15af5 --- /dev/null +++ b/lib/guessit/transfo/guess_idnumber.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# GuessIt - A library for guessing information from filenames +# Copyright (c) 2013 Nicolas Wack +# +# GuessIt is free software; you can redistribute it and/or modify it under +# the terms of the Lesser GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# GuessIt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# Lesser GNU General Public License for more details. +# +# You should have received a copy of the Lesser GNU General Public License +# along with this program. If not, see . +# + +from __future__ import unicode_literals +from guessit.transfo import SingleNodeGuesser +from guessit.patterns import find_properties +import re +import logging + +log = logging.getLogger(__name__) + + +def guess_properties(string): + try: + prop, value, pos, end = find_properties(string)[0] + return { prop: value }, (pos, end) + except IndexError: + return None, None + +_idnum = re.compile(r'(?P[a-zA-Z0-9-]{10,})') # 1.0, (0, 0)) + +def guess_idnumber(string): + match = _idnum.search(string) + if match is not None: + result = match.groupdict() + switch_count = 0 + DIGIT = 0 + LETTER = 1 + OTHER = 2 + last = LETTER + for c in result['idNumber']: + if c in '0123456789': + ci = DIGIT + elif c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ': + ci = LETTER + else: + ci = OTHER + + if ci != last: + switch_count += 1 + + last = ci + + switch_ratio = float(switch_count) / len(result['idNumber']) + + # only return the result as probable if we alternate often between + # char type (more likely for hash values than for common words) + if switch_ratio > 0.4: + return result, match.span() + + return None, None + +def process(mtree): + SingleNodeGuesser(guess_idnumber, 0.4, log).process(mtree) diff --git a/lib/guessit/transfo/guess_language.py b/lib/guessit/transfo/guess_language.py old mode 100755 new mode 100644 diff --git a/lib/guessit/transfo/guess_movie_title_from_position.py b/lib/guessit/transfo/guess_movie_title_from_position.py old mode 100755 new mode 100644 diff --git a/lib/guessit/transfo/guess_properties.py b/lib/guessit/transfo/guess_properties.py old mode 100755 new mode 100644 diff --git a/lib/guessit/transfo/guess_release_group.py b/lib/guessit/transfo/guess_release_group.py old mode 100755 new mode 100644 index 2ff237d..b72c736 --- a/lib/guessit/transfo/guess_release_group.py +++ b/lib/guessit/transfo/guess_release_group.py @@ -31,16 +31,22 @@ def get_patterns(property_name): CODECS = get_patterns('videoCodec') FORMATS = get_patterns('format') +VAPIS = get_patterns('videoApi') -GROUP_NAMES = [ r'(?P' + codec + r')-?(?P.*?)[ \.]' +# RG names following a codec or format, with a potential space or dash inside the name +GROUP_NAMES = [ r'(?P' + codec + r')[ \.-](?P.+?([- \.].*?)??)[ \.]' for codec in CODECS ] -GROUP_NAMES += [ r'(?P' + fmt + r')-?(?P.*?)[ \.]' +GROUP_NAMES += [ r'(?P' + fmt + r')[ \.-](?P.+?([- \.].*?)??)[ \.]' for fmt in FORMATS ] +GROUP_NAMES += [ r'(?P' + api + r')[ \.-](?P.+?([- \.].*?)??)[ \.]' + for api in VAPIS ] GROUP_NAMES2 = [ r'\.(?P' + codec + r')-(?P.*?)(-(.*?))?[ \.]' for codec in CODECS ] -GROUP_NAMES2 += [ r'\.(?P' + fmt + r')-(?P.*?)(-(.*?))?[ \.]' +GROUP_NAMES2 += [ r'\.(?P' + fmt + r')-(?P.*?)(-(.*?))?[ \.]' for fmt in FORMATS ] +GROUP_NAMES2 += [ r'\.(?P' + vapi + r')-(?P.*?)(-(.*?))?[ \.]' + for vapi in VAPIS ] GROUP_NAMES = [ re.compile(r, re.IGNORECASE) for r in GROUP_NAMES ] GROUP_NAMES2 = [ re.compile(r, re.IGNORECASE) for r in GROUP_NAMES2 ] @@ -54,12 +60,17 @@ def guess_release_group(string): # first try to see whether we have both a known codec and a known release group for rexp in GROUP_NAMES: match = rexp.search(string) - if match: + while match: metadata = match.groupdict() - release_group = compute_canonical_form('releaseGroup', metadata['releaseGroup']) + # make sure this is an actual release group we caught + release_group = (compute_canonical_form('releaseGroup', metadata['releaseGroup']) or + compute_canonical_form('weakReleaseGroup', metadata['releaseGroup'])) if release_group: return adjust_metadata(metadata), (match.start(1), match.end(2)) + # we didn't find anything conclusive, keep searching + match = rexp.search(string, match.span()[0]+1) + # pick anything as releaseGroup as long as we have a codec in front # this doesn't include a potential dash ('-') ending the release group # eg: [...].X264-HiS@SiLUHD-English.[...] diff --git a/lib/guessit/transfo/guess_video_rexps.py b/lib/guessit/transfo/guess_video_rexps.py old mode 100755 new mode 100644 diff --git a/lib/guessit/transfo/guess_weak_episodes_rexps.py b/lib/guessit/transfo/guess_weak_episodes_rexps.py old mode 100755 new mode 100644 diff --git a/lib/guessit/transfo/guess_website.py b/lib/guessit/transfo/guess_website.py old mode 100755 new mode 100644 diff --git a/lib/guessit/transfo/guess_year.py b/lib/guessit/transfo/guess_year.py old mode 100755 new mode 100644 index 4bc9b86..c193af7 --- a/lib/guessit/transfo/guess_year.py +++ b/lib/guessit/transfo/guess_year.py @@ -33,6 +33,18 @@ def guess_year(string): else: return None, None +def guess_year_skip_first(string): + year, span = search_year(string) + if year: + year2, span2 = guess_year(string[span[1]:]) + if year2: + return year2, (span2[0]+span[1], span2[1]+span[1]) + + return None, None -def process(mtree): - SingleNodeGuesser(guess_year, 1.0, log).process(mtree) + +def process(mtree, skip_first_year=False): + if skip_first_year: + SingleNodeGuesser(guess_year_skip_first, 1.0, log).process(mtree) + else: + SingleNodeGuesser(guess_year, 1.0, log).process(mtree) diff --git a/lib/guessit/transfo/post_process.py b/lib/guessit/transfo/post_process.py old mode 100755 new mode 100644 diff --git a/lib/guessit/transfo/split_explicit_groups.py b/lib/guessit/transfo/split_explicit_groups.py old mode 100755 new mode 100644 diff --git a/lib/guessit/transfo/split_on_dash.py b/lib/guessit/transfo/split_on_dash.py old mode 100755 new mode 100644 diff --git a/lib/guessit/transfo/split_path_components.py b/lib/guessit/transfo/split_path_components.py old mode 100755 new mode 100644