Skip to content

Commit

Permalink
Release 0.2.1
Browse files Browse the repository at this point in the history
  • Loading branch information
ratoaq2 committed Feb 28, 2016
1 parent 4cbc4fa commit 5facb0e
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 5 deletions.
5 changes: 5 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
Changelog
---------

0.2.1
^^^^^
**release date:** 2016-02-28
* Adding guess encoding back without python-magic dependency.

0.2
^^^^^
**release date:** 2016-02-27
Expand Down
2 changes: 1 addition & 1 deletion cleanit/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
__title__ = 'cleanit'
__author__ = 'Rato'
__version__ = '0.2'
__version__ = '0.2.1'
33 changes: 30 additions & 3 deletions cleanit/subtitle.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
# -*- coding: utf-8 -*-
import logging
import pysrt

import chardet
import pysrt

logger = logging.getLogger(__name__)


class Subtitle(object):

def __init__(self, path, encoding=None):
self.path = path
self.encoding = encoding
self.encoding = encoding if encoding else self.guess_encoding(path)
self.subtitle = None

def __repr__(self):
Expand All @@ -22,6 +22,33 @@ def load(self):
def save(self, path=None, encoding=None):
self.subtitle.save(path=path if path else self.path, encoding=encoding if encoding else self.encoding)

def guess_encoding(self, path):
# always try utf-8 first
encodings = ['utf-8', 'latin-1', 'windows-1251', 'windows-1250', 'iso-8859-9', 'windows-1254', 'windows-1255',
'windows-1256', 'shift-jis', 'gb18030', 'big5']

# try to decode
logger.debug('Trying encodings %r', encodings)

with open(path, 'r') as f:
content = f.read()
for encoding in encodings:
try:
content.decode(encoding)
except UnicodeDecodeError:
pass
else:
logger.info('Guessed encoding %s', encoding)
return encoding

logger.warning('Could not guess encoding from language')

# fallback on chardet
encoding = chardet.detect(content)['encoding']
logger.info('Chardet found encoding %s', encoding)

return encoding

def clean(self, rules, clean_indexes=True):
self.load()

Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def run_tests(self):
sys.exit(errno)

# requirements
install_requirements = ['appdirs>=1.4.0', 'click>=4.0', 'jsonschema>=2.5.1', 'pysrt>=1.0.1', 'pyyaml>=3.11']
install_requirements = ['appdirs>=1.4.0', 'chardet>=2.3.0', 'click>=4.0', 'jsonschema>=2.5.1', 'pysrt>=1.0.1',
'pyyaml>=3.11']

test_requirements = ['pytest', 'pytest-pep8', 'pytest-flakes', 'pytest-cov']

Expand Down

0 comments on commit 5facb0e

Please sign in to comment.