forked from ahupp/python-magic
-
Notifications
You must be signed in to change notification settings - Fork 1
/
magic.py
302 lines (223 loc) · 8.75 KB
/
magic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
"""
magic is a wrapper around the libmagic file identification library.
See README for more information.
Usage:
>>> import magic
>>> magic.from_file("testdata/test.pdf")
'PDF document, version 1.2'
>>> magic.from_file("testdata/test.pdf", mime=True)
'application/pdf'
>>> magic.from_buffer(open("testdata/test.pdf").read(1024))
'PDF document, version 1.2'
>>>
"""
import sys
import glob
import os.path
import ctypes
import ctypes.util
import threading
from ctypes import c_char_p, c_int, c_size_t, c_void_p
class MagicException(Exception): pass
class Magic:
"""
Magic is a wrapper around the libmagic C library.
"""
def __init__(self, mime=False, magic_file=None, mime_encoding=False,
keep_going=False):
"""
Create a new libmagic wrapper.
mime - if True, mimetypes are returned instead of textual descriptions
mime_encoding - if True, codec is returned
magic_file - use a mime database other than the system default
keep_going - don't stop at the first match, keep going
"""
self.flags = MAGIC_NONE
if mime:
self.flags |= MAGIC_MIME
elif mime_encoding:
self.flags |= MAGIC_MIME_ENCODING
if keep_going:
self.flags |= MAGIC_CONTINUE
self.cookie = magic_open(self.flags)
magic_load(self.cookie, magic_file)
self.thread = threading.currentThread()
def from_buffer(self, buf):
"""
Identify the contents of `buf`
"""
self._thread_check()
try:
return magic_buffer(self.cookie, buf)
except MagicException as e:
return self._handle509Bug(e)
def from_file(self, filename):
"""
Identify the contents of file `filename`
raises IOError if the file does not exist
"""
self._thread_check()
if not os.path.exists(filename):
raise IOError("File does not exist: " + filename)
try:
return magic_file(self.cookie, filename)
except MagicException as e:
return self._handle509Bug(e)
def _handle509Bug(self, e):
# libmagic 5.09 has a bug where it might mail to identify the
# mimetype of a file and returns null from magic_file (and
# likely _buffer), but also does not return an error message.
if e.message is None and (self.flags & MAGIC_MIME):
return "application/octet-stream"
def _thread_check(self):
if self.thread != threading.currentThread():
raise Exception('attempting to use libmagic on multiple threads will '
'end in SEGV. Prefer to use the module functions '
'from_file or from_buffer, or carefully manage direct '
'use of the Magic class')
def __del__(self):
# no _thread_check here because there can be no other
# references to this object at this point.
# during shutdown magic_close may have been cleared already so
# make sure it exists before using it.
# the self.cookie check should be unnessary and was an
# incorrect fix for a threading problem, however I'm leaving
# it in because it's harmless and I'm slightly afraid to
# remove it.
if self.cookie and magic_close:
magic_close(self.cookie)
self.cookie = None
instances = threading.local()
def _get_magic_type(mime):
i = instances.__dict__.get(mime)
if i is None:
i = instances.__dict__[mime] = Magic(mime=mime)
return i
def from_file(filename, mime=False):
""""
Accepts a filename and returns the detected filetype. Return
value is the mimetype if mime=True, otherwise a human readable
name.
>>> magic.from_file("testdata/test.pdf", mime=True)
'application/pdf'
"""
m = _get_magic_type(mime)
return m.from_file(filename)
def from_buffer(buffer, mime=False):
"""
Accepts a binary string and returns the detected filetype. Return
value is the mimetype if mime=True, otherwise a human readable
name.
>>> magic.from_buffer(open("testdata/test.pdf").read(1024))
'PDF document, version 1.2'
"""
m = _get_magic_type(mime)
return m.from_buffer(buffer)
libmagic = None
# Let's try to find magic or magic1
dll = ctypes.util.find_library('magic') or ctypes.util.find_library('magic1')
# This is necessary because find_library returns None if it doesn't find the library
if dll:
libmagic = ctypes.CDLL(dll)
if not libmagic or not libmagic._name:
platform_to_lib = {'darwin': ['/opt/local/lib/libmagic.dylib',
'/usr/local/lib/libmagic.dylib'] +
# Assumes there will only be one version installed
glob.glob('/usr/local/Cellar/libmagic/*/lib/libmagic.dylib'),
'win32': ['magic1.dll','cygmagic-1.dll']}
for dll in platform_to_lib.get(sys.platform, []):
try:
libmagic = ctypes.CDLL(dll)
break
except OSError:
pass
if not libmagic or not libmagic._name:
# It is better to raise an ImportError since we are importing magic module
raise ImportError('failed to find libmagic. Check your installation')
magic_t = ctypes.c_void_p
def errorcheck_null(result, func, args):
if result is None:
err = magic_error(args[0])
raise MagicException(err)
else:
return result
def errorcheck_negative_one(result, func, args):
if result is -1:
err = magic_error(args[0])
raise MagicException(err)
else:
return result
def coerce_filename(filename):
if filename is None:
return None
# ctypes will implicitly convert unicode strings to bytes with
# .encode('ascii'). A more useful default here is
# getfilesystemencoding(). We need to leave byte-str unchanged.
is_unicode = (sys.version_info.major <= 2 and
isinstance(filename, unicode)) or \
(sys.version_info.major >= 3 and
isinstance(filename, str))
if is_unicode:
return filename.encode(sys.getfilesystemencoding())
else:
return filename
magic_open = libmagic.magic_open
magic_open.restype = magic_t
magic_open.argtypes = [c_int]
magic_close = libmagic.magic_close
magic_close.restype = None
magic_close.argtypes = [magic_t]
magic_error = libmagic.magic_error
magic_error.restype = c_char_p
magic_error.argtypes = [magic_t]
magic_errno = libmagic.magic_errno
magic_errno.restype = c_int
magic_errno.argtypes = [magic_t]
_magic_file = libmagic.magic_file
_magic_file.restype = c_char_p
_magic_file.argtypes = [magic_t, c_char_p]
_magic_file.errcheck = errorcheck_null
def magic_file(cookie, filename):
return _magic_file(cookie, coerce_filename(filename))
_magic_buffer = libmagic.magic_buffer
_magic_buffer.restype = c_char_p
_magic_buffer.argtypes = [magic_t, c_void_p, c_size_t]
_magic_buffer.errcheck = errorcheck_null
def magic_buffer(cookie, buf):
return _magic_buffer(cookie, buf, len(buf))
_magic_load = libmagic.magic_load
_magic_load.restype = c_int
_magic_load.argtypes = [magic_t, c_char_p]
_magic_load.errcheck = errorcheck_negative_one
def magic_load(cookie, filename):
return _magic_load(cookie, coerce_filename(filename))
magic_setflags = libmagic.magic_setflags
magic_setflags.restype = c_int
magic_setflags.argtypes = [magic_t, c_int]
magic_check = libmagic.magic_check
magic_check.restype = c_int
magic_check.argtypes = [magic_t, c_char_p]
magic_compile = libmagic.magic_compile
magic_compile.restype = c_int
magic_compile.argtypes = [magic_t, c_char_p]
MAGIC_NONE = 0x000000 # No flags
MAGIC_DEBUG = 0x000001 # Turn on debugging
MAGIC_SYMLINK = 0x000002 # Follow symlinks
MAGIC_COMPRESS = 0x000004 # Check inside compressed files
MAGIC_DEVICES = 0x000008 # Look at the contents of devices
MAGIC_MIME = 0x000010 # Return a mime string
MAGIC_MIME_ENCODING = 0x000400 # Return the MIME encoding
MAGIC_CONTINUE = 0x000020 # Return all matches
MAGIC_CHECK = 0x000040 # Print warnings to stderr
MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit
MAGIC_RAW = 0x000100 # Don't translate unprintable chars
MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors
MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files
MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files
MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries
MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type
MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details
MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files
MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff
MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran
MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens