unzip utf8 entry name
from https://zhuanlan.zhihu.com/p/625300363
def _unpack_zipfile(filename, extract_dir):
"""Unpack zip `filename` to `extract_dir`
"""
import zipfile # late import for breaking circular dependency
if not zipfile.is_zipfile(filename):
raise shutil.ReadError("%s is not a zip file" % filename)
zip = zipfile.ZipFile(filename)
try:
for info in zip.infolist():
# name = info.filename
# 支持windows下的打得zip包 不会乱码 ==========================
if info.flag_bits & 0x800: # #utf-8 #编码
name = info.filename
else:
try:
# zipfile 默认使用 #cp437 编码 & #utf-8 编码
name = info.filename.encode('cp437').decode('gbk') # gbk编码兼容ASCII
except UnicodeDecodeError as e:
name = info.filename
# ========================================================
# don't extract absolute paths or ones with .. in them
if name.startswith('/') or '..' in name:
continue
target = os.path.join(extract_dir, *name.split('/'))
if not target:
continue
ensure_dir(target)
if not name.endswith('/'):
# file
data = zip.read(info.filename)
f = open(target, 'wb')
try:
f.write(data)
finally:
f.close()
del data
finally:
zip.close()
shutil.unregister_unpack_format('zip')
shutil.register_unpack_format('zip', ['.zip'], _unpack_zipfile, [], "ZIP file")