Commit 1f5616d1 authored by Patrik Dufresne's avatar Patrik Dufresne

Fix the Content-Type when restoring file. TASK-972

When restoring a file or an archive, guess the Content-Type from the
file extention using mimetypes library. Default to
"application/octet-stream" if we can't guess the proper type.
parent 09216807
Pipeline #145 canceled with stages
in 65 minutes and 28 seconds
......@@ -22,7 +22,7 @@ from __future__ import unicode_literals
from builtins import bytes
from builtins import str
import cherrypy
from cherrypy.lib.static import _serve_fileobj
from cherrypy.lib.static import _serve_fileobj, mimetypes
import logging
from rdiffweb import page_main
......@@ -36,32 +36,44 @@ from rdiffweb.rdw_helpers import quote_url
logger = logging.getLogger(__name__)
def _content_disposition(filename):
"""
Try to generate the best content-disposition value to support most browser.
"""
assert isinstance(filename, str)
# Provide hint filename. Try to follow recommendation at
# http://greenbytes.de/tech/tc2231/
# I choose to only provide filename if the filename is a simple ascii
# file without special character. Otherwise, we provide filename*
# 1. Used quoted filename for ascii filename.
try:
filename.encode('ascii')
# Some char are not decoded properly by user agent.
if not any(c in filename for c in [';', '%', '\\']):
return 'attachment; filename="%s"' % filename
except:
pass
# 3. Define filename* as encoded UTF8 (replace invalid char)
filename_utf8 = filename.encode('utf-8', 'replace')
return 'attachment; filename*=UTF-8\'\'%s' % quote_url(filename_utf8, safe='?')
def _content_type(filename):
"""
Using filename, try to guess the content-type.
"""
ext = ''
i = filename.rfind('.')
if i != -1:
ext = filename[i:].lower()
return mimetypes.types_map.get(ext, "application/octet-stream") # @UndefinedVariable
@rdiffweb.dispatch.poppath()
class RestorePage(page_main.MainPage):
_cp_config = {"response.stream": True, "response.timeout": 3000}
def _content_disposition(self, filename):
"""
Try to generate the best content-disposition value to support most browser.
"""
assert isinstance(filename, str)
# Provide hint filename. Try to follow recommendation at
# http://greenbytes.de/tech/tc2231/
# I choose to only provide filename if the filename is a simple ascii
# file without special character. Otherwise, we provide filename*
# 1. Used quoted filename for ascii filename.
try:
filename.encode('ascii')
# Some char are not decoded properly by user agent.
if not any(c in filename for c in [';', '%', '\\']):
return 'attachment; filename="%s"' % filename
except:
pass
# 3. Define filename* as encoded UTF8 (replace invalid char)
filename_utf8 = filename.encode('utf-8', 'replace')
return 'attachment; filename*=UTF-8\'\'%s' % quote_url(filename_utf8, safe='?')
@cherrypy.expose
@cherrypy.tools.gzip(on=False)
def default(self, path=b"", date=None, kind=None, usetar=None):
......@@ -96,7 +108,13 @@ class RestorePage(page_main.MainPage):
filename, fileobj = path_obj.restore(int(date), kind=kind)
# Define content-disposition.
cherrypy.response.headers["Content-Disposition"] = self._content_disposition(filename)
cherrypy.response.headers["Content-Disposition"] = _content_disposition(filename)
# Set content-type based on filename extension
content_type = _content_type(filename)
cherrypy.response.headers['Content-Type'] = content_type
# Stream the data.
return _serve_fileobj(fileobj, content_type=None, content_length=None)
# Make use of _serve_fileobj() because the fsstat() function on a pipe
# return a size of 0 for Content-Length. This behavior brake all the flow.
return _serve_fileobj(fileobj, content_type=content_type, content_length=None)
......@@ -30,6 +30,7 @@ import tarfile
import unittest
import zipfile
from rdiffweb.page_restore import _content_disposition
from rdiffweb.test import WebCase, AppTestCase
......@@ -47,15 +48,15 @@ class RestorePageTest(AppTestCase):
Check value generated for different content-disposition.
"""
# Simple ascii
self.assertEqual('attachment; filename="foo.bar"', self.page._content_disposition("foo.bar"))
self.assertEqual('attachment; filename="foo.bar"', _content_disposition("foo.bar"))
# ISO-8859-1 > UTF-8
self.assertEqual("attachment; filename*=UTF-8''foo-%C3%A4.html", self.page._content_disposition("foo-ä.html"))
self.assertEqual("attachment; filename*=UTF-8''foo-%C3%A4.html", _content_disposition("foo-ä.html"))
# Ascii filename with %
self.assertEqual("attachment; filename*=UTF-8''foo-%2541.html", self.page._content_disposition("foo-%41.html"))
self.assertEqual("attachment; filename*=UTF-8''foo-%2541.html", _content_disposition("foo-%41.html"))
# Ascii filename with ;
self.assertEqual("attachment; filename*=UTF-8''foo-%3B41.html", self.page._content_disposition("foo-;41.html"))
self.assertEqual("attachment; filename*=UTF-8''foo-%3B41.html", _content_disposition("foo-;41.html"))
# Ascii filename with \
self.assertEqual("attachment; filename*=UTF-8''foo-%5C41.html", self.page._content_disposition("foo-\\41.html"))
self.assertEqual("attachment; filename*=UTF-8''foo-%5C41.html", _content_disposition("foo-\\41.html"))
class RestoreTest(WebCase):
......@@ -82,10 +83,12 @@ class RestoreTest(WebCase):
self._restore(self.REPO, "Fichier%20avec%20non%20asci%20char%20%C9velyne%20M%E8re.txt/", "1415221507", True)
self.assertBody("Centers the value\n")
self.assertHeader('Content-Disposition', 'attachment; filename*=UTF-8\'\'Fichier%20avec%20non%20asci%20char%20%EF%BF%BDvelyne%20M%EF%BF%BDre.txt')
self.assertHeader('Content-Type', 'text/plain;charset=utf-8')
self._restore(self.REPO, "DIR%EF%BF%BD/Data/", "1415059497", True)
self.assertBody("My Data !\n")
self.assertHeader('Content-Disposition', 'attachment; filename="Data"')
self.assertHeader('Content-Type', 'application/octet-stream')
def test_quoted(self):
"""
......@@ -93,6 +96,7 @@ class RestoreTest(WebCase):
"""
self._restore(self.REPO, "Char%20%3B059090%20to%20quote/", "1415221507", True)
self.assertHeader('Content-Disposition', 'attachment; filename*=UTF-8\'\'Char%20%3B090%20to%20quote.tar.gz')
self.assertHeader('Content-Type', 'application/x-gzip')
def test_file(self):
"""
......@@ -100,17 +104,20 @@ class RestoreTest(WebCase):
"""
self._restore(self.REPO, "Fichier%20%40%20%3Croot%3E/", "1414921853", True)
self.assertInBody("Ajout d'info")
self.assertHeader('Content-Type', 'application/octet-stream')
def test_with_quoted_path(self):
"""
Restore file with wuoted path.
Restore file with quoted path.
"""
self._restore(self.REPO, "Char%20%3B090%20to%20quote/Data/", "1414921853", True)
self.assertBody("Bring me some Data !\n")
self.assertHeader('Content-Type', 'application/octet-stream')
def test_root_as_tar_gz(self):
self._restore(self.REPO, "", "1414871387", True)
self.assertStatus(200)
self.assertHeader('Content-Type', 'application/x-gzip')
# Build expected files list
expected = {}
expected["Répertoire Supprimé"] = 0
......@@ -138,6 +145,7 @@ class RestoreTest(WebCase):
def test_root_as_tar_gz_recent(self):
self._restore(self.REPO, "", "1415221507", True)
self.assertStatus(200)
self.assertHeader('Content-Type', 'application/x-gzip')
# Read the content as tar.gz with UTF8 encoding.
expected = {}
if PY3:
......@@ -176,6 +184,7 @@ class RestoreTest(WebCase):
def test_root_as_zip(self):
self._restore(self.REPO, "", "1414871387", False)
self.assertStatus(200)
self.assertHeader('Content-Type', 'application/zip')
# Read the content as tar.gz with UTF8 encoding.
expected = {}
expected["Répertoire Supprimé/"] = 0
......@@ -202,6 +211,7 @@ class RestoreTest(WebCase):
def test_root_as_zip_recent(self):
self._restore(self.REPO, "", "1415221507", False)
self.assertStatus(200)
self.assertHeader('Content-Type', 'application/zip')
# Read the content as tar.gz with UTF8 encoding.
expected = {}
expected["Fichier avec non asci char �velyne M�re.txt"] = 18
......@@ -237,6 +247,7 @@ class RestoreTest(WebCase):
def test_root_as_tar_bz2(self):
self._restore(self.REPO, "", '1415221507', False, 'tar.bz2')
self.assertStatus(200)
self.assertHeader('Content-Type', 'application/x-bzip2')
# Read content as tar.gz.
actual = {}
t = tarfile.open(mode='r:bz2', fileobj=io.BytesIO(self.body))
......@@ -252,6 +263,7 @@ class RestoreTest(WebCase):
def test_root_as_tar(self):
self._restore(self.REPO, "", '1415221507', False, 'tar')
self.assertStatus(200)
self.assertHeader('Content-Type', 'application/x-tar')
# Read content as tar.gz.
actual = {}
t = tarfile.open(mode='r', fileobj=io.BytesIO(self.body))
......@@ -266,6 +278,7 @@ class RestoreTest(WebCase):
def test_subdirectory(self):
self._restore(self.REPO, "R%C3%A9pertoire%20Existant/", "1414871475", True)
self.assertHeader('Content-Type', 'application/x-gzip')
# Read the content as tar.gz with UTF8 encoding.
expected = {}
expected["Untitled Empty Text File"] = 0
......@@ -285,6 +298,7 @@ class RestoreTest(WebCase):
def test_subdirectory_deleted(self):
self._restore(self.REPO, "R%C3%A9pertoire%20Supprim%C3%A9/", "1414871475", True)
self.assertHeader('Content-Type', 'application/x-gzip')
# Read the content as tar.gz with UTF8 encoding.
expected = {}
expected["Untitled Empty Text File"] = 21
......@@ -305,10 +319,13 @@ class RestoreTest(WebCase):
def test_with_revisions(self):
self._restore(self.REPO, "Revisions/Data/", "1415221470", True)
self.assertBody("Version1\n")
self.assertHeader('Content-Type', 'application/octet-stream')
self._restore(self.REPO, "Revisions/Data/", "1415221495", True)
self.assertBody("Version2\n")
self.assertHeader('Content-Type', 'application/octet-stream')
self._restore(self.REPO, "Revisions/Data/", "1415221507", True)
self.assertBody("Version3\n")
self.assertHeader('Content-Type', 'application/octet-stream')
def test_invalid_date(self):
self._restore(self.REPO, "Revisions/Data/", "1415221a470", True)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment