Merge pull request #1321 from SaptakS/range-requests-revisited

Reviving the old range request PR
This commit is contained in:
Micah Lee 2021-04-25 18:07:35 -04:00 committed by GitHub
commit 470fb2bda3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 563 additions and 88 deletions

View file

@ -18,18 +18,100 @@ You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
import binascii
import hashlib
import os
import sys
import tempfile
import zipfile
import mimetypes
from flask import Response, request, render_template, make_response
from datetime import datetime
from flask import Response, request, render_template, make_response, abort
from unidecode import unidecode
from werkzeug.http import parse_date, http_date
from werkzeug.urls import url_quote
from .send_base_mode import SendBaseModeWeb
def make_etag(data):
hasher = hashlib.sha256()
while True:
read_bytes = data.read(4096)
if read_bytes:
hasher.update(read_bytes)
else:
break
hash_value = binascii.hexlify(hasher.digest()).decode('utf-8')
return '"sha256:{}"'.format(hash_value)
def parse_range_header(range_header: str, target_size: int) -> list:
end_index = target_size - 1
if range_header is None:
return [(0, end_index)]
bytes_ = 'bytes='
if not range_header.startswith(bytes_):
abort(416)
ranges = []
for range_ in range_header[len(bytes_):].split(','):
split = range_.split('-')
if len(split) == 1:
try:
start = int(split[0])
end = end_index
except ValueError:
abort(416)
elif len(split) == 2:
start, end = split[0], split[1]
if not start:
# parse ranges of the form "bytes=-100" (i.e., last 100 bytes)
end = end_index
try:
start = end - int(split[1]) + 1
except ValueError:
abort(416)
else:
# parse ranges of the form "bytes=100-200"
try:
start = int(start)
if not end:
end = target_size
else:
end = int(end)
except ValueError:
abort(416)
if end < start:
abort(416)
end = min(end, end_index)
else:
abort(416)
ranges.append((start, end))
# merge the ranges
merged = []
ranges = sorted(ranges, key=lambda x: x[0])
for range_ in ranges:
# initial case
if not merged:
merged.append(range_)
else:
# merge ranges that are adjacent or overlapping
if range_[0] <= merged[-1][1] + 1:
merged[-1] = (merged[-1][0], max(range_[1], merged[-1][1]))
else:
merged.append(range_)
return merged
class ShareModeWeb(SendBaseModeWeb):
"""
All of the web logic for share mode
@ -43,6 +125,10 @@ class ShareModeWeb(SendBaseModeWeb):
"share", "autostop_sharing"
)
self.download_etag = None
self.gzip_etag = None
self.last_modified = datetime.utcnow()
def define_routes(self):
"""
The web app routes for sharing files
@ -92,7 +178,7 @@ class ShareModeWeb(SendBaseModeWeb):
# Prepare some variables to use inside generate() function below
# which is outside of the request context
shutdown_func = request.environ.get("werkzeug.server.shutdown")
path = request.path
request_path = request.path
# If this is a zipped file, then serve as-is. If it's not zipped, then,
# if the http client supports gzip compression, gzip the file first
@ -101,29 +187,116 @@ class ShareModeWeb(SendBaseModeWeb):
if use_gzip:
file_to_download = self.gzip_filename
self.filesize = self.gzip_filesize
etag = self.gzip_etag
else:
file_to_download = self.download_filename
self.filesize = self.download_filesize
etag = self.download_etag
# for range requests
range_, status_code = self.get_range_and_status_code(self.filesize, etag, self.last_modified)
# Tell GUI the download started
history_id = self.cur_history_id
self.cur_history_id += 1
self.web.add_request(
self.web.REQUEST_STARTED, path, {"id": history_id, "use_gzip": use_gzip}
self.web.REQUEST_STARTED, request_path, {"id": history_id, "use_gzip": use_gzip}
)
basename = os.path.basename(self.download_filename)
def generate():
if status_code == 304:
r = Response()
else:
r = Response(
self.generate(shutdown_func, range_, file_to_download, request_path,
history_id, self.filesize))
if use_gzip:
r.headers.set('Content-Encoding', 'gzip')
r.headers.set('Content-Length', range_[1] - range_[0] + 1)
filename_dict = {
"filename": unidecode(basename),
"filename*": "UTF-8''%s" % url_quote(basename),
}
r.headers.set('Content-Disposition', 'attachment', **filename_dict)
r = self.web.add_security_headers(r)
# guess content type
(content_type, _) = mimetypes.guess_type(basename, strict=False)
if content_type is not None:
r.headers.set('Content-Type', content_type)
r.headers.set('Accept-Ranges', 'bytes')
r.headers.set('ETag', etag)
r.headers.set('Last-Modified', http_date(self.last_modified))
# we need to set this for range requests
r.headers.set('Vary', 'Accept-Encoding')
if status_code == 206:
r.headers.set('Content-Range',
'bytes {}-{}/{}'.format(range_[0], range_[1], self.filesize))
r.status_code = status_code
return r
@classmethod
def get_range_and_status_code(cls, dl_size, etag, last_modified):
use_default_range = True
status_code = 200
range_header = request.headers.get('Range')
# range requests are only allowed for get
if request.method == 'GET':
ranges = parse_range_header(range_header, dl_size)
if not (len(ranges) == 1 and ranges[0][0] == 0 and ranges[0][1] == dl_size - 1):
use_default_range = False
status_code = 206
if range_header:
if_range = request.headers.get('If-Range')
if if_range and if_range != etag:
use_default_range = True
status_code = 200
if use_default_range:
ranges = [(0, dl_size - 1)]
if len(ranges) > 1:
abort(416) # We don't support multipart range requests yet
range_ = ranges[0]
etag_header = request.headers.get('ETag')
if etag_header is not None and etag_header != etag:
abort(412)
if_unmod = request.headers.get('If-Unmodified-Since')
if if_unmod:
if_date = parse_date(if_unmod)
if if_date and if_date > last_modified:
abort(412)
elif range_header is None:
status_code = 304
return range_, status_code
def generate(self, shutdown_func, range_, file_to_download, path, history_id, filesize):
# The user hasn't canceled the download
self.client_cancel = False
# Starting a new download
if self.web.settings.get("share", "autostop_sharing"):
self.download_in_progress = True
start, end = range_
chunk_size = 102400 # 100kb
fp = open(file_to_download, "rb")
fp.seek(start)
self.web.done = False
canceled = False
bytes_left = end - start + 1
while not self.web.done:
# The user has canceled the download, so stop serving the file
if not self.web.stop_q.empty():
@ -132,7 +305,8 @@ class ShareModeWeb(SendBaseModeWeb):
)
break
chunk = fp.read(chunk_size)
read_size = min(chunk_size, bytes_left)
chunk = fp.read(read_size)
if chunk == b"":
self.web.done = True
else:
@ -141,7 +315,8 @@ class ShareModeWeb(SendBaseModeWeb):
# tell GUI the progress
downloaded_bytes = fp.tell()
percent = (1.0 * downloaded_bytes / self.filesize) * 100
percent = (1.0 * downloaded_bytes / filesize) * 100
bytes_left -= read_size
# only output to stdout if running onionshare in CLI mode, or if using Linux (#203, #304)
if (
@ -162,7 +337,7 @@ class ShareModeWeb(SendBaseModeWeb):
self.web.add_request(
self.web.REQUEST_PROGRESS,
path,
{"id": history_id, "bytes": downloaded_bytes},
{"id": history_id, "bytes": downloaded_bytes, 'total_bytes': filesize,},
)
self.web.done = False
except:
@ -195,21 +370,6 @@ class ShareModeWeb(SendBaseModeWeb):
except:
pass
r = Response(generate())
if use_gzip:
r.headers.set("Content-Encoding", "gzip")
r.headers.set("Content-Length", self.filesize)
filename_dict = {
"filename": unidecode(basename),
"filename*": "UTF-8''%s" % url_quote(basename),
}
r.headers.set("Content-Disposition", "attachment", **filename_dict)
r = self.web.add_security_headers(r)
# guess content type
(content_type, _) = mimetypes.guess_type(basename, strict=False)
if content_type is not None:
r.headers.set("Content-Type", content_type)
return r
def directory_listing_template(
self, path, files, dirs, breadcrumbs, breadcrumbs_leaf
@ -305,6 +465,8 @@ class ShareModeWeb(SendBaseModeWeb):
if len(self.file_info["files"]) == 1 and len(self.file_info["dirs"]) == 0:
self.download_filename = self.file_info["files"][0]["filename"]
self.download_filesize = self.file_info["files"][0]["size"]
with open(self.download_filename, 'rb') as f:
self.download_etag = make_etag(f)
# Compress the file with gzip now, so we don't have to do it on each request
self.gzip_filename = tempfile.mkstemp("wb+")[1]
@ -312,6 +474,8 @@ class ShareModeWeb(SendBaseModeWeb):
self.download_filename, self.gzip_filename, 6, processed_size_callback
)
self.gzip_filesize = os.path.getsize(self.gzip_filename)
with open(self.gzip_filename, 'rb') as f:
self.gzip_etag = make_etag(f)
# Make sure the gzip file gets cleaned up when onionshare stops
self.cleanup_filenames.append(self.gzip_filename)
@ -337,6 +501,8 @@ class ShareModeWeb(SendBaseModeWeb):
self.zip_writer.close()
self.download_filesize = os.path.getsize(self.download_filename)
with open(self.download_filename, 'rb') as f:
self.download_etag = make_etag(f)
# Make sure the zip file gets cleaned up when onionshare stops
self.cleanup_filenames.append(self.zip_writer.zip_filename)

View file

@ -1,16 +1,24 @@
import os
import random
import re
import socket
import subprocess
import time
import zipfile
import tempfile
import base64
from io import BytesIO
import pytest
from contextlib import contextmanager
from multiprocessing import Process
from urllib.request import urlopen, Request
from werkzeug.datastructures import Headers
from werkzeug.exceptions import RequestedRangeNotSatisfiable
from onionshare_cli.common import Common
from onionshare_cli.web import Web
from onionshare_cli.web.share_mode import parse_range_header
from onionshare_cli.settings import Settings
from onionshare_cli.mode_settings import ModeSettings
import onionshare_cli.web.receive_mode
@ -272,3 +280,263 @@ class TestZipWriterCustom:
def test_custom_callback(self, custom_zw):
assert custom_zw.processed_size_callback(None) == "custom_callback"
def check_unsupported(cmd: str, args: list):
cmd_args = [cmd]
cmd_args.extend(args)
skip = False
try:
subprocess.check_call(cmd_args)
except Exception:
skip = True
return pytest.mark.skipif(skip, reason="Command {!r} not supported".format(cmd))
@contextmanager
def live_server(web):
s = socket.socket()
s.bind(("localhost", 0))
port = s.getsockname()[1]
s.close()
def run():
web.app.run(host="127.0.0.1", port=port, debug=False)
proc = Process(target=run)
proc.start()
url = "http://127.0.0.1:{}".format(port)
auth = base64.b64encode(b"onionshare:" + web.password.encode()).decode()
req = Request(url, headers={"Authorization": "Basic {}".format(auth)})
attempts = 20
while True:
try:
urlopen(req)
break
except Exception:
attempts -= 1
if attempts > 0:
time.sleep(0.5)
else:
raise
yield url + "/download"
proc.terminate()
class TestRangeRequests:
VALID_RANGES = [
(None, 500, [(0, 499)]),
("bytes=0", 500, [(0, 499)]),
("bytes=100", 500, [(100, 499)]),
("bytes=100-", 500, [(100, 499)]), # not in the RFC, but how curl sends
("bytes=0-99", 500, [(0, 99)]),
("bytes=0-599", 500, [(0, 499)]),
("bytes=0-0", 500, [(0, 0)]),
("bytes=-100", 500, [(400, 499)]),
("bytes=0-99,100-199", 500, [(0, 199)]),
("bytes=0-100,100-199", 500, [(0, 199)]),
("bytes=0-99,101-199", 500, [(0, 99), (101, 199)]),
("bytes=0-199,100-299", 500, [(0, 299)]),
("bytes=0-99,200-299", 500, [(0, 99), (200, 299)]),
]
INVALID_RANGES = [
"bytes=200-100",
"bytes=0-100,300-200",
]
def test_parse_ranges(self):
for case in self.VALID_RANGES:
(header, target_size, expected) = case
parsed = parse_range_header(header, target_size)
assert parsed == expected, case
for invalid in self.INVALID_RANGES:
with pytest.raises(RequestedRangeNotSatisfiable):
parse_range_header(invalid, 500)
def test_headers(self, temp_dir, common_obj):
web = web_obj(temp_dir, common_obj, "share", 3)
web.settings.set("share", "autostop_sharing", False)
url = "/download"
with web.app.test_client() as client:
resp = client.get(url, headers=self._make_auth_headers(web.password))
assert resp.headers["ETag"].startswith('"sha256:')
assert resp.headers["Accept-Ranges"] == "bytes"
assert resp.headers.get("Last-Modified") is not None
assert resp.headers.get("Content-Length") is not None
assert "Accept-Encoding" in resp.headers["Vary"]
def test_basic(self, temp_dir, common_obj):
web = web_obj(temp_dir, common_obj, "share", 3)
web.settings.set("share", "autostop_sharing", False)
url = "/download"
with open(web.share_mode.download_filename, "rb") as f:
contents = f.read()
with web.app.test_client() as client:
resp = client.get(url, headers=self._make_auth_headers(web.password))
assert resp.status_code == 200
assert resp.data == contents
def test_reassemble(self, temp_dir, common_obj):
web = web_obj(temp_dir, common_obj, "share", 3)
web.settings.set("share", "autostop_sharing", False)
url = "/download"
with open(web.share_mode.download_filename, "rb") as f:
contents = f.read()
with web.app.test_client() as client:
headers = self._make_auth_headers(web.password)
headers.extend({"Range": "bytes=0-10"})
resp = client.get(url, headers=headers)
assert resp.status_code == 206
content_range = resp.headers["Content-Range"]
assert content_range == "bytes {}-{}/{}".format(
0, 10, web.share_mode.download_filesize
)
bytes_out = resp.data
headers.update({"Range": "bytes=11-100000"})
resp = client.get(url, headers=headers)
assert resp.status_code == 206
content_range = resp.headers["Content-Range"]
assert content_range == "bytes {}-{}/{}".format(
11,
web.share_mode.download_filesize - 1,
web.share_mode.download_filesize,
)
bytes_out += resp.data
assert bytes_out == contents
def test_mismatched_etags(self, temp_dir, common_obj):
"""RFC 7233 Section 3.2
The "If-Range" header field allows a client to "short-circuit" the second request.
Informally, its meaning is as follows: if the representation is unchanged, send me the
part(s) that I am requesting in Range; otherwise, send me the entire representation.
"""
web = web_obj(temp_dir, common_obj, "share", 3)
web.settings.set("share", "autostop_sharing", False)
url = "/download"
with open(web.share_mode.download_filename, "rb") as f:
contents = f.read()
with web.app.test_client() as client:
headers = self._make_auth_headers(web.password)
resp = client.get(url, headers=headers)
assert resp.status_code == 200
headers.extend({"If-Range": "mismatched etag", "Range": "bytes=10-100"})
resp = client.get(url, headers=headers)
assert resp.status_code == 200
assert resp.data == contents
def test_if_unmodified_since(self, temp_dir, common_obj):
web = web_obj(temp_dir, common_obj, "share", 3)
web.settings.set("share", "autostop_sharing", False)
url = "/download"
with web.app.test_client() as client:
headers = self._make_auth_headers(web.password)
resp = client.get(url, headers=headers)
assert resp.status_code == 200
last_mod = resp.headers["Last-Modified"]
headers.extend({"If-Unmodified-Since": last_mod})
resp = client.get(url, headers=headers)
assert resp.status_code == 304
def test_firefox_like_behavior(self, temp_dir, common_obj):
web = web_obj(temp_dir, common_obj, "share", 3)
web.settings.set("share", "autostop_sharing", False)
url = "/download"
with web.app.test_client() as client:
headers = self._make_auth_headers(web.password)
resp = client.get(url, headers=headers)
assert resp.status_code == 200
# Firefox sends these with all range requests
etag = resp.headers["ETag"]
last_mod = resp.headers["Last-Modified"]
# make a request that uses the full header set
headers.extend(
{
"Range": "bytes=0-10",
"If-Unmodified-Since": last_mod,
"If-Range": etag,
}
)
resp = client.get(url, headers=headers)
assert resp.status_code == 206
def _make_auth_headers(self, password):
auth = base64.b64encode(b"onionshare:" + password.encode()).decode()
h = Headers()
h.add("Authorization", "Basic " + auth)
return h
@check_unsupported("curl", ["--version"])
def test_curl(self, temp_dir, tmpdir, common_obj):
web = web_obj(temp_dir, common_obj, "share", 3)
web.settings.set("share", "autostop_sharing", False)
download = tmpdir.join("download")
with live_server(web) as url:
# Debugging help from `man curl`, on error 33
# 33 HTTP range error. The range "command" didn't work.
auth_header = self._make_auth_headers(web.password)
subprocess.check_call(
[
"curl",
"-H",
str(auth_header).strip(),
"--output",
str(download),
"--continue-at",
"10",
url,
]
)
@check_unsupported("wget", ["--version"])
def test_wget(self, temp_dir, tmpdir, common_obj):
web = web_obj(temp_dir, common_obj, "share", 3)
web.settings.set("share", "autostop_sharing", False)
# wget needs a file to exist to continue
download = tmpdir.join("download")
download.write("x" * 10)
with live_server(web) as url:
auth_header = self._make_auth_headers(web.password)
subprocess.check_call(
[
"wget",
"--header",
str(auth_header).strip(),
"--continue",
"-O",
str(download),
url,
]
)
@check_unsupported("http", ["--version"])
def test_httpie(self, temp_dir, common_obj):
web = web_obj(temp_dir, common_obj, "share", 3)
web.settings.set("share", "autostop_sharing", False)
with live_server(web) as url:
subprocess.check_call(["http", url, "Range: bytes=10"])

View file

@ -0,0 +1,41 @@
import pytest
import subprocess
from tempfile import NamedTemporaryFile
from werkzeug.exceptions import RequestedRangeNotSatisfiable
from onionshare_cli.web.share_mode import parse_range_header
VALID_RANGES = [
(None, 500, [(0, 499)]),
('bytes=0', 500, [(0, 499)]),
('bytes=100', 500, [(100, 499)]),
('bytes=100-', 500, [(100, 499)]), # not in the RFC, but how curl sends
('bytes=0-99', 500, [(0, 99)]),
('bytes=0-599', 500, [(0, 499)]),
('bytes=0-0', 500, [(0, 0)]),
('bytes=-100', 500, [(400, 499)]),
('bytes=0-99,100-199', 500, [(0, 199)]),
('bytes=0-100,100-199', 500, [(0, 199)]),
('bytes=0-99,101-199', 500, [(0, 99), (101, 199)]),
('bytes=0-199,100-299', 500, [(0, 299)]),
('bytes=0-99,200-299', 500, [(0, 99), (200, 299)]),
]
INVALID_RANGES = [
'bytes=200-100',
'bytes=0-100,300-200',
]
def test_parse_ranges():
for case in VALID_RANGES:
(header, target_size, expected) = case
parsed = parse_range_header(header, target_size)
assert parsed == expected, case
for invalid in INVALID_RANGES:
with pytest.raises(RequestedRangeNotSatisfiable):
parse_range_header(invalid, 500)