From 2907016d49ce09f18a34ec6331deb4b87b1598f3 Mon Sep 17 00:00:00 2001 From: Zach Steindler Date: Mon, 18 Jan 2021 10:57:03 -0500 Subject: [PATCH 1/3] Allow storing files in S3, instead of just on-disk These changes are backwards-compatible, so it will first check S3 and fall back to on-disk if it cannot find the file. There's a new config option `s3_bucket_name`. We assume you have the region and AWS credentials configured in your enviornment. Also update dependencies to more modern versions. --- docstore | 148 ++++++++++++++++++++++++++++++++++---------- requirements.txt | 12 ++-- settings.sample.yml | 1 + templates/view.html | 56 ++++++----------- 4 files changed, 144 insertions(+), 73 deletions(-) mode change 100755 => 100644 requirements.txt mode change 100755 => 100644 settings.sample.yml diff --git a/docstore b/docstore index 5a962ed..54b7e8b 100755 --- a/docstore +++ b/docstore @@ -2,6 +2,7 @@ import base64 from datetime import datetime +import json import logging import os import shutil @@ -9,6 +10,7 @@ import sys from urllib.parse import quote, unquote, unquote_plus, urlencode import bleach +import boto3 import markdown from sqlalchemy import Column, create_engine, desc, Date, func, Integer, or_, String from sqlalchemy.ext.declarative import declarative_base @@ -50,12 +52,14 @@ class IndexHandler(RequestHandler): class AddHandler(RequestHandler): def initialize( - self, region, google_analytics_id, SessionMaker, stored_docs_path): + self, region, google_analytics_id, SessionMaker, stored_docs_path, + s3_bucket_name=None): self.__region = region self.__google_analytics_id = google_analytics_id self.__SessionMaker = SessionMaker self.__stored_docs_path = stored_docs_path + self.__s3_bucket_name = s3_bucket_name def get(self): authorized = self.get_secure_cookie('authorized') @@ -71,7 +75,8 @@ class AddHandler(RequestHandler): self.render( 'add.html', region=self.__region, org_names=org_names, - google_analytics_id=self.__google_analytics_id, authorized=authorized + google_analytics_id=self.__google_analytics_id, + authorized=authorized ) def post(self): @@ -127,21 +132,35 @@ class AddHandler(RequestHandler): document_id = new_doc.id session.close() - # Make the directory - directory = os.path.join(self.__stored_docs_path, str(document_id)) - os.mkdir(directory) + # If S3 is configured, save file there + if self.__s3_bucket_name: + s3_client = boto3.client('s3') + + for each_file in file_array: + s3_key = 'file/{}/{}'.format(document_id, each_file['filename']) + s3_client.put_object( + Body=each_file['body'], + Bucket=self.__s3_bucket_name, + Key=s3_key, + ) + + # Otherwise, save to disk + else: + # Make the directory + directory = os.path.join(self.__stored_docs_path, str(document_id)) + os.mkdir(directory) - # Write out the files to disk - for each_file in file_array: - file_data = each_file['body'] - filename = each_file['filename'] + # Write out the files to disk + for each_file in file_array: + file_data = each_file['body'] + filename = each_file['filename'] - # Use id number to write to disk - file_path = os.path.join(directory, filename) + # Use id number to write to disk + file_path = os.path.join(directory, filename) - fd = open(file_path, 'wb') - fd.write(file_data) - fd.close() + fd = open(file_path, 'wb') + fd.write(file_data) + fd.close() self.set_cookie('notification', quote('Document added; thanks!')) self.redirect('/') @@ -232,12 +251,14 @@ class LegacyFileHandler(RequestHandler): class ViewHandler(RequestHandler): def initialize( - self, region, google_analytics_id, SessionMaker, stored_docs_path): + self, region, google_analytics_id, SessionMaker, stored_docs_path, + s3_bucket_name=None): self.__region = region self.__google_analytics_id = google_analytics_id self.__SessionMaker = SessionMaker self.__stored_docs_path = stored_docs_path + self.__s3_bucket_name = s3_bucket_name def get(self, document_id, filename=None): authorized = self.get_secure_cookie('authorized') @@ -248,26 +269,60 @@ class ViewHandler(RequestHandler): allowed_tags = bleach.ALLOWED_TAGS + ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'pre'] - # Get file names - doc_folder = os.path.join(self.__stored_docs_path, str(document_id)) - files = os.listdir(doc_folder) + files = [] + + # If S3 is configured, check there first + if self.__s3_bucket_name: + s3_client = boto3.client('s3') + objects = s3_client.list_objects( + Bucket=self.__s3_bucket_name, + Prefix='file/{}/'.format(document_id), + ) + + for each_object in objects.get('Contents', []): + files.append(each_object['Key'].split('/')[-1]) + + # If we didn't find anything, check on-disk + if not files: + doc_folder = os.path.join(self.__stored_docs_path, str(document_id)) + files.extend(os.listdir(doc_folder)) self.render( 'view.html', region=self.__region, google_analytics_id=self.__google_analytics_id, authorized=authorized, doc=doc, filename=filename, bleach=bleach, markdown=markdown, allowed_tags=allowed_tags, files=files, - urlencode=urlencode + urlencode=urlencode, xsrf_token=self.xsrf_token, ) class DownloadHandler(RequestHandler): - def initialize(self, stored_docs_path): + def initialize(self, stored_docs_path, s3_bucket_name=None): self.__stored_docs_path = stored_docs_path + self.__s3_bucket_name = s3_bucket_name def get(self, doc_id, filename): filename = unquote_plus(filename) + + # If S3 is configured, attempt to get file from there + if self.__s3_bucket_name: + s3_key = 'file/{}/{}'.format(doc_id, filename) + s3_client = boto3.client('s3') + objects = s3_client.list_objects( + Bucket=self.__s3_bucket_name, + Prefix=s3_key, + ) + + if objects.get('Contents'): + presigned_url = s3_client.generate_presigned_url( + 'get_object', + Params={'Bucket': self.__s3_bucket_name, 'Key': s3_key}, + ) + self.redirect(presigned_url) + return + + # Object was not in S3, so try on-disk file_path = os.path.join(self.__stored_docs_path, str(doc_id), filename) if not os.path.exists(file_path): @@ -357,9 +412,10 @@ class EditHandler(RequestHandler): class DeleteHandler(RequestHandler): - def initialize(self, SessionMaker, stored_docs_path): + def initialize(self, SessionMaker, stored_docs_path, s3_bucket_name): self.__SessionMaker = SessionMaker self.__stored_docs_path = stored_docs_path + self.__s3_bucket_name = s3_bucket_name def post(self): # Make sure we are authorized @@ -370,15 +426,36 @@ class DeleteHandler(RequestHandler): self.write('Not authorized') return - doc_id = self.get_argument('doc_id', None) + body_dict = json.loads(self.request.body) + doc_id = body_dict.get('doc_id') if not doc_id: - self.set_stataus(401) + self.set_status(401) self.write('Bad request, no doc_id') return - # Remove file on disk - shutil.rmtree(os.path.join(self.__stored_docs_path, str(doc_id))) + # If S3 is configured, check if we should remove files + if self.__s3_bucket_name: + s3_client = boto3.client('s3') + objs = s3_client.list_objects( + Bucket=self.__s3_bucket_name, + Prefix='file/{}/'.format(doc_id), + ) + + objects = [{'Key': each['Key']} for each in objs.get('Contents', [])] + if objects: + s3_client.delete_objects( + Bucket=self.__s3_bucket_name, + Delete={'Objects': objects}, + ) + + # Check if we should remove files from on-disk + doc_folder = os.path.join(self.__stored_docs_path, str(doc_id)) + try: + os.listdir(doc_folder) + shutil.rmtree(doc_folder) + except FileNotFoundError: + pass # Remove metadata session = self.__SessionMaker() @@ -388,7 +465,7 @@ class DeleteHandler(RequestHandler): self.set_cookie( 'notification', - quote('Deleted document: {}'.format(doc.doc_title.encode('utf8'))) + quote('Deleted document: {}'.format(doc.doc_title)), ) self.write({'success': True}) @@ -465,6 +542,8 @@ class AuthHandler(RequestHandler): # If not, make sure basic auth is submitted auth_header = self.request.headers.get('Authorization') + if auth_header: + auth_header = auth_header.encode('utf8') if not auth_header: self.set_header('WWW-Authenticate', 'Basic realm=/auth/') @@ -473,7 +552,7 @@ class AuthHandler(RequestHandler): else: # We have basic auth info; check it - auth_decoded = base64.decodestring(auth_header[6:]) + auth_decoded = base64.decodestring(auth_header[6:]).decode('utf8') username, password = auth_decoded.split(':', 2) if password == self.__password: @@ -577,7 +656,8 @@ if __name__ == '__main__': region=settings['region'], google_analytics_id=google_analytics_id, SessionMaker=SessionMaker, - stored_docs_path=stored_docs_path + stored_docs_path=stored_docs_path, + s3_bucket_name=settings.get('s3_bucket_name'), )), (r'/search', SearchHandler, dict( @@ -596,7 +676,8 @@ if __name__ == '__main__': region=settings['region'], google_analytics_id=google_analytics_id, SessionMaker=SessionMaker, - stored_docs_path=stored_docs_path + stored_docs_path=stored_docs_path, + s3_bucket_name=settings.get('s3_bucket_name'), )), (r'/view/([0-9]+)/(.*)', ViewHandler, dict( @@ -607,7 +688,8 @@ if __name__ == '__main__': )), (r'/file/([0-9]+)/(.*)', DownloadHandler, dict( - stored_docs_path=stored_docs_path + stored_docs_path=stored_docs_path, + s3_bucket_name=settings.get('s3_bucket_name'), )), (r'/edit/([0-9]+)', EditHandler, dict( @@ -618,7 +700,8 @@ if __name__ == '__main__': (r'/delete', DeleteHandler, dict( SessionMaker=SessionMaker, - stored_docs_path=stored_docs_path + stored_docs_path=stored_docs_path, + s3_bucket_name=settings.get('s3_bucket_name'), )), (r'/orgs', OrgHandler, dict( @@ -641,7 +724,8 @@ if __name__ == '__main__': template_path=template_path, cookie_secret=settings['cookie_secret'], - xsrf_cookies=True + xsrf_cookies=True, + debug=settings.get('debug', False), ) server = HTTPServer(app, max_buffer_size=max_file_size) diff --git a/requirements.txt b/requirements.txt old mode 100755 new mode 100644 index 72f2e85..a845a18 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,7 @@ -bleach>=1.4.2 -Markdown>=2.6.5 -PyYAML>=3.10 -SQLAlchemy>=0.9.1 -tornado>=4.3 +bleach>=3.2.1 +boto3>=1.16.56 +botocore>=1.19.56 +Markdown>=3.3.3 +PyYAML>=5.3.1 +SQLAlchemy>=1.3.22 +tornado>=6.1 diff --git a/settings.sample.yml b/settings.sample.yml old mode 100755 new mode 100644 index 29bc852..483de93 --- a/settings.sample.yml +++ b/settings.sample.yml @@ -3,3 +3,4 @@ password: '__make_your_own_management_password__' cookie_secret: '__this_can_be_anything_it_is_just_for_the_server__' google_analytics_id: '__optional_just_remove_this_line_if_not_needed__' max_file_size: 104857600 +s3_bucket_name: 'your_s3_bucket_name' diff --git a/templates/view.html b/templates/view.html index cc5469a..82737bb 100644 --- a/templates/view.html +++ b/templates/view.html @@ -4,41 +4,6 @@ {{ doc.doc_title }} {% end %} -{% block head %} - {% if authorized %} - - {% end %} -{% end %} - {% block body %}

Document Details

@@ -105,7 +70,7 @@

Document Details

{% end %} {{ each_file }} {% if each_file == filename %} - + {% end %}

{% end %} @@ -117,5 +82,24 @@

Management Area



+ {% end %} {% end %} From 11e0802fca6f9ccf6f136b58539f753206886b4d Mon Sep 17 00:00:00 2001 From: Zach Steindler Date: Mon, 18 Jan 2021 10:57:03 -0500 Subject: [PATCH 2/3] Allow storing files in S3, instead of just on-disk These changes are backwards-compatible, so it will first check S3 and fall back to on-disk if it cannot find the file. There's a new config option `s3_bucket_name`. We assume you have the region and AWS credentials configured in your enviornment. Also update dependencies to more modern versions. --- docstore | 152 ++++++++++++++++++++++++++++++++++---------- requirements.txt | 12 ++-- settings.sample.yml | 1 + templates/view.html | 56 ++++++---------- 4 files changed, 147 insertions(+), 74 deletions(-) mode change 100755 => 100644 requirements.txt mode change 100755 => 100644 settings.sample.yml diff --git a/docstore b/docstore index a297b71..c0889b3 100755 --- a/docstore +++ b/docstore @@ -2,6 +2,7 @@ import base64 from datetime import datetime, time +import json import logging import mimetypes import os @@ -10,6 +11,7 @@ import sys from urllib.parse import quote, unquote, unquote_plus, urlencode import bleach +import boto3 from dateutil import tz from feedgen.feed import FeedGenerator import markdown @@ -56,12 +58,14 @@ class IndexHandler(RequestHandler): class AddHandler(RequestHandler): def initialize( - self, region, google_analytics_id, SessionMaker, stored_docs_path): + self, region, google_analytics_id, SessionMaker, stored_docs_path, + s3_bucket_name=None): self.__region = region self.__google_analytics_id = google_analytics_id self.__SessionMaker = SessionMaker self.__stored_docs_path = stored_docs_path + self.__s3_bucket_name = s3_bucket_name def get(self): authorized = self.get_secure_cookie('authorized') @@ -77,7 +81,8 @@ class AddHandler(RequestHandler): self.render( 'add.html', region=self.__region, org_names=org_names, - google_analytics_id=self.__google_analytics_id, authorized=authorized + google_analytics_id=self.__google_analytics_id, + authorized=authorized ) def post(self): @@ -133,21 +138,35 @@ class AddHandler(RequestHandler): document_id = new_doc.id session.close() - # Make the directory - directory = os.path.join(self.__stored_docs_path, str(document_id)) - os.mkdir(directory) + # If S3 is configured, save file there + if self.__s3_bucket_name: + s3_client = boto3.client('s3') + + for each_file in file_array: + s3_key = 'file/{}/{}'.format(document_id, each_file['filename']) + s3_client.put_object( + Body=each_file['body'], + Bucket=self.__s3_bucket_name, + Key=s3_key, + ) - # Write out the files to disk - for each_file in file_array: - file_data = each_file['body'] - filename = each_file['filename'] + # Otherwise, save to disk + else: + # Make the directory + directory = os.path.join(self.__stored_docs_path, str(document_id)) + os.mkdir(directory) - # Use id number to write to disk - file_path = os.path.join(directory, filename) + # Write out the files to disk + for each_file in file_array: + file_data = each_file['body'] + filename = each_file['filename'] - fd = open(file_path, 'wb') - fd.write(file_data) - fd.close() + # Use id number to write to disk + file_path = os.path.join(directory, filename) + + fd = open(file_path, 'wb') + fd.write(file_data) + fd.close() self.set_cookie('notification', quote('Document added; thanks!')) self.redirect('/') @@ -238,12 +257,14 @@ class LegacyFileHandler(RequestHandler): class ViewHandler(RequestHandler): def initialize( - self, region, google_analytics_id, SessionMaker, stored_docs_path): + self, region, google_analytics_id, SessionMaker, stored_docs_path, + s3_bucket_name=None): self.__region = region self.__google_analytics_id = google_analytics_id self.__SessionMaker = SessionMaker self.__stored_docs_path = stored_docs_path + self.__s3_bucket_name = s3_bucket_name def get(self, document_id, filename=None): authorized = self.get_secure_cookie('authorized') @@ -252,26 +273,62 @@ class ViewHandler(RequestHandler): doc = session.query(DocModel).filter(DocModel.id == document_id).one() session.close() - # Get file names - doc_folder = os.path.join(self.__stored_docs_path, str(document_id)) - files = os.listdir(doc_folder) + allowed_tags = bleach.ALLOWED_TAGS + ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'pre'] + + files = [] + + # If S3 is configured, check there first + if self.__s3_bucket_name: + s3_client = boto3.client('s3') + objects = s3_client.list_objects( + Bucket=self.__s3_bucket_name, + Prefix='file/{}/'.format(document_id), + ) + + for each_object in objects.get('Contents', []): + files.append(each_object['Key'].split('/')[-1]) + + # If we didn't find anything, check on-disk + if not files: + doc_folder = os.path.join(self.__stored_docs_path, str(document_id)) + files.extend(os.listdir(doc_folder)) self.render( 'view.html', region=self.__region, google_analytics_id=self.__google_analytics_id, authorized=authorized, doc=doc, filename=filename, bleach=bleach, - markdown=markdown, allowed_tags=BLEACH_ALLOWED_TAGS, files=files, - urlencode=urlencode + markdown=markdown, allowed_tags=allowed_tags, files=files, + urlencode=urlencode, xsrf_token=self.xsrf_token, ) class DownloadHandler(RequestHandler): - def initialize(self, stored_docs_path): + def initialize(self, stored_docs_path, s3_bucket_name=None): self.__stored_docs_path = stored_docs_path + self.__s3_bucket_name = s3_bucket_name def get(self, doc_id, filename): filename = unquote_plus(filename) + + # If S3 is configured, attempt to get file from there + if self.__s3_bucket_name: + s3_key = 'file/{}/{}'.format(doc_id, filename) + s3_client = boto3.client('s3') + objects = s3_client.list_objects( + Bucket=self.__s3_bucket_name, + Prefix=s3_key, + ) + + if objects.get('Contents'): + presigned_url = s3_client.generate_presigned_url( + 'get_object', + Params={'Bucket': self.__s3_bucket_name, 'Key': s3_key}, + ) + self.redirect(presigned_url) + return + + # Object was not in S3, so try on-disk file_path = os.path.join(self.__stored_docs_path, str(doc_id), filename) if not os.path.exists(file_path): @@ -371,9 +428,10 @@ class EditHandler(RequestHandler): class DeleteHandler(RequestHandler): - def initialize(self, SessionMaker, stored_docs_path): + def initialize(self, SessionMaker, stored_docs_path, s3_bucket_name): self.__SessionMaker = SessionMaker self.__stored_docs_path = stored_docs_path + self.__s3_bucket_name = s3_bucket_name def post(self): # Make sure we are authorized @@ -384,15 +442,36 @@ class DeleteHandler(RequestHandler): self.write('Not authorized') return - doc_id = self.get_argument('doc_id', None) + body_dict = json.loads(self.request.body) + doc_id = body_dict.get('doc_id') if not doc_id: - self.set_stataus(401) + self.set_status(401) self.write('Bad request, no doc_id') return - # Remove file on disk - shutil.rmtree(os.path.join(self.__stored_docs_path, str(doc_id))) + # If S3 is configured, check if we should remove files + if self.__s3_bucket_name: + s3_client = boto3.client('s3') + objs = s3_client.list_objects( + Bucket=self.__s3_bucket_name, + Prefix='file/{}/'.format(doc_id), + ) + + objects = [{'Key': each['Key']} for each in objs.get('Contents', [])] + if objects: + s3_client.delete_objects( + Bucket=self.__s3_bucket_name, + Delete={'Objects': objects}, + ) + + # Check if we should remove files from on-disk + doc_folder = os.path.join(self.__stored_docs_path, str(doc_id)) + try: + os.listdir(doc_folder) + shutil.rmtree(doc_folder) + except FileNotFoundError: + pass # Remove metadata session = self.__SessionMaker() @@ -402,7 +481,7 @@ class DeleteHandler(RequestHandler): self.set_cookie( 'notification', - quote('Deleted document: {}'.format(doc.doc_title.encode('utf8'))) + quote('Deleted document: {}'.format(doc.doc_title)), ) self.write({'success': True}) @@ -479,6 +558,8 @@ class AuthHandler(RequestHandler): # If not, make sure basic auth is submitted auth_header = self.request.headers.get('Authorization') + if auth_header: + auth_header = auth_header.encode('utf8') if not auth_header: self.set_header('WWW-Authenticate', 'Basic realm=/auth/') @@ -487,7 +568,7 @@ class AuthHandler(RequestHandler): else: # We have basic auth info; check it - auth_decoded = base64.decodestring(auth_header[6:]) + auth_decoded = base64.decodestring(auth_header[6:]).decode('utf8') username, password = auth_decoded.split(':', 2) if password == self.__password: @@ -653,7 +734,8 @@ if __name__ == '__main__': region=settings['region'], google_analytics_id=google_analytics_id, SessionMaker=SessionMaker, - stored_docs_path=stored_docs_path + stored_docs_path=stored_docs_path, + s3_bucket_name=settings.get('s3_bucket_name'), )), (r'/search', SearchHandler, dict( @@ -672,7 +754,8 @@ if __name__ == '__main__': region=settings['region'], google_analytics_id=google_analytics_id, SessionMaker=SessionMaker, - stored_docs_path=stored_docs_path + stored_docs_path=stored_docs_path, + s3_bucket_name=settings.get('s3_bucket_name'), )), (r'/view/([0-9]+)/(.*)', ViewHandler, dict( @@ -683,7 +766,8 @@ if __name__ == '__main__': )), (r'/file/([0-9]+)/(.*)', DownloadHandler, dict( - stored_docs_path=stored_docs_path + stored_docs_path=stored_docs_path, + s3_bucket_name=settings.get('s3_bucket_name'), )), (r'/edit/([0-9]+)', EditHandler, dict( @@ -694,7 +778,8 @@ if __name__ == '__main__': (r'/delete', DeleteHandler, dict( SessionMaker=SessionMaker, - stored_docs_path=stored_docs_path + stored_docs_path=stored_docs_path, + s3_bucket_name=settings.get('s3_bucket_name'), )), (r'/orgs', OrgHandler, dict( @@ -717,7 +802,8 @@ if __name__ == '__main__': template_path=template_path, cookie_secret=settings['cookie_secret'], - xsrf_cookies=True + xsrf_cookies=True, + debug=settings.get('debug', False), ) server = HTTPServer(app, max_buffer_size=max_file_size) diff --git a/requirements.txt b/requirements.txt old mode 100755 new mode 100644 index 94361ec..6a2ed22 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,8 @@ -bleach>=1.4.2 +bleach>=3.2.1 +boto3>=1.16.56 +botocore>=1.19.56 feedgen>=0.8.0 -Markdown>=2.6.5 -PyYAML>=3.10 -SQLAlchemy>=0.9.1 -tornado>=4.3 +Markdown>=3.3.3 +PyYAML>=5.3.1 +SQLAlchemy>=1.3.22 +tornado>=6.1 diff --git a/settings.sample.yml b/settings.sample.yml old mode 100755 new mode 100644 index 29bc852..483de93 --- a/settings.sample.yml +++ b/settings.sample.yml @@ -3,3 +3,4 @@ password: '__make_your_own_management_password__' cookie_secret: '__this_can_be_anything_it_is_just_for_the_server__' google_analytics_id: '__optional_just_remove_this_line_if_not_needed__' max_file_size: 104857600 +s3_bucket_name: 'your_s3_bucket_name' diff --git a/templates/view.html b/templates/view.html index cc5469a..82737bb 100644 --- a/templates/view.html +++ b/templates/view.html @@ -4,41 +4,6 @@ {{ doc.doc_title }} {% end %} -{% block head %} - {% if authorized %} - - {% end %} -{% end %} - {% block body %}

Document Details

@@ -105,7 +70,7 @@

Document Details

{% end %} {{ each_file }} {% if each_file == filename %} - + {% end %}

{% end %} @@ -117,5 +82,24 @@

Management Area



+ {% end %} {% end %} From a8737cbce42edeff08bd539cbdb1b151d8d798fd Mon Sep 17 00:00:00 2001 From: Zach Steindler Date: Sat, 23 Jan 2021 11:31:08 -0500 Subject: [PATCH 3/3] Allow configuring the path files are stored on, to support S3 or other external service --- docstore | 41 +++++++++++++++-------------------------- settings.sample.yml | 1 + templates/view.html | 2 +- 3 files changed, 17 insertions(+), 27 deletions(-) diff --git a/docstore b/docstore index c0889b3..26fbce7 100755 --- a/docstore +++ b/docstore @@ -143,8 +143,9 @@ class AddHandler(RequestHandler): s3_client = boto3.client('s3') for each_file in file_array: - s3_key = 'file/{}/{}'.format(document_id, each_file['filename']) + s3_key = '{}/{}'.format(document_id, each_file['filename']) s3_client.put_object( + ACL='public-read', Body=each_file['body'], Bucket=self.__s3_bucket_name, Key=s3_key, @@ -258,12 +259,13 @@ class ViewHandler(RequestHandler): def initialize( self, region, google_analytics_id, SessionMaker, stored_docs_path, - s3_bucket_name=None): + doc_root, s3_bucket_name=None): self.__region = region self.__google_analytics_id = google_analytics_id self.__SessionMaker = SessionMaker self.__stored_docs_path = stored_docs_path + self.__doc_root = doc_root self.__s3_bucket_name = s3_bucket_name def get(self, document_id, filename=None): @@ -282,7 +284,7 @@ class ViewHandler(RequestHandler): s3_client = boto3.client('s3') objects = s3_client.list_objects( Bucket=self.__s3_bucket_name, - Prefix='file/{}/'.format(document_id), + Prefix='{}/'.format(document_id), ) for each_object in objects.get('Contents', []): @@ -297,9 +299,10 @@ class ViewHandler(RequestHandler): 'view.html', region=self.__region, google_analytics_id=self.__google_analytics_id, authorized=authorized, doc=doc, filename=filename, bleach=bleach, - markdown=markdown, allowed_tags=allowed_tags, files=files, - urlencode=urlencode, xsrf_token=self.xsrf_token, - ) + markdown=markdown, allowed_tags=allowed_tags, + doc_root=self.__doc_root, files=files, urlencode=urlencode, + xsrf_token=self.xsrf_token, + ) class DownloadHandler(RequestHandler): @@ -311,24 +314,7 @@ class DownloadHandler(RequestHandler): def get(self, doc_id, filename): filename = unquote_plus(filename) - # If S3 is configured, attempt to get file from there - if self.__s3_bucket_name: - s3_key = 'file/{}/{}'.format(doc_id, filename) - s3_client = boto3.client('s3') - objects = s3_client.list_objects( - Bucket=self.__s3_bucket_name, - Prefix=s3_key, - ) - - if objects.get('Contents'): - presigned_url = s3_client.generate_presigned_url( - 'get_object', - Params={'Bucket': self.__s3_bucket_name, 'Key': s3_key}, - ) - self.redirect(presigned_url) - return - - # Object was not in S3, so try on-disk + # Get object from on-disk file_path = os.path.join(self.__stored_docs_path, str(doc_id), filename) if not os.path.exists(file_path): @@ -455,7 +441,7 @@ class DeleteHandler(RequestHandler): s3_client = boto3.client('s3') objs = s3_client.list_objects( Bucket=self.__s3_bucket_name, - Prefix='file/{}/'.format(doc_id), + Prefix='{}/'.format(doc_id), ) objects = [{'Key': each['Key']} for each in objs.get('Contents', [])] @@ -755,6 +741,7 @@ if __name__ == '__main__': google_analytics_id=google_analytics_id, SessionMaker=SessionMaker, stored_docs_path=stored_docs_path, + doc_root=settings.get('doc_root', '/file/'), s3_bucket_name=settings.get('s3_bucket_name'), )), @@ -762,7 +749,9 @@ if __name__ == '__main__': region=settings['region'], google_analytics_id=google_analytics_id, SessionMaker=SessionMaker, - stored_docs_path=stored_docs_path + stored_docs_path=stored_docs_path, + doc_root=settings.get('doc_root', '/file/'), + s3_bucket_name=settings.get('s3_bucket_name'), )), (r'/file/([0-9]+)/(.*)', DownloadHandler, dict( diff --git a/settings.sample.yml b/settings.sample.yml index 483de93..0875324 100644 --- a/settings.sample.yml +++ b/settings.sample.yml @@ -3,4 +3,5 @@ password: '__make_your_own_management_password__' cookie_secret: '__this_can_be_anything_it_is_just_for_the_server__' google_analytics_id: '__optional_just_remove_this_line_if_not_needed__' max_file_size: 104857600 +doc_root: '/file/' # could also be 'https://cdn.example.com/' s3_bucket_name: 'your_s3_bucket_name' diff --git a/templates/view.html b/templates/view.html index 82737bb..8696596 100644 --- a/templates/view.html +++ b/templates/view.html @@ -68,7 +68,7 @@

Document Details

{% if each_file == filename %} {% end %} - {{ each_file }} + {{ each_file }} {% if each_file == filename %} {% end %}