adsabs · femalves · Jan 4, 2024 · Jan 4, 2024 · Jan 4, 2024 · Jan 16, 2024
diff --git a/scan_explorer_service/views/image_proxy.py b/scan_explorer_service/views/image_proxy.py
@@ -3,14 +3,15 @@
 from flask_discoverer import advertise
 from urllib import parse as urlparse
 import img2pdf
-from io import BytesIO
 import math
 import sys
 import requests
 from scan_explorer_service.models import Collection, Page, Article
 from scan_explorer_service.utils.db_utils import item_thumbnail
 from scan_explorer_service.utils.utils import url_for_proxy
-import time 
+import io
+import cProfile
+import pstats
 
 
 bp_proxy = Blueprint('proxy', __name__, url_prefix='/image')
@@ -20,7 +21,6 @@
 @bp_proxy.route('/iiif/2/<path:path>', methods=['GET'])
 def image_proxy(path):
     """Proxy in between the image server and the user"""
-    current_app.logger.debug('######## Starting image proxy for image {} ########'.format(path))
     req_url = urlparse.urljoin(f'{current_app.config.get("IMAGE_API_BASE_URL")}/', path)
     req_headers = {key: value for (key, value) in request.headers if key != 'Host' and key != 'Accept'}
 
@@ -29,7 +29,6 @@ def image_proxy(path):
 
     r = requests.request(request.method, req_url, params=request.args, stream=True,
                          headers=req_headers, allow_redirects=False, data=request.form)
-    current_app.logger.debug('Response = {r.text}')
 
     excluded_headers = ['content-encoding','content-length', 'transfer-encoding', 'connection']
     headers = [(name, value) for (name, value) in r.headers.items() if name.lower() not in excluded_headers]
@@ -38,29 +37,23 @@ def image_proxy(path):
     def generate():
         for chunk in r.raw.stream(decode_content=False):
             yield chunk
-    current_app.logger.debug('######## Ending image proxy for image {path} ########')
     return Response(generate(), status=r.status_code, headers=headers)
 
 
 @advertise(scopes=['api'], rate_limit=[5000, 3600*24])
 @bp_proxy.route('/thumbnail', methods=['GET'])
 def image_proxy_thumbnail():
     """Helper to generate the correct url for a thumbnail given an ID and type"""
-
-    current_app.logger.debug('######## Starting image/thumbnail ########')
     try:
         id = request.args.get('id')
         type = request.args.get('type')
-        current_app.logger.debug(f"Id {id}, Type {type}")
         with current_app.session_scope() as session:
             thumbnail_path = item_thumbnail(session, id, type)
-            current_app.logger.debug(f"Thumbnail path {thumbnail_path}")
             path = urlparse.urlparse(thumbnail_path).path
 
             remove = urlparse.urlparse(url_for_proxy('proxy.image_proxy', path='')).path
             path = path.replace(remove, '')
-            current_app.logger.debug(f"Path {path}")
-            current_app.logger.debug('######## Finishing image/thumbnail ########')
+
             return image_proxy(path)
     except Exception as e:
         current_app.logger.error(f'{e}')
@@ -70,7 +63,7 @@ def image_proxy_thumbnail():
 @bp_proxy.route('/pdf', methods=['GET'])
 def pdf_save():
     """Generate a PDF from pages"""
-    current_app.logger.debug('######## Starting PDF generation process ########')
+
     try:
         id = request.args.get('id')
         page_start = request.args.get('page_start', 1, int)
@@ -80,6 +73,8 @@ def pdf_save():
         scaling = float(dpi)/ 600
         memory_limit = current_app.config.get("IMAGE_PDF_MEMORY_LIMIT")
         page_limit = current_app.config.get("IMAGE_PDF_PAGE_LIMIT")
+
+        current_app.logger.debug(f'Memory limit: {memory_limit}')
 
         @stream_with_context
         def loop_images(id, page_start, page_end):
@@ -89,8 +84,6 @@ def loop_images(id, page_start, page_end):
                 item: Union[Article, Collection] = (
                             session.query(Article).filter(Article.id == id).one_or_none()
                             or session.query(Collection).filter(Collection.id == id).one_or_none())
-                current_app.logger.debug('######## Fetching article/collection {item} from the database ########')
-                fetch_start_time = time.time()
                 if isinstance(item, Article):
                     q = session.query(Article).filter(Article.id == item.id).one_or_none()
                     start_page = q.pages.first().volume_running_page_num
@@ -103,14 +96,7 @@ def loop_images(id, page_start, page_end):
                         Page.volume_running_page_num <= page_end).order_by(Page.volume_running_page_num).limit(page_limit)
                 else:
                     raise Exception("ID: " + id + " not found")
-                fetch_end_time = time.time()
-                fetch_time_elapsed = fetch_end_time - fetch_start_time
-                current_app.logger.debug(f'Fetching article/collection {item} took {fetch_time_elapsed:.2f} seconds to complete.')
-
-
-
-                current_app.logger.debug('######## Fetching images  ########')
-                fetch_images_start_time = time.time()
+
                 for page in query.all():
                     n_pages += 1
                     if n_pages > page_limit:
@@ -126,18 +112,25 @@ def loop_images(id, page_start, page_end):
                     path = path.replace(remove, '')
                     im_data = image_proxy(path).get_data()
                     memory_sum += sys.getsizeof(im_data)
-                    current_app.logger.debug(f'Page {n_pages}. Image data: {im_data}')
                     yield im_data
-                fetch_images_end_time = time.time()
-                fetch_images_time_elapsed = fetch_images_end_time - fetch_images_start_time
-                current_app.logger.debug(f'Fetching images for pages took {fetch_images_time_elapsed:.2f} seconds to complete.')
-
-        current_app.logger.debug('######## Starting image loop ########')
-        loop_start_time = time.time() 
-        response = Response(img2pdf.convert([im for im in loop_images(id, page_start, page_end)]), mimetype='application/pdf')
-        loop_end_time = time.time() 
-        loop_total_time = loop_end_time - loop_start_time
-        current_app.logger.debug(f'Image loop took {loop_total_time:.2f} seconds to complete.')
+
+
+        profiler = cProfile.Profile()
+        profiler.enable()
+        response = Response(img2pdf.convert([im for im in loop_images(id, page_start, page_end)]), mimetype='application/pdf')  
+        profiler.disable()      
+
+        # Log the profiling information
+        log_buffer = io.StringIO()
+        profiler_stats = pstats.Stats(profiler, stream=log_buffer)
+        profiler_stats.strip_dirs().sort_stats('cumulative', 'calls').print_stats(20)
+
+        formatted_stats = log_buffer.getvalue().splitlines()
+
+
+        current_app.logger.debug(f'==================Profiling information========================: \n')
+        for line in formatted_stats:
+            current_app.logger.debug(line)
         return response
     except Exception as e:
         return jsonify(Message=str(e)), 400