Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Debug #6

Open
wants to merge 43 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
ffadc77
adding profiling
femalves Jan 4, 2024
8c974e2
removing logs and adding profiling
femalves Jan 4, 2024
4ce2027
removing logs
femalves Jan 4, 2024
0e516bc
removing unnecessary logs
Jan 16, 2024
0c32f67
removing error
Jan 16, 2024
a517463
removing processing error
femalves Jan 16, 2024
19bbf80
fixing logs
femalves Jan 17, 2024
0968580
testing PyMuPDF
femalves Jan 24, 2024
c5108ce
reversing to img2pdf
femalves Feb 5, 2024
242c941
Adding debug log
femalves Mar 11, 2024
68defa3
modifying logs
femalves Mar 11, 2024
9cbd2ed
testing grabbing images as pdfs and merging
femalves Mar 15, 2024
be7150c
checking how fast downloading files is
femalves Mar 15, 2024
40558b1
trying pymupdf with logs
femalves Mar 15, 2024
1e4293b
Revert "modifying logs"
femalves Mar 18, 2024
5429b18
Revert "testing grabbing images as pdfs and merging"
femalves Mar 18, 2024
a3a0dd3
code to download pre-computed PDFs
femalves Apr 1, 2024
7edeba2
adding changes to image proxy
femalves Apr 3, 2024
5646f10
fixing bug
femalves Apr 3, 2024
471385c
adding s to pdf
femalves Apr 4, 2024
838e9d5
checking if bucket name is correct
femalves Apr 5, 2024
5862267
adding log
femalves Apr 5, 2024
0f40801
adding extra logs to capture error
femalves Apr 8, 2024
742dcda
changing s3_utils
femalves Apr 8, 2024
a323319
fixing bug
femalves Apr 8, 2024
762a697
cleaning code, adding tests, removing logs
femalves Apr 10, 2024
a378240
adding gu=ithub actions
femalves Apr 12, 2024
d7c619c
adjusting python version
femalves Apr 15, 2024
a0a8166
upgrading python
femalves Apr 15, 2024
e3c5fd5
solving 2to_3 bug
femalves Apr 15, 2024
a135daf
verbose
femalves Apr 15, 2024
1efabe6
changing psycopg
femalves Apr 15, 2024
da8badd
fixing bug
femalves Apr 15, 2024
9b77585
setuptools < 57
femalves Apr 15, 2024
e0c0756
running without cache
femalves Apr 15, 2024
3d51c30
removing space
femalves Apr 15, 2024
677be7a
removing space
femalves Apr 15, 2024
decd74f
debugging
femalves Apr 15, 2024
4e52569
downgrading to python 3.8
femalves Apr 15, 2024
66624f5
removing no cache
femalves Apr 15, 2024
de8e606
removing upgrade pip
femalves Apr 15, 2024
531dabb
adding quotes
femalves Apr 15, 2024
e553195
giving it another try
femalves Apr 15, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
48 changes: 48 additions & 0 deletions .github/workflows/python_actions.yaml
@@ -0,0 +1,48 @@
name: GitHub Actions CI

on: [pull_request]

jobs:
build:

runs-on: ubuntu-latest
env:
PGDATABASE: scan_explorer_service
PGPASSWORD: scan_explorer
PGUSER: scan_explorer
PGHOST: localhost
PGPORT: 5432

services:
postgres:
image: postgres:14.2
env:
POSTGRES_DB: scan_explorer_service
POSTGRES_PASSWORD: scan_explorer
POSTGRES_USER: scan_explorer
POSTGRES_HOST: localhost
POSTGRES_PORT: 5432

ports:
- 5432:5432
# Set health checks to wait until postgres has started
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: '3.8'

- name: Install dependencies
run: |
python -m pip install --upgrade wheel setuptools==57 pip
pip install -r requirements.txt
pip install -r dev-requirements.txt

- name: Run unittests
run: |
py.test
6 changes: 5 additions & 1 deletion config.py
Expand Up @@ -19,4 +19,8 @@
OPEN_SEARCH_INDEX = 'scan-explorer'

ADS_SEARCH_SERVICE_URL = 'https://api.adsabs.harvard.edu/v1/search/query'
ADS_SEARCH_SERVICE_TOKEN = '<CHANGE ME>'
ADS_SEARCH_SERVICE_TOKEN = '<CHANGE ME>'

AWS_ACCESS_KEY_ID='<CHANGE ME>'
AWS_SECRET_ACCESS_KEY='<CHANGE ME>'
AWS_BUCKET_NAME='<CHANGE ME>'
3 changes: 2 additions & 1 deletion dev-requirements.txt
Expand Up @@ -2,4 +2,5 @@ Flask-Testing==0.8.1
coverage==5.2.1
testing.postgresql==1.3.0
pytest==7.1.2
pytest-cov==3.0.0
pytest-cov==3.0.0
boto3==1.34.75
3 changes: 2 additions & 1 deletion requirements.txt
Expand Up @@ -12,4 +12,5 @@ opensearch-py==2.0.0
setuptools<58
alembic==1.8.0
img2pdf==0.4.4
appmap>=1.1.0.dev0
appmap>=1.1.0.dev0
boto3==1.34.75
27 changes: 11 additions & 16 deletions scan_explorer_service/manifest_factory.py
Expand Up @@ -13,36 +13,32 @@ class ManifestFactoryExtended(ManifestFactory):
"""

def create_manifest(self, item: Union[Article, Collection]):
current_app.logger.debug(f"Creating manifest for item: {item}")
manifest = self.manifest(
ident=f'{item.id}/manifest.json', label=item.id)
manifest.description = item.id
manifest.add_sequence(self.create_sequence(item))
for range in self.create_range(item):
manifest.add_range(range)
current_app.logger.debug(f"Manifest created: {manifest}")
current_app.logger.info(f"Manifest created: {manifest}")
return manifest

def create_sequence(self, item: Union[Article, Collection]):
current_app.logger.debug(f"Creating sequence for item: {item}")
current_app.logger.info(f"Creating sequence for item: {item}")
sequence: Sequence = self.sequence()
current_app.logger.debug(f"Sequence is: {sequence}. Adding canvases to sequence.")
for page in item.pages:
current_app.logger.debug(f"Adding canvas to sequence: {page}.")
sequence.add_canvas(self.get_or_create_canvas(page))
current_app.logger.debug(f"Final sequence created: {sequence}")
current_app.logger.info(f"Final sequence created: {sequence}")
return sequence

def create_range(self, item: Union[Article, Collection]):
current_app.logger.debug(f"Creating range for item: {item}")
current_app.logger.info(f"Creating range for item: {item}")
if isinstance(item, Collection):
return list(chain(*[self.create_range(article) for article in item.articles]))

range: Range = self.range(ident=item.bibcode, label=item.bibcode)
for page in item.pages:
current_app.logger.debug(f"Adding canvas to range: {page}.")
range.add_canvas(self.get_or_create_canvas(page))
current_app.logger.debug(f"Range created: {[range]}")
current_app.logger.info(f"Range created: {[range]}")
return [range]

def get_canvas_dict(self) -> Dict[str, Canvas]:
Expand All @@ -51,7 +47,7 @@ def get_canvas_dict(self) -> Dict[str, Canvas]:
return self.canvas_dict

def get_or_create_canvas(self, page: Page):
current_app.logger.debug(f"Getting or creating canvas for page: {page}")
current_app.logger.info(f"Getting or creating canvas for page: {page}")
canvas_dict = self.get_canvas_dict()
if(page.id in canvas_dict.keys()):
return canvas_dict[page.id]
Expand All @@ -70,28 +66,27 @@ def get_or_create_canvas(self, page: Page):
canvas.add_annotation(annotation)
canvas_dict[page.id] = canvas

current_app.logger.debug(f"Canvas created: {canvas}")
current_app.logger.info(f"Canvas created: {canvas}")
return canvas

def create_image_annotation(self, page: Page):
current_app.logger.debug(f"Creating image annotation for page: {page}")
current_app.logger.info(f"Creating image annotation for page: {page}")
annotation: Annotation = self.annotation(ident=str(page.id))
image: Image = annotation.image(
ident=page.image_path, label=f'p. {page.label}', iiif=True)

# Override default image quality and format set by prezi
image.id = image.id.replace(f'/default.jpg', f'/{page.image_color_quality}.tif')
current_app.logger.debug(f"Image id: {image.id}")

image.format = page.format
image.height = page.height
image.width = page.width
current_app.logger.debug(f"Image annotation created: {annotation}")
current_app.logger.info(f"Image annotation created: {annotation}")
return annotation

def add_search_service(self, manifest: Manifest, search_url: str):
current_app.logger.debug(f"Adding search services for manifest {manifest} and search url {search_url}")
context = 'http://iiif.io/api/search/1/context.json'
profile = 'http://iiif.io/api/search/1/search'

manifest.add_service(ident=search_url, context=context, profile=profile)
current_app.logger.debug(f"Adding search services for manifest {manifest} and search url {search_url}")
current_app.logger.info(f"Adding search services for manifest {manifest} and search url {search_url}")
43 changes: 40 additions & 3 deletions scan_explorer_service/tests/test_proxy.py
@@ -1,9 +1,8 @@
import unittest
from flask_testing import TestCase
from flask import url_for
from unittest.mock import patch
from unittest.mock import MagicMock, patch
from scan_explorer_service.tests.base import TestCaseDatabase
from scan_explorer_service.views.image_proxy import image_proxy, image_proxy_thumbnail
from scan_explorer_service.views.image_proxy import image_proxy, get_item
from scan_explorer_service.models import Article, Base, Collection, Page


Expand Down Expand Up @@ -117,6 +116,44 @@ def test_get_thumbnail(self, mock_request):
assert(response.is_streamed)
assert(response.status_code == 200)

def test_get_item(self):
"""Test retrieving an item by its ID"""
with self.app.app_context():
article = get_item(self.app.db.session, self.article.id)
assert(isinstance(article, Article))

collection = get_item(self.app.db.session, self.collection.id)
assert(isinstance(collection, Collection))

with self.assertRaises(Exception) as context:
get_item(self.app.db.session, 'non-existent-id')
assert("ID: non-existent-id not found" in str(context.exception))

@patch('scan_explorer_service.views.image_proxy.image_proxy')
@patch('scan_explorer_service.utils.s3_utils.S3Provider.read_object_s3')
def test_pdf_save_success(self, mock_read_object_s3, mock_image_proxy):

mock_read_object_s3.return_value = b'%PDF-1.4'

mock_image_proxy_response = MagicMock()
mock_image_proxy_response.status_code = 200
mock_image_proxy_response.headers = {'Content-Type': 'application/pdf'}
mock_image_proxy_response.get_data.return_value = b'%PDF-1.4'
mock_image_proxy.return_value = mock_image_proxy_response

data = {
'id': self.article.id,
'page_start': 1,
'page_end': 100,
'dpi': 300
}

response = self.client.get(url_for('proxy.pdf_save', **data))


assert(response.status_code == 200)
assert('application/pdf' == response.content_type)
assert(b'%PDF-1.4' in response.data)

if __name__ == '__main__':
unittest.main()
43 changes: 43 additions & 0 deletions scan_explorer_service/utils/s3_utils.py
@@ -0,0 +1,43 @@
import io
import logging
import boto3
from botocore.exceptions import ClientError, ParamValidationError


class S3Provider:
"""
Class for interacting with a particular S3 provider
"""

def __init__(self, config):
"""
input:

config:
"""

self.s3 = boto3.resource("s3")
self.bucket = self.s3.Bucket(config.get("AWS_BUCKET_NAME"))


def write_object_s3(self, file_bytes, object_name):
try:
response = self.bucket.put_object(Body=file_bytes, Key=object_name)
logging.info(response)
except (ClientError, ParamValidationError) as e:
logging.exception(e)
raise e
return response.e_tag

def read_object_s3(self, object_name):
try:
with io.BytesIO() as s3_obj:
self.bucket.download_fileobj(object_name, s3_obj)
s3_obj.seek(0)
s3_file = s3_obj.read()
return s3_file
except (ClientError, ParamValidationError) as e:
raise e



2 changes: 0 additions & 2 deletions scan_explorer_service/utils/utils.py
Expand Up @@ -5,10 +5,8 @@ def url_for_proxy(endpoint: str, **values):
values['_external'] = False

server, prefix = proxy_url()
current_app.logger.debug(f"Server is {server} and prefix is {prefix}.")
path = url_for(endpoint, **values).lstrip('/')

current_app.logger.debug(f"Url is {server}/{prefix}/{path}.")
return f'{server}/{prefix}/{path}'

def proxy_url():
Expand Down