diff --git a/Dockerfile b/Dockerfile index a482e5ae9..d35bb28d0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -75,7 +75,7 @@ RUN mkdir -p /l/local/bin RUN ln -s /usr/bin/unzip /l/local/bin/unzip RUN ln -s /usr/bin/convert /l/local/bin/convert RUN ln -s /usr/bin/plackup /l/local/bin/plackup -RUN /bin/bash -c 'for cmd in pamflip jpegtopnm tifftopnm bmptopnm pngtopam ppmmake pamcomp pnmscalefixed pamscale pnmrotate pnmpad pamtotiff pnmtotiff pnmtojpeg pamrgbatopng ppmtopgm pnmtopng; do ln -s /usr/bin/$cmd /l/local/bin; done' +RUN /bin/bash -c 'for cmd in pamflip jpegtopnm tifftopnm bmptopnm pngtopam ppmmake pamcomp pnmscalefixed pamscale pnmrotate pnmpad pamtotiff pnmtotiff pnmtojpeg pamrgbatopng ppmtopgm pnmtopng pamthreshold; do ln -s /usr/bin/$cmd /l/local/bin; done' RUN mkdir /htapps/babel/cache RUN chmod 4777 /htapps/babel/cache diff --git a/imgsrv/lib/SRV/Image.pm b/imgsrv/lib/SRV/Image.pm index e7fdf98f8..7fa20dc04 100644 --- a/imgsrv/lib/SRV/Image.pm +++ b/imgsrv/lib/SRV/Image.pm @@ -87,10 +87,15 @@ sub run { $self->_validate_params($env); my $C = $$env{'psgix.context'}; + my $mdpItem = $C->get_object('MdpItem'); my $gId = $mdpItem->GetId(); my $restricted = $$env{'psgix.restricted'}; + # require an existing session + my $ses = $C->get_object('Session'); + if ( $$ses{is_new} ) { $restricted = 1; } + # now we deal with extracting my $cache_dir = SRV::Utils::get_cachedir(); my $logfile = SRV::Utils::get_logfile(); @@ -240,7 +245,8 @@ sub call_core { Plack::Util::set_io_path($fh, Cwd::realpath($$output{filename})); } - my $res = $req->new_response(200); + my $status = ( $$output{restricted} ) ? 403 : 200; + my $res = $req->new_response($status); $res->content_type($$output{mimetype}); $res->header('X-HathiTrust-ImageSize' => $$output{metadata}{width} . "x" . $$output{metadata}{height}); diff --git a/imgsrv/lib/SRV/Volume/Base.pm b/imgsrv/lib/SRV/Volume/Base.pm index f887319dc..26fb24a08 100644 --- a/imgsrv/lib/SRV/Volume/Base.pm +++ b/imgsrv/lib/SRV/Volume/Base.pm @@ -477,17 +477,27 @@ sub _authorize { unless ( defined $self->restricted ) { + my $C = $$env{'psgix.context'}; my $mdpItem = $C->get_object('MdpItem'); my $ar = $C->get_object('Access::Rights'); my $gId = $mdpItem->GetId(); + + # require a valid session + my $ses = $C->get_object('Session'); + + # limit to users in an existing session + if ( $$ses{is_new} ) { + $self->restricted(1); + } else { - my $final_access_status = $ar->assert_final_access_status($C, $gId); - my $download_access_status = $ar->get_single_page_PDF_access_status($C, $gId); + my $final_access_status = $ar->assert_final_access_status($C, $gId); + my $download_access_status = $ar->get_single_page_PDF_access_status($C, $gId); - my $restricted = ! ( ( $final_access_status eq 'allow' ) && ( $download_access_status eq 'allow' ) ); - - $self->restricted($restricted); + my $restricted = ! ( ( $final_access_status eq 'allow' ) && ( $download_access_status eq 'allow' ) ); + + $self->restricted($restricted); + } } } diff --git a/imgsrv/lib/SRV/Volume/HTML.pm b/imgsrv/lib/SRV/Volume/HTML.pm index d5140fa19..0c88a6d04 100644 --- a/imgsrv/lib/SRV/Volume/HTML.pm +++ b/imgsrv/lib/SRV/Volume/HTML.pm @@ -65,12 +65,19 @@ sub run { return { contents => "
", mimetype => 'text/html' }; } + # limit to users in a current session + my $ses = $C->get_object('Session'); + if ( $$ses{is_new} ) { + $self->restricted(1); + } + my $restricted = $self->restricted; unless ( defined $restricted ) { # $restricted = $C->get_object('Access::Rights')->assert_final_access_status($C, $gId) ne 'allow'; $restricted = $$env{'psgix.restricted'}; } + # now we deal with extracting my $cache_dir = SRV::Utils::get_cachedir(); @@ -180,7 +187,8 @@ sub call_core { my $max_age = 86400; # 1 day = 60 * 60 * 24 my $cache_control = qq{max-age=$max_age}; - my $res = $req->new_response(200); + my $status = ( $self->restricted ) ? 403 : 200; + my $res = $req->new_response($status); $res->content_type($$target{mimetype} . ";charset=utf-8"); my $contents = encode_utf8($$target{contents}); diff --git a/imgsrv/lib/SRV/Volume/Image/Bundle.pm b/imgsrv/lib/SRV/Volume/Image/Bundle.pm index fc3e202ab..294a4e38c 100644 --- a/imgsrv/lib/SRV/Volume/Image/Bundle.pm +++ b/imgsrv/lib/SRV/Volume/Image/Bundle.pm @@ -92,12 +92,11 @@ sub _authorize { $self->SUPER::_authorize($env); unless ( $self->restricted ) { - # technically the user has access but we need to - # limit resources for bundling to users in a current session - # unless you're using XYZZY=1 on the command line my $C = $$env{'psgix.context'}; + # limit to users in a current session my $ses = $C->get_object('Session'); - if ( $$ses{is_new} && ! $ENV{XYZZY} ) { $self->restricted(1); } + if ( $$ses{is_new} ) { $self->restricted(1); } + # limit TIFF bundling to 10 pages elsif ( $self->format eq 'image/tiff' && $self->total_pages > 10 ) { $self->restricted(1); } diff --git a/mdp-lib/Institutions.pm b/mdp-lib/Institutions.pm index 189da5300..bdb3fd04a 100644 --- a/mdp-lib/Institutions.pm +++ b/mdp-lib/Institutions.pm @@ -329,7 +329,7 @@ sub get_idp_list { $seen{$$hash_ref{inst_id}} = 1; my $host = $ENV{'HTTP_HOST'} || 'localhost'; - my $idp_url = $hash_ref->{template}; + my $idp_url = $hash_ref->{template} || 'https://default.invalid'; $idp_url =~ s,___HOST___,$host,; $idp_url =~ s,&,&,; diff --git a/pt/web/firebird/tests/imgsrv_download.spec.js b/pt/web/firebird/tests/imgsrv_download.spec.js index 41f7ffeab..b1c27a049 100644 --- a/pt/web/firebird/tests/imgsrv_download.spec.js +++ b/pt/web/firebird/tests/imgsrv_download.spec.js @@ -11,7 +11,7 @@ test.describe('imgsrv download', () => { test('download whole item pdf, full resolution', async ({ request, page }) => { var currentTime = new Date().getTime(); - const initialResponse = await request.get( + const initialResponse = await page.context().request.get( 'http://apache:8080/cgi/imgsrv/download/pdf?id=test.pd_open&callback=tunnelCallback&_=' + currentTime ); const initialBody = await initialResponse.text(); @@ -33,7 +33,7 @@ test.describe('imgsrv download', () => { let done = false; while (done == false) { - const callbackResponse = await request.get('http://apache:8080' + callbackUrl); + const callbackResponse = await page.context().request.get('http://apache:8080' + callbackUrl); const callbackJson = await callbackResponse.json(); if (callbackJson.status == 'DONE') { @@ -47,7 +47,7 @@ test.describe('imgsrv download', () => { } } - const downloadResponse = await request.get('http://apache:8080' + downloadUrl); + const downloadResponse = await page.context().request.get('http://apache:8080' + downloadUrl); const downloadHeaders = downloadResponse.headers(); const downloadBody = await downloadResponse.text(); @@ -57,10 +57,10 @@ test.describe('imgsrv download', () => { expect(downloadBody.length).toBeGreaterThan(512 * 1024); }); - // test('download epub', async ({ request, page }) => { +// test('download epub', async ({ request, page }) => { // var currentTime = new Date().getTime(); // -// const initialResponse = await request.get( +// const initialResponse = await page.context().request.get( // 'http://apache:8080/cgi/imgsrv/download/epub?id=test.pd_open&callback=tunnelCallback&_=' + currentTime // ); // const initialBody = await initialResponse.text(); @@ -79,7 +79,7 @@ test.describe('imgsrv download', () => { // let done = false; // // while (done == false) { -// const callbackResponse = await request.get('http://apache:8080' + callbackUrl); +// const callbackResponse = await page.context().request.get('http://apache:8080' + callbackUrl); // const callbackJson = await callbackResponse.json(); // // if (callbackJson.status == 'DONE') { @@ -93,7 +93,7 @@ test.describe('imgsrv download', () => { // } // } // -// const downloadResponse = await request.get('http://apache:8080' + downloadUrl); +// const downloadResponse = await page.context().request.get('http://apache:8080' + downloadUrl); // const downloadHeaders = downloadResponse.headers(); // const downloadBody = await downloadResponse.text(); // @@ -106,7 +106,7 @@ test.describe('imgsrv download', () => { test('download single tiff current page, full resolution', async ({ request, page }) => { // no callback tunnel on single tiff - const downloadResponse = await request.get( + const downloadResponse = await page.context().request.get( 'http://apache:8080/cgi/imgsrv/image?id=test.pd_open&attachment=1&tracker=D1&format=image/tiff&size=full&seq=1' ); const downloadHeaders = downloadResponse.headers(); @@ -121,7 +121,7 @@ test.describe('imgsrv download', () => { test('download single page jpeg, high resolution', async ({ request, page }) => { //no callback tunnel on single pages - const downloadResponse = await request.get( + const downloadResponse = await page.context().request.get( 'http://apache:8080/cgi/imgsrv/image?id=test.pd_open&attachment=1&tracker=D1&format=image/jpeg&size=ppi:300&seq=2' ); const downloadHeaders = downloadResponse.headers(); @@ -135,7 +135,7 @@ test.describe('imgsrv download', () => { test('download selected pages jpeg, full resolution', async ({ request, page }) => { //no callback tunnel on non-tiff selections <11 pages - const downloadResponse = await request.get( + const downloadResponse = await page.context().request.get( 'http://apache:8080/cgi/imgsrv/image?id=test.pd_open&attachment=1&tracker=D1&format=image/jpeg&target_ppi=0&seq=1&seq=2' ); const downloadHeaders = downloadResponse.headers(); @@ -149,7 +149,7 @@ test.describe('imgsrv download', () => { expect(downloadBody.length).toBeGreaterThan(1); }); test('download pdf with bogus seq', async ({ request, page }) => { - const initialResponse = await request.get( + const initialResponse = await page.context().request.get( 'http://apache:8080/cgi/imgsrv/download/pdf?id=test.pd_open&attachment=1&tracker=D1&seq=mashed_potatoes' ); expect(initialResponse.status()).toEqual(200); @@ -172,7 +172,7 @@ test.describe('imgsrv download', () => { expect(downloadBody.length).toBeGreaterThan(1); }); test('download single selected page txt', async ({ request, page }) => { - const downloadResponse = await request.get( + const downloadResponse = await page.context().request.get( 'http://apache:8080/cgi/imgsrv/download/plaintext?id=test.pd_open&attachment=1&tracker=D5&seq=2' ); const downloadHeaders = downloadResponse.headers(); diff --git a/pt/web/firebird/tests/imgsrv_session.spec.js b/pt/web/firebird/tests/imgsrv_session.spec.js new file mode 100644 index 000000000..f09ccf784 --- /dev/null +++ b/pt/web/firebird/tests/imgsrv_session.spec.js @@ -0,0 +1,70 @@ +import { expect, test } from '@playwright/test'; + +test.describe('imgsrv requires session', () => { + + test.describe('without session', () => { + test('download full pdf', async ({ request }) => { + var currentTime = new Date().getTime(); + + const response = await request.get( + 'http://apache:8080/cgi/imgsrv/download/pdf?id=test.pd_open&callback=tunnelCallback&_=' + currentTime + ); + + expect(response.status()).toEqual(403) + }); + + test('download single page pdf', async ({ request }) => { + const response = await request.get( + 'http://apache:8080/cgi/imgsrv/download/image?id=test.pd_open&attachment=1&tracker=D1&format=image%2Ftiff&target_ppi=0&seq=2' + ); + expect(response.status()).toEqual(403); + }); + + test('page image, full resolution', async ({ request }) => { + const response = await request.get( + 'http://apache:8080/cgi/imgsrv/image?id=test.pd_open&size=full&seq=1' + ); + + expect(response.status()).toEqual(403); + }); + + test('page image, default resolution', async ({ request }) => { + const response = await request.get( + 'http://apache:8080/cgi/imgsrv/image?id=test.pd_open&seq=1' + ); + + expect(response.status()).toEqual(403); + }); + + test('page ocr', async ({ request }) => { + const response = await request.get( + 'http://apache:8080/cgi/imgsrv/html?id=test.pd_open&seq=1' + ); + + expect(response.status()).toEqual(403); + }); + }); + + test.describe('with session', () => { + // make sure we have appropriate session cookies etc before calling imgsrv + test.beforeEach(async ({ page }) => { + await page.goto('/cgi/pt?id=test.pd_open'); + //accept the cookie banner before each test + await page.getByRole('button', { name: 'Allow all cookies' }).click(); + }); + + test('page image', async ({ request, page }) => { + const response = await page.context().request.get( + 'http://apache:8080/cgi/imgsrv/image?id=test.pd_open&seq=1' + ); + expect(response.status()).toEqual(200); + }); + + test('ocr', async ({ request, page }) => { + const response = await page.context().request.get( + 'http://apache:8080/cgi/imgsrv/html?id=test.pd_open&seq=1' + ); + expect(response.status()).toEqual(200); + }); + }); +});