@@ -356,3 +356,169 @@ def test_docs_libs_gateway_200_html_transformed(rf, tp, mock_get_file_data):
356356def test_calendar (rf , tp ):
357357 response = tp .get ("calendar" )
358358 tp .response_200 (response )
359+
360+
361+ @pytest .mark .django_db
362+ @override_settings (
363+ CACHES = TEST_CACHES ,
364+ )
365+ def test_static_content_blocks_direct_doc_paths (request_factory ):
366+ """Test that direct access to doc paths and library paths is blocked with 404."""
367+
368+ # Test cases for paths that should be blocked (return 404)
369+ blocked_paths = [
370+ # Original doc/html paths that should be blocked
371+ "boost_1_53_0_beta1/doc/html/index.html" ,
372+ "1_82_0/doc/html/tutorial.html" ,
373+ "1_55_0b1/doc/html/reference/api.html" ,
374+ "boost_1_86_0/doc/html/deep/nested/path.html" ,
375+ "1_75_0/doc/html/simple.html" ,
376+ # Edge cases with different boost version formats
377+ "boost_1_53_0_beta1/doc/html/" , # trailing slash
378+ "1_82_0/doc/html/a" , # single character file
379+ # NEW: Library paths that should now be blocked
380+ "boost_1_53_0_beta1/libs/algorithm/doc/index.html" ,
381+ "1_82_0/libs/filesystem/doc/index.html" ,
382+ "boost_1_86_0/libs/test/doc/reference.html" ,
383+ "1_75_0/libs/wave/doc/tutorial.html" ,
384+ "boost_1_82_0/libs/any_library/any_file.html" ,
385+ "1_55_0b1/libs/serialization/index.html" ,
386+ # Edge cases for libs paths
387+ "boost_1_53_0_beta1/libs/" , # just libs with trailing slash
388+ "1_82_0/libs/a" , # single character lib name
389+ ]
390+
391+ for content_path in blocked_paths :
392+ request = request_factory .get (f"/{ content_path } " )
393+ view = StaticContentTemplateView .as_view ()
394+
395+ # Should raise Http404 without even trying to fetch from S3
396+ with pytest .raises (Http404 ):
397+ view (request , content_path = content_path )
398+
399+
400+ @pytest .mark .django_db
401+ @override_settings (
402+ CACHES = TEST_CACHES ,
403+ )
404+ def test_static_content_allows_non_direct_doc_paths (request_factory ):
405+ """Test that non-direct doc paths are allowed and processed normally."""
406+
407+ # Test cases for paths that should NOT be blocked (normal processing)
408+ allowed_paths = [
409+ # Tools paths - should still be allowed (not libs)
410+ "1_82_0/tools/build/doc/index.html" ,
411+ "boost_1_82_0/tools/cmake/doc/reference.html" ,
412+ # Paths with non-boost-version prefixes - should be allowed
413+ "develop/libs/filesystem/doc/index.html" , # develop prefix, not version
414+ "master/libs/test/doc/reference.html" , # master prefix, not version
415+ # Paths without version prefixes
416+ "doc/html/index.html" , # No boost version prefix
417+ "some/other/doc/html/file.html" , # Different structure
418+ "libs/algorithm/doc/index.html" , # No version prefix
419+ # Paths that don't match the exact patterns
420+ "boost_1_82_0/doc/other/file.html" , # not /doc/html/
421+ "1_82_0/doc/htmls/file.html" , # not exact /doc/html/
422+ "1_82_0/documentation/html/file.html" , # not /doc/html/
423+ "boost_1_82_0/libraries/algorithm/doc/index.html" , # libraries not libs
424+ "some_other_prefix/libs/algorithm/doc/index.html" , # no boost version
425+ ]
426+
427+ for content_path in allowed_paths :
428+ # Mock S3 to return content so we can test the path isn't blocked
429+ with patch (
430+ "core.views.get_content_from_s3" ,
431+ return_value = {"content" : b"test content" , "content_type" : "text/plain" },
432+ ):
433+ response = call_view (request_factory , content_path )
434+ # Should get 200 response, not 404 - the main thing is it's not blocked
435+ assert (
436+ response .status_code == 200
437+ ), f"Path should be allowed but got { response .status_code } : { content_path } "
438+
439+
440+ def test_boost_version_regex_doc_html_pattern ():
441+ """Test the BOOST_VERSION_REGEX doc/html pattern matches expected version formats."""
442+ import re
443+ from core .constants import BOOST_VERSION_REGEX
444+
445+ # Test the doc/html blocking pattern used in the view
446+ doc_html_pattern = rf"^{ BOOST_VERSION_REGEX } /doc/html/.+$"
447+
448+ # Test cases that should match the doc/html pattern
449+ matching_cases = [
450+ "boost_1_53_0_beta1/doc/html/index.html" ,
451+ "1_82_0/doc/html/tutorial.html" ,
452+ "1_55_0b1/doc/html/reference/api.html" ,
453+ "boost_1_86_0/doc/html/test.html" ,
454+ "1_75_0/doc/html/simple.html" ,
455+ ]
456+
457+ for test_path in matching_cases :
458+ match = re .match (doc_html_pattern , test_path )
459+ assert match is not None , f"Doc/html pattern should match: { test_path } "
460+ # The captured groups should match the expected version parts
461+ version_match = re .match (BOOST_VERSION_REGEX , test_path )
462+ assert version_match is not None , f"Version pattern should match: { test_path } "
463+
464+ # Test cases that should NOT match the doc/html pattern
465+ non_matching_cases = [
466+ "1_82_0/tools/build/doc/index.html" , # tools path
467+ "develop/doc/html/index.html" , # develop prefix, not version
468+ "doc/html/index.html" , # no version prefix
469+ "boost_1_82_0/doc/other/file.html" , # not /doc/html/
470+ "1_82_0/doc/htmls/file.html" , # not exact /doc/html/
471+ "some/other/doc/html/file.html" , # no boost version
472+ "boost_1_82_0/doc/html/" , # no file after /doc/html/
473+ "1_82_0/doc/html" , # no trailing slash or file
474+ "boost_1_53_0_beta1/libs/algorithm/doc/index.html" , # libs path
475+ ]
476+
477+ for test_path in non_matching_cases :
478+ match = re .match (doc_html_pattern , test_path )
479+ assert match is None , f"Doc/html pattern should NOT match: { test_path } "
480+
481+
482+ def test_boost_version_regex_libs_pattern ():
483+ """Test the BOOST_VERSION_REGEX libs pattern matches expected version formats."""
484+ import re
485+ from core .constants import BOOST_VERSION_REGEX
486+
487+ # Test the libs blocking pattern used in the view
488+ libs_pattern = rf"^{ BOOST_VERSION_REGEX } /libs/.+$"
489+
490+ # Test cases that should match the libs pattern
491+ matching_cases = [
492+ "boost_1_53_0_beta1/libs/algorithm/doc/index.html" ,
493+ "1_82_0/libs/filesystem/doc/index.html" ,
494+ "boost_1_86_0/libs/test/doc/reference.html" ,
495+ "1_75_0/libs/wave/doc/tutorial.html" ,
496+ "boost_1_82_0/libs/any_library/any_file.html" ,
497+ "1_55_0b1/libs/serialization/index.html" ,
498+ "1_82_0/libs/a" , # single character lib name
499+ "boost_1_53_0_beta1/libs/algorithm" , # no trailing file extension
500+ ]
501+
502+ for test_path in matching_cases :
503+ match = re .match (libs_pattern , test_path )
504+ assert match is not None , f"Libs pattern should match: { test_path } "
505+ # The captured groups should match the expected version parts
506+ version_match = re .match (BOOST_VERSION_REGEX , test_path )
507+ assert version_match is not None , f"Version pattern should match: { test_path } "
508+
509+ # Test cases that should NOT match the libs pattern
510+ non_matching_cases = [
511+ "1_82_0/tools/build/doc/index.html" , # tools path
512+ "develop/libs/filesystem/doc/index.html" , # develop prefix, not version
513+ "latest/libs/algorithm/doc/index.html" , # latest prefix, not version
514+ "libs/algorithm/doc/index.html" , # no version prefix
515+ "boost_1_82_0/libraries/algorithm/doc/index.html" , # libraries not libs
516+ "some/other/libs/algorithm/file.html" , # no boost version
517+ "boost_1_82_0/libs" , # no trailing slash or file
518+ "boost_1_53_0_beta1/libs/" , # just libs with trailing slash (no content after)
519+ "1_82_0/doc/html/index.html" , # doc/html path
520+ ]
521+
522+ for test_path in non_matching_cases :
523+ match = re .match (libs_pattern , test_path )
524+ assert match is None , f"Libs pattern should NOT match: { test_path } "
0 commit comments