@@ -738,8 +738,11 @@ def test_trsh_ess_job(self):
738738
739739 # Test Orca
740740 # Orca: test 1
741- # Test troubleshooting insufficient memory issue
742- # Automatically increase memory provided not exceeding maximum available memory
741+ # Test troubleshooting insufficient memory issue.
742+ # When merely increasing total memory has already been attempted ('memory' is already in
743+ # ess_trsh_methods), simply requesting more total memory keeps failing (and previously caused
744+ # ARC to resubmit near-identical jobs in an endless loop). Instead, ARC reduces the number of
745+ # cpu cores to raise the memory per core (Orca's MaxCore).
743746 label = 'test'
744747 level_of_theory = {'method' : 'dlpno-ccsd(T)' }
745748 server = 'server1'
@@ -759,8 +762,9 @@ def test_trsh_ess_job(self):
759762 job_type , software , fine , memory_gb ,
760763 num_heavy_atoms , cpu_cores , ess_trsh_methods )
761764 self .assertIn ('memory' , ess_trsh_methods )
762- self .assertEqual (cpu_cores , 32 )
763- self .assertAlmostEqual (memory , 327 )
765+ self .assertIn ('cpu' , ess_trsh_methods )
766+ self .assertEqual (cpu_cores , 22 )
767+ self .assertAlmostEqual (memory , 227 )
764768
765769 # Orca: test 2
766770 # Test troubleshooting insufficient memory issue
@@ -814,6 +818,36 @@ def test_trsh_ess_job(self):
814818 self .assertEqual (couldnt_trsh , True )
815819 self .assertLess (cpu_cores , 1 ) # can't really run job with less than 1 cpu ^o^
816820
821+ # Orca: test 3b
822+ # Regression test for the Orca 5.x DLPNO-CCSD(T) "out of memory in the triples" loop.
823+ # In Orca 5.x the message is "Please increase MaxCore - Skipping calculation" with no explicit
824+ # per-core requirement, so determine_ess_status returns 'Insufficient job memory.'. Increasing
825+ # total memory was already attempted (ess_trsh_methods=['memory']) and the node is NOT at its
826+ # memory ceiling (no 'max_total_job_memory' keyword). Previously ARC kept resubmitting a nearly
827+ # identical job forever; instead it must reduce the number of cpu cores so that the memory per
828+ # core (Orca's MaxCore) actually increases.
829+ label = 'test'
830+ level_of_theory = {'method' : 'dlpno-ccsd(T)' }
831+ server = 'server2'
832+ job_type = 'sp'
833+ software = 'orca'
834+ fine = False
835+ memory_gb = 37
836+ cpu_cores = 16
837+ num_heavy_atoms = 16
838+ ess_trsh_methods = ['memory' ]
839+ job_status = {'keywords' : ['MDCI' , 'Memory' ], 'error' : 'Insufficient job memory.' }
840+ mem_per_core_before = memory_gb / cpu_cores
841+ output_errors , ess_trsh_methods , remove_checkfile , level_of_theory , software , job_type , fine , trsh_keyword , \
842+ memory , shift , cpu_cores , couldnt_trsh = trsh .trsh_ess_job (label , level_of_theory , server , job_status ,
843+ job_type , software , fine , memory_gb ,
844+ num_heavy_atoms , cpu_cores , ess_trsh_methods )
845+ self .assertIn ('cpu' , ess_trsh_methods )
846+ self .assertFalse (couldnt_trsh )
847+ self .assertEqual (cpu_cores , 5 ) # cpu cores reduced (this breaks the endless retry loop)
848+ self .assertAlmostEqual (memory , 29 )
849+ self .assertGreater (memory / cpu_cores , mem_per_core_before ) # memory per core increased
850+
817851 # Orca: test 4
818852 # Test troubleshooting too many cpu cores
819853 # Automatically reduce cpu cores
0 commit comments