T3/t3/main.py at c4e082714e34578880d86cf9372f3ccc7ec0064e · ReactionMechanismGenerator/T3 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
The tandem tool (T3) for iterative kinetic model generation and refinement

Todo:
    - generate n generations of reactions (circles around the reactants) and get all thermo right
    - parse errors and warnings from the ARC output/status.yml file for failed species
    - modify tolerance interrupt simulation according to tolerance move to core
    - implement Cantera and RMS SA, including global observables
    - determine whether a species should be forbidden or not
    - scan pdep networks and the core, mark non-physical species
    - utilize the uncertainty analysis script
    - SA dict: dump and load for restart, inc. pdep SA
    - Need to instruct ARC which species to save in a thermo library. E.g., when computing a rate coefficient of
      A + OH <=> C + D, we shouldn't store thermo for OH, it's already well-known and our numbers will be inferior.
"""

import datetime
import inspect
import os
import re
import shutil
from typing import List, Optional, Tuple, Union

from arkane import Arkane
from rmgpy import settings as rmg_settings
from rmgpy.chemkin import load_chemkin_file
from rmgpy.data.kinetics import KineticsLibrary
from rmgpy.data.kinetics.library import LibraryReaction
from rmgpy.data.thermo import ThermoLibrary
from rmgpy.exceptions import (ChemicallySignificantEigenvaluesError,
                              ChemkinError,
                              ModifiedStrongCollisionError,
                              NetworkError,
                              )
from rmgpy.kinetics import Arrhenius, KineticsData
from rmgpy.reaction import Reaction
from rmgpy.rmg.pdep import PDepReaction
from rmgpy.species import Species
from rmgpy.thermo import NASAPolynomial, NASA, ThermoData, Wilhoit

from arc.common import (get_number_with_ordinal_indicator,
                        get_ordinal_indicator,
                        key_by_val,
                        read_yaml_file,
                        save_yaml_file,
                        )
from arc.exceptions import ConverterError
from arc.main import ARC
from arc.species.species import ARCSpecies, check_label
from arc.species.converter import check_xyz_dict

from t3.common import PROJECTS_BASE_PATH, VALID_CHARS, delete_root_rmg_log, get_species_by_label, time_lapse
from t3.logger import Logger
from t3.runners.rmg_runner import rmg_runner
from t3.runners.rmg_adapter import RMGAdapter
from t3.schema import InputBase
from t3.simulate.factory import simulate_factory
from t3.utils.writer import write_pdep_network_file, write_rmg_input_file

RMG_THERMO_LIB_BASE_PATH = os.path.join(rmg_settings['database.directory'], 'thermo', 'libraries')
RMG_KINETICS_LIB_BASE_PATH = os.path.join(rmg_settings['database.directory'], 'kinetics', 'libraries')


class T3(object):
    """
    The main T3 class.

    Dictionary structures::

        species = {
            <int: T3_spc_index>: {
                'RMG label': <str: RMG label>,
                'Chemkin label': <str: Chemkin label>,
                'QM label': <str: The label used for the QM calc>,
                'object': <Species: RMG Species object>,
                'reasons': <List[str]: Reasons for calculating thermodynamic data for this species>,
                'converged': <Optional[bool]: whether thermo was successfully calculated, ``None`` if pending>,
                'iteration': <int: The iteration number in which this species was originally considered>,
            },
        }

        reactions = {
            <int: T3_reaction_index>: {
                'RMG label': <str: RMG label>,
                'Chemkin label': <str: Chemkin label>,
                'QM label': <str: The label used for the QM calc>,
                'SMILES label': <str: A reaction label that consists of the reactants/products SMILES>,
                'object': <Reaction: RMG Reaction object>,
                'reactant_keys': <List[int]: Keys of species that participate in this reaction as reactants>,
                'product_keys': <List[int]: Keys of species that participate in this reaction as products>,
                'reasons': <List[str]: Reasons for calculating the rate coefficient for this reaction>,
                'converged': <Optional[bool]: whether the rate coeff was successfully calculated, ``None`` if pending>,
                'iteration': <int: The iteration number in which this reaction was originally considered>,
            },
        }

        sa_dict = {
            'thermo': {
                <str: SA observable>: {
                    <str: RMG species label>: <array: 1D array with one entry per time point. Each entry is
                                     dLn(observable_1) / dG_species_1 in mol / kcal at the respective time>,
                }
            }
            'kinetics': {
                <str: SA observable>: {
                    <int: reaction number>: <array: 1D array with one entry per time point. Each entry is
                                             dLn(observable_1) / dLn(k_1) at the respective time>,
                }
            }
            'time': <array: 1D array of time points in seconds>
        }

    Args:
        project (str): The project name.
        project_directory (str, optional): The project directory. Required through the API, optional through an input
                                           file (will be set to the directory of the input file if not specified).
        verbose (int, optional): The logging level, optional. 10 - debug, 20 - info, 30 - warning, default: 20.
        clean_dir (bool, optional): Whether to delete all existing files (other than input and submit files)
                                    and folders in the project directory prior to execution.
                                    If set to ``True``, the restart feature will not be triggered. Default: ``False``.
        t3 (dict, optional): T3 directives.
        rmg (dict): RMG directives.
        qm (dict, optional): QM directive.

    Attributes:
        project (str): The project name.
        project_directory (str): The project directory. Required through the API, optional through an input file.
        t3 (dict): T3 directives.
        rmg (dict): RMG directives.
        qm (dict, optional): QM directive.
        verbose (int): The logging level.
        t0 (datetime.datetime): Initial time when the project was spawned, stored as a datetime object.
        logger (Logger): The Logger class instance.
        rmg_exceptions_counter (int): Number of times RMG crashed.
        iteration (int): The current iteration number. Iteration 0 is reserved for pre-running ARC,
                         normal iteration numbers begin at 1.
        thermo_lib_base_path (str): The path to the thermo libraries folder in RMG-database.
        kinetics_lib_base_path (str): The path to the kinetics libraries folder in RMG-database.
        species (Dict[int, dict]: The T3 species dictionary. Keys are T3 species indices, values are dictionaries.
        paths (dict): Various directory and file paths.
        executed_networks (list): PDep networks for which SA was already executed. Entries are tuples of isomer labels.
        rmg_species (List[Species]): Entries are RMG species objects in the model core for a certain T3 iteration.
        rmg_reactions (List[Reaction]): Entries are RMG reaction objects in the model core for a certain T3 iteration.
        sa_observables (list): Entries are RMG species labels for the SA observables.
        sa_dict (dict): Dictionary with keys of `kinetics`, `thermo`, and `time`.
    """

    def __init__(self,
                 project: str,
                 rmg: dict,
                 t3: Optional[dict] = None,
                 qm: Optional[dict] = None,
                 project_directory: Optional[str] = None,
                 verbose: int = 20,
                 clean_dir: bool = False,
                 ):

        self.sa_dict = None
        self.sa_observables = list()
        self.t0 = datetime.datetime.now()  # initialize the timer as datetime object

        project_directory = project_directory or os.path.join(PROJECTS_BASE_PATH, project)

        self.schema = InputBase(project=project,
                                project_directory=project_directory,
                                t3=t3,
                                rmg=rmg,
                                qm=qm,
                                verbose=verbose,
                                ).dict()

        self.schema_exclude_unset = InputBase(project=project,
                                              project_directory=project_directory,
                                              t3=t3,
                                              rmg=rmg,
                                              qm=qm,
                                              verbose=verbose,
                                              ).dict(exclude_unset=True)

        self.project = self.schema['project']
        self.project_directory = self.schema['project_directory']
        self.t3 = self.schema['t3']
        self.rmg = self.schema['rmg']
        self.qm = self.schema['qm']
        self.verbose = self.schema['verbose']

        if clean_dir and os.path.isdir(self.project_directory):
            self.cleanup()
        if not os.path.isdir(self.project_directory):
            os.makedirs(self.project_directory)

        # initialize the logger
        self.logger = Logger(project=self.project,
                             project_directory=self.project_directory,
                             verbose=self.verbose,
                             t0=self.t0,
                             )
        self.logger.log_args(schema=self.schema_exclude_unset)

        self.rmg_exceptions_counter = 0
        self.iteration = 0
        self.thermo_lib_base_path = os.path.join(rmg_settings['database.directory'], 'thermo', 'libraries')
        self.kinetics_lib_base_path = os.path.join(rmg_settings['database.directory'], 'kinetics', 'libraries')
        self.species, self.reactions, self.paths = dict(), dict(), dict()
        self.rmg_species, self.rmg_reactions, self.executed_networks = list(), list(), list()

        if self.qm:
            # check args
            self.check_arc_args()

        if any(self.rmg['model']['core_tolerance'][i + 1] > self.rmg['model']['core_tolerance'][i]
               for i in range(len(self.rmg['model']['core_tolerance']) - 1)):
            self.logger.warning(f'The RMG tolerances are not in descending order.')
            self.logger.info(f'Got: {self.rmg["model"]["core_tolerance"]}')

    def as_dict(self) -> dict:
        """
        Generate a dictionary representation of the object's arguments.

        Returns:
            dict: The dictionary representation.
        """
        result = dict()
        result['project'] = self.project
        result['project_directory'] = self.project_directory
        result['verbose'] = self.verbose
        result['t3'] = self.t3
        result['rmg'] = self.rmg
        result['qm'] = self.qm
        return result

    def write_t3_input_file(self,
                            path: Optional[str] = None,
                            all_args: bool = False,
                            ) -> None:
        """
        Save the current **arguments** (not all attributes) as a T3 input file.
        Useful for creating an input file using the API.

        Args:
             path (str, optional): The full path for the generated input file,
                                   or to the folder where this file will be saved under a default name.
                                   If ``None``, the input file will be saved to the project directory.
             all_args (bool, optional): Whether to save all arguments in the generated input file
                                        including all default values. Default: ``False``.
        """
        if path is None:
            path = os.path.join(self.project_directory, 'T3_auto_saved_input.yml')
        if os.path.isdir(path):
            path += '/' if path[-1] != '/' else ''
            path += 'T3_auto_saved_input.yml'
        base_path = os.path.dirname(path)
        if not os.path.isdir(base_path):
            os.makedirs(base_path)
        self.logger.info(f'\n\nWriting input file to {path}')
        save_yaml_file(path=path, content=self.schema if all_args else self.schema_exclude_unset)

    def execute(self):
        """
        Execute T3.
        """
        iteration_start, run_rmg_at_start = self.restart()

        if iteration_start == 0 \
                and self.qm \
                and self.qm['adapter'] == 'ARC' \
                and (len(self.qm['species']) or len(self.qm['reactions'])):
            self.set_paths(iteration=iteration_start)
            self.run_arc(arc_kwargs=self.qm)
            self.process_arc_run()
            # don't request these species and reactions again
            iteration_start += 1
        # ARC species and reactions will be loaded again if restarting and they were already sent to ARC, set to list()
        self.qm['species'], self.qm['reactions'] = list(), list()

        additional_calcs_required = False
        iteration_start = iteration_start or 1

        # main T3 loop
        max_t3_iterations = self.t3['options']['max_T3_iterations']
        for self.iteration in range(iteration_start, max_t3_iterations + 1):

            self.logger.info(f'\n\n\nT3 iteration {self.iteration}:\n'
                             f'---------------\n')
            self.set_paths()

            # RMG
            if self.iteration > iteration_start or self.iteration == iteration_start and run_rmg_at_start:
                self.run_rmg(restart_rmg=run_rmg_at_start)

            # SA
            if self.t3['sensitivity'] is not None:
                # Determine observables to run SA for.
                if not self.sa_observables:
                    for species in self.rmg['species']:
                        if species['observable'] or species['SA_observable']:
                            self.sa_observables.append(species['label'])

                simulate_adapter = simulate_factory(simulate_method=self.t3['sensitivity']['adapter'],
                                                    t3=self.t3,
                                                    rmg=self.rmg,
                                                    paths=self.paths,
                                                    logger=self.logger,
                                                    atol=self.rmg['model']['atol'],
                                                    rtol=self.rmg['model']['rtol'],
                                                    observable_list=self.sa_observables,
                                                    sa_atol=self.t3['sensitivity']['atol'],
                                                    sa_rtol=self.t3['sensitivity']['rtol'],
                                                    global_observables=None,
                                                    )
                simulate_adapter.simulate()
                self.sa_dict = simulate_adapter.get_sa_coefficients()

            additional_calcs_required = self.determine_species_and_reactions_to_calculate()

            # ARC
            if additional_calcs_required:
                if self.qm is None:
                    self.logger.error('Could not refine the model without any QM arguments.')
                    additional_calcs_required = None
                else:
                    self.run_arc(arc_kwargs=self.qm)
                    self.process_arc_run()
            if not additional_calcs_required and self.iteration >= len(self.rmg['model']['core_tolerance']):
                # T3 iterated through all of the user requested tolerances, and there are no more calculations required
                break

            if self.check_overtime():
                self.logger.log_max_time_reached(max_time=self.t3['options']['max_T3_walltime'])
                break

        if additional_calcs_required:
            # The main T3 loop terminated, but the latest calculations were not included in the model.
            # Run RMG for the last time.
            self.iteration += 1
            self.logger.info(f'\n\n\nT3 iteration {self.iteration} (just generating a model using RMG):\n'
                             f'------------------------------------------------------\n')
            self.set_paths()
            self.run_rmg(restart_rmg=run_rmg_at_start)

        self.logger.log_species_summary(species_dict=self.species)
        self.logger.log_reactions_summary(reactions_dict=self.reactions)
        self.logger.log_footer()
        delete_root_rmg_log(project_directory=self.project_directory)

    def set_paths(self,
                  iteration: Optional[int] = None,
                  project_directory: Optional[str] = None,
                  ):
        """
        Set various file and folder paths (but don't create the folders).

        Args:
            iteration (Optional[int]): The iteration number. If ``None``, self.iteration will be used instead.
            project_directory (Optional[str]): The base folder to use, will use ``self.project_directory``
                                               if this argument is left empty.
        """
        project_directory = project_directory or self.project_directory
        iteration = iteration or self.iteration
        iteration_path = os.path.join(project_directory, f'iteration_{iteration}')
        self.paths = {
            'iteration': iteration_path,
            'RMG': os.path.join(iteration_path, 'RMG'),
            'RMG PDep': os.path.join(iteration_path, 'RMG', 'pdep'),
            'RMG input': os.path.join(iteration_path, 'RMG', 'input.py'),
            'RMG log': os.path.join(iteration_path, 'RMG', 'RMG.log'),
            'RMG job log': os.path.join(iteration_path, 'RMG', 'job.log'),
            'RMG coll vio': os.path.join(iteration_path, 'RMG', 'collision_rate_violators.log'),
            'RMS': os.path.join(iteration_path, 'RMG', 'rms'),
            'cantera annotated': os.path.join(iteration_path, 'RMG', 'cantera', 'chem_annotated.cti'),
            'chem annotated': os.path.join(iteration_path, 'RMG', 'chemkin', 'chem_annotated.inp'),
            'species dict': os.path.join(iteration_path, 'RMG', 'chemkin', 'species_dictionary.txt'),
            'SA': os.path.join(iteration_path, 'SA'),
            'SA solver': os.path.join(iteration_path, 'SA', 'solver'),
            'SA input': os.path.join(iteration_path, 'SA', 'input.py'),
            'PDep SA': os.path.join(iteration_path, 'PDep_SA'),
            'ARC': os.path.join(iteration_path, 'ARC'),
            'ARC input': os.path.join(iteration_path, 'ARC', 'input.yml'),
            'ARC restart': os.path.join(iteration_path, 'ARC', 'restart.yml'),
            'ARC log': os.path.join(iteration_path, 'ARC', 'arc.log'),
            'ARC info': os.path.join(iteration_path, 'ARC',
                                     f"{self.qm['project'] if 'project' in self.qm else self.project}_info.yml"),
            'ARC thermo lib': os.path.join(iteration_path, 'ARC', 'output', 'RMG libraries', 'thermo',
                                           f"{self.qm['project'] if 'project' in self.qm else self.project}.py"),
            'ARC kinetics lib': os.path.join(iteration_path, 'ARC', 'output', 'RMG libraries', 'kinetics'),
            'RMG T3 thermo lib': os.path.join(RMG_THERMO_LIB_BASE_PATH, f"{self.t3['options']['library_name']}.py") \
                if self.t3['options']['save_libraries_directly_in_rmgdb'] \
                else os.path.join(project_directory, 'Libraries', f"{self.t3['options']['library_name']}.py"),
            'RMG T3 kinetics lib': os.path.join(RMG_KINETICS_LIB_BASE_PATH, f"{self.t3['options']['library_name']}") \
                if self.t3['options']['save_libraries_directly_in_rmgdb'] \
                else os.path.join(project_directory, 'Libraries', f"{self.t3['options']['library_name']}"),
        }

    def restart(self) -> Tuple[int, bool]:
        """
        Restart T3 by looking for existing iteration folders.
        Restarts ARC if it ran and did not terminate.

        Returns:
            Tuple[int, bool]:
                - The current iteration number.
                - Whether to run RMG for this iteration.
        """
        # set default values
        i_max = 0
        run_rmg_i, restart_arc_i = True, False

        folders = tuple(os.walk(self.project_directory))[0][1]  # returns a 3-tuple: (dirpath, dirnames, filenames)
        iteration_folders = [folder for folder in folders if 'iteration_' in folder]

        if len(iteration_folders):
            self.load_species_and_reactions()
            i_max = max([int(folder.split('_')[1]) for folder in iteration_folders])  # get the latest iteration number
            self.set_paths(iteration=i_max)
            if i_max != 0 and os.path.isfile(self.paths['RMG log']):
                # iteration 0 is reserved for ARC only if needed
                with open(self.paths['RMG log'], 'r') as f:
                    lines = f.readlines()
                    for line in lines[::-1]:
                        if 'MODEL GENERATION COMPLETED' in line:
                            # RMG terminated, no need to regenerate the model
                            run_rmg_i = False
                            break
            if os.path.isfile(self.paths['ARC log']) and (not run_rmg_i or i_max == 0):
                # The ARC log file exists, and no need to run RMG (converged) or this is iteration 0
                with open(self.paths['ARC log'], 'r') as f:
                    lines = f.readlines()
                    for line in lines[::-1]:
                        if 'ARC execution terminated on' in line:
                            # ARC terminated as well, continue to the next iteration
                            i_max += 1
                            run_rmg_i = True
                            break
                    else:
                        # ARC did not terminate, see if the restart file was generated
                        if os.path.isfile(self.paths['ARC restart']):
                            restart_arc_i = True
            if i_max or not run_rmg_i or restart_arc_i:
                rmg_text = ', using the completed RMG run from this iteration' if not run_rmg_i \
                    else ', re-running RMG for this iteration'
                arc_text = ', restarting the previous ARC run in this iteration' if restart_arc_i else ''
                self.logger.log(f'\nRestarting T3 from iteration {i_max}{rmg_text}{arc_text}.\n')
            if restart_arc_i:
                self.run_arc(input_file_path=self.paths['ARC restart'])
                self.process_arc_run()
                i_max += 1
                run_rmg_i = True

        return i_max, run_rmg_i

    def check_arc_args(self):
        """
        Check that all arguments in the ARC input dictionary are legal.

        Returns:
            dict: Updated ARC arguments.
        """
        if self.qm and self.qm['adapter'] == 'ARC':
            allowed_non_arc_args = ['adapter']
            for key in list(self.qm.keys()):
                if key not in inspect.getfullargspec(ARC.__init__).args and key not in allowed_non_arc_args:
                    # This argument was not extracted above, and it's not an ARC argument, remove so ARC doesn't crush
                    self.logger.error(f'Argument "{key}" passed to ARC is not allowed. NOT using it.\n'
                                      f'(if this was meant to be a T3 argument, it was not recognized).')
                    del self.qm[key]

    def run_arc(self,
                input_file_path: Optional[str] = None,
                arc_kwargs: Optional[dict] = None,
                ):
        """
        Run ARC.
        Either ``input_file_path`` or ``arc_kwargs`` must be specified.

        Args:
            input_file_path (Optional[str]): A path to an ARC input file.
            arc_kwargs (Optional[dict]): ARC's arguments as a keyword argument dictionary.
                                         No need to remove the ``adapter`` key if present.

        Raises:
            ValueError: If neither or both ``input_file`` and ``kwargs`` were specified,
                        or an ARC input file was specified but could not be found.
            Various ARC exceptions: If ARC crashes.
        """
        if input_file_path is None and arc_kwargs is None:
            raise ValueError('Either input_file or kwargs must be given to run ARC, got neither.')
        if input_file_path is not None and arc_kwargs is not None:
            raise ValueError('Either input_file or kwargs must be given to run ARC, not both.')
        if input_file_path is not None and not os.path.isfile(input_file_path):
            raise ValueError(f'The ARC input file {input_file_path} could not be found.')

        self.logger.info(f'\nRunning ARC (iteration {self.iteration})...')

        if input_file_path is not None:
            arc_kwargs = read_yaml_file(input_file_path)

        self.dump_species_and_reactions()

        arc_kwargs = arc_kwargs.copy()
        if 'adapter' in arc_kwargs:
            del arc_kwargs['adapter']

        arc_kwargs['project_directory'] = self.paths['ARC']
        if not os.path.isdir(arc_kwargs['project_directory']):
            os.makedirs(arc_kwargs['project_directory'])

        if 'project' not in arc_kwargs:
            arc_kwargs['project'] = self.project
        if 'species' in arc_kwargs.keys() and arc_kwargs['species']:
            species = list()
            for spc_ in arc_kwargs['species']:
                spc = ARCSpecies(species_dict=spc_) if isinstance(spc_, dict) else spc_
                key = self.get_species_key(species=spc)
                if key is not None \
                        and all('no need to compute thermo' in reason for reason in self.species[key]['reasons']):
                    species.append(ARCSpecies(rmg_species=spc, compute_thermo=False) if isinstance(spc, Species) else spc)
                else:
                    species.append(spc)
            arc_kwargs['species'] = species

        tic = datetime.datetime.now()
        arc = ARC(**arc_kwargs)
        if not os.path.isfile(self.paths['ARC input']):
            save_yaml_file(path=self.paths['ARC input'], content=arc.as_dict())

        try:
            arc.execute()
        except Exception as e:
            self.logger.error(f'ARC crashed with {e.__class__}. Got the following error message:\n{e}')
            raise

        elapsed_time = time_lapse(tic)
        self.logger.info(f'ARC terminated, execution time: {elapsed_time}')

    def process_arc_run(self):
        """
        Process an ARC run.
        Sets the self.species[<key>]['converged'] and the self.rxns[<key>]['converged'] parameters.

        Todo:
            - Check for non-physical species in unconverged species.
        """
        unconverged_spc_keys, converged_spc_keys = list(), list()
        unconverged_rxn_keys, converged_rxn_keys = list(), list()
        if os.path.isfile(self.paths['ARC info']):
            content = read_yaml_file(path=self.paths['ARC info'])
            for species in content['species']:
                key = self.get_species_key(label=species['label'])
                if key is not None:
                    if species['success']:
                        converged_spc_keys.append(key)
                        self.species[key]['converged'] = True
                    else:
                        unconverged_spc_keys.append(key)
                        self.species[key]['converged'] = False
            for reaction in content['reactions']:
                key = self.get_reaction_key(label=reaction['label'])
                if key is not None:
                    if reaction['success']:
                        converged_rxn_keys.append(key)
                        self.reactions[key]['converged'] = True
                    else:
                        unconverged_rxn_keys.append(key)
                        self.reactions[key]['converged'] = False
        else:
            raise ValueError(f'ARC did not save a project_info.yml file (expected to find it in {self.paths["ARC info"]}, '
                             f'something must be wrong.')
        self.logger.log_unconverged_species_and_reactions(
            species_keys=unconverged_spc_keys,
            species_dict=self.species,
            reaction_keys=unconverged_rxn_keys,
            reaction_dict=self.reactions,
        )
        if len(converged_spc_keys) or len(converged_rxn_keys):
            # we calculated something, add to thermo/kinetic library
            self.add_to_rmg_libraries()
        # clear the calculated objects from self.qm:
        self.qm['species'], self.qm['reactions'] = list(), list()
        self.dump_species_and_reactions()

    def get_current_rmg_tol(self) -> float:
        """
        Get the current RMG tolerance.

        Returns:
            float: The current RMG move to core tolerance.
        """
        # self.iteration is 1-indexed
        return self.rmg['model']['core_tolerance'][self.iteration - 1] \
            if len(self.rmg['model']['core_tolerance']) >= self.iteration \
            else self.rmg['model']['core_tolerance'][-1]

    def run_rmg(self, restart_rmg: bool = False):
        """
        Run RMG.

        Raises:
            Various RMG Exceptions: if RMG crushed too many times.
        """
        self.logger.info(f'Running RMG (tolerance = {self.get_current_rmg_tol()}, iteration {self.iteration})...')

        # Use the RMG T3 libraries only if they exist and not already in use.
        # 1. thermo
        t3_thermo_lib = self.t3['options']['library_name'] if self.t3['options']['save_libraries_directly_in_rmgdb'] \
            else self.paths['RMG T3 thermo lib']
        if t3_thermo_lib not in self.rmg['database']['thermo_libraries'] \
                and os.path.isfile(self.paths['RMG T3 thermo lib']):
            self.rmg['database']['thermo_libraries'] = [t3_thermo_lib] + self.rmg['database']['thermo_libraries']
        elif t3_thermo_lib in self.rmg['database']['thermo_libraries'] \
                and not os.path.isfile(self.paths['RMG T3 thermo lib']):
            self.rmg['database']['thermo_libraries'].pop(self.rmg['database']['thermo_libraries'].index(t3_thermo_lib))
        # 2. kinetics
        t3_kinetics_lib = self.t3['options']['library_name'] if self.t3['options']['save_libraries_directly_in_rmgdb'] \
            else self.paths['RMG T3 kinetics lib']
        if t3_kinetics_lib not in self.rmg['database']['kinetics_libraries'] \
                and os.path.isdir(self.paths['RMG T3 kinetics lib']):
            self.rmg['database']['kinetics_libraries'] = [t3_kinetics_lib] + self.rmg['database']['kinetics_libraries']
        elif t3_kinetics_lib in self.rmg['database']['kinetics_libraries'] \
                and not os.path.isdir(self.paths['RMG T3 kinetics lib']):
            self.rmg['database']['kinetics_libraries'].pop(self.rmg['database']['kinetics_libraries'].index(t3_kinetics_lib))


    # Creating the RMG Adapter class - allow for SSH
        # We will need self.rmg
        rmg_adapter = RMGAdapter(
                                rmg=self.rmg,
                                t3=self.t3,
                                iteration=self.iteration,
                                paths=self.paths,
                                logger=self.logger,
                                walltime=self.t3['options']['max_RMG_walltime'],
                                max_iterations=self.t3['options']['max_rmg_iterations'],
                                verbose=self.verbose,
                                t3_project_name=self.project,
                                restart_rmg=restart_rmg,
                                )
        rmg_adapter.run_rmg()
        if not os.path.isfile(self.paths['RMG input']):
            raise ValueError(f"The RMG input file {self.paths['RMG input']} could not be written.")
        tic = datetime.datetime.now()

        max_rmg_exceptions_allowed = self.t3['options']['max_RMG_exceptions_allowed']

        if rmg_adapter.rmg_exception_encountered:
            self.rmg_exceptions_counter += 1
            if self.rmg_exceptions_counter > max_rmg_exceptions_allowed:
                self.logger.error(f'This is the {get_number_with_ordinal_indicator(self.rmg_exceptions_counter)} '
                                  f'exception raised by RMG.\nCannot allow more than {max_rmg_exceptions_allowed} '
                                  f'RMG exceptions during a T3 run.\nNot allowing additional exceptions, terminating.')
                raise ValueError('Terminating due to RMG exceptions.')
            else:
                self.logger.warning(f'RMG did not converge. '
                                    f'This is the {get_number_with_ordinal_indicator(self.rmg_exceptions_counter)} '
                                    f'exception raised by RMG.\n'
                                    f'The maximum number of exceptions allowed is {max_rmg_exceptions_allowed}.')
        elapsed_time = time_lapse(tic)
        self.logger.info(f'RMG terminated, execution time: {elapsed_time}')


    #
    def determine_species_and_reactions_to_calculate(self) -> bool:
        """
        Determine which species and reactions in the executed RMG job should be calculated.
        Species/reactions which were previously attempted to be calculated but did not converge
        will not be reconsidered.

        Updates:
            self.rmg_species
            self.rmg_reactions
            self.species - via self.add_species()
            self.qm['species'] - via self.add_species()
            self.reactions - via self.add_reaction()
            self.qm['reactions'] - via self.add_reaction()
            self.executed_networks - via self.determine_species_from_pdep_network()

        Returns:
            bool: Whether additional calculations are required.
        """
        species_keys, reaction_keys, coll_vio_spc_keys, coll_vio_rxn_keys = list(), list(), list(), list()

        self.rmg_species, self.rmg_reactions = self.load_species_and_reactions_from_chemkin_file()
        self.logger.info(f'This RMG model has {len(self.rmg_species)} species '
                         f'and {len(self.rmg_reactions)} reactions in its core.')

        sa_observables_exist = False
        for input_species in self.rmg['species']:
            if input_species['observable'] or input_species['SA_observable']:
                sa_observables_exist = True
                break

        if self.t3['options']['collision_violators_thermo'] or self.t3['options']['collision_violators_rates']:
            coll_vio_spc_keys, coll_vio_rxn_keys = self.determine_species_and_reactions_based_on_collision_violators()

        if self.t3['options']['all_core_species']:
            for species in self.rmg_species:
                if self.species_requires_refinement(species=species):
                    key = self.add_species(species=species, reasons=[f'(i {self.iteration}) All core species'])
                    if key is not None:
                        species_keys.append(key)

        # 1. Species
        else:
            # 1.1. SA observables
            for input_species in self.rmg['species']:
                if (input_species['observable'] or input_species['SA_observable']) and \
                        self.species_requires_refinement(species=get_species_by_label(input_species['label'],
                                                                                      self.rmg_species)):
                    key = self.add_species(species=get_species_by_label(input_species['label'], self.rmg_species),
                                           reasons=['SA observable'])
                    if key is not None:
                        species_keys.append(key)
            # 1.2. SA
            if sa_observables_exist:
                species_keys.extend(self.determine_species_based_on_sa())
            # 1.3. collision violators
            if self.t3['options']['collision_violators_thermo']:
                species_keys.extend(coll_vio_spc_keys)

        # 2. Reactions
        # 2.1. SA
        if sa_observables_exist:
            reaction_keys.extend(self.determine_reactions_based_on_sa())
        # 2.2. collision violators
        if self.t3['options']['collision_violators_rates']:
            reaction_keys.extend(coll_vio_rxn_keys)

        species_keys = list(set([key for key in species_keys if key is not None]))
        reaction_keys = list(set([key for key in reaction_keys if key is not None]))

        additional_calcs_required = bool(len(species_keys)) or bool(len(reaction_keys)) \
            or any(spc['converged'] is None for spc in self.species.values())

        self.logger.info(f'Additional calculations required: {additional_calcs_required}\n')
        if len(species_keys):
            self.logger.log_species_to_calculate(species_keys, self.species)
        if len(reaction_keys):
            self.logger.log_reactions_to_calculate(reaction_keys, self.reactions)
        return additional_calcs_required

    def determine_species_based_on_sa(self) -> List[int]:
        """
        Determine species to calculate based on sensitivity analysis.

        Returns:
            List[int]: Entries are T3 species indices of species determined to be calculated based on SA.
        """
        species_keys, pdep_rxns_to_explore = list(), list()
        if self.sa_dict is None:
            self.logger.error(f"T3's sa_dict was None. Please check that the input file contains a proper 'sensitivity' "
                              f"block and/or that SA was run successfully.\n"
                              f"Not performing refinement based on sensitivity analysis!")
            return species_keys

        sa_dict_max = {'kinetics': dict(), 'thermo': dict()}
        for key in ['kinetics', 'thermo']:
            for observable_label in self.sa_dict[key].keys():
                if observable_label not in sa_dict_max[key]:
                    sa_dict_max[key][observable_label] = list()
                for parameter in self.sa_dict[key][observable_label].keys():
                    entry = dict()
                    entry['parameter'] = parameter  # rxn number (int) or spc label (str)
                    entry['max_sa'] = max(self.sa_dict[key][observable_label][parameter].max(),
                                          abs(self.sa_dict[key][observable_label][parameter].min()))
                    sa_dict_max[key][observable_label].append(entry)

            for observable_label, sa_list in sa_dict_max['kinetics'].items():
                sa_list_sorted = sorted(sa_list, key=lambda item: item['max_sa'], reverse=True)
                for i in range(min(self.t3['sensitivity']['top_SA_reactions'], len(sa_list_sorted))):
                    reaction = get_reaction_by_index(sa_list_sorted[i]['parameter'] - 1, self.rmg_reactions)
                    if reaction is None:
                        continue
                    for species in reaction.reactants + reaction.products:
                        if self.species_requires_refinement(species=species):
                            num = f'{i+1}{get_ordinal_indicator(i+1)} ' if i else ''
                            reason = f'(i {self.iteration}) participates in the {num}most sensitive reaction ' \
                                     f'for {observable_label}: {reaction}'
                            key = self.add_species(species=species, reasons=reason)
                            if key is not None:
                                species_keys.append(key)
                    if reaction.kinetics.is_pressure_dependent() \
                            and reaction not in [rxn_tup[0] for rxn_tup in pdep_rxns_to_explore] \
                            and self.t3['sensitivity']['pdep_SA_threshold'] is not None:
                        pdep_rxns_to_explore.append((reaction, i, observable_label))
            for observable_label, sa_list in sa_dict_max['thermo'].items():
                sa_list_sorted = sorted(sa_list, key=lambda item: item['max_sa'], reverse=True)
                for i in range(min(self.t3['sensitivity']['top_SA_species'], len(sa_list_sorted))):
                    species = get_species_by_label(sa_list_sorted[i]['parameter'], self.rmg_species)
                    if species is None:
                        self.logger.error(f"Could not identify species {sa_list_sorted[i]['parameter']}!")
                    if self.species_requires_refinement(species=species):
                        num = f'{i+1}{get_ordinal_indicator(i+1)} ' if i else ''
                        reason = f'(i {self.iteration}) the {num}most sensitive species thermo for {observable_label}'
                        key = self.add_species(species=species, reasons=reason)
                        if key is not None:
                            species_keys.append(key)

        species_keys.extend(self.determine_species_from_pdep_network(pdep_rxns_to_explore=pdep_rxns_to_explore))

        return species_keys

    def determine_reactions_based_on_sa(self) -> List[int]:
        """
        Determine reaction rate coefficients to calculate based on sensitivity analysis.

        Returns:
            List[int]: Entries are T3 reaction indices of reactions determined to be calculated based on SA.
        """
        reaction_keys, pdep_rxns_to_explore = list(), list()
        if not os.path.isdir(self.paths['SA solver']):
            self.logger.error("Could not find the path to the SA solver output folder.\n"
                              "Not performing refinement based on sensitivity analysis!")
            return reaction_keys

        sa_dict_max = dict()
        for observable_label in self.sa_dict['kinetics'].keys():
            if observable_label not in sa_dict_max:
                sa_dict_max[observable_label] = list()
            for parameter in self.sa_dict['kinetics'][observable_label].keys():
                entry = dict()
                entry['parameter'] = parameter  # rxn number (int)
                entry['max_sa'] = max(self.sa_dict['kinetics'][observable_label][parameter].max(),
                                      abs(self.sa_dict['kinetics'][observable_label][parameter].min()))
                sa_dict_max[observable_label].append(entry)

        for observable_label, sa_list in sa_dict_max.items():
            sa_list_sorted = sorted(sa_list, key=lambda item: item['max_sa'], reverse=True)
            for i in range(min(self.t3['sensitivity']['top_SA_reactions'], len(sa_list_sorted))):
                reaction = get_reaction_by_index(sa_list_sorted[i]['parameter'] - 1, self.rmg_reactions)
                if self.reaction_requires_refinement(reaction):
                    num = f'{i+1}{get_ordinal_indicator(i+1)} ' if i else ''
                    reason = f'(i {self.iteration}) the {num}most sensitive reaction for {observable_label}'
                    key = self.add_reaction(reaction=reaction, reasons=reason)
                    if key is not None:
                        reaction_keys.append(key)

        return reaction_keys

    def determine_species_from_pdep_network(self,
                                            pdep_rxns_to_explore: List[Tuple[Union[Reaction, PDepReaction], int, str]],
                                            ) -> List[int]:
        """
        Determine species to calculate based on a pressure dependent network
        by spawning network sensitivity analyses.

        Args:
            pdep_rxns_to_explore (List[Tuple[Reaction, int, str]]):
                Entries are tuples of (Reaction, SA rank index, observable_label).

        Returns:
            List[int]: Entries are T3 species indices of species determined to be calculated based on SA.
        """
        species_keys = list()
        if self.t3['sensitivity']['pdep_SA_threshold'] is None:
            return species_keys
        if not os.path.isdir(self.paths['PDep SA']):
            os.mkdir(self.paths['PDep SA'])

        for reaction_tuple in pdep_rxns_to_explore:
            reaction = reaction_tuple[0]

            if not isinstance(reaction, PDepReaction):
                continue

            # identify the network name and file name
            network_file_names = list()
            for (_, _, files) in os.walk(self.paths['RMG PDep']):
                network_file_names.extend(files)
                break  # don't continue to explore subdirectories
            network_file_names = [network_file_name for network_file_name in network_file_names
                                  if f'network{reaction.network.index}_' in network_file_name]
            if not network_file_names:
                # this PDepReaction did not stem from a network file, it is probably a library reaction
                if hasattr(reaction, 'library'):
                    self.logger.info(f'Not exploring library reaction {reaction} with PES SA.')
                else:
                    self.logger.warning(f'Not exploring reaction {reaction} with PES SA '
                                        f'since it does not have a `network` attribute.')
                continue
            network_version = max([int(network_file_name.split('.')[0].split('_')[1])
                                   for network_file_name in network_file_names])
            network_name = f'network{reaction.network.index}_{network_version}'  # w/o the '.py' extension

            # Try running this network using user-specified methods by order.
            sa_coefficients_path, arkane = None, None
            errors = list()
            for method in self.t3['sensitivity']['ME_methods']:
                isomer_labels = write_pdep_network_file(
                    network_name=network_name,
                    method=method,
                    pdep_sa_path=self.paths['PDep SA'],
                    rmg_pdep_path=self.paths['RMG PDep'],
                )
                arkane = Arkane(input_file=os.path.join(self.paths['PDep SA'], network_name, method, 'input.py'),
                                output_directory=os.path.join(self.paths['PDep SA'], network_name, method))
                arkane.plot = True
                self.logger.info(f'\nRunning PDep SA for network {network_name} using the {method} method\n'
                                 f'to examine reaction {reaction} (iteration {self.iteration})...')
                try:
                    arkane.execute()
                except (AttributeError,
                        ChemicallySignificantEigenvaluesError,
                        ModifiedStrongCollisionError,
                        NetworkError,
                        TypeError,
                        UnboundLocalError,
                        ValueError,
                        ) as e:
                    errors.append(e)
                else:
                    # Network execution was successful, mark network as executed and don't run the next method.
                    self.logger.info(f'Successfully executed a PDep SA for network {network_name} '
                                     f'using the {method} method.\n')
                    self.executed_networks.append(isomer_labels)
                    sa_coefficients_path = os.path.join(self.paths['PDep SA'], network_name, method,
                                                        'sensitivity', 'sa_coefficients.yml')
                    break
            else:
                self.logger.error(f"Could not execute a PDep SA for network {network_name} using "
                                  f"{self.t3['sensitivity']['ME_methods']}.\nGot the following errors:")
                for method, e in zip(self.t3['sensitivity']['ME_methods'], errors):
                    self.logger.info(f'{e.__class__} for method {method}:\n{e}\n')

            if sa_coefficients_path is not None:
                sa_dict = read_yaml_file(sa_coefficients_path)
                reactants_label = ' + '.join([reactant.to_chemkin() for reactant in reaction.reactants])
                products_label = ' + '.join([product.to_chemkin() for product in reaction.products])
                chemkin_reaction_str = f'{reactants_label} <=> {products_label}'
                labels_map = dict()  # Keys are network species labels, values are Chemkin labels of the RMG species.
                for network_label, adj in sa_dict['structures'].items():
                    labels_map[network_label] = get_species_label_by_structure(adj=adj, species_list=self.rmg_species)

                reactants_label = ' + '.join([key_by_val(labels_map, reactant.label) for reactant in reaction.reactants])
                products_label = ' + '.join([key_by_val(labels_map, product.label) for product in reaction.products])
                network_reaction_str = f'{reactants_label} <=> {products_label}'
                if network_reaction_str not in sa_dict:
                    self.logger.error(f'Could not locate reaction {network_reaction_str} '
                                      f'in SA output for network {network_name}.')
                else:
                    # identify wells in this network this reaction is sensitive to
                    sensitive_wells_dict = dict()  # keys are wells labels, values are lists of sensitive conditions
                    for condition, sa_data in sa_dict[network_reaction_str].items():
                        max_sa_coeff = max([sa_coeff for sa_coeff in sa_data.values()])
                        for entry, sa_coeff in sa_data.items():
                            if '(TS)' not in entry \
                                    and sa_coeff > max_sa_coeff * self.t3['sensitivity']['pdep_SA_threshold']:
                                if entry not in sensitive_wells_dict:
                                    sensitive_wells_dict[entry] = [condition]
                                else:
                                    sensitive_wells_dict[entry].append(condition)
                    if sensitive_wells_dict:
                        # extract species from wells and add to species_to_calc if thermo is uncertain
                        for well, conditions in sensitive_wells_dict.items():
                            species_list = list()
                            for label in well.split(' + '):
                                spc_label = labels_map[label]
                                species = None
                                if spc_label is not None:
                                    species = get_species_by_label(label=labels_map[label],
                                                                   species_list=self.rmg_species)
                                elif arkane is not None:
                                    # this is an Edge species which is missing from the Core rmg_species list
                                    species = get_species_by_label(label=label,
                                                                   species_list=arkane.species_dict.values())
                                if species is not None:
                                    species_list.append(species)
                            for species in species_list:
                                if self.species_requires_refinement(species=species):
                                    num = f'{reaction_tuple[1] + 1}{get_ordinal_indicator(reaction_tuple[1] + 1)} ' \
                                        if reaction_tuple[1] else ''
                                    reason = f'(i {self.iteration}) a sensitive well in PDep ' \
                                        f'network {network_name} from which {chemkin_reaction_str} was ' \
                                        f'derived, which is the {num}most sensitive reaction for observable ' \
                                        f'{reaction_tuple[2]}, at {conditions}.'
                                    key = self.add_species(species=species, reasons=reason)
                                    if key is not None:
                                        species_keys.append(key)

        return species_keys

    def determine_species_and_reactions_based_on_collision_violators(self) -> Tuple[List[int], List[int]]:
        """
        Determine species to calculate based on collision rate violating reactions.

        Returns:
            Tuple[List[int], List[int]]:
                - Entries are T3 species indices of species determined to be calculated based on SA.
                - Entries are T3 reaction indices of reactions determined to be calculated based on SA.
        """
        species_keys, reaction_keys = list(), list()
        if not os.path.isfile(self.paths['RMG coll vio']):
            self.logger.info('No collision rate violating reactions identified in this model.')
            return species_keys, reaction_keys

        with open(self.paths['RMG coll vio'], 'r') as f:
            lines = f.readlines()

        for line in lines: