GFCP-reader/Read_input.py at master · 41bY/GFCP-reader · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
# -*- coding: utf-8 -*-
"""
Created on Sat Oct  1 10:22:10 2022

@author: simula
"""

import numpy as np
import Utils as ut

###############################################################################

def read_CP_input(file_path : str):

    """
    'main' program of this module. It reads a 'cp.x' input file and it controls
    the other methods above. Open the 'cp.x' input file, load it into list of str,
    call 'search_input_tags' to get the string values associated to the tags and
    perform checks on them by calling 'check_founds'.
    It returns the list of founds associated to the tags.

    Parameters
    ----------
    file_path : str
        'cp.x' input file path.

    Raises
    ------
    errors
        If found values are in different numbers to what expected or some of them
        is not found.

    Returns
    -------
    found_list : list
        Output list containing the values associated to each tag of the proper
        type depending on the tag:
        'iprint' -> int; 'dt' -> float; 'celldm' -> float; 'nat' -> int; 'ntyp' -> int;
        'CELL_PARAMETERS' -> np.array(float); 'ATOMIC_SPECIES' -> list of [str, float];
        'ATOMIC_POSITIONS' -> list of str

    """

    #Input files are small so they can be load into memory
    file = open(file_path, 'r')
    lines = file.readlines()
    file.close()

    #Search tags in lines and returns string values inside found_list
    found_list_str = search_input_tags(lines)

    #Check formats -> raise errors if tag not found or corrupted. Assign right types to list values
    found_list, err_msg = check_founds(found_list_str)

    #Handles errors
    if err_msg != '':
        raise ValueError(err_msg)

    return found_list

###############################################################################

def search_input_tags(read_list : list):
    """
    Search tags needed to perform the data extracion. The searching is performed
    within 'read_list' containing the content, line per line, of the 'cp.x' input
    file. It returns a list containing the string associated to each tag:
    'iprint', 'dt', 'celldm', 'nat', 'ntyp', 'CELL_PARAMETERS', 'ATOMIC_SPECIES',
    'ATOMIC_POSITIONS'

    Parameters
    ----------
    read_list : list
        Input list containing the lines of 'cp.x' input file.

    Returns
    -------
    found_list : list of str
        Output list containing the str values associated to each tag.
        If a tag is corrupted the associated str will be ''.
        If a tag is missing 'found_list' will have a smaller size.
    """

    #Initialize variables expected to be found in CP input file
    found_list = []
    nat = 0
    ntyp = 0

    #Search loop
    for num_line, line in enumerate(read_list):

        if 'iprint' in line:
            value_str = process_tag(line)
            found_list.append(value_str)

        elif 'dt' in line:
            value_str = process_tag(line)
            found_list.append(value_str)

        elif 'celldm' in line:
            value_str = process_tag(line)
            found_list.append(value_str)

        elif 'nat' in line:
            value_str = process_tag(line)
            found_list.append(value_str)
            if value_str != '': nat = int(value_str)

        elif 'ntyp' in line:
            value_str = process_tag(line)
            found_list.append(value_str)
            if value_str != '': ntyp = int(value_str)

        elif 'CELL_PARAMETERS' in line:
            cell = process_cell(read_list[num_line:])
            found_list.append(cell)

        elif 'ATOMIC_SPECIES' in line:
            species = process_species(read_list[num_line:], ntyp)
            found_list.append(species)

        elif 'ATOMIC_POSITIONS' in line:
            pos = process_atoms(read_list[num_line:], nat)
            found_list.append(pos)

    return found_list

###############################################################################

def process_tag(input_str : str):
    """
    Standard way to process a line associated to a tag. The line is splitted
    with '=' and the right-hand side is trimmed to contain only numbers.
    This processed string is returned. If the field is corrupted a null string
    is returned.

    Parameters
    ----------
    input_str : str
        Input string associated to a tag.

    Returns
    -------
    value_str : str
        The trimmed righ-hand side of the line.
        Null string if field is corrupted.

    """

    tokens = input_str.split('=')
    value_str = ''

    #Error, return blanck string
    if len(tokens) < 2:
        value_str
        return value_str

    else:
        token = tokens[1]
        value_str = ut.trim_string(token, mode = 'num')

        #Error, return blanck string
        if value_str == '':
            return value_str

        else:
            return value_str

###############################################################################

def process_cell(input_list_str : list):
    """
    Standard way to process a 'CELL_PARAMTERS' tag in 'cp.x' input file.
    Three lines below the tag are read, splitted and trimmed to create a 3x3
    matrix of string containing the lattice vectors.

    Parameters
    ----------
    input_list_str : list
        Input list of string with length 4. Containing the tag and the three lattice
        vectors.

    Returns
    -------
    cell : list of str
        If the field is not corrupted it returns a 3x3 list of str containing the
        cartesian component of the three lattice parameters.
        If the field is corrupted it returns a void list.

    """

    cell = []

    #Error return void cell
    if len(input_list_str) < 4:
        return cell

    else:

        for line in input_list_str[1:4]:
            tokens = line.split()

            #Error return void cell
            if len(tokens) != 3:
                cell = []
                return cell

            else:
                row = []
                for tk in tokens:
                    token = ut.trim_string(tk, mode = 'num')

                    #Error return void cell
                    if token == '':
                        cell = []
                        return cell

                    else:
                        row.append(token)

                cell.append(row)

        return cell

###############################################################################

def process_species(input_list_str : list, ntyp : int):
    """
    Standard way to process a 'ATOMIC_SPECIES' tag in 'cp.x' input file.
    'ntyp' lines below the tag are read, splitted and trimmed to create a 'ntyp'x2
    matrix of string containing the different atomic labels and atomic masses.
    'ntyp' has to be provided.

    Parameters
    ----------
    input_list_str : list
        Input list of str of dimension ('ntyp'+1)x3. The first row contains the
        tag 'ATOMIC_SPECIES'. The other rows contain atomic labels atomic masses
        and atomic pseudopotential.
    ntyp : int
        Number of different atomic species present in the 'cp.x' input file.

    Returns
    -------
    species : list
        List of str of dimension 'ntyp'x2 containing string of atomic labels
        and masses. If the tag is corrupted a void list is returned.

    """


    species = []

    #Error, returns void species
    if len(input_list_str) < ntyp + 1:
        return species

    else:

        for line in input_list_str[1:ntyp + 1]:
            tokens = line.split()

            #Error, returns void species
            if len(tokens) < 3:
                species = []
                return species
            else:
                typ = tokens[0]
                token = tokens[1]
                mass_str = ut.trim_string(token, mode = 'num')

                #Error, return void species
                if mass_str == '':
                    species = []
                    return species

                else:
                    row = [typ, mass_str]

                species.append(row)

        return species

###############################################################################

def process_atoms(input_list_str : list, nat : int):
    """
    Standard way to process a 'ATOMIC_POSITIONS' tag in 'cp.x' input file.
    'nat' lines below the tag are read, splitted and trimmed to create a 'nat'x1
    matrix of string containing the atomic labels of each atom in the input.
    'nat' has to be provided.

    Parameters
    ----------
    input_list_str : list
        Input list of str of dimension ('nat'+1)x4. The first row contains the
        tag 'ATOMIC_POSITIONS'. The other rows contain atomic labels and the
        three cartesian component of each atom in input.
    nat : int
        Total number of atoms in the 'cp.x' input file.

    Returns
    -------
    atoms : list
        List of str of dimension 'nat' containing string of atomic labels for
        each atom. If the tag is corrupted a void list is returned.

    """


    atoms = []

    #Error return void list
    if len(input_list_str) < nat + 1:
        atoms = []
        return atoms

    else:

        for line in input_list_str[1:nat + 1]:
            tokens = line.split()

            #Error return void list
            if len(tokens) < 1:
                atoms = []
                return atoms

            else:
                typ = tokens[0]
                atoms.append(typ)

        return atoms

###############################################################################

def check_founds(found_list : list):
    """
    Check 'found_list' has the proper length i.e. every tag has been found and
    verify that each tag is not void or blanck. In this case they are converted
    to their proper type. Otherwise an exception occurs specifing that the
    'cp.x' input file is corrupted.

    Parameters
    ----------
    found_list : list of str
        Input list containing the str values associated to each tag.
        If a tag is corrupted the associated str will be ''.
        If a tag is missing 'found_list' will have a smaller size.

    Returns
    -------
    found_list : list
        Output list containing the values associated to each tag of the proper
        type depending on the tag:
        'iprint' -> int; 'dt' -> float; 'celldm' -> float; 'nat' -> int; 'ntyp' -> int;
        'CELL_PARAMETERS' -> np.array(float); 'ATOMIC_SPECIES' -> list of [str, float];
        'ATOMIC_POSITIONS' -> list of str

    """
    err_msg = ''

    if len(found_list) != 8:
        err_msg = 'Tags: \'iprint\' | \'dt\' | \'celldm\' | \'ntyp\' |'\
                         +' \'nat\' | \'CELL_PARAMETERS\' | \'ATOMIC_POSITIONS\' | \'ATOMIC_SPECIES\''\
                         +' not found in CP input file.'
        return found_list, err_msg

    #iprint
    if found_list[0] != '':
        found_list[0] = int(found_list[0])
    else:
        err_msg = 'Corrupted tag \'iprint\' in CP input file'
        return found_list, err_msg

    #dt
    if found_list[1] != '':
        found_list[1] = float(found_list[1])
    else:
        err_msg = 'Corrupted tag \'dt\' in CP input file'
        return found_list, err_msg

    #celldm
    if found_list[2] != '':
        found_list[2] = float(found_list[2])
    else:
        err_msg = 'Corrupted tag \'celldm\' in CP input file'
        return found_list, err_msg

    #nat
    if found_list[3] != '':
        found_list[3] = int(found_list[3])
    else:
        err_msg = 'Corrupted tag \'nat\' in CP input file'
        return found_list, err_msg

    #ntyp
    if found_list[4] != '':
        found_list[4] = int(found_list[4])
    else:
        err_msg = 'Corrupted tag \'ntyp\' in CP input file'
        return found_list, err_msg

    #Cell
    if found_list[5] != []:
        found_list[5] = np.array(found_list[5], dtype=float)
    else:
        err_msg = 'Corrupted tag \'CELL_PARAMETERS\' in CP input file'
        return found_list, err_msg

    #Species
    if found_list[6] != []:
        found_list[6] = [[fnd[0], float(fnd[1])] for fnd in found_list[6]]
    else:
        err_msg = 'Corrupted tag \'ATOMIC_SPECIES\' in CP input file'
        return found_list, err_msg

    #Species
    if found_list[7] != []:
        found_list[7] = found_list[7]
    else:
        err_msg = 'Corrupted tag \'ATOMIC_POSITIONS\' in CP input file'
        return found_list, err_msg

    return found_list, err_msg
###############################################################################