Skip to content

Commit fd08648

Browse files
authored
Add DirectImport Method for Flexible Sigma Profile Management (#30)
* Add DirectImport method for importing COSMO-SAC profiles from any location * Updated profile_db.hpp to remove unnecessary substr argument and use std::optional for dispersion_eoverkB * Update CMake version to 3.10 and set C++ standard to C++17 - Changed minimum required CMake version from 3.0 to 3.10 - Set C++ standard to C++17 with CMAKE_CXX_STANDARD, CMAKE_CXX_STANDARD_REQUIRED, and CMAKE_CXX_EXTENSIONS - Added newline to maintain code style consistency * Fix potential null value issue in dispersion_eoverkB - Replaced direct access to dispersion_eoverkB with value_or(0.0) to handle optional values - Ensured default value of 0.0 is used if dispersion_eoverkB is not set * Replace deprecated std::not1 and std::ptr_fun with lambda functions - Updated strlstrip and strrstrip functions to use lambda expressions instead of deprecated std::not1 and std::ptr_fun - Ensured compatibility with modern C++ standards - Maintained functionality of trimming whitespace from strings
1 parent 9388a88 commit fd08648

12 files changed

Lines changed: 856 additions & 10 deletions

File tree

CMakeLists.txt

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
project(cCOSMO)
2-
cmake_minimum_required(VERSION 3.0)
3-
set (CMAKE_CXX_STANDARD 11)
2+
cmake_minimum_required(VERSION 3.10)
3+
4+
# Set the C++ standard to C++17
5+
set (CMAKE_CXX_STANDARD 17)
6+
set (CMAKE_CXX_STANDARD_REQUIRED ON)
7+
set (CMAKE_CXX_EXTENSIONS OFF)
48

59
string(CONFIGURE "#define COSMO_SAC_HOME \"${CMAKE_CURRENT_SOURCE_DIR}\"" CONFIG_HEADER)
610
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/HOME_PATH.h" ${CONFIG_HEADER})
@@ -11,6 +15,7 @@ set(MY_INCLUDES
1115
"${CMAKE_CURRENT_BINARY_DIR}"
1216
"${CMAKE_CURRENT_SOURCE_DIR}/externals/nlohmann/single_include"
1317
)
18+
1419
add_executable(main "${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp")
1520
target_sources(main PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/externals/Eigen/debug/msvc/eigen.natvis")
1621
target_include_directories(main PUBLIC ${MY_INCLUDES})
@@ -22,4 +27,4 @@ target_include_directories(cCOSMO PUBLIC ${MY_INCLUDES})
2227

2328
add_executable(catch_tests "${CMAKE_CURRENT_SOURCE_DIR}/tests/catch_tests.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/tests/catch_main.cpp")
2429
target_sources(catch_tests PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/externals/Eigen/debug/msvc/eigen.natvis" "${CMAKE_CURRENT_SOURCE_DIR}/externals/nlohmann/nlohmann_json.natvis")
25-
target_include_directories(catch_tests PUBLIC ${MY_INCLUDES} "${CMAKE_CURRENT_SOURCE_DIR}/externals/Catch/single_include" )
30+
target_include_directories(catch_tests PUBLIC ${MY_INCLUDES} "${CMAKE_CURRENT_SOURCE_DIR}/externals/Catch/single_include" )

include/COSMO_SAC/COSMO.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,8 @@ namespace COSMOSAC {
518518
w = -0.27027;
519519
}
520520

521-
double ekB0 = m_fluids[0].dispersion_eoverkB, ekB1 = m_fluids[1].dispersion_eoverkB;
521+
double ekB0 = m_fluids[0].dispersion_eoverkB.value_or(0.0);
522+
double ekB1 = m_fluids[1].dispersion_eoverkB.value_or(0.0);
522523
double A = w*(0.5*(ekB0+ekB1) - sqrt(ekB0*ekB1));
523524
EigenArray lngamma_dsp(2);
524525
lngamma_dsp(0) = A*x[1]*x[1];

include/COSMO_SAC/profile_db.hpp

Lines changed: 87 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <algorithm>
1010
#include <cctype>
1111
#include <functional>
12+
#include <optional>
1213
#include "nlohmann/json.hpp"
1314

1415
namespace COSMOSAC {
@@ -41,14 +42,14 @@ struct SigmaProfileSet {
4142

4243
/// The sigma profiles (and some more metadata) associated with the fluid
4344
struct FluidProfiles {
44-
enum class dispersion_classes{DISP_WATER, DISP_COOH, DISP_NHB, DISP_ONLY_ACCEPTOR, DISP_DONOR_ACCEPTOR};
45+
enum class dispersion_classes {DISP_WATER, DISP_COOH, DISP_NHB, DISP_ONLY_ACCEPTOR, DISP_DONOR_ACCEPTOR};
4546
SigmaProfileSet profiles;
4647
std::string name;
4748
std::size_t VTnumber;
4849
double A_COSMO_A2; ///< The surface area of the molecule as calculated by COSMO-SAC, in A^2
4950
double V_COSMO_A3; ///< The volume of the molecule as calculated by COSMO-SAC, in A^3
5051
dispersion_classes dispersion_flag;
51-
double dispersion_eoverkB;
52+
std::optional<double> dispersion_eoverkB;
5253
nlohmann::json meta; ///< Any additional metadata, stored in JSON format
5354
};
5455

@@ -346,6 +347,90 @@ class DelawareProfileDatabase : public ProfileDatabase {
346347
}
347348
};
348349

350+
/// The DirectImport feature for importing profiles based on "filename" + "path"
351+
class DirectImport : public ProfileDatabase {
352+
private:
353+
std::string m_dbpath;
354+
355+
public:
356+
DirectImport() {
357+
}
358+
359+
void add_profile(const std::string& identifier, const std::string& dbpath = ".") {
360+
m_dbpath = dbpath;
361+
// Now we load the sigma profile(s) from the file
362+
auto lines = str_split(get_file_contents(m_dbpath + "/" + identifier + ".sigma"));
363+
364+
std::vector<double> _sigma, _psigmaA;
365+
FluidProfiles fluid;
366+
std::string meta;
367+
for (auto &&line : lines) {
368+
if (line.substr(0,8) == "# Name: ") {
369+
std::string check_name = line.substr(8);
370+
continue;
371+
}
372+
if (line.substr(0,8) == "# CASn: ") {
373+
std::string check_CAS = line.substr(8);
374+
continue;
375+
}
376+
if (line.substr(0,8) == "# meta: ") {
377+
meta = line.substr(8);
378+
continue;
379+
}
380+
if (line[0] == '#'){ continue; }
381+
auto v = str_split(strrstrip(line), " ");
382+
if (v.empty() || (v.size() == 1 && v[0].empty())) { continue; }
383+
_sigma.push_back(mystrtod(v[0]));
384+
_psigmaA.push_back(mystrtod(v[1]));
385+
}
386+
Eigen::Map<Eigen::ArrayXd> sigma(&(_sigma[0]), _sigma.size());
387+
Eigen::Map<Eigen::ArrayXd> psigmaA(&(_psigmaA[0]), _psigmaA.size());
388+
fluid.name = identifier;
389+
390+
if (sigma.size() == 51 && psigmaA.size() == 51) {
391+
fluid.profiles.nhb = SigmaProfile(sigma, psigmaA);
392+
fluid.A_COSMO_A2 = fluid.profiles.nhb.psigmaA().sum();
393+
}
394+
else if (sigma.size() == 51*3 && psigmaA.size() == 51*3){
395+
fluid.profiles.nhb = SigmaProfile(sigma.segment(0*51, 51), psigmaA.segment(0*51, 51));
396+
fluid.profiles.oh = SigmaProfile(sigma.segment(1*51, 51), psigmaA.segment(1*51, 51));
397+
fluid.profiles.ot = SigmaProfile(sigma.segment(2*51, 51), psigmaA.segment(2*51, 51));
398+
double check_Area2 = fluid.profiles.nhb.psigmaA().sum() + fluid.profiles.oh.psigmaA().sum() + fluid.profiles.ot.psigmaA().sum();
399+
fluid.A_COSMO_A2 = check_Area2;
400+
}
401+
else{
402+
throw std::invalid_argument("Length of sigma profile ["+std::to_string(sigma.size())+"] is neither 51 nor 51*3");
403+
}
404+
fluid.meta = nlohmann::json::parse(meta);
405+
fluid.V_COSMO_A3 = fluid.meta["volume [A^3]"];
406+
std::string flag = fluid.meta["disp. flag"];
407+
if (flag == "COOH") {
408+
fluid.dispersion_flag = FluidProfiles::dispersion_classes::DISP_COOH;
409+
}
410+
else if (flag == "H2O") {
411+
fluid.dispersion_flag = FluidProfiles::dispersion_classes::DISP_WATER;
412+
}
413+
else if (flag == "NHB") {
414+
fluid.dispersion_flag = FluidProfiles::dispersion_classes::DISP_NHB;
415+
}
416+
else if (flag == "HB-ACCEPTOR") {
417+
fluid.dispersion_flag = FluidProfiles::dispersion_classes::DISP_ONLY_ACCEPTOR;
418+
}
419+
else if (flag == "HB-DONOR-ACCEPTOR") {
420+
fluid.dispersion_flag = FluidProfiles::dispersion_classes::DISP_DONOR_ACCEPTOR;
421+
}
422+
else {
423+
throw std::invalid_argument("Unable to match dispersion flag: \""+flag+"\"");
424+
}
425+
if (fluid.meta["disp. e/kB [K]"].is_null()) {
426+
fluid.dispersion_eoverkB = std::nullopt;
427+
} else {
428+
fluid.dispersion_eoverkB = fluid.meta["disp. e/kB [K]"].get<double>();
429+
}
430+
add_to_db(identifier, std::move(fluid));
431+
}
432+
};
433+
349434
} /* namespace COSMOSAC */
350435

351436
#endif

include/COSMO_SAC/util.hpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,16 +72,18 @@ static std::vector<std::string> str_split(const std::string &s,
7272
}
7373

7474
/// The following code for the trim functions was taken from http://stackoverflow.com/questions/216823/whats-the-best-way-to-trim-stdstring
75-
// trim from start
75+
// trim from start
7676
static std::string& strlstrip(std::string& s) {
77-
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace))));
77+
s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](unsigned char c) { return !std::isspace(c); }));
7878
return s;
7979
}
80+
8081
// trim from end
8182
static std::string& strrstrip(std::string& s) {
82-
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
83+
s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char c) { return !std::isspace(c); }).base(), s.end());
8384
return s;
8485
}
86+
8587
// trim from both ends
8688
static std::string& strstrip(std::string& s) {
8789
return strlstrip(strrstrip(s));

src/pybind11_interface.cxx

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,13 @@ void init_COSMO(py::module &m) {
7070
py::class_<DelawareProfileDatabase, ProfileDatabase >(m, "DelawareProfileDatabase")
7171
.def(py::init<const std::string &, const std::string &>())
7272
.def("add_profile", &DelawareProfileDatabase::add_profile)
73-
.def("to_JSON", &DelawareProfileDatabase::to_JSON);
73+
.def("to_JSON", &DelawareProfileDatabase::to_JSON)
74+
;
75+
76+
py::class_<DirectImport, ProfileDatabase >(m, "DirectImport")
77+
.def(py::init<>())
78+
.def("add_profile", &DirectImport::add_profile, py::arg("name"), py::arg("path") = ".")
79+
;
7480

7581
using EigenArrayA = AbstractCOSMOModel::EigenArray;
7682
py::class_<AbstractCOSMOModel >(m, "AbstractCOSMOModel")

tests/direct_import/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# DirectImport for COSMO-SAC
2+
3+
## Introduction
4+
The `DirectImport` feature is a custom enhancement to the [COSMO-SAC package](https://github.com/usnistgov/COSMOSAC), designed to provide more flexibility and organization in handling sigma profiles for different components. Unlike the standard import methods (`VirginiaTechProfileDatabase`, `DelawareProfileDatabase`) in COSMO-SAC, which require all sigma profiles to be stored in one folder and listed in a TXT file, `DirectImport` allows users to store sigma profiles in separate directories. This feature enables specifying the path and name of the sigma file for each component individually, offering a significant improvement in directory structure and accessibility.
5+
6+
## Features
7+
- **Enhanced Organization:** With `DirectImport`, users can separate sigma profiles into different folders, such as one for polymers and another for APIs (Active Pharmaceutical Ingredients), and provide their paths separately. This organization is particularly beneficial for maintaining a clear and structured directory.
8+
- **Flexible Testing:** `DirectImport` facilitates the testing of different sigma profiles for the same component by allowing users to select which sigma profile to use explicitly. This flexibility is invaluable for research and development purposes, where multiple iterations of sigma profiles may need to be evaluated.
9+
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
"""
2+
This module enhances the COSMO-SAC package with the `DirectImport` feature,
3+
providing a more flexible and organized method for managing sigma profiles.
4+
Unlike the standard import method requiring sigma profiles in a single folder,
5+
`DirectImport` allows for storing profiles in separate directories and specifying
6+
the path and name of the sigma file for each component. This approach facilitates
7+
enhanced organization and flexible testing of different sigma profiles for the
8+
same component.
9+
10+
Author: Ivan Antolovic
11+
E-Mail: ivan.antolovic@tu-berlin.de
12+
13+
Note: The `DirectImport` method originated during the work on the paper:
14+
https://pubs.acs.org/doi/10.1021/acs.molpharmaceut.4c00342
15+
Example profiles are taken from: https://github.com/ivanantolo/cosmopharm
16+
"""
17+
18+
import cCOSMO
19+
from pathlib import Path
20+
21+
# Get the directory where the script is located
22+
script_dir = Path(__file__).resolve().parent
23+
24+
# Constants
25+
PROFILES_API = script_dir / "profiles/pharmaceuticals"
26+
PROFILES_POLY = script_dir / "profiles/polymers"
27+
PATH_TO_SIGMAS = script_dir / "profiles/sigma3"
28+
PATH_TO_COMPLIST = script_dir / "profiles/complist.txt"
29+
PATH_TO_PROFILES = [PROFILES_API, PROFILES_POLY]
30+
31+
def import_delaware(names, path_to_sigmas, path_to_complist):
32+
"""
33+
Imports sigma profiles using the DelawareProfileDatabase.
34+
35+
Parameters:
36+
- names: A list of component names.
37+
- path_to_sigmas: The file path to the sigma profiles.
38+
- path_to_complist: The file path to the component list.
39+
40+
Returns:
41+
A tuple containing the COSMO3 object and the database object.
42+
43+
Considerations:
44+
- All sigma profiles must be located in the same directory.
45+
- All sigma profiles must be listed in complist.txt.
46+
- Using .xlsx files for adding new profiles and converting them to .txt is recommended.
47+
- Much of the information in complist.txt is not necessary for importing sigma profiles.
48+
- Names used in complist.txt do not correspond to unique identifiers.
49+
- One name corresponds to one identifier, which poses a challenge when dealing with modifications.
50+
- The same name may be associated with different .sigma files.
51+
"""
52+
db = cCOSMO.DelawareProfileDatabase(str(path_to_complist), str(path_to_sigmas))
53+
for name in names:
54+
db.add_profile(name)
55+
return cCOSMO.COSMO3(names, db), db
56+
57+
def import_direct(names, paths):
58+
"""
59+
Directly imports sigma profiles using DirectImport.
60+
61+
Parameters:
62+
- names: A list of component names.
63+
- paths: A list of paths to the sigma profiles.
64+
65+
Returns:
66+
A tuple containing the COSMO3 object and the database object.
67+
68+
Advantages:
69+
- Profiles can be stored in separate directories.
70+
- No need for a .txt file listing all profiles.
71+
- Provides flexibility for testing different sigma profiles for the same component.
72+
- Enhances the organization of profiles.
73+
"""
74+
db = cCOSMO.DirectImport()
75+
for name, path in zip(names, paths):
76+
db.add_profile(name, str(path))
77+
return cCOSMO.COSMO3(names, db), db
78+
79+
def display_profile(db, name):
80+
"""
81+
Displays information about a single profile from the database object.
82+
83+
Parameters:
84+
- db: The database object containing the profiles.
85+
- name: The name for the profile to display.
86+
"""
87+
try:
88+
profile = db.get_profile(name)
89+
print(f"Name: {profile.name}")
90+
print(f"Surface Area (A^2): {profile.A_COSMO_A2}")
91+
print(f"Volume (A^3): {profile.V_COSMO_A3}")
92+
# print(f"Sigma Profile (non-hydrogen-bonding segments, first 5 elements): {profile.profiles.nhb.sigma[:5]}")
93+
# print(f"Probability (non-hydrogen-bonding segments, first 5 elements): {profile.profiles.nhb.psigmaA[:5]}")
94+
except Exception as e:
95+
print(f"Error retrieving profile for {name}: {e}")
96+
97+
if __name__ == "__main__":
98+
names = ['SIM', 'PLGA50']
99+
100+
# Import profiles using both methods
101+
cosmo_delaware, db_delaware = import_delaware(names, PATH_TO_SIGMAS, PATH_TO_COMPLIST)
102+
cosmo_direct, db_direct = import_direct(names=names, paths=PATH_TO_PROFILES)
103+
104+
# Display information about the imported profiles
105+
print("\nProfiles imported using DelawareProfileDatabase:")
106+
for name in names:
107+
display_profile(db_delaware, name)
108+
109+
print("\nProfiles imported using DirectImport:")
110+
for name in names:
111+
display_profile(db_direct, name)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
ID FORMULA CAS# NAME SMILES INCHI INCHIKEY
2+
1 ??? ??? SIM ??? InChI=??? SIM
3+
2 ??? ??? PLGA50 ??? InChI=??? PLGA50

0 commit comments

Comments
 (0)