Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/somef/export/json_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -974,6 +974,14 @@ def unify_results(repo_data: dict) -> dict:
else:
canonical = canonicalize_value(value, value_type)
key = str(canonical)
elif category == constants.CAT_REQUIREMENTS:
req_name = result.get("name", "").strip().lower()
req_version = result.get("version", "").strip()
if req_name:
key = f"REQ-{req_name}-{req_version}"
else:
canonical = canonicalize_value(value, value_type)
key = str(canonical)
else:
# Normal behavior for the rest of the categories
canonical = canonicalize_value(value, value_type)
Expand Down
18 changes: 15 additions & 3 deletions src/somef/header_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,25 @@ def extract_header_content(text: str) -> Tuple[pd.DataFrame, str | None]:

content, none_header_content = mardown_parser.extract_content_per_header(text, headers)
parents = mardown_parser.extract_headers_parents(text)

# parent_list = [parents.get(h) for h in header_list]

content_map = dict(zip(header_list, content))

aligned_content = [content_map[h] for h in header_list]
aligned_parents = [parents.get(h) for h in header_list]

# df = pd.DataFrame({
# 'Header': header_list,
# 'Content': content,
# 'ParentHeader': [parents.get(h) for h in header_list],
# })

df = pd.DataFrame({
'Header': header_list,
'Content': content,
'ParentHeader': [parents.get(h) for h in header_list],
'Content': aligned_content,
'ParentHeader': aligned_parents,
})

# df['Content'].replace('', np.nan, inplace=True)
df['Content'] = df['Content'].replace('', np.nan)
df = df.dropna(subset=['Content'])
Expand Down
25 changes: 22 additions & 3 deletions src/somef/parser/mardown_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,13 @@ def extract_headers(original_text):
if is_header(line):
#text = re.sub(regex, '', line)
text = get_tag_content(line)
# Check if the header is actually a separator as -----------
if is_separator_header(text):
index += 1
continue
if index + 1 >= limit:
output[text] = True
elif not splitted[index + 1].startswith("<h"):
elif not splitted[index + 1].startswith("<h") or not is_header(splitted[index + 1]):
output[text] = True
else:
output[text] = False
Expand All @@ -40,10 +44,23 @@ def extract_headers_with_tags(original_text):
line = splitted[index]
if line.startswith("<h"):
if is_header(line):
# ignrore separators
if is_separator_header(get_tag_content(line)):
index += 1
continue
output.append(replace_html_tags(line))
index += 1
return output

def is_separator_header(text):
"""Detects false headers generated by separators (--- or ===)
that are incorrectly converted to <h2> elements by the Markdown parser."""
return re.match(r'^[-=]+$', text.strip()) is not None

def remove_separator_lines(text: str) -> str:
"""Removes separator lines (--- or ===) from extracted content."""
lines = text.split('\n')
return '\n'.join(line for line in lines if not is_separator_header(line))

def extract_content_per_header(original_text, headers):
keys = list(headers.keys())
Expand Down Expand Up @@ -86,7 +103,9 @@ def extract_content_per_header(original_text, headers):
header_content = get_text(top_index + offset, -1, text_tokenized)
if header_content.startswith('\n'):
header_content = header_content[1:]
content.append(header_content)
if headers[top]:
content.append(header_content)
# content.append(header_content)
output[top] = header_content
return content, none_header_content

Expand Down Expand Up @@ -120,7 +139,7 @@ def get_text(init_index, end_index, text_tokenized):
while init_index < end_index:
output = output + '\n' + text_tokenized[init_index]
init_index += 1
return output
return remove_separator_lines(output)


def extract_bash(text):
Expand Down
128 changes: 128 additions & 0 deletions src/somef/test/test_JSON_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -874,5 +874,133 @@ def test_issue_953_publication_reconciliation(self):
assert bibtex_cit is not None, "Bibtex citation (Text_excerpt) not found"
assert bibtex_cit["result"]["format"] == "bibtex"
assert bibtex_cit["result"]["year"] == "2019"

os.remove(output_path)


def test_issue_reconciliation_workloop(self):
"""Checks that key metadata is correctly extracted and reconciled from codemeta.json and README.
Issue with canolicalization ofvalues"""

output_path = test_data_path + "test_issue_reconciliation_workloopr.json"

somef_cli.run_cli(threshold=0.8,
ignore_classifiers=False,
repo_url=None,
local_repo=test_data_repositories + "workloopr",
doc_src=None,
in_file=None,
output=output_path,
graph_out=None,
graph_format="turtle",
codemeta_out=None,
pretty=True,
missing=False,
readme_only=False)

with open(output_path, "r") as text_file:
json_content = json.loads(text_file.read())

assert constants.CAT_AUTHORS in json_content
authors = json_content[constants.CAT_AUTHORS]
assert len(authors) == 2
author_names = [a["result"]["value"] for a in authors]
assert "Vikram B. Baliga" in author_names
assert "Shreeram Senthivasan" in author_names

assert constants.CAT_INSTALLATION in json_content
installation = json_content[constants.CAT_INSTALLATION]
assert any("devtools::install_github" in i["result"]["value"] for i in installation)

assert constants.CAT_CITATION in json_content
citations = json_content[constants.CAT_CITATION]
# print(citations)
assert any("workloopR" in c["result"]["value"] for c in citations)

os.remove(output_path)



def test_issue_reconciliation_cropwater(self):
"""Checks that key metadata is correctly extracted and reconciled from codemeta.json and README.
Issue with canolicalization ofvalues"""

output_path = test_data_path + "test_issue_reconciliation_cropwater.json"

somef_cli.run_cli(threshold=0.8,
ignore_classifiers=False,
repo_url=None,
local_repo=test_data_repositories + "cropwater",
doc_src=None,
in_file=None,
output=output_path,
graph_out=None,
graph_format="turtle",
codemeta_out=None,
pretty=True,
missing=False,
readme_only=False)

with open(output_path, "r") as text_file:
json_content = json.loads(text_file.read())

assert constants.CAT_AUTHORS in json_content
authors = json_content[constants.CAT_AUTHORS]
assert len(authors) == 5
author_names = [a["result"]["value"] for a in authors]
assert "Gabriel Constantino Blain" in author_names
assert "Adam H. Sparks" in author_names


assert constants.CAT_INSTALLATION in json_content
installation = json_content[constants.CAT_INSTALLATION]
assert any("gabrielblain/CropWaterBalance" in i["result"]["value"] for i in installation)

assert constants.CAT_LICENSE in json_content
licenses = json_content[constants.CAT_LICENSE]
spdx = next((l for l in licenses if l["result"].get("spdx_id") == "MIT"), None)
assert spdx is not None, "MIT license not found"

assert constants.CAT_DESCRIPTION in json_content

os.remove(output_path)


def test_issue_980_reconciliation_requirements(self):
"""Checks that requirements with the same name and version are correctly reconciled
into a single entry merging sources, even when their raw values are not the same
(ej. 'pracma (>= 2.0.7)' vs 'pracma==>= 2.0.7')."""

output_path = test_data_path + "test_issue_980_reconciliation_requirements.json"

somef_cli.run_cli(threshold=0.8,
ignore_classifiers=False,
repo_url=None,
local_repo=test_data_repositories + "ggstatsplot",
doc_src=None,
in_file=None,
output=output_path,
graph_out=None,
graph_format="turtle",
codemeta_out=None,
pretty=True,
missing=False,
readme_only=False)

with open(output_path, "r") as text_file:
json_content = json.loads(text_file.read())

requirements = json_content[constants.CAT_REQUIREMENTS]
# print(requirements)

seen_keys = [(r["result"]["name"], r["result"].get("version", "")) for r in requirements]
unique_keys = set(seen_keys)

assert len(seen_keys) == len(unique_keys), (
f"Duplicate requirements found: "
f"{[k for k in unique_keys if seen_keys.count(k) > 1]}"
)

os.remove(output_path)


123 changes: 123 additions & 0 deletions src/somef/test/test_data/README-laueNN.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@

![til](https://github.com/BM32ESRF/LaueNN/blob/main/docs/idea_lauenn/frames_medres.gif)

[![Conda](https://img.shields.io/conda/pn/bm32esrf/lauetoolsnn?color=green&label=supported%20platform)](https://anaconda.org/bm32esrf/lauetoolsnn)
[![GitHub tag (latest by date)](https://img.shields.io/github/v/tag/BM32ESRF/LaueNN?color=blue&label=Github%20tag)](https://github.com/BM32ESRF/LaueNN)

[![Lint, test, build, and publish](https://github.com/BM32ESRF/LaueNN/actions/workflows/complete_workflow.yml/badge.svg)](https://github.com/BM32ESRF/LaueNN/actions/workflows/complete_workflow.yml)
[![PyPI](https://img.shields.io/pypi/v/lauetoolsnn)](https://pypi.python.org/pypi/lauetoolsnn/)
[![PyPI pyversions](https://img.shields.io/pypi/pyversions/lauetoolsnn.svg)](https://pypi.python.org/pypi/lauetoolsnn/)

[![Anaconda-Server Badge](https://anaconda.org/bm32esrf/lauetoolsnn/badges/license.svg)](https://anaconda.org/bm32esrf/lauetoolsnn)
[![Conda](https://img.shields.io/conda/v/bm32esrf/lauetoolsnn?style=flat-square)](https://anaconda.org/bm32esrf/lauetoolsnn)


[![contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/BM32ESRF/LaueNN/issues)


# lauetoolsnn/LaueNN
An autonomous feed-forward neural network (FFNN) model to predict the HKL in single/multi-grain/multi-phase Laue patterns with high efficiency and accuracy is introduced.

Laue diffraction indexation (especially Laue images comprising of diffraction signal from several polycrystals/multi phase materials) can be a very tedious and CPU intensive process. To takle this, LaueNN or LauetoolsNN was developed employing the power of neural network to speed up a part of the indexation process. In the LaueNN_presentation (https://github.com/BM32ESRF/LaueNN/tree/main/presentations/LaueNN_presentation.pdf), several steps of Laue pattern indexation with classical approach is described. We have replaced the most CPU intensive step with the Neural Networks. The step where the Laue indices hkl of each spot os now determined with the Neural networks, alongside the spot hkl index, the neural network also predicts the Material that spot belongs to. This can be useful incase of Laue images comprising of diffraction signal from multi-phases.
LaueNN uses the existing modules of Lauetools to generate simulated Laue patterns. The whole workflow and the application of this tool is illustrated in this article (https://onlinelibrary.wiley.com/iucr/doi/10.1107/S1600576722004198)

For classical indexation of Laue pattern (GUI and scripts), check out the sister package: https://github.com/BM32ESRF/lauetools


### Video tutorial
------------------------------
- Video 1: Working with jupyter notebook scripts : https://cloud.esrf.fr/s/6q4DJfAn7K46BGN
- Video 2: Working with lauetoolsnn GUI : https://cloud.esrf.fr/s/AeGow4CoqZRJiyx


### Requirements: (latest version of each libraries accessed on 03/04/2022)
------------------------------
- PyQt5 (GUI)
- matplotlib
- Keras
- tensorflow
- numpy
- scipy (scipy transform rotation is used)
- h5py (required for writing neural network model files)
- scikit-learn (required for generating trained model classification reports)
- fabio (used for opening raw Laue tiff images)
- networkx (to be replaced with numpy in the future)
- scikit-image (used for hough based analysis of Laue patterns)
- tqdm (required only for notebook scripts)
- opencv (for LOG based peak search)
- pandas and pytables (for writing pickle to h5)


### Installation
------------------------------
Lauetoolsnn can be installed either via PYPI usiing the following command in terminal (this installs all dependencies automatically):

https://pypi.org/project/lauetoolsnn/

https://anaconda.org/bm32esrf/lauetoolsnn

``` bash
$ pip install lauetoolsnn
$ or
$conda install -c bm32esrf lauetoolsnn -c conda-forge
```
For macOS user, please use the conda installation to avoid build errors or can be compiled and installed locally via the setup.py file. Download the Github repository and type the following in terminal. In this case, the dependencies has to be installed manually. The latest version of each dependency works as of (01/04/2022).
``` bash
$ python setup.py install
```

See procedure_usage_lauetoolsnn.pdf for installation and how to write the configuration file to be used with GUI.
This project is also hosted on sourceforge.net https://lauetoolsnn.sourceforge.io


### Documentation
------------------------------
Documentation (under construction) for lauetoolsnn/lauenn is on the following webpage
https://lauenn.readthedocs.io/en/latest/


### Example case
------------------------------
Two example case studies are included in the lauetoolsnn\examples folder.
Run the GUI by either launching directly from the terminal using the 'lauetoolsnn' command or by running it locally with python lauetoolsneuralnetwork.py command.

First step is to load the config.txt from the example folder, it sets all the values of the GUI to the case study.
In the GUI:
- Step1: File --> load config . Select the config file from the example directory.
- Step1a: If config file is not available, one can set parameters in the configure parameters window directly.
- Step2: Press the configure parameters button and press Accept button at the end (the values are loaded from the config file).
- Step3: Press Generate Training dataset button. This will generate the training and validation dataset for neural network.
- Step4: Press Train Neural network button. This will start the training process and once finished will save the trained model.
- Step5: Press the Live prediction with IPF map to start the prediction on predefined experimental dataset. Example datafile is included in the examples folder.
- Step6: Once analyzed, the results can be saved using the save results button.

In addition, all the above mentioned steps can be done without the GUI and are detailed in the lauetoolsnn\example_notebook_scripts folder.
Jupyter notebook scripts are provided to run all the steps sequentially.

The indexed orientation matrix is also written in ".ctf" format, which can then be opened with channel 5 Aztec or MTEX software to do post processing related to orientations analysis. MTEX post processing script is also included in the lauetoolsnn\util_script\MTEX_plot.m


### Citation
------------------------------
If you use this software, please cite it using the metadata available in the citation_bibtex.cff file in root.
``` bash
Purushottam Raj Purohit, R. R. P., Tardif, S., Castelnau, O., Eymery, J., Guinebretiere, R., Robach, O., Ors, T. & Micha, J.-S. (2022). J. Appl. Cryst. 55, 737-750.
```


### Known Issues
------------------------------
So far, there is a issue with H5py and HDF5 version in the windows installation with conda. If error with H5py version mismatch exist after conda installation, please try "pip install lauetoolsnn" on windows as this should not have this problem. The other possibility is to install the H5py with pip before or after installing lauetoolsnn with conda.


### Support
------------------------------
Do not hesitate to contact the development team at [purushot@esrf.fr](mailto:purushot@esrf.fr) or [micha@esrf.fr](mailto:micha@esrf.fr).

### Funding
------------------------------
This code was developed as a result of French-German project funded respectively by the ANR and DFG (HoTMiX project reference number ANR-19-CE09-0035-01): https://www.bam.de/Content/EN/Projects/HoTMiX/hotmix.html

### Maintainer(s)
------------------------------
* [Ravi PURUSHOTTAM](https://github.com/ravipurohit1991)
Loading
Loading