|
3 | 3 | from dependency_injector.containers import DeclarativeContainer |
4 | 4 | from dependency_injector.providers import Factory, List, Singleton # noqa: WOT001 |
5 | 5 |
|
6 | | -from extractor_api_lib.impl.api_endpoints.general_source_extractor import GeneralSourceExtractor |
| 6 | +from extractor_api_lib.impl.api_endpoints.general_file_extractor import ( |
| 7 | + GeneralFileExtractor, |
| 8 | +) |
| 9 | +from extractor_api_lib.impl.api_endpoints.general_source_extractor import ( |
| 10 | + GeneralSourceExtractor, |
| 11 | +) |
7 | 12 | from extractor_api_lib.impl.extractors.confluence_extractor import ConfluenceExtractor |
8 | | -from extractor_api_lib.impl.extractors.file_extractors.ms_docs_extractor import MSDocsExtractor |
| 13 | +from extractor_api_lib.impl.extractors.file_extractors.epub_extractor import ( |
| 14 | + EpubExtractor, |
| 15 | +) |
| 16 | +from extractor_api_lib.impl.extractors.file_extractors.ms_docs_extractor import ( |
| 17 | + MSDocsExtractor, |
| 18 | +) |
9 | 19 | from extractor_api_lib.impl.extractors.file_extractors.pdf_extractor import PDFExtractor |
10 | 20 | from extractor_api_lib.impl.extractors.file_extractors.xml_extractor import XMLExtractor |
11 | | -from extractor_api_lib.impl.api_endpoints.general_file_extractor import GeneralFileExtractor |
12 | 21 | from extractor_api_lib.impl.extractors.sitemap_extractor import SitemapExtractor |
13 | 22 | from extractor_api_lib.impl.file_services.s3_service import S3Service |
14 | 23 | from extractor_api_lib.impl.mapper.confluence_langchain_document2information_piece import ( |
|
17 | 26 | from extractor_api_lib.impl.mapper.internal2external_information_piece import ( |
18 | 27 | Internal2ExternalInformationPiece, |
19 | 28 | ) |
20 | | -from extractor_api_lib.impl.mapper.sitemap_document2information_piece import SitemapLangchainDocument2InformationPiece |
| 29 | +from extractor_api_lib.impl.mapper.langchain_document2information_piece import ( |
| 30 | + LangchainDocument2InformationPiece, |
| 31 | +) |
| 32 | +from extractor_api_lib.impl.mapper.sitemap_document2information_piece import ( |
| 33 | + SitemapLangchainDocument2InformationPiece, |
| 34 | +) |
21 | 35 | from extractor_api_lib.impl.settings.pdf_extractor_settings import PDFExtractorSettings |
22 | 36 | from extractor_api_lib.impl.settings.s3_settings import S3Settings |
23 | 37 | from extractor_api_lib.impl.table_converter.dataframe2markdown import DataFrame2Markdown |
@@ -45,8 +59,11 @@ class DependencyContainer(DeclarativeContainer): |
45 | 59 |
|
46 | 60 | intern2external = Singleton(Internal2ExternalInformationPiece) |
47 | 61 | confluence_document2information_piece = Singleton(ConfluenceLangchainDocument2InformationPiece) |
| 62 | + langchain_document2information_piece = Singleton(LangchainDocument2InformationPiece) |
48 | 63 | sitemap_document2information_piece = Singleton(SitemapLangchainDocument2InformationPiece) |
49 | | - file_extractors = List(pdf_extractor, ms_docs_extractor, xml_extractor) |
| 64 | + epub_extractor = Singleton(EpubExtractor, file_service, langchain_document2information_piece) |
| 65 | + |
| 66 | + file_extractors = List(pdf_extractor, ms_docs_extractor, xml_extractor, epub_extractor) |
50 | 67 |
|
51 | 68 | general_file_extractor = Singleton(GeneralFileExtractor, file_service, file_extractors, intern2external) |
52 | 69 | confluence_extractor = Singleton(ConfluenceExtractor, mapper=confluence_document2information_piece) |
|
0 commit comments