Skip to content

Commit f078293

Browse files
fix: Enhance classifier prompt to avoid explanation (#511)
* Enhance classifier prompt to avoid explanation * Bumped classifier in tool registry
1 parent d2ea640 commit f078293

6 files changed

Lines changed: 21 additions & 22 deletions

File tree

backend/adapter_processor/constants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ class AdapterKeys:
2121
X2TEXT_DEFAULT = "x2text_default"
2222
SHARED_USERS = "shared_users"
2323
ADAPTER_NAME_EXISTS = (
24-
"Configuration with this Name already exists. "
25-
"Please try with a different Name"
24+
"Configuration with this name already exists within your organisation. "
25+
"Please try with a different name."
2626
)
2727
ADAPTER_CREATED_BY = "created_by_email"
2828
ADAPTER_CONTEXT_WINDOW_SIZE = "context_window_size"

backend/adapter_processor_v2/constants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ class AdapterKeys:
2121
X2TEXT_DEFAULT = "x2text_default"
2222
SHARED_USERS = "shared_users"
2323
ADAPTER_NAME_EXISTS = (
24-
"Configuration with this Name already exists. "
25-
"Please try with a different Name"
24+
"Configuration with this name already exists within your organisation. "
25+
"Please try with a different name"
2626
)
2727
ADAPTER_CREATED_BY = "created_by_email"
2828
ADAPTER_CONTEXT_WINDOW_SIZE = "context_window_size"

tools/classifier/sample.env

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,7 @@
1-
PLATFORM_SERVICE_HOST=
2-
PLATFORM_SERVICE_PORT=
3-
PLATFORM_SERVICE_API_KEY=
4-
TOOL_DATA_DIR=
1+
PLATFORM_SERVICE_HOST=http://unstract-platform-service
2+
PLATFORM_SERVICE_PORT=3001
3+
PLATFORM_SERVICE_API_KEY=<add_platform_key_from_Unstract_frontend>
4+
TOOL_DATA_DIR=../data_dir
55

6-
# Azure OpenAI
7-
OPENAI_API_KEY=
8-
OPENAI_API_BASE=
9-
OPENAI_API_VERSION=
10-
OPENAI_API_ENGINE=
11-
OPENAI_API_MODEL=
6+
X2TEXT_HOST=http://unstract-x2text-service
7+
X2TEXT_PORT=3004

tools/classifier/src/config/properties.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"schemaVersion": "0.0.1",
33
"displayName": "File Classifier",
44
"functionName": "classify",
5-
"toolVersion": "0.0.26",
5+
"toolVersion": "0.0.27",
66
"description": "Classifies a file into a bin based on its contents",
77
"input": {
88
"description": "File to be classified"

tools/classifier/src/main.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ def validate(self, input_file: str, settings: dict[str, Any]) -> None:
2727
if not llm_adapter_instance_id:
2828
self.stream_error_and_exit("Choose an LLM to process the classifier")
2929
if not text_extraction_adapter_id:
30-
self.stream_error_and_exit("Choose an LLM to extract the documents")
30+
self.stream_error_and_exit(
31+
"Choose a text extractor to extract the documents"
32+
)
3133

3234
def run(
3335
self,
@@ -99,9 +101,10 @@ def run(
99101

100102
prompt = (
101103
f"Classify the following text into one of the following categories: {' '.join(bins_with_quotes)}.\n\n" # noqa: E501
102-
f"Your categorization should be strictly exactly one of the items in the " # noqa: E501
103-
f"categories given. Find a semantic match of category if possible. If it does not categorize well " # noqa: E501
104-
f"into any of the listed categories, categorize it as 'unknown'.\n\nText:\n\n{text}\n\n\nCategory:" # noqa: E501
104+
"Your categorization should be strictly exactly one of the items in the " # noqa: E501
105+
"categories given, do not provide any explanation. Find a semantic match of category if possible. " # noqa: E501
106+
"If it does not categorize well into any of the listed categories, categorize it as 'unknown'.\n\n" # noqa: E501
107+
f"Text:\n\n{text}\n\n\nCategory:"
105108
)
106109

107110
settings_string = "".join(str(value) for value in settings.values())

unstract/tool-registry/src/unstract/tool_registry/public_tools.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"schemaVersion": "0.0.1",
66
"displayName": "File Classifier",
77
"functionName": "classify",
8-
"toolVersion": "0.0.26",
8+
"toolVersion": "0.0.27",
99
"description": "Classifies a file into a bin based on its contents",
1010
"input": {
1111
"description": "File to be classified"
@@ -139,9 +139,9 @@
139139
}
140140
},
141141
"icon": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<svg\n enable-background=\"new 0 0 20 20\"\n height=\"48\"\n viewBox=\"0 0 20 20\"\n width=\"48\"\n fill=\"#000000\"\n version=\"1.1\"\n id=\"svg8109\"\n sodipodi:docname=\"folder_copy_black_48dp.svg\"\n xmlns:inkscape=\"http://www.inkscape.org/namespaces/inkscape\"\n xmlns:sodipodi=\"http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd\"\n xmlns=\"http://www.w3.org/2000/svg\"\n xmlns:svg=\"http://www.w3.org/2000/svg\">\n <defs\n id=\"defs8113\" />\n <sodipodi:namedview\n id=\"namedview8111\"\n pagecolor=\"#ffffff\"\n bordercolor=\"#000000\"\n borderopacity=\"0.25\"\n inkscape:showpageshadow=\"2\"\n inkscape:pageopacity=\"0.0\"\n inkscape:pagecheckerboard=\"0\"\n inkscape:deskcolor=\"#d1d1d1\"\n showgrid=\"false\" />\n <g\n id=\"g8099\">\n <rect\n fill=\"none\"\n height=\"20\"\n width=\"20\"\n x=\"0\"\n id=\"rect8097\"\n y=\"0\" />\n </g>\n <g\n id=\"g8107\"\n style=\"fill:#ff4d6d;fill-opacity:1\">\n <g\n id=\"g8105\"\n style=\"fill:#ff4d6d;fill-opacity:1\">\n <path\n d=\"M 2.5,5 H 1 V 15.5 C 1,16.33 1.67,17 2.5,17 H 15.68 V 15.5 H 2.5 Z\"\n id=\"path8101\"\n style=\"fill:#ff4d6d;fill-opacity:1\" />\n <path\n d=\"M 16.5,4 H 11 L 9,2 H 5.5 C 4.67,2 4,2.67 4,3.5 v 9 C 4,13.33 4.67,14 5.5,14 h 11 c 0.83,0 1.5,-0.67 1.5,-1.5 v -7 C 18,4.67 17.33,4 16.5,4 Z m 0,8.5 h -11 v -9 h 2.88 l 2,2 h 6.12 z\"\n id=\"path8103\"\n style=\"fill:#ff4d6d;fill-opacity:1\" />\n </g>\n </g>\n</svg>\n",
142-
"image_url": "docker:unstract/tool-classifier:0.0.26",
142+
"image_url": "docker:unstract/tool-classifier:0.0.27",
143143
"image_name": "unstract/tool-classifier",
144-
"image_tag": "0.0.26"
144+
"image_tag": "0.0.27"
145145
},
146146
"text_extractor": {
147147
"tool_uid": "text_extractor",

0 commit comments

Comments
 (0)