Skip to content

Commit f24a245

Browse files
committed
Add interactive language selection and command-line options for OCR4Linux
- Implemented interactive language selection using rofi in the shell script. - Added support for specifying languages via command-line arguments. - Updated README.md to reflect new features and usage instructions.
1 parent 021b230 commit f24a245

3 files changed

Lines changed: 283 additions & 59 deletions

File tree

OCR4Linux.py

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class TesseractConfig:
4646
4747
langs (str): The languages to be used by Tesseract for OCR.
4848
custom_config (str): Custom configuration string for Tesseract.
49-
ouput_encoding (str): The encoding to be used for the output file.
49+
output_encoding (str): The encoding to be used for the output file.
5050
5151
Methods:
5252
__init__(self, image_path: str, output_path: str):
@@ -60,28 +60,37 @@ class TesseractConfig:
6060
text extraction, and saves the extracted text to an output file. Returns 0 if successful, 1 otherwise.
6161
"""
6262

63-
def __init__(self, image_path: str, output_path: str):
63+
def __init__(self, image_path: str, output_path: str, langs: str = None):
6464
"""
6565
Initializes the OCR4Linux class with command-line arguments.
6666
6767
Attributes:
6868
image_path (str): The path to the input image file.
6969
output_path (str): The path to the output file where results will be saved.
70+
langs (str): The languages to be used by Tesseract for OCR (optional).
7071
oem_mode (int): The OCR Engine Mode (OEM) for Tesseract.
7172
psm_mode (int): The Page Segmentation Mode (PSM) for Tesseract.
72-
langs (str): The languages to be used by Tesseract for OCR.
7373
custom_config (str): Custom configuration string for Tesseract.
74-
ouput_encoding (str): The encoding to be used for the output file.
74+
output_encoding (str): The encoding to be used for the output file.
7575
"""
7676
self.image_path = image_path
7777
self.output_path = output_path
7878
self.oem_mode = 3 # Default LSTM engine
7979
self.psm_mode = 6 # Uniform block of text
8080
self.available_langs = pytesseract.get_languages()
81-
self.langs = '+'.join(filter(None, self.available_langs)
82-
) if self.available_langs else 'eng'
81+
82+
# Use provided languages or default to all available languages
83+
if langs and langs.strip():
84+
self.langs = langs
85+
print(f"Using specified languages: {langs}", file=sys.stderr)
86+
else:
87+
self.langs = '+'.join(filter(None, self.available_langs)
88+
) if self.available_langs else 'eng'
89+
print(
90+
f"Using all available languages: {self.langs}", file=sys.stderr)
91+
8392
self.custom_config = f'--oem {self.oem_mode} --psm {self.psm_mode}'
84-
self.ouput_encoding = 'utf-8'
93+
self.output_encoding = 'utf-8'
8594

8695
def extract_text_with_lines(self, image: Image) -> str:
8796
"""
@@ -115,7 +124,7 @@ def main(self) -> int:
115124
extracted_text = self.extract_text_with_lines(image)
116125

117126
# Save the extracted text to a file
118-
with open(self.output_path, 'w', encoding=self.ouput_encoding) as file:
127+
with open(self.output_path, 'w', encoding=self.output_encoding) as file:
119128
file.write(extracted_text)
120129

121130
return 0
@@ -149,19 +158,21 @@ def __init__(self):
149158
" and text extraction using Tesseract OCR. The script takes an input\n" + \
150159
" based on the language in the image."
151160
self.useges = [
152-
"python OCR4Linux.py <image_path> <output_path>",
161+
"python OCR4Linux.py <image_path> <output_path> [--langs <languages>]",
153162
"python OCR4Linux.py [-l | --list-langs]",
154163
"python OCR4Linux.py [-h | --help]"
155164
]
156165
self.examples = [
157166
"python OCR4Linux.py screenshot.png output.txt",
167+
"python OCR4Linux.py screenshot.png output.txt --langs eng+fra+deu",
158168
"python OCR4Linux.py -l",
159169
"python OCR4Linux.py -h"
160170
]
161171
self.arguments = [
162172
"file_path: Path to the python script",
163173
"image_path: Path to the image file",
164174
"output_path: Path to the output text file",
175+
"--langs: Specify languages for OCR (e.g., eng+fra+deu)",
165176
"-l, --list-langs: List all available languages for OCR in the system",
166177
"-h, --help: Display this help message, then exit"
167178
]
@@ -199,20 +210,24 @@ def check_arguments(self) -> int:
199210
Checks the command line arguments for validity.
200211
201212
Handles the following options:
202-
- Standard usage: <image_path> <output_path>
213+
- Standard usage: <image_path> <output_path> [--langs <languages>]
203214
- Help: -h or --help
204215
- List languages: -l or --list-langs
205216
206217
Returns:
207-
bool: True if arguments are valid, False otherwise.
218+
int: 0 if help/list was shown, 1 if error, 2 if valid arguments for processing.
208219
"""
209220
if len(sys.argv) == 2 and sys.argv[1] in ['-l', '--list-langs']:
210221
self.list_available_languages()
211222
return 0
212223
elif len(sys.argv) == 2 and sys.argv[1] in ['-h', '--help']:
213224
self.help()
214225
return 0
215-
elif len(sys.argv) != self.args_num:
226+
elif len(sys.argv) < self.args_num or len(sys.argv) > 5:
227+
# Valid patterns:
228+
# 3 args: script image_path output_path
229+
# 4 args: script image_path output_path --langs=languages
230+
# 5 args: script image_path output_path --langs languages
216231
self.help()
217232
return 1
218233
return 2
@@ -252,7 +267,8 @@ def main(self):
252267
This function performs the following steps:
253268
1. Checks if the correct number of arguments is provided.
254269
2. Verifies if the image file exists.
255-
3. Creates an instance of the TesseractConfig class and runs the OCR process.
270+
3. Parses language arguments if provided.
271+
4. Creates an instance of the TesseractConfig class and runs the OCR process.
256272
257273
Returns:
258274
int: Returns 1 if there is an error with the arguments or image path, otherwise returns the result of the TesseractConfig main function.
@@ -268,8 +284,15 @@ def main(self):
268284
if not self.check_image_path(sys.argv[1]):
269285
return 1
270286

287+
# Parse language arguments
288+
langs = None
289+
if len(sys.argv) >= 4 and sys.argv[3] == '--langs' and len(sys.argv) == 5:
290+
langs = sys.argv[4]
291+
elif len(sys.argv) == 4 and sys.argv[3].startswith('--langs='):
292+
langs = sys.argv[3].split('=', 1)[1]
293+
271294
# Create an instance of the TesseractConfig class
272-
tesseract = TesseractConfig(sys.argv[1], sys.argv[2])
295+
tesseract = TesseractConfig(sys.argv[1], sys.argv[2], langs)
273296
return tesseract.main()
274297

275298

OCR4Linux.sh

Lines changed: 119 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
# - tesseract-ocr: For text extraction
2424
# - grimblast/scrot: For screenshot capture
2525
# - wl-clipboard/xclip: For clipboard operations
26+
# - rofi: For language selection menu
2627
# - Python 3.x: For image processing
2728
#
2829
# Usage:
@@ -39,37 +40,61 @@ LOGS_FILE_NAME="OCR4Linux.log"
3940
SLEEP_DURATION=0.5
4041
REMOVE_SCREENSHOT=false
4142
KEEP_LOGS=false
43+
LANG_SPECIFIED=false
44+
SPECIFIED_LANGS=""
45+
46+
langs=()
4247

4348
# Display help message
4449
show_help() {
4550
echo "Usage: $(basename "$0") [OPTIONS]"
4651
echo "Options:"
47-
echo " -r Remove screenshot in the screenshot directory"
48-
echo " -d DIRECTORY Set screenshot directory (default: $SCREENSHOT_DIRECTORY)"
49-
echo " -l Keep logs"
50-
echo " -h Show this help message, then exit"
52+
echo " -r Remove screenshot in the screenshot directory"
53+
echo " -d DIRECTORY Set screenshot directory (default: $SCREENSHOT_DIRECTORY)"
54+
echo " -l Keep logs"
55+
echo " --lang LANGUAGES Specify OCR languages (e.g., 'all', 'eng', 'eng+ara')"
56+
echo " -h Show this help message, then exit"
5157
echo "Example:"
52-
echo " OCR4Linux.sh -s -d $HOME/screenshots -l"
53-
echo " OCR4Linux.sh -s -l"
58+
echo " OCR4Linux.sh -d $HOME/screenshots -l"
59+
echo " OCR4Linux.sh --lang eng+ara"
60+
echo " OCR4Linux.sh --lang all -l"
5461
echo " OCR4Linux.sh -h"
5562
echo "Note:"
56-
echo " if you run \`OCR4Linux.sh\` only without any arguments, it will save the screenshot in the default directory $SCREENSHOT_DIRECTORY."
63+
echo " - If --lang is not specified, an interactive language selection menu will appear"
64+
echo " - Use 'all' to select all available languages"
65+
echo " - Use '+' to separate multiple languages (e.g., 'eng+ara+fra')"
66+
echo " - Without arguments, screenshots are saved to $SCREENSHOT_DIRECTORY"
5767
}
5868

5969
# Parse command line arguments
60-
while getopts "rd:lh" opt; do
61-
case $opt in
62-
r) REMOVE_SCREENSHOT=true ;;
63-
d) SCREENSHOT_DIRECTORY="$OPTARG" ;;
64-
l) KEEP_LOGS=true ;;
65-
h)
66-
show_help
67-
exit 0
68-
;;
69-
*)
70-
show_help
71-
exit 1
72-
;;
70+
while [[ $# -gt 0 ]]; do
71+
case $1 in
72+
-r)
73+
REMOVE_SCREENSHOT=true
74+
shift
75+
;;
76+
-d)
77+
SCREENSHOT_DIRECTORY="$2"
78+
shift 2
79+
;;
80+
-l)
81+
KEEP_LOGS=true
82+
shift
83+
;;
84+
--lang)
85+
SPECIFIED_LANGS="$2"
86+
LANG_SPECIFIED=true
87+
shift 2
88+
;;
89+
-h)
90+
show_help
91+
exit 0
92+
;;
93+
*)
94+
echo "Unknown option: $1"
95+
show_help
96+
exit 1
97+
;;
7398
esac
7499
done
75100

@@ -89,6 +114,12 @@ log_message() {
89114
check_if_files_exist() {
90115
log_message "Checking required files and directories..."
91116

117+
# Check if rofi is installed
118+
if ! command -v rofi &> /dev/null; then
119+
log_message "ERROR: rofi is not installed. Please install rofi to use language selection."
120+
exit 1
121+
fi
122+
92123
# Validate screenshot directory
93124
if [ ! -d "$SCREENSHOT_DIRECTORY" ]; then
94125
log_message "Creating screenshot directory: $SCREENSHOT_DIRECTORY since it does not exist."
@@ -112,6 +143,50 @@ check_if_files_exist() {
112143
fi
113144
}
114145

146+
# Process specified languages from command line
147+
process_specified_langs() {
148+
log_message "Processing specified languages: $SPECIFIED_LANGS"
149+
150+
# Handle "all" case
151+
if [[ "$SPECIFIED_LANGS" == "all" ]]; then
152+
mapfile -t langs < <(tesseract --list-langs | awk 'FNR>1')
153+
log_message "Using ALL available languages: $(IFS=+ ; echo "${langs[*]}")"
154+
else
155+
# Split the language string by '+' and populate the langs array
156+
IFS='+' read -ra langs <<< "$SPECIFIED_LANGS"
157+
log_message "Using specified languages: $(IFS=+ ; echo "${langs[*]}")"
158+
159+
# Validate that the specified languages are available
160+
available_langs=$(tesseract --list-langs | awk 'FNR>1')
161+
for lang in "${langs[@]}"; do
162+
if ! echo "$available_langs" | grep -q "^$lang$"; then
163+
log_message "WARNING: Language '$lang' is not available on this system"
164+
fi
165+
done
166+
fi
167+
}
168+
169+
# Choose languages for OCR using rofi
170+
choose_lang() {
171+
log_message "Fetching available languages for OCR selection..."
172+
173+
# Get available languages and add "ALL" option at the beginning
174+
mapfile -t langs < <(tesseract --list-langs | awk 'BEGIN {print "ALL" } FNR>1' | rofi -dmenu -multi-select -p "Select OCR Languages:")
175+
176+
if [ ${#langs[@]} -eq 0 ]; then
177+
log_message "CANCELLED: User aborted language selection"
178+
exit 1
179+
fi
180+
181+
# If "ALL" is selected, use all available languages
182+
if [[ " ${langs[*]} " =~ " ALL " ]]; then
183+
mapfile -t langs < <(tesseract --list-langs | awk 'FNR>1')
184+
log_message "Selected ALL languages: $(IFS=+ ; echo "${langs[*]}")"
185+
else
186+
log_message "Selected languages: $(IFS=+ ; echo "${langs[*]}")"
187+
fi
188+
}
189+
115190
# take shots using grimblast for wayland
116191
takescreenshot_wayland() {
117192
sleep $SLEEP_DURATION
@@ -136,9 +211,22 @@ takescreenshot() {
136211

137212
# Pass the screenshot to OCR tool to extract text from the image.
138213
extract_text() {
139-
python "$OCR4Linux_HOME/$OCR4Linux_PYTHON_NAME" \
140-
"$SCREENSHOT_DIRECTORY/$SCREENSHOT_NAME" \
141-
"$OCR4Linux_HOME/$TEXT_OUTPUT_FILE_NAME"
214+
# Create language string for passing to Python script
215+
local lang_string=""
216+
if [ ${#langs[@]} -gt 0 ]; then
217+
lang_string=$(IFS=+; echo "${langs[*]}")
218+
fi
219+
220+
if [ -n "$lang_string" ]; then
221+
python "$OCR4Linux_HOME/$OCR4Linux_PYTHON_NAME" \
222+
"$SCREENSHOT_DIRECTORY/$SCREENSHOT_NAME" \
223+
"$OCR4Linux_HOME/$TEXT_OUTPUT_FILE_NAME" \
224+
--langs "$lang_string"
225+
else
226+
python "$OCR4Linux_HOME/$OCR4Linux_PYTHON_NAME" \
227+
"$SCREENSHOT_DIRECTORY/$SCREENSHOT_NAME" \
228+
"$OCR4Linux_HOME/$TEXT_OUTPUT_FILE_NAME"
229+
fi
142230
log_message "Text extraction completed successfully"
143231
}
144232

@@ -175,6 +263,14 @@ remove_image() {
175263
# Run the functions
176264
main() {
177265
check_if_files_exist
266+
267+
# Handle language selection
268+
if [ "$LANG_SPECIFIED" = true ]; then
269+
process_specified_langs
270+
else
271+
choose_lang
272+
fi
273+
178274
takescreenshot
179275
extract_text
180276
run_copy_to_clipboard

0 commit comments

Comments
 (0)