From 2618e0ac90091d1f3beee0beefd96a1daf9fa592 Mon Sep 17 00:00:00 2001 From: Manuel Cortez Date: Mon, 29 May 2017 02:25:50 +0400 Subject: [PATCH] Language selection for OCR. Fixes #107 --- doc/changelog.md | 1 + src/controller/mainController.py | 7 ++++++- src/controller/settings.py | 6 ++++-- src/extra/ocr/OCRSpace.py | 4 +++- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/doc/changelog.md b/doc/changelog.md index c97642a1..3479319d 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -9,6 +9,7 @@ * SndUp users will be able to upload audio in their account by using their API Key again. ([#134](https://github.com/manuelcortez/TWBlue/issues/134)) * old tweets shouldn't be added as new items in buffers. ([#116,](https://github.com/manuelcortez/TWBlue/issues/116)) ([#133](https://github.com/manuelcortez/TWBlue/issues/133)) * All mentionned users should be displayed correctly in Twishort's long tweets. ([#116,](https://github.com/manuelcortez/TWBlue/issues/116)) ([#135](https://github.com/manuelcortez/TWBlue/issues/135)) +* It is possible to select a language for OCR service from the extras panel, in the account settings dialogue. You can, however, set this to detect automatically. OCR should work better in languages with special characters or non-english symbols. ([#107](https://github.com/manuelcortez/TWBlue/issues/107)) * And more ([#136,](https://github.com/manuelcortez/TWBlue/issues/136)) ## Changes in version 0.90 diff --git a/src/controller/mainController.py b/src/controller/mainController.py index b208677d..487e9ab6 100644 --- a/src/controller/mainController.py +++ b/src/controller/mainController.py @@ -1598,9 +1598,14 @@ class Controller(object): return else: img = tweet["entities"]["media"][0] + if buffer.session.settings["mysc"]["ocr_language"] != "": + ocr_lang = buffer.session.settings["mysc"]["ocr_language"] + else: + ocr_lang = ocr.OCRSpace.short_langs.index(tweet["lang"]) + ocr_lang = ocr.OCRSpace.OcrLangs[ocr_lang] api = ocr.OCRSpace.OCRSpaceAPI() try: - text = api.OCR_URL(img["media_url"]) + text = api.OCR_URL(img["media_url"], lang=ocr_lang) except ocr.OCRSpace.APIError as er: output.speak(_(u"Unable to extract text")) return diff --git a/src/controller/settings.py b/src/controller/settings.py index b8836d10..f98c5353 100644 --- a/src/controller/settings.py +++ b/src/controller/settings.py @@ -167,6 +167,8 @@ class accountSettingsController(globalSettingsController): self.dialog.set_value("sound", "indicate_img", self.config["sound"]["indicate_img"]) self.dialog.create_extras(OCRSpace.translatable_langs) self.dialog.set_value("extras", "sndup_apiKey", self.config["sound"]["sndup_api_key"]) + language_index = OCRSpace.OcrLangs.index(self.config["mysc"]["ocr_language"]) + self.dialog.extras.ocr_lang.SetSelection(language_index) self.dialog.realize() self.dialog.set_title(_(u"Account settings for %s") % (self.user,)) self.response = self.dialog.get_response() @@ -202,7 +204,7 @@ class accountSettingsController(globalSettingsController): if set(self.config["general"]["buffer_order"]) != set(buffers_list) or buffers_list != self.config["general"]["buffer_order"]: self.needs_restart = True self.config["general"]["buffer_order"] = buffers_list - + self.config["mysc"]["ocr_language"] = OCRSpace.OcrLangs[self.dialog.extras.ocr_lang.GetSelection()] # if self.config["other_buffers"]["show_followers"] != self.dialog.get_value("buffers", "followers"): # self.config["other_buffers"]["show_followers"] = self.dialog.get_value("buffers", "followers") # pub.sendMessage("create-new-buffer", buffer="followers", account=self.user, create=self.config["other_buffers"]["show_followers"]) @@ -239,7 +241,7 @@ class accountSettingsController(globalSettingsController): self.config["sound"]["indicate_audio"] = self.dialog.get_value("sound", "indicate_audio") self.config["sound"]["indicate_geo"] = self.dialog.get_value("sound", "indicate_geo") self.config["sound"]["indicate_img"] = self.dialog.get_value("sound", "indicate_img") - self.config["sound"]["sndup_api_key"] = self.dialog.get_value("extras", "apiKey") + self.config["sound"]["sndup_api_key"] = self.dialog.get_value("extras", "sndup_apiKey") self.buffer.session.sound.config = self.config["sound"] self.buffer.session.sound.check_soundpack() self.config.write() diff --git a/src/extra/ocr/OCRSpace.py b/src/extra/ocr/OCRSpace.py index 9249b484..0b4f4821 100644 --- a/src/extra/ocr/OCRSpace.py +++ b/src/extra/ocr/OCRSpace.py @@ -15,12 +15,14 @@ class OCRSpaceAPI(object): self.key = key self.url = url - def OCR_URL(self, url, overlay=False): + def OCR_URL(self, url, overlay=False, lang=None): payload = { 'url': url, 'isOverlayRequired': overlay, 'apikey': self.key, } + if lang != None: + payload.update(language=lang) r = requests.post(self.url, data=payload) result = r.json()['ParsedResults'][0] if result['ErrorMessage']: