Added OCR support via the ocr.space API. Closes #93

2025-07-17 21:56:07 -04:00 · 2017-01-03 08:30:58 -06:00
parent 516acb501a
commit ec58d02bb3
10 changed files with 78 additions and 6 deletions
--- a/src/controller/mainController.py
+++ b/src/controller/mainController.py
@@ -6,7 +6,7 @@ if system == "Windows":
 from update import updater
 from wxUI import (view, dialogs, commonMessageDialogs, sysTrayIcon)
 import settings
- from extra import SoundsTutorial
+ from extra import SoundsTutorial, ocr
 import keystrokeEditor
 from keyboard_handler.wx_handler import WXKeyboardHandler
 import userActionsController
@@ -142,7 +142,7 @@ class Controller(object):
   widgetUtils.connect_event(self.view, widgetUtils.MENU, self.find, menuitem=self.view.find)
   widgetUtils.connect_event(self.view, widgetUtils.MENU, self.accountConfiguration, menuitem=self.view.account_settings)
   widgetUtils.connect_event(self.view, widgetUtils.MENU, self.configuration, menuitem=self.view.prefs)
-
+   widgetUtils.connect_event(self.view, widgetUtils.MENU, self.ocr_image, menuitem=self.view.ocr)
  widgetUtils.connect_event(self.view, widgetUtils.MENU, self.learn_sounds, menuitem=self.view.sounds_tutorial)
  widgetUtils.connect_event(self.view, widgetUtils.MENU, self.exit, menuitem=self.view.close)
  widgetUtils.connect_event(self.view, widgetUtils.CLOSE_EVENT, self.exit)
@@ -1520,6 +1520,32 @@ class Controller(object):
  buffer_index = self.view.search(buffer.name, buffer.account)
  self.view.set_page_title(buffer_index, title)

+ def ocr_image(self, *args, **kwargs):
+  buffer = self.get_current_buffer()
+  if hasattr(buffer, "get_right_tweet") == False:
+   output.speak(_(u"Invalid buffer"))
+   return
+  tweet = buffer.get_right_tweet()
+  if tweet.has_key("entities") == False or tweet["entities"].has_key("media") == False:
+   output.speak(_(u"This tweet doesn't contain images"))
+   return
+  if len(tweet["entities"]["media"]) > 1:
+   image_list = [_(u"Picture {0}").format(i,) for i in xrange(0, len(tweet["entities"]["media"]))]
+   dialog = dialogs.urlList.urlList(title=_(u"Select the picture"))
+   if dialog.get_response() == widgetUtils.OK:
+    img = tweet["entities"]["media"][dialog.get_item()]
+   else:
+    return
+  else:
+   img = tweet["entities"]["media"][0]
+  api = ocr.OCRSpace.OCRSpaceAPI()
+  try:
+   text = api.OCR_URL(img["media_url"])
+  except ocr.OCRSpace.APIError as er:
+   output.speak(_(u"Unable to extract text"))
+   return
+  msg = messages.viewTweet(text["ParsedText"], [], False)
+
 def save_data_in_db(self):
  for i in session_.sessions:
   session_.sessions[i].shelve()
--- a/src/extra/ocr/OCRSpace.py
+++ b/src/extra/ocr/OCRSpace.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+""" original module taken and modified from https://github.com/ctoth/cloudOCR"""
+import requests
+
+class APIError(Exception):
+	pass
+
+class OCRSpaceAPI(object):
+
+	def __init__(self, key="4e72ae996f88957", url='https://api.ocr.space/parse/image'):
+		self.key = key
+		self.url = url
+
+	def OCR_URL(self, url, overlay=False):
+		payload = {
+			'url': url,
+			'isOverlayRequired': overlay,
+			'apikey': self.key,
+		}
+		r = requests.post(self.url, data=payload)
+		result = r.json()['ParsedResults'][0]
+		if result['ErrorMessage']:
+			raise APIError(result['ErrorMessage'])
+		return result
+
+	def OCR_file(self, fileobj, overlay=False):
+		payload = {
+			'isOverlayRequired': overlay,
+			'apikey': self.key,
+			'lang': 'es',
+		}
+		r = requests.post(self.url, data=payload, files={'file': fileobj})
+		results = r.json()['ParsedResults']
+		if results[0]['ErrorMessage']:
+			raise APIError(results[0]['ErrorMessage'])
+		return results
+
--- a/src/extra/ocr/init.py
+++ b/src/extra/ocr/init.py
@@ -0,0 +1,2 @@
+# -*- coding: utf-8 -*-
+import OCRSpace
--- a/src/keymaps/Windows
+++ b/src/keymaps/Windows
@@ -53,4 +53,5 @@ check_for_updates = string(default="alt+win+u")
 list_manager = string(default="alt+win+shift+l")
 configuration = string(default="control+win+o")
 accountConfiguration = string(default="control+win+shift+o")
-update_buffer = string(default="control+alt+shift+u")
+update_buffer = string(default="control+alt+shift+u")
+ocr_image = string(default="win+alt+o")
--- a/src/keymaps/default.keymap
+++ b/src/keymaps/default.keymap
@@ -54,4 +54,5 @@ check_for_updates = string(default="control+win+u")
 list_manager = string(default="control+win+shift+l")
 configuration = string(default="control+win+o")
 accountConfiguration = string(default="control+win+shift+o")
-update_buffer = string(default="control+win+shift+u")
+update_buffer = string(default="control+win+shift+u")
+ocr_image = string(default="win+alt+o")
--- a/src/keystrokeEditor/constants.py
+++ b/src/keystrokeEditor/constants.py
@@ -52,4 +52,5 @@ actions = {
 "accountConfiguration": _(u"Opens the account settings dialogue"),
 "audio": _(u"Try to play an audio file"),
 "update_buffer": _(u"Updates the buffer and retrieves possible lost items there."),
+"ocr_image": _(u"Extracts the text from a picture and displays the result in a dialog."),
 }
--- a/src/wxUI/dialogs/urlList.py
+++ b/src/wxUI/dialogs/urlList.py
@@ -2,8 +2,8 @@
 import wx

 class urlList(wx.Dialog):
- def __init__(self):
-  super(urlList, self).__init__(parent=None, title=_(u"Select URL"))
+ def __init__(self, title=_(u"Select URL")):
+  super(urlList, self).__init__(parent=None, title=title)
  panel = wx.Panel(self)
  self.lista = wx.ListBox(panel, -1)
  self.lista.SetFocus()
--- a/src/wxUI/view.py
+++ b/src/wxUI/view.py
@@ -32,6 +32,7 @@ class mainFrame(wx.Frame):
  self.view = tweet.Append(wx.NewId(), _(u"&Show tweet"))
  self.view_coordinates = tweet.Append(wx.NewId(), _(u"View &address"))
  self.view_conversation = tweet.Append(wx.NewId(), _(u"View conversa&tion"))
+  self.ocr = tweet.Append(wx.NewId(), _(u"Read text in pictures"))
  self.delete = tweet.Append(wx.NewId(), _(u"&Delete"))

  # User menu