interpreter --profile local-os for Local OS Mode

OpenInterpreter · May 13, 2024 · 0db16d4 · 0db16d4
1 parent ae76502
commit 0db16d4
Show file tree

Hide file tree

Showing 6 changed files with 220 additions and 114 deletions.
diff --git a/interpreter/core/computer/computer.py b/interpreter/core/computer/computer.py
@@ -53,6 +53,9 @@ def __init__(self, interpreter):
 
         self.import_skills = False
         self._has_imported_skills = False
+        self.max_output = (
+            self.interpreter.max_output
+        )  # Should mirror interpreter.max_output
 
     # Shortcut for computer.terminal.languages
     @property

diff --git a/interpreter/core/computer/display/display.py b/interpreter/core/computer/display/display.py
@@ -24,6 +24,7 @@
 np = lazy_import("numpy")
 plt = lazy_import("matplotlib.pyplot")
 screeninfo = lazy_import("screeninfo")
+pywinctl = lazy_import("pywinctl")
 
 
 from ..utils.computer_vision import find_text_in_image, pytesseract_get_text
@@ -64,7 +65,7 @@ def center(self):
 
     def info(self):
         """
-        Returns a list of all connected montitor/displays and thir information
+        Returns a list of all connected monitor/displays and thir information
         """
         return get_displays()
 
@@ -84,7 +85,7 @@ def screenshot(
         screen=0,
         show=True,
         quadrant=None,
-        active_app_only=False,
+        active_app_only=True,
         force_image=False,
         combine_screens=True,
     ):
@@ -99,31 +100,50 @@ def screenshot(
             description = self.computer.vision.query(pil_image=screenshot)
             print("A DESCRIPTION OF WHAT'S ON THE SCREEN: " + description)
 
-            print("ALL OF THE TEXT ON THE SCREEN: ")
-            text = self.get_text_as_list_of_lists(screenshot=screenshot)
-            pp = pprint.PrettyPrinter(indent=4)
-            pretty_text = pp.pformat(text)  # language models like it pretty!
-            pretty_text = format_to_recipient(pretty_text, "assistant")
-            print(pretty_text)
-            print(
-                format_to_recipient(
-                    "To recieve the text above as a Python object, run computer.display.get_text_as_list_of_lists()",
-                    "assistant",
+            if self.computer.max_output > 600:
+                print("ALL OF THE TEXT ON THE SCREEN: ")
+                text = self.get_text_as_list_of_lists(screenshot=screenshot)
+                pp = pprint.PrettyPrinter(indent=4)
+                pretty_text = pp.pformat(text)  # language models like it pretty!
+                pretty_text = format_to_recipient(pretty_text, "assistant")
+                print(pretty_text)
+                print(
+                    format_to_recipient(
+                        "To recieve the text above as a Python object, run computer.display.get_text_as_list_of_lists()",
+                        "assistant",
+                    )
                 )
-            )
             return
 
         if quadrant == None:
-            # Implement active_app_only!
             if active_app_only:
-                region = self.get_active_window()["region"]
-                screenshot = pyautogui.screenshot(region=region)
+                active_window = pywinctl.getActiveWindow()
+                if active_window:
+                    screenshot = pyautogui.screenshot(
+                        region=(
+                            active_window.left,
+                            active_window.top,
+                            active_window.width,
+                            active_window.height,
+                        )
+                    )
+                    message = format_to_recipient(
+                        "Taking a screenshot of the active app (recommended). To take a screenshot of the entire screen (uncommon), use computer.display.view(active_app_only=False).",
+                        "assistant",
+                    )
+                    print(message)
+                else:
+                    screenshot = pyautogui.screenshot()
+
             else:
                 screenshot = take_screenshot_to_pil(
                     screen=screen, combine_screens=combine_screens
                 )  #  this function uses pyautogui.screenshot which works fine for all OS (mac, linux and windows)
-                # message = format_to_recipient("Taking a screenshot of the entire screen. This is not recommended. You (the language model assistant) will recieve it with low resolution.\n\nTo maximize performance, use computer.display.view(active_app_only=True). This will produce an ultra high quality image of the active application.", "assistant")
-                # print(message)
+                message = format_to_recipient(
+                    "Taking a screenshot of the entire screen. This is not recommended. You (the language model assistant) will recieve it with low resolution.\n\nTo maximize performance, use computer.display.view(active_app_only=True). This will produce an ultra high quality image of the active application.",
+                    "assistant",
+                )
+                print(message)
 
         else:
             screen_width, screen_height = pyautogui.size()

diff --git a/interpreter/core/respond.py b/interpreter/core/respond.py
@@ -112,10 +112,7 @@ def respond(interpreter):
                     )
             elif interpreter.offline and not interpreter.os:
                 print(traceback.format_exc())
-                raise Exception(
-                    "Error occurred. "
-                    + str(e)
-                )
+                raise Exception("Error occurred. " + str(e))
             else:
                 raise
 
@@ -191,18 +188,22 @@ def respond(interpreter):
                     )
                     code = re.sub(r"import computer\.\w+\n", "pass\n", code)
                     # If it does this it sees the screenshot twice (which is expected jupyter behavior)
-                    if any(code.split("\n")[-1].startswith(text) for text in [
-                        "computer.display.view",
-                        "computer.display.screenshot",
-                        "computer.view",
-                        "computer.screenshot",
-                    ]):
+                    if any(
+                        code.split("\n")[-1].startswith(text)
+                        for text in [
+                            "computer.display.view",
+                            "computer.display.screenshot",
+                            "computer.view",
+                            "computer.screenshot",
+                        ]
+                    ):
                         code = code + "\npass"
 
                 # sync up some things (is this how we want to do this?)
                 interpreter.computer.verbose = interpreter.verbose
                 interpreter.computer.debug = interpreter.debug
                 interpreter.computer.emit_images = interpreter.llm.supports_vision
+                interpreter.computer.max_output = interpreter.max_output
 
                 # sync up the interpreter's computer with your computer
                 try: