-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
264 lines (204 loc) · 7.74 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
import pywinauto
import wmi
import psutil
import pyautogui
import PIL
import keyboard
import win32gui
import win32process # "pip install win32api"
import win32api
import win10toast
import numpy as np
import time
import pprint
import cv2 # "pip install opencv-python"
import pytesseract # google tesseract OCR
import os
import openai
openai.api_key = "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"
pytesseract.pytesseract.tesseract_cmd = r"C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
pp = pprint.PrettyPrinter(depth=4)
toaster = win10toast.ToastNotifier()
def is_visible_and_active(hwnd):
if win32gui.IsWindowVisible(hwnd):
client_rect = win32gui.GetClientRect(hwnd)
if (client_rect[2] - client_rect[0] > 10) and (client_rect[3] - client_rect[1] > 10):
return True
def distracted(data):
if data["process_exe"] == "cmd.exebooga":
return True
else:
return False
def get_full_screenshot_text():
# note: this function can take upto 5 seconds to run
img = pyautogui.screenshot()
img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
text = pytesseract.image_to_string(img)
return text
def get_window_screenshot_text(hwnd):
bbox = win32gui.GetWindowRect(hwnd)
img = PIL.ImageGrab.grab(bbox)
img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
# After tests, Tesseract model inference is by far most time-consuming step of this function
text = pytesseract.image_to_string(img)
return text
def gpt4(input_text):
messages = []
temperature = 1
top_p = 1
presence_penalty = 0
frequency_penalty = 0
user_message = {"role" : "user", "content" : input_text}
messages.append(user_message)
response_received = False
while( not response_received):
try:
result = openai.ChatCompletion.create(
model="gpt-4",
messages = messages,
temperature = temperature,
top_p = top_p,
presence_penalty = presence_penalty,
frequency_penalty = frequency_penalty
)
response_received = True
except Exception as e:
print(e)
print("OpenAI API error go brrr..")
time.sleep(10)
sys_role = result["choices"][0]["message"]["role"]
sys_content = result["choices"][0]["message"]["content"]
return sys_content
def gpt_embedding(text, model="text-embedding-ada-002"):
# text = text.replace("\n", " ")
response_received = False
while( not response_received):
try:
result = openai.Embedding.create(input = [text], model=model)
response_received = True
except Exception as e:
print(e)
print("OpenAI API error go brr...")
time.sleep(10)
embedding = np.array(result['data'][0]['embedding'])
return embedding
# create an ada embedding database
ada_DB_D = 1536
ada_DB_N_max = 1000
ada_DB = np.empty((ada_DB_N_max, ada_DB_D))
text_DB = []
# index to fill next entry in ada_DB and text_DB
DB_i = 0
# while True:
for i in range(10):
time.sleep(5)
data = {}
# save current time
data["time"] = time.time()
hwnd_fore = win32gui.GetForegroundWindow()
if is_visible_and_active(hwnd_fore) == False:
raise Exception("wtf")
# get title of window
data["window_text"] = win32gui.GetWindowText(hwnd_fore)
# get thread and process that "created" the window
(thread_id, process_id) = win32process.GetWindowThreadProcessId(hwnd_fore)
data["thread_id"] = thread_id
data["process_id"] = process_id
# get name of program that created this process, and some data about that process
p = psutil.Process(process_id)
with p.oneshot():
data["process_name"] = p.name()
data["process_exe"] = p.exe()
data["process_status"] = p.status()
data["process_create_time"] = p.create_time()
# take screenshot of fullscreen or foreground window, and extract text from that
# data["full_screenshot_text"] = get_full_screenshot_text()
print("taking screenshot")
data["window_screenshot_text"] = get_window_screenshot_text(hwnd_fore)
# pp.pprint(data)
# print("calling GPT4 API")
# gpt4_input = "This is a screenshot of my window converted to text. What am I currently doing?\n\n\"\"\"" + data["window_screenshot_text"] + "\"\"\""
# result = gpt4(gpt4_input)
# print(result)
text_DB.append(data["window_screenshot_text"])
print("calling ada embedding API")
embed = gpt_embedding(data["window_screenshot_text"])
# print(embed, len(embed))
ada_DB[DB_i,:] = embed
DB_i += 1
print(ada_DB)
toaster.show_toast("Screenshot taken", str(i), duration=1)
# if distracted(data) == True:
# toaster.show_toast("Stay focussed.", "You may be getting distracted", duration=10)
while True:
print("type a question as input: ")
prompt = input("(User:) ")
print("\ncalling ada embedding API")
embed = gpt_embedding(prompt)
print("finding most similar embeddings")
sims = np.sum(embed * ada_DB, axis = 1)
n = 3
top_n_indices = np.argsort(sims)[-n:]
print("found most similar embeddings. calling GPT4 with your query now")
gpt4_input = prompt
gpt4_input += "\n\nI have been regularly taking screenshots of my computer and converting them to text. Here are some that may be most relevant to answering the above question."
for i in range(n):
gpt4_input += f"\n\nScreenshot {i}:\n\"\"\""
gpt4_input += text_DB[top_n_indices[i]]
gpt4_input += "\n\"\"\""
gpt4_result = gpt4(gpt4_input)
print(f"\n(GPT4:) {gpt4_result}")
# LOTS OF UNUSED CODE
# NOT DELETED IN CASE I NEED TO REFER TO IT
# print("aaa")
# recorded = keyboard.record(until='esc')
# print(recorded)
# temp = input("enter")
# # gets all open windows using pywinauto
# windows = pywinauto.Desktop(backend="uia").windows()
# print(windows)
# print([w.window_text() for w in windows])
# # gets all processes using WMI
# f = wmi.WMI()
# print("pid Process name")
# for process in f.Win32_Process():
# print(f"{process.ProcessId:<10} {process.Name}")
# # get current python process, and all its attributes, using psutil
# p = psutil.Process()
# attr_all = list(p.as_dict().keys())
# # ['io_counters', 'environ', 'cpu_percent', 'create_time', 'cpu_affinity', 'memory_full_info', 'memory_info', 'cpu_times', 'exe', 'nice', 'ionice', 'threads', 'username', 'ppid', 'memory_percent', 'num_ctx_switches', 'pid', 'cwd', 'num_threads', 'name', 'status', 'cmdline', 'open_files', 'num_handles', 'memory_maps', 'connections']
# attr_some = ['exe', 'name', 'pid', 'ppid', 'status']
# print(p)
# print(attr_all)
# # get all chrome-associated processes (note these maybe be child or parent, and may or may not have active windows)
# chrome_pids = []
# for proc in psutil.process_iter(attr_some):
# if proc.info["name"] == "chrome.exe":
# chrome_pids.append(proc.info["pid"])
# print(chrome_pids)
# def callback(hwnd, extra):
# if win32gui.IsWindowVisible(hwnd):
# window_text = win32gui.GetWindowText(hwnd)
# client_rect = win32gui.GetClientRect(hwnd)
# if (client_rect[2] - client_rect[0] > 10) and (client_rect[3] - client_rect[1] > 10):
# print(f"window text: {window_text}")
# print(f"client rect: {client_rect}")
# (thread_id, process_id) = win32process.GetWindowThreadProcessId(hwnd)
# print(thread_id, process_id)
# return (thread_id, process_id)
# Enumerate all active windows that occupy space on screen, and parent processes and threads
# win32gui.EnumWindows(callback, None)
# # given a chrome process id, try to maximise any window it is handling - code might not work
# app = pywinauto.application.Application()
# for pid in chrome_pids:
# print(pid)
# try:
# app1 = app.connect(process=pid)
# # app1.maximize()
# app1.top_window().set_focus()
# print("in focus")
# except Exception as e:
# print(e)
# p = psutil.Process()
# with p.oneshot():
# print(p.name(), p.cpu_times(), p.cpu_percent(), p.create_time(), p.ppid(), p.status())