diff --git a/.gitignore b/.gitignore index dddbacb..3e11501 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ dist/ flojoy.egg-info .idea **/__pycache__ +flojoy/aiprompter/PyCodeGPT build diff --git a/flojoy/__init__.py b/flojoy/__init__.py index ef87cd1..2554439 100644 --- a/flojoy/__init__.py +++ b/flojoy/__init__.py @@ -5,3 +5,4 @@ from .module_scraper import FlojoyWrapper from .job_result_utils import get_next_nodes, get_next_directions, get_job_result from .data_container import DataContainer +from .aiprompter import * \ No newline at end of file diff --git a/flojoy/aiprompter/__init__.py b/flojoy/aiprompter/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/flojoy/aiprompter/instrumentation/__main__.py b/flojoy/aiprompter/instrumentation/__main__.py new file mode 100644 index 0000000..715a304 --- /dev/null +++ b/flojoy/aiprompter/instrumentation/__main__.py @@ -0,0 +1,225 @@ +import openai +import os +import numpy as np +from tqdm import tqdm +import json +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path +import subprocess +import argparse +import yaml +import re +from transformers import pipeline, set_seed +from transformers import AutoTokenizer, AutoModelForCausalLM +from transformers.pipelines.base import Pipeline +import json +openai.api_key = os.environ["OPENAI_KEY"] +from tenacity import retry, wait_random_exponential, stop_after_attempt + +def prompt_pycodegpt(prompt, model_params): + def load_generation_pipe(model_name_or_path: str, gpu_device: int = 0): + model = AutoModelForCausalLM.from_pretrained(model_name_or_path) + tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) + + pipe = pipeline( + "text-generation", + model=model, + tokenizer=tokenizer, + use_fast=False, + device="cpu", # Have no CUDA installed https://discuss.huggingface.co/t/is-transformers-using-gpu-by-default/8500 + ) + + print( + "load generation pipeline from {} over, vocab size = {}, eos id = {}, gpu device = {}.".format( + model_name_or_path, len(tokenizer), tokenizer.eos_token_id, gpu_device + ) + ) + + return pipe + + def extract_function_block(string): + return re.split("\nclass|\ndef|\n#|\n@|\nprint|\nif", string)[0].rstrip() + + def run_code_generation(pipe, prompt, num_completions=1, **gen_kwargs): + set_seed(123) + + code_gens = pipe(prompt, num_return_sequences=num_completions, **gen_kwargs) + + return [ + extract_function_block(code_gen["generated_text"][len(prompt) :]) + for code_gen in code_gens + ] + + print("#" * 72 + "\nPyCodeGPT prompting\n" + "#" * 72) + pipe = load_generation_pipe("Daoguang/PyCodeGPT", 0) + gen_kwargs = { + "pad_token_id": pipe.tokenizer.pad_token_id + if pipe.tokenizer.pad_token_id + else pipe.tokenizer.eos_token_id, + "eos_token_id": pipe.tokenizer.eos_token_id, + } + gen_kwargs.update(model_params) + [ + print(code_gen) + for code_gen in run_code_generation( + pipe, prompt, num_completions=1, **gen_kwargs + ) + ] + +@retry(wait=wait_random_exponential(min=1, max=40), stop=stop_after_attempt(3), after=lambda retry_state:print(f'Attempt: {retry_state.attempt_number}')) +def prompt_gpt35(prompt, model_params, experimental=False): + print( + "#" * 72 + + f'\nGPT-3.5-Turbo prompting {"with experimental run" if experimental else ""}\n' + + "#" * 72 + ) + if not experimental: + response_retval = openai.Completion.create( + model="text-davinci-003", prompt=prompt, **model_params + ) + print(response_retval.choices[0]["text"]) + + else: + messages = [ + { + "role": "user", + "content": "Generate an instrument driver for the Agilent 34400A using the QCodes library", + } + ] + functions = [ + { + "name": "get_driver_information", + "description": "Prints the driver in a human readable format", + "parameters": { + "type": "object", + "properties": { + "instrument_name": { + "type": "string", + "description": "The name of the instrument", + }, + "description": { + "type": "string", + "description": "A description of the instrument", + }, + "set_methods": { + "type": "string", + "description": "A comma separated list of driver methods that can set parameter values on the instrument beginning with 'set_'", + }, + "get_methods": { + "type": "string", + "description": "A comma separated list of driver methods that can get parameter values on the instrument beginning with 'get_", + }, + }, + "required": [ + "instrument_name", + "description", + "set_methods", + "get_methods", + ], + }, + } + ] + response_retval = openai.ChatCompletion.create( + model="gpt-3.5-turbo-0613", + messages=messages, + functions=functions, + function_call="auto", # auto is default, but we'll be explicit, + ) + print( + "Driver info:", + response_retval["choices"][0]["message"]["function_call"]["arguments"], + ) + # print(pretty_print_conversation(response_retval['choices'][0]['message'])) + function_args = json.loads(response_retval["choices"][0]["message"]["function_call"]["arguments"]) + # generate the set methods + COMPLETE_FUNC = [] + for setter in function_args.get('get_methods').split(","): + head, tail = setter.split('_')[:2] + messages = [ + { + "role": "user", + "content": f"Generate a Python3.10 function to {head} the {tail} on the {function_args.get('instrument_name')} using the QCodes library and VisaInstrument class of QCodes", + } + ] + # print(messages[0]['content']) + response_retval = openai.ChatCompletion.create( + model="gpt-3.5-turbo-0613", + messages=messages, + **model_params + ) + result = response_retval["choices"][0]["message"]['content'] + if 'import VisaInstrument' not in result: + raise TypeError("Incorrect response returned.") + # =============================================================== + # Now we need to process the fragements into one single class ... + # =============================================================== + # first, we need to only get the python code delimited in markdown syntax + fragment = result[result.find('```'):result.find("```", result.find("```") + 1)] + fragment = fragment[:fragment.find('# Example usage')] + # now, we remove the leading ```python line + fragment = "\n".join(fragment.split('\n')[1:]) + COMPLETE_FUNC.append(fragment) + # print(fragment) + headers = [] + functionality = [] + for fragment in COMPLETE_FUNC: + for idl, line in enumerate(fragment.split('\n')): + if line.startswith('from') or line.startswith('import'): + headers.append(line) + else: + functionality.append(line) + redudant = '\n'.join(set(headers)) + '\n'.join(functionality) + messages = [ + { + "role": "user", + "content": f"Combine the following Python3.10 into a single class: {redudant}", + } + ] + # print(messages[0]['content']) + response_retval = openai.ChatCompletion.create( + model="gpt-3.5-turbo-0613", + messages=messages, + **model_params + ) + result = response_retval["choices"][0]["message"]['content'] + + print(result) + + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("-P", "--pycodegpt", dest="pycodegpt", action="store_true") + parser.add_argument("-G", "--gpt35", dest="gpt35", action="store_true") + parser.add_argument( + "-E", "--experimental", dest="experimental", action="store_true" + ) + args = parser.parse_args() + if args.gpt35: + model_params = { + "max_tokens": 2048, # Adjust as per your requirements + # "n": 1, # Number of completions to generate + "top_p": 1, + "temperature": 0.0, # Controls randomness of the output + "frequency_penalty": 0.0, + "presence_penalty": 0.0, + } + prompt_gpt35( + prompt=f"Write an instrument driver in Python 3.10 for the Agilent 34400A using the QCodes library", + model_params=model_params, + experimental=args.experimental, + ) + if args.pycodegpt: + model_params = { + "do_sample": True, + "temperature": 0.8, + "max_new_tokens": 500, + "top_p": 1.0, + "top_k": 0, + } + prompt_pycodegpt( + prompt="How to make an instrument driver in Python 3.10 for the Agilent 34400A with QCodes?", # prompt requires question syntax for some reason + model_params=model_params, + ) diff --git a/flojoy/aiprompter/numpy_random/__main__.py b/flojoy/aiprompter/numpy_random/__main__.py new file mode 100644 index 0000000..4ea586d --- /dev/null +++ b/flojoy/aiprompter/numpy_random/__main__.py @@ -0,0 +1,177 @@ +import openai +import os +import numpy as np +from tqdm import tqdm +import json +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path +import subprocess +import argparse +import yaml + + +def find_2nd(string, substring): + return string.find(substring, string.find(substring) + 1) + + +openai.api_key = os.environ["OPENAI_KEY"] + +primary_functions = [ + func_name + for func_name in dir(np.random) + if callable(getattr(np.random, func_name)) and not func_name.startswith("_") +] + + +def generate_wrapper_davinci(function): + try: + getattr(np.random, function).__name__ + except AttributeError: + return "", "" + message = [ + { + "role": "user", + "content": f"Provide to me a wrapper in Python for numpy.random.{function} that includes the following: the input parameters of the wrapper must be 'dc' and 'params', the name of the wrapper must be {getattr(np.random, function).__name__.upper()}, the primary argument of the function must be 'dc[0].y', all optional arguments must be strictly typed and taken as keys of the same name from the dictionary 'params', all internal variables must be strictly typed, and the doc string used in the wrapper must be the original doc string of the function. The functions must enforce strict typing using Python 3.10 syntax.", + } + ] + response = openai.Completion.create( + model="text-davinci-003", + prompt=message[0]["content"], + max_tokens=2048, # Adjust as per your requirements + # n=1, # Number of completions to generate + top_p=1, + temperature=0.0, # Controls randomness of the output + frequency_penalty=0.0, + presence_penalty=0.0, + ) + + response_retval = openai.Completion.create( + model="text-davinci-003", + prompt=f"Change the following Python3.10 function to return a custom class 'DataContainer', instantiated with the arguments 'x=dc[0].y, y=', where is the return value of the function: {response.choices[0]['text']}", + max_tokens=2048, # Adjust as per your requirements + # n=1, # Number of completions to generate + top_p=1, + temperature=0.0, # Controls randomness of the output + frequency_penalty=0.0, + presence_penalty=0.0, + ) + return function, response_retval.choices[0]["text"] + + +def generate_manifest(function): + response = openai.Completion.create( + model="text-davinci-003", + prompt=f"Return all arguments of the numpy.random.{function} function as keys of a YAML tree of the name 'parameters', giving their default values as a sub-key 'default' and the type of parameter as the a subkey 'type'. If they have no default value, leave the entry blank.", + max_tokens=2048, # Adjust as per your requirements + # n=1, # Number of completions to generate + top_p=1, + temperature=0.0, # Controls randomness of the output + frequency_penalty=0.0, + presence_penalty=0.0, + ) + data = yaml.safe_load(response.choices[0]["text"]) + for key in data: + val = data[key] + if ":" not in val: + continue + data[key] = tmp = {} + for x in val.split(): + x = x.split(":", 1) + tmp[x[0]] = x[1] + return function, data + + +def write_wrapper_to_file(wkdir, function, string): + LOCAL_DIR = wkdir / Path(function.upper()) + LOCAL_DIR.mkdir(exist_ok=True) + with open(LOCAL_DIR / Path(function.upper() + ".py"), "w") as fh: + # we need to redo the doc string so that it amtches the origianl in the same + # conventions as the other autogenned nodes + # First, lets get the OG string + og_docstring = getattr(np.random, function).__doc__ + og_explanation = og_docstring[og_docstring.find("Parameters") :] + head = string[: string.find("Parameters")] + tail = string[find_2nd(string, '"""') :] + flojoy_disclaimer = ( + "-." * 36 + + "\nThe parameters of the function in this Flojoy wrapper are given below." + + "\n" + + "-." * 36 + + "\n" + ) + string = head + "\n" + flojoy_disclaimer + og_explanation + "\n\n" + tail + string = string.replace("\t\t", "\t") + fh.write( + f"import numpy as np\nfrom flojoy import flojoy, DataContainer\nfrom typing import Optional, Union, Tuple, List\n@flojoy\n{string}" + ) + subprocess.call( + ["black", f"{LOCAL_DIR/Path(function.upper()+'.py')}"], + stdout=subprocess.DEVNULL, + stderr=subprocess.STDOUT, + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--generate_nodes", "-G", dest="generate_nodes", action="store_true" + ) + parser.add_argument( + "--generate_manifests", "-M", dest="generate_manifest", action="store_true" + ) + + args = parser.parse_args() + + DIRECTORY = Path("RANDOM") + DIRECTORY.mkdir(exist_ok=True) + + MANIFEST_DIRECTORY = DIRECTORY / Path("../../MANIFEST") + + if args.generate_nodes: + DATA_FNAME = "numpy.random.json" + + WRAPPERS = {} + with ThreadPoolExecutor(max_workers=10) as executor: + future_to_stuff = [ + executor.submit(generate_wrapper_davinci, function) + for function in primary_functions + ] + for future in tqdm( + as_completed(future_to_stuff), total=len(primary_functions) + ): + res = future.result() + WRAPPERS[res[0]] = res[1] + if "def" in res[1] and "return" in res[1]: + write_wrapper_to_file(DIRECTORY, res[0], res[1]) + with open(DATA_FNAME, "w", encoding="utf-8") as f: + json.dump(WRAPPERS, f, ensure_ascii=False, indent=4) + + if args.generate_manifest: + with ThreadPoolExecutor(max_workers=10) as executor: + future_to_stuff = [ + executor.submit(generate_manifest, function) + for function in primary_functions + ] + for future in tqdm( + as_completed(future_to_stuff), total=len(primary_functions) + ): + res = future.result() + func, MANIFEST = res + MANIFEST["name"] = func + MANIFEST["key"] = func.upper() + MANIFEST["type"] = "NUMPY_RANDOM" + if "size" in MANIFEST["parameters"]: + MANIFEST["parameters"]["size"] = { + "default": "dc[0].y.shape", + "type": "string", + } + with open( + MANIFEST_DIRECTORY / Path(func.lower() + ".manifest.yaml"), "w" + ) as fh: + yaml.safe_dump({"COMMAND": MANIFEST}, fh, default_flow_style=False) + # for function in primary_functions: + # node_exists = os.path.exists(DIRECTORY/Path(function.upper())/Path(function.upper()+".py")) + # # print(f'{function} included: {node_exists}') + # if not node_exists: + # os.remove(MANIFEST_DIRECTORY/Path(function.lower()+'.manifest.yaml')) + # print('Deleted manifest of', function) diff --git a/requirements.txt b/requirements.txt index e4312a0..7a34cad 100644 --- a/requirements.txt +++ b/requirements.txt @@ -64,3 +64,4 @@ uptime==3.0.1 urllib3==1.26.15 wrapt==1.15.0 zope.interface==6.0 +openai==0.27.8 \ No newline at end of file