From d65cc5fef12531aa95e5c6c001fef4db7eb25a5b Mon Sep 17 00:00:00 2001 From: if Date: Wed, 26 Apr 2023 01:27:00 +0300 Subject: [PATCH] init phind --- ai_talks/chat.py | 1 + ai_talks/src/utils/agi/phind.py | 293 +++++++++++++++++++++++++++++ ai_talks/src/utils/conversation.py | 21 +++ requirements.txt | 1 + 4 files changed, 316 insertions(+) create mode 100644 ai_talks/src/utils/agi/phind.py diff --git a/ai_talks/chat.py b/ai_talks/chat.py index 0496b2d..0dbbaf0 100644 --- a/ai_talks/chat.py +++ b/ai_talks/chat.py @@ -27,6 +27,7 @@ AI_MODEL_OPTIONS: list[str] = [ "gpt-4", "gpt-4-32k", "bard", + "phind-gpt-4", ] st.set_page_config(page_title=PAGE_TITLE, page_icon=PAGE_ICON) diff --git a/ai_talks/src/utils/agi/phind.py b/ai_talks/src/utils/agi/phind.py new file mode 100644 index 0000000..20a7e5d --- /dev/null +++ b/ai_talks/src/utils/agi/phind.py @@ -0,0 +1,293 @@ +from datetime import datetime +from queue import Empty, Queue +from threading import Thread +from time import time +from urllib.parse import quote + +from curl_cffi.requests import post + +cf_clearance = "" +user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36" # noqa: E501 + + +class PhindResponse: + class Completion: + class Choices: + def __init__(self, choice: dict) -> None: + self.text = choice["text"] + self.content = self.text.encode() + self.index = choice["index"] + self.logprobs = choice["logprobs"] + self.finish_reason = choice["finish_reason"] + + def __repr__(self) -> str: + return f"""<__main__.APIResponse.Completion.Choices(\n text = {self.text.encode()},\n index = {self.index},\n logprobs = {self.logprobs},\n finish_reason = {self.finish_reason})object at 0x1337>""" # noqa: E501 + + def __init__(self, choices: dict) -> None: + self.choices = [self.Choices(choice) for choice in choices] + + class Usage: + def __init__(self, usage_dict: dict) -> None: + self.prompt_tokens = usage_dict["prompt_tokens"] + self.completion_tokens = usage_dict["completion_tokens"] + self.total_tokens = usage_dict["total_tokens"] + + def __repr__(self): + return f"""<__main__.APIResponse.Usage(\n prompt_tokens = {self.prompt_tokens},\n completion_tokens = {self.completion_tokens},\n total_tokens = {self.total_tokens})object at 0x1337>""" # noqa: E501 + + def __init__(self, response_dict: dict) -> None: + self.response_dict = response_dict + self.id = response_dict["id"] + self.object = response_dict["object"] + self.created = response_dict["created"] + self.model = response_dict["model"] + self.completion = self.Completion(response_dict["choices"]) + self.usage = self.Usage(response_dict["usage"]) + + def json(self) -> dict: + return self.response_dict + + +class Search: + @staticmethod + def create(prompt: str, actual_search: bool = True, language: str = "en") -> dict: # None = no search + if user_agent == "": + raise ValueError("user_agent must be set, refer to documentation") + + if not actual_search: + return { + "_type": "SearchResponse", + "queryContext": { + "originalQuery": prompt + }, + "webPages": { + "webSearchUrl": f"https://www.bing.com/search?q={quote(prompt)}", + "totalEstimatedMatches": 0, + "value": [] + }, + "rankingResponse": { + "mainline": { + "items": [] + } + } + } + + headers = { + "authority": "www.phind.com", + "accept": "*/*", + "accept-language": "en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3", + "cookie": f"cf_clearance={cf_clearance}", + "origin": "https://www.phind.com", + "referer": "https://www.phind.com/search?q=hi&c=&source=searchbox&init=true", + "sec-ch-ua": '"Chromium";v="112", "Google Chrome";v="112", "Not:A-Brand";v="99"', + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": '"macOS"', + "sec-fetch-dest": "empty", + "sec-fetch-mode": "cors", + "sec-fetch-site": "same-origin", + "user-agent": user_agent + } + + res = post("https://www.phind.com/api/bing/search", headers=headers, json={ + "q": prompt, + "userRankList": {}, + "browserLanguage": language}) + import streamlit as st + st.markdown(res.content) + st.code(res.status_code) + st.warning(res.dict) + return post("https://www.phind.com/api/bing/search", headers=headers, json={ + "q": prompt, + "userRankList": {}, + "browserLanguage": language}).json()["rawBingResults"] + + +class Completion: + @staticmethod + def create( + model="gpt-4", + prompt: str = "", + results: dict = None, + creative: bool = False, + detailed: bool = False, + code_context: str = "", + language: str = "en") -> PhindResponse: + + if user_agent == "": + raise ValueError("user_agent must be set, refer to documentation") + + if results is None: + results = Search.create(prompt, actual_search=True) + + if len(code_context) > 2999: + raise ValueError("codeContext must be less than 3000 characters") + + models = { + "gpt-4": "expert", + "gpt-3.5-turbo": "intermediate", + "gpt-3.5": "intermediate", + } + + json_data = { + "question": prompt, + "bingResults": results, # response.json()['rawBingResults'], + "codeContext": code_context, + "options": { + "skill": models[model], + "date": datetime.now().strftime("%d/%m/%Y"), + "language": language, + "detailed": detailed, + "creative": creative + } + } + + headers = { + "authority": "www.phind.com", + "accept": "*/*", + "accept-language": "en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3", + "content-type": "application/json", + "cookie": f"cf_clearance={cf_clearance}", + "origin": "https://www.phind.com", + "referer": "https://www.phind.com/search?q=hi&c=&source=searchbox&init=true", + "sec-ch-ua": '"Chromium";v="112", "Google Chrome";v="112", "Not:A-Brand";v="99"', + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": '"macOS"', + "sec-fetch-dest": "empty", + "sec-fetch-mode": "cors", + "sec-fetch-site": "same-origin", + "user-agent": user_agent + } + + completion = "" + response = post("https://www.phind.com/api/infer/answer", headers=headers, json=json_data, timeout=99999, + impersonate="chrome110") + for line in response.text.split("\r\n\r\n"): + completion += (line.replace("data: ", "")) + + return PhindResponse({ + "id": f"cmpl-1337-{int(time())}", + "object": "text_completion", + "created": int(time()), + "model": models[model], + "choices": [{ + "text": completion, + "index": 0, + "logprobs": None, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": len(prompt), + "completion_tokens": len(completion), + "total_tokens": len(prompt) + len(completion) + } + }) + + +class StreamingCompletion: + message_queue = Queue() + stream_completed = False + + @staticmethod + def request(model, prompt, results, creative, detailed, code_context, language) -> None: + + models = { + "gpt-4": "expert", + "gpt-3.5-turbo": "intermediate", + "gpt-3.5": "intermediate", + } + + json_data = { + "question": prompt, + "bingResults": results, + "codeContext": code_context, + "options": { + "skill": models[model], + "date": datetime.now().strftime("%d/%m/%Y"), + "language": language, + "detailed": detailed, + "creative": creative + } + } + + headers = { + "authority": "www.phind.com", + "accept": "*/*", + "accept-language": "en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3", + "content-type": "application/json", + "cookie": f"cf_clearance={cf_clearance}", + "origin": "https://www.phind.com", + "referer": "https://www.phind.com/search?q=hi&c=&source=searchbox&init=true", + "sec-ch-ua": '"Chromium";v="112", "Google Chrome";v="112", "Not:A-Brand";v="99"', + "sec-ch-ua-mobile": "?0", + "sec-ch-ua-platform": '"macOS"', + "sec-fetch-dest": "empty", + "sec-fetch-mode": "cors", + "sec-fetch-site": "same-origin", + "user-agent": user_agent + } + + post("https://www.phind.com/api/infer/answer", + headers=headers, json=json_data, timeout=99999, impersonate="chrome110", + content_callback=StreamingCompletion.handle_stream_response) + + StreamingCompletion.stream_completed = True + + @staticmethod + def create( + model: str = "gpt-4", + prompt: str = "", + results: dict = None, + creative: bool = False, + detailed: bool = False, + code_context: str = "", + language: str = "en"): + + if user_agent == "": + raise ValueError("user_agent must be set, refer to documentation") + + if results is None: + results = Search.create(prompt, actual_search=True) + + if len(code_context) > 2999: + raise ValueError("codeContext must be less than 3000 characters") + + Thread(target=StreamingCompletion.request, args=[ + model, prompt, results, creative, detailed, code_context, language]).start() + + while StreamingCompletion.stream_completed is not True or not StreamingCompletion.message_queue.empty(): + try: + chunk = StreamingCompletion.message_queue.get(timeout=0) + + if chunk == b"data: \r\ndata: \r\ndata: \r\n\r\n": + chunk = b"data: \n\n\r\n\r\n" + + chunk = chunk.decode() + + chunk = chunk.replace("data: \r\n\r\ndata: ", "data: \n") + chunk = chunk.replace("\r\ndata: \r\ndata: \r\n\r\n", "\n\n\r\n\r\n") + chunk = chunk.replace("data: ", "").replace("\r\n\r\n", "") + + yield PhindResponse({ + "id": f"cmpl-1337-{int(time())}", + "object": "text_completion", + "created": int(time()), + "model": model, + "choices": [{ + "text": chunk, + "index": 0, + "logprobs": None, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": len(prompt), + "completion_tokens": len(chunk), + "total_tokens": len(prompt) + len(chunk) + } + }) + + except Empty: + pass + + @staticmethod + def handle_stream_response(response): + StreamingCompletion.message_queue.put(response) diff --git a/ai_talks/src/utils/conversation.py b/ai_talks/src/utils/conversation.py index f789e11..b1eac13 100644 --- a/ai_talks/src/utils/conversation.py +++ b/ai_talks/src/utils/conversation.py @@ -5,11 +5,15 @@ from openai.error import InvalidRequestError, OpenAIError from requests.exceptions import TooManyRedirects from streamlit_chat import message +from .agi import phind from .agi.bard import BardChat from .agi.chat_gpt import create_gpt_completion from .stt import show_voice_input from .tts import show_audio_player +phind.cf_clearance = "" +phind.user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36" # noqa: E501 + def clear_chat() -> None: st.session_state.generated = [] @@ -88,6 +92,21 @@ def show_bard_conversation() -> None: st.error(err) +def phind_get_answer(question: str): + try: + result = phind.Completion.create( + model="gpt-4", + prompt=question, + results=phind.Search.create(question, actualSearch=True), + creative=False, + detailed=False, + codeContext="" + ) + st.markdown(result.completion.choices[0].text) + except Exception as e: + st.error(e) + + def show_conversation() -> None: if st.session_state.messages: st.session_state.messages.append({"role": "user", "content": st.session_state.user_text}) @@ -99,5 +118,7 @@ def show_conversation() -> None: ] if st.session_state.model == "bard": show_bard_conversation() + elif st.session_state.model == "phind-gpt-4": + phind_get_answer(st.session_state.user_text) else: show_gpt_conversation() diff --git a/requirements.txt b/requirements.txt index 031b175..cfec1d7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ gtts>=2.3.1 bokeh==2.4.2 streamlit-bokeh-events>=0.1.2 watchdog>=3.0.0 +curl_cffi>=0.5.5