From d65cc5fef12531aa95e5c6c001fef4db7eb25a5b Mon Sep 17 00:00:00 2001
From: if <if.kosarevsky@gmail.com>
Date: Wed, 26 Apr 2023 01:27:00 +0300
Subject: [PATCH] init phind

---
 ai_talks/chat.py                   |   1 +
 ai_talks/src/utils/agi/phind.py    | 293 +++++++++++++++++++++++++++++
 ai_talks/src/utils/conversation.py |  21 +++
 requirements.txt                   |   1 +
 4 files changed, 316 insertions(+)
 create mode 100644 ai_talks/src/utils/agi/phind.py

diff --git a/ai_talks/chat.py b/ai_talks/chat.py
index 0496b2d..0dbbaf0 100644
--- a/ai_talks/chat.py
+++ b/ai_talks/chat.py
@@ -27,6 +27,7 @@ AI_MODEL_OPTIONS: list[str] = [
     "gpt-4",
     "gpt-4-32k",
     "bard",
+    "phind-gpt-4",
 ]
 
 st.set_page_config(page_title=PAGE_TITLE, page_icon=PAGE_ICON)
diff --git a/ai_talks/src/utils/agi/phind.py b/ai_talks/src/utils/agi/phind.py
new file mode 100644
index 0000000..20a7e5d
--- /dev/null
+++ b/ai_talks/src/utils/agi/phind.py
@@ -0,0 +1,293 @@
+from datetime import datetime
+from queue import Empty, Queue
+from threading import Thread
+from time import time
+from urllib.parse import quote
+
+from curl_cffi.requests import post
+
+cf_clearance = ""
+user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"  # noqa: E501
+
+
+class PhindResponse:
+    class Completion:
+        class Choices:
+            def __init__(self, choice: dict) -> None:
+                self.text = choice["text"]
+                self.content = self.text.encode()
+                self.index = choice["index"]
+                self.logprobs = choice["logprobs"]
+                self.finish_reason = choice["finish_reason"]
+
+            def __repr__(self) -> str:
+                return f"""<__main__.APIResponse.Completion.Choices(\n    text           = {self.text.encode()},\n    index          = {self.index},\n    logprobs       = {self.logprobs},\n    finish_reason  = {self.finish_reason})object at 0x1337>"""  # noqa: E501
+
+        def __init__(self, choices: dict) -> None:
+            self.choices = [self.Choices(choice) for choice in choices]
+
+    class Usage:
+        def __init__(self, usage_dict: dict) -> None:
+            self.prompt_tokens = usage_dict["prompt_tokens"]
+            self.completion_tokens = usage_dict["completion_tokens"]
+            self.total_tokens = usage_dict["total_tokens"]
+
+        def __repr__(self):
+            return f"""<__main__.APIResponse.Usage(\n    prompt_tokens      = {self.prompt_tokens},\n    completion_tokens  = {self.completion_tokens},\n    total_tokens       = {self.total_tokens})object at 0x1337>"""  # noqa: E501
+
+    def __init__(self, response_dict: dict) -> None:
+        self.response_dict = response_dict
+        self.id = response_dict["id"]
+        self.object = response_dict["object"]
+        self.created = response_dict["created"]
+        self.model = response_dict["model"]
+        self.completion = self.Completion(response_dict["choices"])
+        self.usage = self.Usage(response_dict["usage"])
+
+    def json(self) -> dict:
+        return self.response_dict
+
+
+class Search:
+    @staticmethod
+    def create(prompt: str, actual_search: bool = True, language: str = "en") -> dict:  # None = no search
+        if user_agent == "":
+            raise ValueError("user_agent must be set, refer to documentation")
+
+        if not actual_search:
+            return {
+                "_type": "SearchResponse",
+                "queryContext": {
+                    "originalQuery": prompt
+                },
+                "webPages": {
+                    "webSearchUrl": f"https://www.bing.com/search?q={quote(prompt)}",
+                    "totalEstimatedMatches": 0,
+                    "value": []
+                },
+                "rankingResponse": {
+                    "mainline": {
+                        "items": []
+                    }
+                }
+            }
+
+        headers = {
+            "authority": "www.phind.com",
+            "accept": "*/*",
+            "accept-language": "en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3",
+            "cookie": f"cf_clearance={cf_clearance}",
+            "origin": "https://www.phind.com",
+            "referer": "https://www.phind.com/search?q=hi&c=&source=searchbox&init=true",
+            "sec-ch-ua": '"Chromium";v="112", "Google Chrome";v="112", "Not:A-Brand";v="99"',
+            "sec-ch-ua-mobile": "?0",
+            "sec-ch-ua-platform": '"macOS"',
+            "sec-fetch-dest": "empty",
+            "sec-fetch-mode": "cors",
+            "sec-fetch-site": "same-origin",
+            "user-agent": user_agent
+        }
+
+        res = post("https://www.phind.com/api/bing/search", headers=headers, json={
+            "q": prompt,
+            "userRankList": {},
+            "browserLanguage": language})
+        import streamlit as st
+        st.markdown(res.content)
+        st.code(res.status_code)
+        st.warning(res.dict)
+        return post("https://www.phind.com/api/bing/search", headers=headers, json={
+            "q": prompt,
+            "userRankList": {},
+            "browserLanguage": language}).json()["rawBingResults"]
+
+
+class Completion:
+    @staticmethod
+    def create(
+            model="gpt-4",
+            prompt: str = "",
+            results: dict = None,
+            creative: bool = False,
+            detailed: bool = False,
+            code_context: str = "",
+            language: str = "en") -> PhindResponse:
+
+        if user_agent == "":
+            raise ValueError("user_agent must be set, refer to documentation")
+
+        if results is None:
+            results = Search.create(prompt, actual_search=True)
+
+        if len(code_context) > 2999:
+            raise ValueError("codeContext must be less than 3000 characters")
+
+        models = {
+            "gpt-4": "expert",
+            "gpt-3.5-turbo": "intermediate",
+            "gpt-3.5": "intermediate",
+        }
+
+        json_data = {
+            "question": prompt,
+            "bingResults": results,  # response.json()['rawBingResults'],
+            "codeContext": code_context,
+            "options": {
+                "skill": models[model],
+                "date": datetime.now().strftime("%d/%m/%Y"),
+                "language": language,
+                "detailed": detailed,
+                "creative": creative
+            }
+        }
+
+        headers = {
+            "authority": "www.phind.com",
+            "accept": "*/*",
+            "accept-language": "en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3",
+            "content-type": "application/json",
+            "cookie": f"cf_clearance={cf_clearance}",
+            "origin": "https://www.phind.com",
+            "referer": "https://www.phind.com/search?q=hi&c=&source=searchbox&init=true",
+            "sec-ch-ua": '"Chromium";v="112", "Google Chrome";v="112", "Not:A-Brand";v="99"',
+            "sec-ch-ua-mobile": "?0",
+            "sec-ch-ua-platform": '"macOS"',
+            "sec-fetch-dest": "empty",
+            "sec-fetch-mode": "cors",
+            "sec-fetch-site": "same-origin",
+            "user-agent": user_agent
+        }
+
+        completion = ""
+        response = post("https://www.phind.com/api/infer/answer", headers=headers, json=json_data, timeout=99999,
+                        impersonate="chrome110")
+        for line in response.text.split("\r\n\r\n"):
+            completion += (line.replace("data: ", ""))
+
+        return PhindResponse({
+            "id": f"cmpl-1337-{int(time())}",
+            "object": "text_completion",
+            "created": int(time()),
+            "model": models[model],
+            "choices": [{
+                "text": completion,
+                "index": 0,
+                "logprobs": None,
+                "finish_reason": "stop"
+            }],
+            "usage": {
+                "prompt_tokens": len(prompt),
+                "completion_tokens": len(completion),
+                "total_tokens": len(prompt) + len(completion)
+            }
+        })
+
+
+class StreamingCompletion:
+    message_queue = Queue()
+    stream_completed = False
+
+    @staticmethod
+    def request(model, prompt, results, creative, detailed, code_context, language) -> None:
+
+        models = {
+            "gpt-4": "expert",
+            "gpt-3.5-turbo": "intermediate",
+            "gpt-3.5": "intermediate",
+        }
+
+        json_data = {
+            "question": prompt,
+            "bingResults": results,
+            "codeContext": code_context,
+            "options": {
+                "skill": models[model],
+                "date": datetime.now().strftime("%d/%m/%Y"),
+                "language": language,
+                "detailed": detailed,
+                "creative": creative
+            }
+        }
+
+        headers = {
+            "authority": "www.phind.com",
+            "accept": "*/*",
+            "accept-language": "en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3",
+            "content-type": "application/json",
+            "cookie": f"cf_clearance={cf_clearance}",
+            "origin": "https://www.phind.com",
+            "referer": "https://www.phind.com/search?q=hi&c=&source=searchbox&init=true",
+            "sec-ch-ua": '"Chromium";v="112", "Google Chrome";v="112", "Not:A-Brand";v="99"',
+            "sec-ch-ua-mobile": "?0",
+            "sec-ch-ua-platform": '"macOS"',
+            "sec-fetch-dest": "empty",
+            "sec-fetch-mode": "cors",
+            "sec-fetch-site": "same-origin",
+            "user-agent": user_agent
+        }
+
+        post("https://www.phind.com/api/infer/answer",
+                        headers=headers, json=json_data, timeout=99999, impersonate="chrome110",
+                        content_callback=StreamingCompletion.handle_stream_response)
+
+        StreamingCompletion.stream_completed = True
+
+    @staticmethod
+    def create(
+            model: str = "gpt-4",
+            prompt: str = "",
+            results: dict = None,
+            creative: bool = False,
+            detailed: bool = False,
+            code_context: str = "",
+            language: str = "en"):
+
+        if user_agent == "":
+            raise ValueError("user_agent must be set, refer to documentation")
+
+        if results is None:
+            results = Search.create(prompt, actual_search=True)
+
+        if len(code_context) > 2999:
+            raise ValueError("codeContext must be less than 3000 characters")
+
+        Thread(target=StreamingCompletion.request, args=[
+            model, prompt, results, creative, detailed, code_context, language]).start()
+
+        while StreamingCompletion.stream_completed is not True or not StreamingCompletion.message_queue.empty():
+            try:
+                chunk = StreamingCompletion.message_queue.get(timeout=0)
+
+                if chunk == b"data:  \r\ndata: \r\ndata: \r\n\r\n":
+                    chunk = b"data:  \n\n\r\n\r\n"
+
+                chunk = chunk.decode()
+
+                chunk = chunk.replace("data: \r\n\r\ndata: ", "data: \n")
+                chunk = chunk.replace("\r\ndata: \r\ndata: \r\n\r\n", "\n\n\r\n\r\n")
+                chunk = chunk.replace("data: ", "").replace("\r\n\r\n", "")
+
+                yield PhindResponse({
+                    "id": f"cmpl-1337-{int(time())}",
+                    "object": "text_completion",
+                    "created": int(time()),
+                    "model": model,
+                    "choices": [{
+                        "text": chunk,
+                        "index": 0,
+                        "logprobs": None,
+                        "finish_reason": "stop"
+                    }],
+                    "usage": {
+                        "prompt_tokens": len(prompt),
+                        "completion_tokens": len(chunk),
+                        "total_tokens": len(prompt) + len(chunk)
+                    }
+                })
+
+            except Empty:
+                pass
+
+    @staticmethod
+    def handle_stream_response(response):
+        StreamingCompletion.message_queue.put(response)
diff --git a/ai_talks/src/utils/conversation.py b/ai_talks/src/utils/conversation.py
index f789e11..b1eac13 100644
--- a/ai_talks/src/utils/conversation.py
+++ b/ai_talks/src/utils/conversation.py
@@ -5,11 +5,15 @@ from openai.error import InvalidRequestError, OpenAIError
 from requests.exceptions import TooManyRedirects
 from streamlit_chat import message
 
+from .agi import phind
 from .agi.bard import BardChat
 from .agi.chat_gpt import create_gpt_completion
 from .stt import show_voice_input
 from .tts import show_audio_player
 
+phind.cf_clearance = ""
+phind.user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"  # noqa: E501
+
 
 def clear_chat() -> None:
     st.session_state.generated = []
@@ -88,6 +92,21 @@ def show_bard_conversation() -> None:
         st.error(err)
 
 
+def phind_get_answer(question: str):
+    try:
+        result = phind.Completion.create(
+            model="gpt-4",
+            prompt=question,
+            results=phind.Search.create(question, actualSearch=True),
+            creative=False,
+            detailed=False,
+            codeContext=""
+        )
+        st.markdown(result.completion.choices[0].text)
+    except Exception as e:
+        st.error(e)
+
+
 def show_conversation() -> None:
     if st.session_state.messages:
         st.session_state.messages.append({"role": "user", "content": st.session_state.user_text})
@@ -99,5 +118,7 @@ def show_conversation() -> None:
         ]
     if st.session_state.model == "bard":
         show_bard_conversation()
+    elif st.session_state.model == "phind-gpt-4":
+        phind_get_answer(st.session_state.user_text)
     else:
         show_gpt_conversation()
diff --git a/requirements.txt b/requirements.txt
index 031b175..cfec1d7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,4 @@ gtts>=2.3.1
 bokeh==2.4.2
 streamlit-bokeh-events>=0.1.2
 watchdog>=3.0.0
+curl_cffi>=0.5.5