From 6e81fa844c2f7253001518cca57c83af8b430dfa Mon Sep 17 00:00:00 2001
From: s452624 <vlaser@st.amu.edu.pl>
Date: Mon, 10 Jun 2024 11:31:31 +0200
Subject: [PATCH] Data collection in a separate CSV file

---
 backend/backend.py | 108 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 87 insertions(+), 21 deletions(-)

diff --git a/backend/backend.py b/backend/backend.py
index 66448e5..a5a11d2 100644
--- a/backend/backend.py
+++ b/backend/backend.py
@@ -14,6 +14,8 @@ import joblib
 import imaplib
 import email
 from email.header import decode_header
+import base64
+import csv
 
 app = Flask(__name__)
 CORS(app)
@@ -27,8 +29,10 @@ API_SERVICE_NAME = 'gmail'
 API_VERSION = 'v1'
 
 SAFE_EMAILS_FILE = 'safe_emails.json'
+SAFE_TRASH_FILE = 'safe_trash.csv'
 
 # Load safe emails from file
+
 def load_safe_emails():
     if os.path.exists(SAFE_EMAILS_FILE):
         with open(SAFE_EMAILS_FILE, 'r') as file:
@@ -36,10 +40,23 @@ def load_safe_emails():
     return []
 
 # Save safe emails to file
+
 def save_safe_emails(safe_emails):
     with open(SAFE_EMAILS_FILE, 'w') as file:
         json.dump(safe_emails, file)
 
+def save_safe_trash(safe_trash):
+    file_exists = os.path.isfile(SAFE_TRASH_FILE)
+    
+    with open(SAFE_TRASH_FILE, 'a', newline='', encoding='utf-8') as file:
+        fieldnames = ['from', 'subject', 'body']
+        writer = csv.DictWriter(file, fieldnames=fieldnames)
+        
+        if not file_exists:
+            writer.writeheader()
+        
+        writer.writerow(safe_trash)
+
 safe_emails = load_safe_emails()
 
 model = joblib.load('spam_classifier_model.pkl')
@@ -60,6 +77,7 @@ def load_credentials():
     return None
 
 @app.route('/authorize')
+
 def authorize():
     client_secrets = load_client_secrets()
     flow = google_auth_oauthlib.flow.Flow.from_client_secrets_file(
@@ -86,7 +104,6 @@ def oauth2callback():
 
     authorization_response = request.url
     app.logger.info(f"Authorization response: {authorization_response}")
-
     try:
         flow.fetch_token(authorization_response=authorization_response)
         credentials = flow.credentials
@@ -107,13 +124,12 @@ def oauth2callback():
         app.logger.error(f"Exception: {str(e)}")
         app.logger.error("Traceback: " + traceback.format_exc())
         return jsonify({'error': 'Exception', 'message': str(e)}), 500
-
 @app.route('/validate-outlook-login', methods=['POST'])
+
 def validate_outlook_login():
     data = request.json
     username = data['email']
     password = data['password']
-
     try:
         mail = imaplib.IMAP4_SSL("outlook.office365.com")
         mail.login(username, password)
@@ -125,11 +141,11 @@ def validate_outlook_login():
         return jsonify({"success": False, "error": "Encoding error"}), 400
     
 @app.route('/fetch-emails', methods=['POST'])
+
 def fetch_emails():
     data = request.json
     username = data['username']
     password = data['password']
-
     try:
         mail = imaplib.IMAP4_SSL("outlook.office365.com")
         mail.login(username, password)
@@ -138,14 +154,11 @@ def fetch_emails():
         return jsonify({"error": "Login failed. Check your email and password."}), 401
     except UnicodeEncodeError:
         return jsonify({"error": "Encoding error. Ensure your email and password contain only valid characters."}), 400
-
     status, messages = mail.search(None, "ALL")
     if status != "OK":
         return jsonify({"error": "Failed to retrieve emails."}), 500
-
     status, messages = mail.search(None, "ALL")
     email_ids = messages[0].split()
-
     emails = []
 
     for email_id in email_ids:
@@ -168,7 +181,6 @@ def fetch_emails():
                 email_vectorized = vectorizer.transform([body])
                 prediction = model.predict(email_vectorized)
                 result = True if prediction == 1 or contains_suspicious_links(body) or contains_phishing_indicators(subject,  email_address,body) else False
-
                 emails.append({"id": email_id.decode(), 
                                "from": email_address,
                                "subject": subject, 
@@ -179,6 +191,7 @@ def fetch_emails():
     return jsonify(emails)
 
 @app.route('/check_auth_status', methods=['GET'])
+
 def check_auth_status():
     if 'credentials' in session:
         credentials = google.oauth2.credentials.Credentials(**session['credentials'])
@@ -187,20 +200,24 @@ def check_auth_status():
     return jsonify({'logged_in': False})
 
 @app.route('/check_mail')
+
 def check_mail():
     if 'credentials' not in session:
         return redirect('authorize')
-
+    
     credentials = google.oauth2.credentials.Credentials(
         **session['credentials'])
-
+    
     gmail = googleapiclient.discovery.build(
         'gmail', 'v1', credentials=credentials)
-
+    
     results = gmail.users().messages().list(userId='me', labelIds=['INBOX']).execute()
     messages = results.get('messages', [])
-
+    
+    
+    
     emails = []
+
     for message in messages:
         msg = gmail.users().messages().get(userId='me', id=message['id']).execute()
         snippet = msg.get('snippet', '')
@@ -218,7 +235,6 @@ def check_mail():
         prediction = model.predict(email_vectorized)
         if message['id'] not in safe_emails:
             result = True if prediction == 1 or contains_suspicious_links(snippet) or contains_phishing_indicators(subject, sender,snippet) else False
-
         emails.append({
             'subject': subject,
             'from': sender,
@@ -227,32 +243,57 @@ def check_mail():
             'suspicious': result
         })
     return jsonify(emails)
-
 @app.route('/logout', methods=['POST'])
+
 def logout():
     session.clear()
     app.logger.info("Session cleared. User logged out.")
     return jsonify({'message': 'Logged out'}), 200
-
 @app.route('/mark_safe/<email_id>', methods=['POST'])
+
 def mark_safe(email_id):
     global safe_emails
     safe_emails.append(email_id)
     save_safe_emails(safe_emails)
     app.logger.info(f'Email {email_id} marked as safe')
     return jsonify({"message": f"Email {email_id} marked as safe"}), 200
-
 @app.route('/move_trash/<email_id>', methods=['POST'])
+
 def move_trash(email_id):
     if 'credentials' not in session:
         return jsonify({'message': 'Not logged in'}), 401
-
     credentials = google.oauth2.credentials.Credentials(
         **session['credentials'])
     gmail = googleapiclient.discovery.build(
         'gmail', 'v1', credentials=credentials)
-
+    
     try:
+        message = gmail.users().messages().get(userId='me', id=email_id).execute()
+        payload = message.get('payload', {})
+        headers = payload.get('headers', [])
+        subject = ''
+        sender = ''
+        body = ''
+        
+        for header in headers:
+            if header['name'] == 'Subject':
+                subject = header['value']
+            if header['name'] == 'From':
+                sender = header['value']
+        
+        if 'parts' in payload:
+            for part in payload['parts']:
+                if part['mimeType'] == 'text/plain':
+                    body = part['body']['data']
+                    body = base64.urlsafe_b64decode(body).decode('utf-8')
+        
+        safe_trash = {
+            "from": sender,
+            "subject": subject,
+            "body": body
+        }
+        save_safe_trash(safe_trash)
+        
         gmail.users().messages().modify(
             userId='me', 
             id=email_id, 
@@ -263,29 +304,54 @@ def move_trash(email_id):
     except Exception as e:
         app.logger.error(f"Exception moving email to trash: {str(e)}")
         return jsonify({"error": str(e)}), 500
-
+    
 @app.route('/delete-email', methods=['POST'])
+
 def delete_email():
     data = request.json
     email_id = data['email_id']
     username = data['username']
     password = data['password']
-    
+
     try:
         mail = imaplib.IMAP4_SSL("outlook.office365.com")
         mail.login(username, password)
         mail.select("inbox")
+        status, message_data = mail.fetch(email_id, "(RFC822)")
+        
+        if status != "OK":
+            return jsonify({"error": "Failed to fetch email"}), 500
+        
+        msg = email.message_from_bytes(message_data[0][1])
+        subject, encoding = decode_header(msg["Subject"])[0]
+        if isinstance(subject, bytes):
+            subject = subject.decode(encoding if encoding else "utf-8")
+        from_ = msg.get("From")
+        name, email_address = email.utils.parseaddr(from_)
+        body = ""
+        if msg.is_multipart():
+            for part in msg.walk():
+                if part.get_content_type() == "text/plain" and part.get("Content-Disposition") is None:
+                    body += part.get_payload(decode=True).decode(part.get_content_charset() or "utf-8")
+        else:
+            body = msg.get_payload(decode=True).decode(msg.get_content_charset() or "utf-8")
+        safe_trash = {
+            "from": from_,
+            "subject": subject,
+            "body": body
+        }
+        save_safe_trash(safe_trash)
         mail.store(email_id, '+FLAGS', '\\Deleted')
         mail.expunge()
         return jsonify({"message": f"Email {email_id} deleted"})
     except imaplib.IMAP4.error:
         return jsonify({"error": "Failed to delete email"}), 500
 
-
 def contains_suspicious_links(snippet):
     url_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
     urls = url_pattern.findall(snippet)
     suspicious_domains = ['phishingsite.com', 'malicious.com']
+
     for url in urls:
         for domain in suspicious_domains:
             if domain in url: