From 6e81fa844c2f7253001518cca57c83af8b430dfa Mon Sep 17 00:00:00 2001 From: s452624 Date: Mon, 10 Jun 2024 11:31:31 +0200 Subject: [PATCH] Data collection in a separate CSV file --- backend/backend.py | 108 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 87 insertions(+), 21 deletions(-) diff --git a/backend/backend.py b/backend/backend.py index 66448e5..a5a11d2 100644 --- a/backend/backend.py +++ b/backend/backend.py @@ -14,6 +14,8 @@ import joblib import imaplib import email from email.header import decode_header +import base64 +import csv app = Flask(__name__) CORS(app) @@ -27,8 +29,10 @@ API_SERVICE_NAME = 'gmail' API_VERSION = 'v1' SAFE_EMAILS_FILE = 'safe_emails.json' +SAFE_TRASH_FILE = 'safe_trash.csv' # Load safe emails from file + def load_safe_emails(): if os.path.exists(SAFE_EMAILS_FILE): with open(SAFE_EMAILS_FILE, 'r') as file: @@ -36,10 +40,23 @@ def load_safe_emails(): return [] # Save safe emails to file + def save_safe_emails(safe_emails): with open(SAFE_EMAILS_FILE, 'w') as file: json.dump(safe_emails, file) +def save_safe_trash(safe_trash): + file_exists = os.path.isfile(SAFE_TRASH_FILE) + + with open(SAFE_TRASH_FILE, 'a', newline='', encoding='utf-8') as file: + fieldnames = ['from', 'subject', 'body'] + writer = csv.DictWriter(file, fieldnames=fieldnames) + + if not file_exists: + writer.writeheader() + + writer.writerow(safe_trash) + safe_emails = load_safe_emails() model = joblib.load('spam_classifier_model.pkl') @@ -60,6 +77,7 @@ def load_credentials(): return None @app.route('/authorize') + def authorize(): client_secrets = load_client_secrets() flow = google_auth_oauthlib.flow.Flow.from_client_secrets_file( @@ -86,7 +104,6 @@ def oauth2callback(): authorization_response = request.url app.logger.info(f"Authorization response: {authorization_response}") - try: flow.fetch_token(authorization_response=authorization_response) credentials = flow.credentials @@ -107,13 +124,12 @@ def oauth2callback(): app.logger.error(f"Exception: {str(e)}") app.logger.error("Traceback: " + traceback.format_exc()) return jsonify({'error': 'Exception', 'message': str(e)}), 500 - @app.route('/validate-outlook-login', methods=['POST']) + def validate_outlook_login(): data = request.json username = data['email'] password = data['password'] - try: mail = imaplib.IMAP4_SSL("outlook.office365.com") mail.login(username, password) @@ -125,11 +141,11 @@ def validate_outlook_login(): return jsonify({"success": False, "error": "Encoding error"}), 400 @app.route('/fetch-emails', methods=['POST']) + def fetch_emails(): data = request.json username = data['username'] password = data['password'] - try: mail = imaplib.IMAP4_SSL("outlook.office365.com") mail.login(username, password) @@ -138,14 +154,11 @@ def fetch_emails(): return jsonify({"error": "Login failed. Check your email and password."}), 401 except UnicodeEncodeError: return jsonify({"error": "Encoding error. Ensure your email and password contain only valid characters."}), 400 - status, messages = mail.search(None, "ALL") if status != "OK": return jsonify({"error": "Failed to retrieve emails."}), 500 - status, messages = mail.search(None, "ALL") email_ids = messages[0].split() - emails = [] for email_id in email_ids: @@ -168,7 +181,6 @@ def fetch_emails(): email_vectorized = vectorizer.transform([body]) prediction = model.predict(email_vectorized) result = True if prediction == 1 or contains_suspicious_links(body) or contains_phishing_indicators(subject, email_address,body) else False - emails.append({"id": email_id.decode(), "from": email_address, "subject": subject, @@ -179,6 +191,7 @@ def fetch_emails(): return jsonify(emails) @app.route('/check_auth_status', methods=['GET']) + def check_auth_status(): if 'credentials' in session: credentials = google.oauth2.credentials.Credentials(**session['credentials']) @@ -187,20 +200,24 @@ def check_auth_status(): return jsonify({'logged_in': False}) @app.route('/check_mail') + def check_mail(): if 'credentials' not in session: return redirect('authorize') - + credentials = google.oauth2.credentials.Credentials( **session['credentials']) - + gmail = googleapiclient.discovery.build( 'gmail', 'v1', credentials=credentials) - + results = gmail.users().messages().list(userId='me', labelIds=['INBOX']).execute() messages = results.get('messages', []) - + + + emails = [] + for message in messages: msg = gmail.users().messages().get(userId='me', id=message['id']).execute() snippet = msg.get('snippet', '') @@ -218,7 +235,6 @@ def check_mail(): prediction = model.predict(email_vectorized) if message['id'] not in safe_emails: result = True if prediction == 1 or contains_suspicious_links(snippet) or contains_phishing_indicators(subject, sender,snippet) else False - emails.append({ 'subject': subject, 'from': sender, @@ -227,32 +243,57 @@ def check_mail(): 'suspicious': result }) return jsonify(emails) - @app.route('/logout', methods=['POST']) + def logout(): session.clear() app.logger.info("Session cleared. User logged out.") return jsonify({'message': 'Logged out'}), 200 - @app.route('/mark_safe/', methods=['POST']) + def mark_safe(email_id): global safe_emails safe_emails.append(email_id) save_safe_emails(safe_emails) app.logger.info(f'Email {email_id} marked as safe') return jsonify({"message": f"Email {email_id} marked as safe"}), 200 - @app.route('/move_trash/', methods=['POST']) + def move_trash(email_id): if 'credentials' not in session: return jsonify({'message': 'Not logged in'}), 401 - credentials = google.oauth2.credentials.Credentials( **session['credentials']) gmail = googleapiclient.discovery.build( 'gmail', 'v1', credentials=credentials) - + try: + message = gmail.users().messages().get(userId='me', id=email_id).execute() + payload = message.get('payload', {}) + headers = payload.get('headers', []) + subject = '' + sender = '' + body = '' + + for header in headers: + if header['name'] == 'Subject': + subject = header['value'] + if header['name'] == 'From': + sender = header['value'] + + if 'parts' in payload: + for part in payload['parts']: + if part['mimeType'] == 'text/plain': + body = part['body']['data'] + body = base64.urlsafe_b64decode(body).decode('utf-8') + + safe_trash = { + "from": sender, + "subject": subject, + "body": body + } + save_safe_trash(safe_trash) + gmail.users().messages().modify( userId='me', id=email_id, @@ -263,29 +304,54 @@ def move_trash(email_id): except Exception as e: app.logger.error(f"Exception moving email to trash: {str(e)}") return jsonify({"error": str(e)}), 500 - + @app.route('/delete-email', methods=['POST']) + def delete_email(): data = request.json email_id = data['email_id'] username = data['username'] password = data['password'] - + try: mail = imaplib.IMAP4_SSL("outlook.office365.com") mail.login(username, password) mail.select("inbox") + status, message_data = mail.fetch(email_id, "(RFC822)") + + if status != "OK": + return jsonify({"error": "Failed to fetch email"}), 500 + + msg = email.message_from_bytes(message_data[0][1]) + subject, encoding = decode_header(msg["Subject"])[0] + if isinstance(subject, bytes): + subject = subject.decode(encoding if encoding else "utf-8") + from_ = msg.get("From") + name, email_address = email.utils.parseaddr(from_) + body = "" + if msg.is_multipart(): + for part in msg.walk(): + if part.get_content_type() == "text/plain" and part.get("Content-Disposition") is None: + body += part.get_payload(decode=True).decode(part.get_content_charset() or "utf-8") + else: + body = msg.get_payload(decode=True).decode(msg.get_content_charset() or "utf-8") + safe_trash = { + "from": from_, + "subject": subject, + "body": body + } + save_safe_trash(safe_trash) mail.store(email_id, '+FLAGS', '\\Deleted') mail.expunge() return jsonify({"message": f"Email {email_id} deleted"}) except imaplib.IMAP4.error: return jsonify({"error": "Failed to delete email"}), 500 - def contains_suspicious_links(snippet): url_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+') urls = url_pattern.findall(snippet) suspicious_domains = ['phishingsite.com', 'malicious.com'] + for url in urls: for domain in suspicious_domains: if domain in url: