Data collection in a separate CSV file

This commit is contained in:
s452624 2024-06-10 11:31:31 +02:00
parent a84edc687c
commit 6e81fa844c

View File

@ -14,6 +14,8 @@ import joblib
import imaplib import imaplib
import email import email
from email.header import decode_header from email.header import decode_header
import base64
import csv
app = Flask(__name__) app = Flask(__name__)
CORS(app) CORS(app)
@ -27,8 +29,10 @@ API_SERVICE_NAME = 'gmail'
API_VERSION = 'v1' API_VERSION = 'v1'
SAFE_EMAILS_FILE = 'safe_emails.json' SAFE_EMAILS_FILE = 'safe_emails.json'
SAFE_TRASH_FILE = 'safe_trash.csv'
# Load safe emails from file # Load safe emails from file
def load_safe_emails(): def load_safe_emails():
if os.path.exists(SAFE_EMAILS_FILE): if os.path.exists(SAFE_EMAILS_FILE):
with open(SAFE_EMAILS_FILE, 'r') as file: with open(SAFE_EMAILS_FILE, 'r') as file:
@ -36,10 +40,23 @@ def load_safe_emails():
return [] return []
# Save safe emails to file # Save safe emails to file
def save_safe_emails(safe_emails): def save_safe_emails(safe_emails):
with open(SAFE_EMAILS_FILE, 'w') as file: with open(SAFE_EMAILS_FILE, 'w') as file:
json.dump(safe_emails, file) json.dump(safe_emails, file)
def save_safe_trash(safe_trash):
file_exists = os.path.isfile(SAFE_TRASH_FILE)
with open(SAFE_TRASH_FILE, 'a', newline='', encoding='utf-8') as file:
fieldnames = ['from', 'subject', 'body']
writer = csv.DictWriter(file, fieldnames=fieldnames)
if not file_exists:
writer.writeheader()
writer.writerow(safe_trash)
safe_emails = load_safe_emails() safe_emails = load_safe_emails()
model = joblib.load('spam_classifier_model.pkl') model = joblib.load('spam_classifier_model.pkl')
@ -60,6 +77,7 @@ def load_credentials():
return None return None
@app.route('/authorize') @app.route('/authorize')
def authorize(): def authorize():
client_secrets = load_client_secrets() client_secrets = load_client_secrets()
flow = google_auth_oauthlib.flow.Flow.from_client_secrets_file( flow = google_auth_oauthlib.flow.Flow.from_client_secrets_file(
@ -86,7 +104,6 @@ def oauth2callback():
authorization_response = request.url authorization_response = request.url
app.logger.info(f"Authorization response: {authorization_response}") app.logger.info(f"Authorization response: {authorization_response}")
try: try:
flow.fetch_token(authorization_response=authorization_response) flow.fetch_token(authorization_response=authorization_response)
credentials = flow.credentials credentials = flow.credentials
@ -107,13 +124,12 @@ def oauth2callback():
app.logger.error(f"Exception: {str(e)}") app.logger.error(f"Exception: {str(e)}")
app.logger.error("Traceback: " + traceback.format_exc()) app.logger.error("Traceback: " + traceback.format_exc())
return jsonify({'error': 'Exception', 'message': str(e)}), 500 return jsonify({'error': 'Exception', 'message': str(e)}), 500
@app.route('/validate-outlook-login', methods=['POST']) @app.route('/validate-outlook-login', methods=['POST'])
def validate_outlook_login(): def validate_outlook_login():
data = request.json data = request.json
username = data['email'] username = data['email']
password = data['password'] password = data['password']
try: try:
mail = imaplib.IMAP4_SSL("outlook.office365.com") mail = imaplib.IMAP4_SSL("outlook.office365.com")
mail.login(username, password) mail.login(username, password)
@ -125,11 +141,11 @@ def validate_outlook_login():
return jsonify({"success": False, "error": "Encoding error"}), 400 return jsonify({"success": False, "error": "Encoding error"}), 400
@app.route('/fetch-emails', methods=['POST']) @app.route('/fetch-emails', methods=['POST'])
def fetch_emails(): def fetch_emails():
data = request.json data = request.json
username = data['username'] username = data['username']
password = data['password'] password = data['password']
try: try:
mail = imaplib.IMAP4_SSL("outlook.office365.com") mail = imaplib.IMAP4_SSL("outlook.office365.com")
mail.login(username, password) mail.login(username, password)
@ -138,14 +154,11 @@ def fetch_emails():
return jsonify({"error": "Login failed. Check your email and password."}), 401 return jsonify({"error": "Login failed. Check your email and password."}), 401
except UnicodeEncodeError: except UnicodeEncodeError:
return jsonify({"error": "Encoding error. Ensure your email and password contain only valid characters."}), 400 return jsonify({"error": "Encoding error. Ensure your email and password contain only valid characters."}), 400
status, messages = mail.search(None, "ALL") status, messages = mail.search(None, "ALL")
if status != "OK": if status != "OK":
return jsonify({"error": "Failed to retrieve emails."}), 500 return jsonify({"error": "Failed to retrieve emails."}), 500
status, messages = mail.search(None, "ALL") status, messages = mail.search(None, "ALL")
email_ids = messages[0].split() email_ids = messages[0].split()
emails = [] emails = []
for email_id in email_ids: for email_id in email_ids:
@ -168,7 +181,6 @@ def fetch_emails():
email_vectorized = vectorizer.transform([body]) email_vectorized = vectorizer.transform([body])
prediction = model.predict(email_vectorized) prediction = model.predict(email_vectorized)
result = True if prediction == 1 or contains_suspicious_links(body) or contains_phishing_indicators(subject, email_address,body) else False result = True if prediction == 1 or contains_suspicious_links(body) or contains_phishing_indicators(subject, email_address,body) else False
emails.append({"id": email_id.decode(), emails.append({"id": email_id.decode(),
"from": email_address, "from": email_address,
"subject": subject, "subject": subject,
@ -179,6 +191,7 @@ def fetch_emails():
return jsonify(emails) return jsonify(emails)
@app.route('/check_auth_status', methods=['GET']) @app.route('/check_auth_status', methods=['GET'])
def check_auth_status(): def check_auth_status():
if 'credentials' in session: if 'credentials' in session:
credentials = google.oauth2.credentials.Credentials(**session['credentials']) credentials = google.oauth2.credentials.Credentials(**session['credentials'])
@ -187,20 +200,24 @@ def check_auth_status():
return jsonify({'logged_in': False}) return jsonify({'logged_in': False})
@app.route('/check_mail') @app.route('/check_mail')
def check_mail(): def check_mail():
if 'credentials' not in session: if 'credentials' not in session:
return redirect('authorize') return redirect('authorize')
credentials = google.oauth2.credentials.Credentials( credentials = google.oauth2.credentials.Credentials(
**session['credentials']) **session['credentials'])
gmail = googleapiclient.discovery.build( gmail = googleapiclient.discovery.build(
'gmail', 'v1', credentials=credentials) 'gmail', 'v1', credentials=credentials)
results = gmail.users().messages().list(userId='me', labelIds=['INBOX']).execute() results = gmail.users().messages().list(userId='me', labelIds=['INBOX']).execute()
messages = results.get('messages', []) messages = results.get('messages', [])
emails = [] emails = []
for message in messages: for message in messages:
msg = gmail.users().messages().get(userId='me', id=message['id']).execute() msg = gmail.users().messages().get(userId='me', id=message['id']).execute()
snippet = msg.get('snippet', '') snippet = msg.get('snippet', '')
@ -218,7 +235,6 @@ def check_mail():
prediction = model.predict(email_vectorized) prediction = model.predict(email_vectorized)
if message['id'] not in safe_emails: if message['id'] not in safe_emails:
result = True if prediction == 1 or contains_suspicious_links(snippet) or contains_phishing_indicators(subject, sender,snippet) else False result = True if prediction == 1 or contains_suspicious_links(snippet) or contains_phishing_indicators(subject, sender,snippet) else False
emails.append({ emails.append({
'subject': subject, 'subject': subject,
'from': sender, 'from': sender,
@ -227,32 +243,57 @@ def check_mail():
'suspicious': result 'suspicious': result
}) })
return jsonify(emails) return jsonify(emails)
@app.route('/logout', methods=['POST']) @app.route('/logout', methods=['POST'])
def logout(): def logout():
session.clear() session.clear()
app.logger.info("Session cleared. User logged out.") app.logger.info("Session cleared. User logged out.")
return jsonify({'message': 'Logged out'}), 200 return jsonify({'message': 'Logged out'}), 200
@app.route('/mark_safe/<email_id>', methods=['POST']) @app.route('/mark_safe/<email_id>', methods=['POST'])
def mark_safe(email_id): def mark_safe(email_id):
global safe_emails global safe_emails
safe_emails.append(email_id) safe_emails.append(email_id)
save_safe_emails(safe_emails) save_safe_emails(safe_emails)
app.logger.info(f'Email {email_id} marked as safe') app.logger.info(f'Email {email_id} marked as safe')
return jsonify({"message": f"Email {email_id} marked as safe"}), 200 return jsonify({"message": f"Email {email_id} marked as safe"}), 200
@app.route('/move_trash/<email_id>', methods=['POST']) @app.route('/move_trash/<email_id>', methods=['POST'])
def move_trash(email_id): def move_trash(email_id):
if 'credentials' not in session: if 'credentials' not in session:
return jsonify({'message': 'Not logged in'}), 401 return jsonify({'message': 'Not logged in'}), 401
credentials = google.oauth2.credentials.Credentials( credentials = google.oauth2.credentials.Credentials(
**session['credentials']) **session['credentials'])
gmail = googleapiclient.discovery.build( gmail = googleapiclient.discovery.build(
'gmail', 'v1', credentials=credentials) 'gmail', 'v1', credentials=credentials)
try: try:
message = gmail.users().messages().get(userId='me', id=email_id).execute()
payload = message.get('payload', {})
headers = payload.get('headers', [])
subject = ''
sender = ''
body = ''
for header in headers:
if header['name'] == 'Subject':
subject = header['value']
if header['name'] == 'From':
sender = header['value']
if 'parts' in payload:
for part in payload['parts']:
if part['mimeType'] == 'text/plain':
body = part['body']['data']
body = base64.urlsafe_b64decode(body).decode('utf-8')
safe_trash = {
"from": sender,
"subject": subject,
"body": body
}
save_safe_trash(safe_trash)
gmail.users().messages().modify( gmail.users().messages().modify(
userId='me', userId='me',
id=email_id, id=email_id,
@ -263,29 +304,54 @@ def move_trash(email_id):
except Exception as e: except Exception as e:
app.logger.error(f"Exception moving email to trash: {str(e)}") app.logger.error(f"Exception moving email to trash: {str(e)}")
return jsonify({"error": str(e)}), 500 return jsonify({"error": str(e)}), 500
@app.route('/delete-email', methods=['POST']) @app.route('/delete-email', methods=['POST'])
def delete_email(): def delete_email():
data = request.json data = request.json
email_id = data['email_id'] email_id = data['email_id']
username = data['username'] username = data['username']
password = data['password'] password = data['password']
try: try:
mail = imaplib.IMAP4_SSL("outlook.office365.com") mail = imaplib.IMAP4_SSL("outlook.office365.com")
mail.login(username, password) mail.login(username, password)
mail.select("inbox") mail.select("inbox")
status, message_data = mail.fetch(email_id, "(RFC822)")
if status != "OK":
return jsonify({"error": "Failed to fetch email"}), 500
msg = email.message_from_bytes(message_data[0][1])
subject, encoding = decode_header(msg["Subject"])[0]
if isinstance(subject, bytes):
subject = subject.decode(encoding if encoding else "utf-8")
from_ = msg.get("From")
name, email_address = email.utils.parseaddr(from_)
body = ""
if msg.is_multipart():
for part in msg.walk():
if part.get_content_type() == "text/plain" and part.get("Content-Disposition") is None:
body += part.get_payload(decode=True).decode(part.get_content_charset() or "utf-8")
else:
body = msg.get_payload(decode=True).decode(msg.get_content_charset() or "utf-8")
safe_trash = {
"from": from_,
"subject": subject,
"body": body
}
save_safe_trash(safe_trash)
mail.store(email_id, '+FLAGS', '\\Deleted') mail.store(email_id, '+FLAGS', '\\Deleted')
mail.expunge() mail.expunge()
return jsonify({"message": f"Email {email_id} deleted"}) return jsonify({"message": f"Email {email_id} deleted"})
except imaplib.IMAP4.error: except imaplib.IMAP4.error:
return jsonify({"error": "Failed to delete email"}), 500 return jsonify({"error": "Failed to delete email"}), 500
def contains_suspicious_links(snippet): def contains_suspicious_links(snippet):
url_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+') url_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
urls = url_pattern.findall(snippet) urls = url_pattern.findall(snippet)
suspicious_domains = ['phishingsite.com', 'malicious.com'] suspicious_domains = ['phishingsite.com', 'malicious.com']
for url in urls: for url in urls:
for domain in suspicious_domains: for domain in suspicious_domains:
if domain in url: if domain in url: