{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "UMA_projekt.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "gpuClass": "standard" }, "cells": [ { "cell_type": "code", "source": [ "import time, gc\n", "\n", "# Timing utilities\n", "start_time = None\n", "\n", "def start_timer():\n", " global start_time\n", " gc.collect()\n", " torch.cuda.empty_cache()\n", " torch.cuda.reset_max_memory_allocated()\n", " torch.cuda.synchronize()\n", " start_time = time.time()\n", "\n", "def end_timer_and_print(local_msg):\n", " torch.cuda.synchronize()\n", " end_time = time.time()\n", " print(\"\\n\" + local_msg)\n", " print(\"Total execution time = {:.3f} sec\".format(end_time - start_time))\n", " print(\"Max memory used by tensors = {} bytes\".format(torch.cuda.max_memory_allocated()))" ], "metadata": { "id": "tWf7BQXI3Epz" }, "execution_count": 232, "outputs": [] }, { "cell_type": "code", "execution_count": 233, "metadata": { "id": "OFdF8yc6z9QK", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "6b2863d8-cbd3-40c8-f356-c57efb696aae" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[name: \"/device:CPU:0\"\n", " device_type: \"CPU\"\n", " memory_limit: 268435456\n", " locality {\n", " }\n", " incarnation: 7116988186229065702\n", " xla_global_id: -1, name: \"/device:GPU:0\"\n", " device_type: \"GPU\"\n", " memory_limit: 14465892352\n", " locality {\n", " bus_id: 1\n", " links {\n", " }\n", " }\n", " incarnation: 10048785647988876421\n", " physical_device_desc: \"device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5\"\n", " xla_global_id: 416903419]" ] }, "metadata": {}, "execution_count": 233 } ], "source": [ "from tensorflow.python.client import device_lib\n", "device_lib.list_local_devices()" ] }, { "cell_type": "code", "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.datasets import fetch_20newsgroups\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "from sklearn.model_selection import train_test_split\n", "import torch\n", "import scipy" ], "metadata": { "id": "TIdeqZPs0aON" }, "execution_count": 234, "outputs": [] }, { "cell_type": "code", "source": [ "# !unzip real-or-fake-fake-jobposting-prediction.zip" ], "metadata": { "id": "Rf2cOL69qJ7D" }, "execution_count": 235, "outputs": [] }, { "cell_type": "code", "source": [ "data = pd.read_csv('fake_job_postings.csv', engine='python')\n", "data = data[[\"company_profile\", \"fraudulent\"]]\n", "data = data.sample(frac=1)\n", "data = data.dropna()" ], "metadata": { "id": "NO98S-QDsV6j" }, "execution_count": 236, "outputs": [] }, { "cell_type": "code", "source": [ "data" ], "metadata": { "id": "3Xd0Uvi4stMg", "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "outputId": "55e073ac-74f2-44f9-90de-d39094f84369" }, "execution_count": 237, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " company_profile fraudulent\n", "16503 At Hayes-Corp, we create the fun stuff. With ... 0\n", "16706 Tribal Worldwide Athens is a digitally centric... 0\n", "3364 About ECHOING GREEN: Echoing Green unleashes ... 0\n", "16856 Daily Secret is the fastest growing digital me... 0\n", "1566 ding* is the world’s largest top-up provider. ... 0\n", "... ... ...\n", "7607 Established on the principles that full time e... 0\n", "682 AGOGO creates a personalized audio channel by ... 0\n", "2759 We are a family run business that has been in ... 0\n", "5751 We have aggressive growth plans in place for t... 1\n", "3629 Want to build a 21st century financial service... 0\n", "\n", "[14572 rows x 2 columns]" ], "text/html": [ "\n", "
\n", " | company_profile | \n", "fraudulent | \n", "
---|---|---|
16503 | \n", "At Hayes-Corp, we create the fun stuff. With ... | \n", "0 | \n", "
16706 | \n", "Tribal Worldwide Athens is a digitally centric... | \n", "0 | \n", "
3364 | \n", "About ECHOING GREEN: Echoing Green unleashes ... | \n", "0 | \n", "
16856 | \n", "Daily Secret is the fastest growing digital me... | \n", "0 | \n", "
1566 | \n", "ding* is the world’s largest top-up provider. ... | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "
7607 | \n", "Established on the principles that full time e... | \n", "0 | \n", "
682 | \n", "AGOGO creates a personalized audio channel by ... | \n", "0 | \n", "
2759 | \n", "We are a family run business that has been in ... | \n", "0 | \n", "
5751 | \n", "We have aggressive growth plans in place for t... | \n", "1 | \n", "
3629 | \n", "Want to build a 21st century financial service... | \n", "0 | \n", "
14572 rows × 2 columns
\n", "