Compare commits

..

12 Commits

37 changed files with 1425 additions and 2 deletions

238
.gitignore vendored
View File

@ -518,4 +518,240 @@ FodyWeavers.xsd
### VisualStudio Patch ### ### VisualStudio Patch ###
# Additional files built by Visual Studio # Additional files built by Visual Studio
# End of https://www.toptal.com/developers/gitignore/api/visualstudio,aspnetcore # End of https://www.toptal.com/developers/gitignore/api/visualstudio,aspnetcore
### Flask ###
instance/*
!instance/.gitignore
.webassets-cache
.env
### Flask.Python Stack ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
### Python ###
# Byte-compiled / optimized / DLL files
# C extensions
# Distribution / packaging
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
# Installer logs
# Unit test / coverage reports
# Translations
# Django stuff:
# Flask stuff:
# Scrapy stuff:
# Sphinx documentation
# PyBuilder
# Jupyter Notebook
# IPython
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
# Celery stuff
# SageMath parsed files
# Environments
# Spyder project settings
# Rope project settings
# mkdocs documentation
# mypy
# Pyre type checker
# pytype static type analyzer
# Cython debug symbols
# PyCharm
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
# End of https://www.toptal.com/developers/gitignore/api/python,flask

View File

@ -0,0 +1,12 @@
<Project Sdk="Microsoft.NET.Sdk.Web">
<PropertyGroup>
<TargetFramework>net5.0</TargetFramework>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Newtonsoft.Json" Version="13.0.1" />
<PackageReference Include="Swashbuckle.AspNetCore" Version="5.6.3" />
</ItemGroup>
</Project>

View File

@ -0,0 +1,109 @@
using BitSearch.API.Models;
using Microsoft.AspNetCore.Mvc;
using Microsoft.AspNetCore.WebUtilities;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Logging;
using Microsoft.Net.Http.Headers;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Net.Http.Json;
using System.Threading.Tasks;
namespace BitSearch.API.Controllers
{
[ApiController]
[Route("[controller]")]
public class SearchController : ControllerBase
{
private readonly ILogger<SearchController> _logger;
private readonly IConfiguration Configuration;
private readonly IHttpClientFactory _httpClientFactory;
public SearchController(ILogger<SearchController> logger, IConfiguration configuration, IHttpClientFactory httpClientFactory)
{
_logger = logger;
_httpClientFactory = httpClientFactory;
Configuration = configuration;
}
[HttpGet("ranking")]
public async Task<IActionResult> GetRanking()
{
var cryptoHash = new Dictionary<string, string>()
{
{"Bitcoin", "#btc"},
{"Etherum", "#eth"},
{"Tron", "#trx"},
{"Tether", "#usdt"},
{"Binance Coin", "#bnb"},
{"Cardano", "#ada"},
{"Dash", "#dash"},
{"Neo", "#neo"},
{"Steem", "#steem"},
{"Zcash", "#zec"}
};
var token = Configuration["Token"];
var twitterClient = _httpClientFactory.CreateClient();
twitterClient.BaseAddress = new Uri("https://api.twitter.com/2/tweets/counts/");
twitterClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", token);
twitterClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
var ranking = new List<CryptoRanking>();
foreach (KeyValuePair<string, string> crypto in cryptoHash)
{
var requestUri = QueryHelpers.AddQueryString("recent", "query", crypto.Value);
var request = new HttpRequestMessage(HttpMethod.Get, requestUri);
var response = await twitterClient.SendAsync(request);
if (response.IsSuccessStatusCode)
{
var tweetsDto = JsonConvert.DeserializeObject<TweetCount>(await response.Content.ReadAsStringAsync());
ranking.Add(new CryptoRanking() { Name = crypto.Key, TweetAmount = tweetsDto.meta.total_tweet_count });
}
}
return Ok(ranking.OrderByDescending(r => r.TweetAmount));
}
[HttpGet("analise/{hash}")]
public async Task<IActionResult> GetAnalysis(string hash)
{
var queryString = new Dictionary<string, string>()
{
{"query", $"#{hash}"},
{"max_results", "100"},
{"tweet.fields", "lang,referenced_tweets"},
};
var token = Configuration["Token"];
var twitterClient = _httpClientFactory.CreateClient();
twitterClient.BaseAddress = new Uri("https://api.twitter.com/2/tweets/search/");
var requestUri = QueryHelpers.AddQueryString("recent", queryString);
var twiiterRequest = new HttpRequestMessage(HttpMethod.Get, requestUri);
twitterClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", token);
twitterClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
var response = await twitterClient.SendAsync(twiiterRequest);
if (response.IsSuccessStatusCode)
{
var tweetsDto = JsonConvert.DeserializeObject<TweeterResponse>(await response.Content.ReadAsStringAsync());
tweetsDto.data = tweetsDto.data.Where(d => d.lang == "en");
var microServiceClient = _httpClientFactory.CreateClient();
var microServiceRequest = new HttpRequestMessage(HttpMethod.Get, "http://127.0.0.1:5000/analysis");
microServiceRequest.Content = JsonContent.Create(tweetsDto);
var microServiceResponse = await microServiceClient.SendAsync(microServiceRequest);
return Ok(await microServiceResponse.Content.ReadAsStringAsync());
//return Ok(tweetsDto);
}
return Ok(response);
}
}
}

View File

@ -0,0 +1,12 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
namespace BitSearch.API.Models
{
public class BaseTweetsDto<T>
{
public IEnumerable<T> data { get; set; }
}
}

View File

@ -0,0 +1,13 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
namespace BitSearch.API.Models
{
public class CryptoRanking
{
public string Name { get; set; }
public int TweetAmount { get; set; }
}
}

View File

@ -0,0 +1,12 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
namespace BitSearch.API.Models
{
public class MetaData
{
public int total_tweet_count { get; set; }
}
}

View File

@ -0,0 +1,13 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
namespace BitSearch.API.Models
{
public class ReferencedTweets
{
public string type { get; set; }
public string id { get; set; }
}
}

View File

@ -0,0 +1,12 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
namespace BitSearch.API.Models
{
public class TweetCount
{
public MetaData meta { get; set; }
}
}

View File

@ -0,0 +1,11 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
namespace BitSearch.API.Models
{
public class TweeterResponse : BaseTweetsDto<TweetsDto>
{
}
}

View File

@ -0,0 +1,15 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
namespace BitSearch.API.Models
{
public class TweetsDto
{
public string id { get; set; }
public string text { get; set; }
public string lang { get; set; }
public IEnumerable<ReferencedTweets> referenced_tweets { get; set; }
}
}

View File

@ -0,0 +1,26 @@
using Microsoft.AspNetCore.Hosting;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
namespace BitSearch.API
{
public class Program
{
public static void Main(string[] args)
{
CreateHostBuilder(args).Build().Run();
}
public static IHostBuilder CreateHostBuilder(string[] args) =>
Host.CreateDefaultBuilder(args)
.ConfigureWebHostDefaults(webBuilder =>
{
webBuilder.UseStartup<Startup>();
});
}
}

View File

@ -0,0 +1,31 @@
{
"iisSettings": {
"windowsAuthentication": false,
"anonymousAuthentication": true,
"iisExpress": {
"applicationUrl": "http://localhost:62241",
"sslPort": 0
}
},
"$schema": "http://json.schemastore.org/launchsettings.json",
"profiles": {
"IIS Express": {
"commandName": "IISExpress",
"launchBrowser": true,
"launchUrl": "swagger",
"environmentVariables": {
"ASPNETCORE_ENVIRONMENT": "Development"
}
},
"BitSearch.API": {
"commandName": "Project",
"launchBrowser": true,
"launchUrl": "swagger",
"environmentVariables": {
"ASPNETCORE_ENVIRONMENT": "Development"
},
"dotnetRunMessages": "true",
"applicationUrl": "https://localhost:5004;http://localhost:5005"
}
}
}

View File

@ -0,0 +1,60 @@
using Microsoft.AspNetCore.Builder;
using Microsoft.AspNetCore.Hosting;
using Microsoft.AspNetCore.HttpsPolicy;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Microsoft.OpenApi.Models;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
namespace BitSearch.API
{
public class Startup
{
public Startup(IConfiguration configuration)
{
Configuration = configuration;
}
public IConfiguration Configuration { get; }
// This method gets called by the runtime. Use this method to add services to the container.
public void ConfigureServices(IServiceCollection services)
{
services.AddControllers();
services.AddSwaggerGen(c =>
{
c.SwaggerDoc("v1", new OpenApiInfo { Title = "BitSearch.API", Version = "v1" });
});
services.AddHttpClient();
}
// This method gets called by the runtime. Use this method to configure the HTTP request pipeline.
public void Configure(IApplicationBuilder app, IWebHostEnvironment env)
{
if (env.IsDevelopment())
{
app.UseDeveloperExceptionPage();
app.UseSwagger();
app.UseSwaggerUI(c => c.SwaggerEndpoint("/swagger/v1/swagger.json", "BitSearch.API v1"));
}
app.UseHttpsRedirection();
app.UseRouting();
app.UseAuthorization();
app.UseEndpoints(endpoints =>
{
endpoints.MapControllers();
});
}
}
}

View File

@ -0,0 +1,9 @@
{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft": "Warning",
"Microsoft.Hosting.Lifetime": "Information"
}
}
}

View File

@ -0,0 +1,12 @@
{
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft": "Warning",
"Microsoft.Hosting.Lifetime": "Information"
}
},
"AllowedHosts": "*",
"Token": "AAAAAAAAAAAAAAAAAAAAAJm8XwEAAAAAWyiq2ESxmQgVp5gCbvccbnLzfrE%3D1g4gSrKfNveU4E382aWh6JYkESNw09tsraYMFcdv6Wk87kQixh"
}

View File

@ -0,0 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net5.0</TargetFramework>
<IsPackable>false</IsPackable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="NUnit" Version="3.12.0" />
<PackageReference Include="NUnit3TestAdapter" Version="3.16.1" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.5.0" />
<PackageReference Include="Selenium.WebDriver" Version="4.1.0" />
</ItemGroup>
</Project>

View File

@ -0,0 +1,70 @@
using NUnit.Framework;
using OpenQA.Selenium;
using OpenQA.Selenium.Chrome;
namespace BitSearch.Tests
{
public class Browser_ops
{
IWebDriver webDriver;
public void Init_Browser()
{
var options = new ChromeOptions();
options.AcceptInsecureCertificates = true;
webDriver = new ChromeDriver("C:\\Program Files (x86)\\Google\\Chrome\\Application", options);
webDriver.Manage().Window.Maximize();
}
public string Title
{
get { return webDriver.Title; }
}
public void Goto(string url)
{
webDriver.Url = url;
}
public void Close()
{
webDriver.Quit();
}
public IWebDriver getDriver
{
get { return webDriver; }
}
}
public class LoginTests
{
Browser_ops brow = new Browser_ops();
string test_url = "https://localhost:44371/Identity/Account/Login";
IWebDriver driver;
[SetUp]
public void Setup()
{
brow.Init_Browser();
driver = brow.getDriver;
}
[Test]
public void VerifyValidLogin()
{
brow.Goto(test_url);
System.Threading.Thread.Sleep(2000);
driver.FindElement(By.XPath("//*[@id='Input_Email']")).SendKeys("testowy@mail.com");
driver.FindElement(By.XPath("//*[@id='Input_Password']")).SendKeys("12#qwE");
driver.FindElement(By.XPath("//*[@id='login-submit']")).Click();
Assert.AreEqual(driver.Url, "https://localhost:44371/");
}
[Test]
public void VerifyInvalidLogin()
{
brow.Goto(test_url);
System.Threading.Thread.Sleep(2000);
driver.FindElement(By.XPath("//*[@id=\"Input_Email\"]")).SendKeys("wrong@mail.com");
driver.FindElement(By.XPath("//*[@id='Input_Password']")).SendKeys("invalid");
driver.FindElement(By.XPath("//*[@id='login-submit']")).Click();
Assert.AreEqual(driver.FindElement(By.XPath("//*[@id='account']/div[1]/ul/li")).Text, "Invalid login attempt.");
}
}
}

View File

@ -3,7 +3,11 @@ Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16 # Visual Studio Version 16
VisualStudioVersion = 16.0.31112.23 VisualStudioVersion = 16.0.31112.23
MinimumVisualStudioVersion = 10.0.40219.1 MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BitSearch", "BitSearch\BitSearch.csproj", "{F5F997CE-0BBE-478A-B7A3-828B0EAB8B99}" Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BitSearch", "BitSearch\BitSearch.csproj", "{F5F997CE-0BBE-478A-B7A3-828B0EAB8B99}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BitSearch.Tests", "BitSearch.Tests\BitSearch.Tests.csproj", "{8B871554-1F3D-4828-9642-B191B499A80C}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BitSearch.API", "BitSearch.API\BitSearch.API.csproj", "{08FC64E2-3128-4517-ADCA-D1BF8AF126F0}"
EndProject EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
@ -15,6 +19,14 @@ Global
{F5F997CE-0BBE-478A-B7A3-828B0EAB8B99}.Debug|Any CPU.Build.0 = Debug|Any CPU {F5F997CE-0BBE-478A-B7A3-828B0EAB8B99}.Debug|Any CPU.Build.0 = Debug|Any CPU
{F5F997CE-0BBE-478A-B7A3-828B0EAB8B99}.Release|Any CPU.ActiveCfg = Release|Any CPU {F5F997CE-0BBE-478A-B7A3-828B0EAB8B99}.Release|Any CPU.ActiveCfg = Release|Any CPU
{F5F997CE-0BBE-478A-B7A3-828B0EAB8B99}.Release|Any CPU.Build.0 = Release|Any CPU {F5F997CE-0BBE-478A-B7A3-828B0EAB8B99}.Release|Any CPU.Build.0 = Release|Any CPU
{8B871554-1F3D-4828-9642-B191B499A80C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{8B871554-1F3D-4828-9642-B191B499A80C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{8B871554-1F3D-4828-9642-B191B499A80C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{8B871554-1F3D-4828-9642-B191B499A80C}.Release|Any CPU.Build.0 = Release|Any CPU
{08FC64E2-3128-4517-ADCA-D1BF8AF126F0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{08FC64E2-3128-4517-ADCA-D1BF8AF126F0}.Debug|Any CPU.Build.0 = Debug|Any CPU
{08FC64E2-3128-4517-ADCA-D1BF8AF126F0}.Release|Any CPU.ActiveCfg = Release|Any CPU
{08FC64E2-3128-4517-ADCA-D1BF8AF126F0}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection EndGlobalSection
GlobalSection(SolutionProperties) = preSolution GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE HideSolutionNode = FALSE

View File

@ -0,0 +1,51 @@
from flask import Flask, request
import flask.scaffold
flask.helpers._endpoint_from_view_func = flask.scaffold._endpoint_from_view_func
from flask_restful import Resource
import werkzeug
werkzeug.cached_property = werkzeug.utils.cached_property
from flask_restplus import Namespace, Resource, fields, Api
from flask import Blueprint
from twitter_pred import Predictor
app = Flask(__name__)
api = Api(app)
namespace = Namespace("test", "opis")
test_model = namespace.model("Test", {
'result': fields.String(
readonly = True,
description = 'test message'
)
})
predictor = Predictor()
@namespace.route('')
class Analysis(Resource):
@namespace.marshal_list_with(test_model)
@namespace.response(500, "error")
def get(self):
json = request.get_json()
data = predictor.anylise([tweet["text"] for tweet in json['data']])
return {"result": data}
blueprint = Blueprint('documented_api', __name__, url_prefix='/documented_api')
api_extension = Api(
blueprint,
title='Flask RESTplus Demo',
version='1.0',
description='Application tutorial to demonstrate Flask RESTplus extension\
for better project structure and auto generated documentation',
doc='/doc'
)
api_extension.add_namespace(namespace)
app.config['RESTPLUS_MASK_SWAGGER'] = False
app.register_blueprint(blueprint)
api.add_resource(Analysis, "/analysis")
if __name__ == "__main__":
app.run(debug=True)

Binary file not shown.

View File

@ -0,0 +1,76 @@
# %%
import pickle
import re
import json
# %%
class Predictor:
change_dict = {
# tokens
" username ": ['@\w+|@'],
" url ": ['http\S*'],
" emoji ": ["[;:][dbop\(\)\[\]]|[^\w][dbop\(\)\[\]][;:]|xd+|\S*&\S*", "[^\w\s,.?!:;#\'\"\(\)\$\-\+%\[\]\|]"],
" number ": ["[\+\-\$]?[\d]+[,\.\:k]?[\d]?[%]?"],
# standardization
', ': ['\s,'],
'. ': ['\s\.'],
' ': ['\s{2,}', '\n', '^rt[\s]+', '\s\:\s'],
"'": ["�"],
'?': ["\s\?"],
'!': ["\s\!"],
'".': ["\s\"\."],
'",': ["\s\"\,"],
'" ': ["\s\"\s"]
}
def clean_lines(self, line, change_dict):
line = str(line).lower()
for change_to, change_regex_list in change_dict.items():
for change_regex in change_regex_list:
line = re.sub(change_regex, change_to, line)
return line
def get_rep_idx_to_cut_out_from_str(self, line):
occurence = 0
idx_to_cut = []
for idx, letter in enumerate(line):
if idx > 0:
occurence = occurence + 1 if line[idx - 1] == letter else 0
if occurence >= 2:
idx_to_cut.append(idx)
return idx_to_cut
def truncate_duplicated_letters_to_two(self, line):
idx_to_cut = self.get_rep_idx_to_cut_out_from_str(line)
str_out = ''
for i, s in enumerate(line):
if i not in idx_to_cut:
str_out += s
return str_out
def clean_data(self, l):
text = [self.clean_lines(x, self.change_dict) for x in l]
text = [self.truncate_duplicated_letters_to_two(x).strip() for x in text]
return text
def __init__(self):
with open('model_pkl' , 'rb') as f:
self.model = pickle.load(f)
# %%
with open('vectorizer_pkl' , 'rb') as f:
self.vectorizer = pickle.load(f)
def anylise(self, text_to_predict):
# %%
text_to_predict = self.clean_data(text_to_predict)
test_matrix = self.vectorizer.transform(text_to_predict)
data_predicted = self.model.predict(test_matrix).tolist()
# %%
positives = sum([1 for x in data_predicted if x == 1])
negatives = sum([1 for x in data_predicted if x == -1])
# %%
data_to_send = {"pos_perc": positives/(positives+negatives),
"neg_perc": negatives/(positives+negatives)}
return data_to_send

View File

@ -0,0 +1,398 @@
<#
.Synopsis
Activate a Python virtual environment for the current PowerShell session.
.Description
Pushes the python executable for a virtual environment to the front of the
$Env:PATH environment variable and sets the prompt to signify that you are
in a Python virtual environment. Makes use of the command line switches as
well as the `pyvenv.cfg` file values present in the virtual environment.
.Parameter VenvDir
Path to the directory that contains the virtual environment to activate. The
default value for this is the parent of the directory that the Activate.ps1
script is located within.
.Parameter Prompt
The prompt prefix to display when this virtual environment is activated. By
default, this prompt is the name of the virtual environment folder (VenvDir)
surrounded by parentheses and followed by a single space (ie. '(.venv) ').
.Example
Activate.ps1
Activates the Python virtual environment that contains the Activate.ps1 script.
.Example
Activate.ps1 -Verbose
Activates the Python virtual environment that contains the Activate.ps1 script,
and shows extra information about the activation as it executes.
.Example
Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
Activates the Python virtual environment located in the specified location.
.Example
Activate.ps1 -Prompt "MyPython"
Activates the Python virtual environment that contains the Activate.ps1 script,
and prefixes the current prompt with the specified string (surrounded in
parentheses) while the virtual environment is active.
.Notes
On Windows, it may be required to enable this Activate.ps1 script by setting the
execution policy for the user. You can do this by issuing the following PowerShell
command:
PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
For more information on Execution Policies:
https://go.microsoft.com/fwlink/?LinkID=135170
#>
Param(
[Parameter(Mandatory = $false)]
[String]
$VenvDir,
[Parameter(Mandatory = $false)]
[String]
$Prompt
)
<# Function declarations --------------------------------------------------- #>
<#
.Synopsis
Remove all shell session elements added by the Activate script, including the
addition of the virtual environment's Python executable from the beginning of
the PATH variable.
.Parameter NonDestructive
If present, do not remove this function from the global namespace for the
session.
#>
function global:deactivate ([switch]$NonDestructive) {
# Revert to original values
# The prior prompt:
if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
}
# The prior PYTHONHOME:
if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
}
# The prior PATH:
if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
Remove-Item -Path Env:_OLD_VIRTUAL_PATH
}
# Just remove the VIRTUAL_ENV altogether:
if (Test-Path -Path Env:VIRTUAL_ENV) {
Remove-Item -Path env:VIRTUAL_ENV
}
# Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
}
# Leave deactivate function in the global namespace if requested:
if (-not $NonDestructive) {
Remove-Item -Path function:deactivate
}
}
<#
.Description
Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
given folder, and returns them in a map.
For each line in the pyvenv.cfg file, if that line can be parsed into exactly
two strings separated by `=` (with any amount of whitespace surrounding the =)
then it is considered a `key = value` line. The left hand string is the key,
the right hand is the value.
If the value starts with a `'` or a `"` then the first and last character is
stripped from the value before being captured.
.Parameter ConfigDir
Path to the directory that contains the `pyvenv.cfg` file.
#>
function Get-PyVenvConfig(
[String]
$ConfigDir
) {
Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
# Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
$pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
# An empty map will be returned if no config file is found.
$pyvenvConfig = @{ }
if ($pyvenvConfigPath) {
Write-Verbose "File exists, parse `key = value` lines"
$pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
$pyvenvConfigContent | ForEach-Object {
$keyval = $PSItem -split "\s*=\s*", 2
if ($keyval[0] -and $keyval[1]) {
$val = $keyval[1]
# Remove extraneous quotations around a string value.
if ("'""".Contains($val.Substring(0, 1))) {
$val = $val.Substring(1, $val.Length - 2)
}
$pyvenvConfig[$keyval[0]] = $val
Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
}
}
}
return $pyvenvConfig
}
<# Begin Activate script --------------------------------------------------- #>
# Determine the containing directory of this script
$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
$VenvExecDir = Get-Item -Path $VenvExecPath
Write-Verbose "Activation script is located in path: '$VenvExecPath'"
Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
# Set values required in priority: CmdLine, ConfigFile, Default
# First, get the location of the virtual environment, it might not be
# VenvExecDir if specified on the command line.
if ($VenvDir) {
Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
}
else {
Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
$VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
Write-Verbose "VenvDir=$VenvDir"
}
# Next, read the `pyvenv.cfg` file to determine any required value such
# as `prompt`.
$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
# Next, set the prompt from the command line, or the config file, or
# just use the name of the virtual environment folder.
if ($Prompt) {
Write-Verbose "Prompt specified as argument, using '$Prompt'"
}
else {
Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
$Prompt = $pyvenvCfg['prompt'];
}
else {
Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virutal environment)"
Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
$Prompt = Split-Path -Path $venvDir -Leaf
}
}
Write-Verbose "Prompt = '$Prompt'"
Write-Verbose "VenvDir='$VenvDir'"
# Deactivate any currently active virtual environment, but leave the
# deactivate function in place.
deactivate -nondestructive
# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
# that there is an activated venv.
$env:VIRTUAL_ENV = $VenvDir
if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
Write-Verbose "Setting prompt to '$Prompt'"
# Set the prompt to include the env name
# Make sure _OLD_VIRTUAL_PROMPT is global
function global:_OLD_VIRTUAL_PROMPT { "" }
Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
function global:prompt {
Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
_OLD_VIRTUAL_PROMPT
}
}
# Clear PYTHONHOME
if (Test-Path -Path Env:PYTHONHOME) {
Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
Remove-Item -Path Env:PYTHONHOME
}
# Add the venv to the PATH
Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
# SIG # Begin signature block
# MIIcwAYJKoZIhvcNAQcCoIIcsTCCHK0CAQExDzANBglghkgBZQMEAgEFADB5Bgor
# BgEEAYI3AgEEoGswaTA0BgorBgEEAYI3AgEeMCYCAwEAAAQQH8w7YFlLCE63JNLG
# KX7zUQIBAAIBAAIBAAIBAAIBADAxMA0GCWCGSAFlAwQCAQUABCAwnDYwEHaCQq0n
# 8NAvsN7H7BO7/48rXCNwrg891FS5vaCCC38wggUwMIIEGKADAgECAhAECRgbX9W7
# ZnVTQ7VvlVAIMA0GCSqGSIb3DQEBCwUAMGUxCzAJBgNVBAYTAlVTMRUwEwYDVQQK
# EwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20xJDAiBgNV
# BAMTG0RpZ2lDZXJ0IEFzc3VyZWQgSUQgUm9vdCBDQTAeFw0xMzEwMjIxMjAwMDBa
# Fw0yODEwMjIxMjAwMDBaMHIxCzAJBgNVBAYTAlVTMRUwEwYDVQQKEwxEaWdpQ2Vy
# dCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20xMTAvBgNVBAMTKERpZ2lD
# ZXJ0IFNIQTIgQXNzdXJlZCBJRCBDb2RlIFNpZ25pbmcgQ0EwggEiMA0GCSqGSIb3
# DQEBAQUAA4IBDwAwggEKAoIBAQD407Mcfw4Rr2d3B9MLMUkZz9D7RZmxOttE9X/l
# qJ3bMtdx6nadBS63j/qSQ8Cl+YnUNxnXtqrwnIal2CWsDnkoOn7p0WfTxvspJ8fT
# eyOU5JEjlpB3gvmhhCNmElQzUHSxKCa7JGnCwlLyFGeKiUXULaGj6YgsIJWuHEqH
# CN8M9eJNYBi+qsSyrnAxZjNxPqxwoqvOf+l8y5Kh5TsxHM/q8grkV7tKtel05iv+
# bMt+dDk2DZDv5LVOpKnqagqrhPOsZ061xPeM0SAlI+sIZD5SlsHyDxL0xY4PwaLo
# LFH3c7y9hbFig3NBggfkOItqcyDQD2RzPJ6fpjOp/RnfJZPRAgMBAAGjggHNMIIB
# yTASBgNVHRMBAf8ECDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIBhjATBgNVHSUEDDAK
# BggrBgEFBQcDAzB5BggrBgEFBQcBAQRtMGswJAYIKwYBBQUHMAGGGGh0dHA6Ly9v
# Y3NwLmRpZ2ljZXJ0LmNvbTBDBggrBgEFBQcwAoY3aHR0cDovL2NhY2VydHMuZGln
# aWNlcnQuY29tL0RpZ2lDZXJ0QXNzdXJlZElEUm9vdENBLmNydDCBgQYDVR0fBHow
# eDA6oDigNoY0aHR0cDovL2NybDQuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0QXNzdXJl
# ZElEUm9vdENBLmNybDA6oDigNoY0aHR0cDovL2NybDMuZGlnaWNlcnQuY29tL0Rp
# Z2lDZXJ0QXNzdXJlZElEUm9vdENBLmNybDBPBgNVHSAESDBGMDgGCmCGSAGG/WwA
# AgQwKjAoBggrBgEFBQcCARYcaHR0cHM6Ly93d3cuZGlnaWNlcnQuY29tL0NQUzAK
# BghghkgBhv1sAzAdBgNVHQ4EFgQUWsS5eyoKo6XqcQPAYPkt9mV1DlgwHwYDVR0j
# BBgwFoAUReuir/SSy4IxLVGLp6chnfNtyA8wDQYJKoZIhvcNAQELBQADggEBAD7s
# DVoks/Mi0RXILHwlKXaoHV0cLToaxO8wYdd+C2D9wz0PxK+L/e8q3yBVN7Dh9tGS
# dQ9RtG6ljlriXiSBThCk7j9xjmMOE0ut119EefM2FAaK95xGTlz/kLEbBw6RFfu6
# r7VRwo0kriTGxycqoSkoGjpxKAI8LpGjwCUR4pwUR6F6aGivm6dcIFzZcbEMj7uo
# +MUSaJ/PQMtARKUT8OZkDCUIQjKyNookAv4vcn4c10lFluhZHen6dGRrsutmQ9qz
# sIzV6Q3d9gEgzpkxYz0IGhizgZtPxpMQBvwHgfqL2vmCSfdibqFT+hKUGIUukpHq
# aGxEMrJmoecYpJpkUe8wggZHMIIFL6ADAgECAhADPtXtoGXRuMkd/PkqbJvYMA0G
# CSqGSIb3DQEBCwUAMHIxCzAJBgNVBAYTAlVTMRUwEwYDVQQKEwxEaWdpQ2VydCBJ
# bmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20xMTAvBgNVBAMTKERpZ2lDZXJ0
# IFNIQTIgQXNzdXJlZCBJRCBDb2RlIFNpZ25pbmcgQ0EwHhcNMTgxMjE4MDAwMDAw
# WhcNMjExMjIyMTIwMDAwWjCBgzELMAkGA1UEBhMCVVMxFjAUBgNVBAgTDU5ldyBI
# YW1wc2hpcmUxEjAQBgNVBAcTCVdvbGZlYm9ybzEjMCEGA1UEChMaUHl0aG9uIFNv
# ZnR3YXJlIEZvdW5kYXRpb24xIzAhBgNVBAMTGlB5dGhvbiBTb2Z0d2FyZSBGb3Vu
# ZGF0aW9uMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAqr2kS7J1uW7o
# JRxlsdrETAjKarfoH5TI8PWST6Yb2xPooP7vHT4iaVXyL5Lze1f53Jw67Sp+u524
# fJXf30qHViEWxumy2RWG0nciU2d+mMqzjlaAWSZNF0u4RcvyDJokEV0RUOqI5CG5
# zPI3W9uQ6LiUk3HCYW6kpH177A5T3pw/Po8O8KErJGn1anaqtIICq99ySxrMad/2
# hPMBRf6Ndah7f7HPn1gkSSTAoejyuqF5h+B0qI4+JK5+VLvz659VTbAWJsYakkxZ
# xVWYpFv4KeQSSwoo0DzMvmERsTzNvVBMWhu9OriJNg+QfFmf96zVTu93cZ+r7xMp
# bXyfIOGKhHMaRuZ8ihuWIx3gI9WHDFX6fBKR8+HlhdkaiBEWIsXRoy+EQUyK7zUs
# +FqOo2sRYttbs8MTF9YDKFZwyPjn9Wn+gLGd5NUEVyNvD9QVGBEtN7vx87bduJUB
# 8F4DylEsMtZTfjw/au6AmOnmneK5UcqSJuwRyZaGNk7y3qj06utx+HTTqHgi975U
# pxfyrwAqkovoZEWBVSpvku8PVhkBXcLmNe6MEHlFiaMoiADAeKmX5RFRkN+VrmYG
# Tg4zajxfdHeIY8TvLf48tTfmnQJd98geJQv/01NUy/FxuwqAuTkaez5Nl1LxP0Cp
# THhghzO4FRD4itT2wqTh4jpojw9QZnsCAwEAAaOCAcUwggHBMB8GA1UdIwQYMBaA
# FFrEuXsqCqOl6nEDwGD5LfZldQ5YMB0GA1UdDgQWBBT8Kr9+1L6s84KcpM97IgE7
# uI8H8jAOBgNVHQ8BAf8EBAMCB4AwEwYDVR0lBAwwCgYIKwYBBQUHAwMwdwYDVR0f
# BHAwbjA1oDOgMYYvaHR0cDovL2NybDMuZGlnaWNlcnQuY29tL3NoYTItYXNzdXJl
# ZC1jcy1nMS5jcmwwNaAzoDGGL2h0dHA6Ly9jcmw0LmRpZ2ljZXJ0LmNvbS9zaGEy
# LWFzc3VyZWQtY3MtZzEuY3JsMEwGA1UdIARFMEMwNwYJYIZIAYb9bAMBMCowKAYI
# KwYBBQUHAgEWHGh0dHBzOi8vd3d3LmRpZ2ljZXJ0LmNvbS9DUFMwCAYGZ4EMAQQB
# MIGEBggrBgEFBQcBAQR4MHYwJAYIKwYBBQUHMAGGGGh0dHA6Ly9vY3NwLmRpZ2lj
# ZXJ0LmNvbTBOBggrBgEFBQcwAoZCaHR0cDovL2NhY2VydHMuZGlnaWNlcnQuY29t
# L0RpZ2lDZXJ0U0hBMkFzc3VyZWRJRENvZGVTaWduaW5nQ0EuY3J0MAwGA1UdEwEB
# /wQCMAAwDQYJKoZIhvcNAQELBQADggEBAEt1oS21X0axiafPjyY+vlYqjWKuUu/Y
# FuYWIEq6iRRaFabNDhj9RBFQF/aJiE5msrQEOfAD6/6gVSH91lZWBqg6NEeG9T9S
# XbiAPvJ9CEWFsdkXUrjbWhvCnuZ7kqUuU5BAumI1QRbpYgZL3UA+iZXkmjbGh1ln
# 8rUhWIxbBYL4Sg2nqpB44p7CUFYkPj/MbwU2gvBV2pXjj5WaskoZtsACMv5g42BN
# oVLoRAi+ev6s07POt+JtHRIm87lTyuc8wh0swTPUwksKbLU1Zdj9CpqtzXnuVE0w
# 50exJvRSK3Vt4g+0vigpI3qPmDdpkf9+4Mvy0XMNcqrthw20R+PkIlMxghCXMIIQ
# kwIBATCBhjByMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkw
# FwYDVQQLExB3d3cuZGlnaWNlcnQuY29tMTEwLwYDVQQDEyhEaWdpQ2VydCBTSEEy
# IEFzc3VyZWQgSUQgQ29kZSBTaWduaW5nIENBAhADPtXtoGXRuMkd/PkqbJvYMA0G
# CWCGSAFlAwQCAQUAoIGaMBkGCSqGSIb3DQEJAzEMBgorBgEEAYI3AgEEMBwGCisG
# AQQBgjcCAQsxDjAMBgorBgEEAYI3AgEVMC4GCisGAQQBgjcCAQwxIDAeoByAGgBQ
# AHkAdABoAG8AbgAgADMALgA4AC4AMQAwMC8GCSqGSIb3DQEJBDEiBCAGueLiZxG/
# uwwkezGlE6NGik7PbC5BWsmef6UPtfvL6DANBgkqhkiG9w0BAQEFAASCAgBFCSGD
# sxcFCakg+TKDhbuPBHuY6y9GIgOIPoDAKzSNTmGd38nlGq9GNiOCDzYDS2tEdwwc
# 3Kt4G9Yc6OF8aPhYSjOg2N2DGoRDk5FYg1VAZYtdZGWIWkbxHKv1//04J38a18Wd
# QpSk+54Nu4ith/3HXeiUxb1IjHECHyq/cBj34op7Kl3SiSfLt4qW6n1FyQnrAq9M
# 8EyAJwx1q7sX5ugzvHTUE/stbLkxXO/k06MQ96GPt+knJgWy77EOgdwMQmnQoLQV
# XFyMQsa4SxpytVOtOgpdzAavrrmC6qbifbqLeUcioA4a2pm9Pa2xVetUFDilvSyM
# rIHRGx5LUCK1FGYccjXidJ4NFvINNT6pOylwkraxZdWJsptTrdR9oRUo8Lh6QAh1
# JPAw4mod88kbI5/5H0rOcTsa7P8jAtxkp0uwvxvGUxT+M+A5hwu81wTYsQZbEaVn
# H+JI2ADE4CnquMwhoUqQsqgVctZGX1r4AA7LhveEZEHOQ7m6KPYepdTKAGBf0KhO
# eEOSjJpnjQzaVmTaP2pfxgaoHHTSQ4AwIdIyO9m5wDoIznWT9T8618T6mzL0m8W5
# cX1fHREbifIJLSv3PXiYde0OWVqYIQO9PS7uTJ345Th+TCZUc+SYrC6Vq0PUsRZH
# IHLP6UH7SZfsjmlQbD/IiHR9g3kDInlij+IgQ6GCDUQwgg1ABgorBgEEAYI3AwMB
# MYINMDCCDSwGCSqGSIb3DQEHAqCCDR0wgg0ZAgEDMQ8wDQYJYIZIAWUDBAIBBQAw
# dwYLKoZIhvcNAQkQAQSgaARmMGQCAQEGCWCGSAGG/WwHATAxMA0GCWCGSAFlAwQC
# AQUABCBuuoKKI5VzqiwEcymFiOxOYAKyJj+lwucjo+Pb4yWr0QIQTCCWuLxarqMV
# tBcxlyFhXBgPMjAyMTA1MDMxMTUyNDhaoIIKNzCCBP4wggPmoAMCAQICEA1CSuC+
# Ooj/YEAhzhQA8N0wDQYJKoZIhvcNAQELBQAwcjELMAkGA1UEBhMCVVMxFTATBgNV
# BAoTDERpZ2lDZXJ0IEluYzEZMBcGA1UECxMQd3d3LmRpZ2ljZXJ0LmNvbTExMC8G
# A1UEAxMoRGlnaUNlcnQgU0hBMiBBc3N1cmVkIElEIFRpbWVzdGFtcGluZyBDQTAe
# Fw0yMTAxMDEwMDAwMDBaFw0zMTAxMDYwMDAwMDBaMEgxCzAJBgNVBAYTAlVTMRcw
# FQYDVQQKEw5EaWdpQ2VydCwgSW5jLjEgMB4GA1UEAxMXRGlnaUNlcnQgVGltZXN0
# YW1wIDIwMjEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDC5mGEZ8WK
# 9Q0IpEXKY2tR1zoRQr0KdXVNlLQMULUmEP4dyG+RawyW5xpcSO9E5b+bYc0VkWJa
# uP9nC5xj/TZqgfop+N0rcIXeAhjzeG28ffnHbQk9vmp2h+mKvfiEXR52yeTGdnY6
# U9HR01o2j8aj4S8bOrdh1nPsTm0zinxdRS1LsVDmQTo3VobckyON91Al6GTm3dOP
# L1e1hyDrDo4s1SPa9E14RuMDgzEpSlwMMYpKjIjF9zBa+RSvFV9sQ0kJ/SYjU/aN
# Y+gaq1uxHTDCm2mCtNv8VlS8H6GHq756WwogL0sJyZWnjbL61mOLTqVyHO6fegFz
# +BnW/g1JhL0BAgMBAAGjggG4MIIBtDAOBgNVHQ8BAf8EBAMCB4AwDAYDVR0TAQH/
# BAIwADAWBgNVHSUBAf8EDDAKBggrBgEFBQcDCDBBBgNVHSAEOjA4MDYGCWCGSAGG
# /WwHATApMCcGCCsGAQUFBwIBFhtodHRwOi8vd3d3LmRpZ2ljZXJ0LmNvbS9DUFMw
# HwYDVR0jBBgwFoAU9LbhIB3+Ka7S5GGlsqIlssgXNW4wHQYDVR0OBBYEFDZEho6k
# urBmvrwoLR1ENt3janq8MHEGA1UdHwRqMGgwMqAwoC6GLGh0dHA6Ly9jcmwzLmRp
# Z2ljZXJ0LmNvbS9zaGEyLWFzc3VyZWQtdHMuY3JsMDKgMKAuhixodHRwOi8vY3Js
# NC5kaWdpY2VydC5jb20vc2hhMi1hc3N1cmVkLXRzLmNybDCBhQYIKwYBBQUHAQEE
# eTB3MCQGCCsGAQUFBzABhhhodHRwOi8vb2NzcC5kaWdpY2VydC5jb20wTwYIKwYB
# BQUHMAKGQ2h0dHA6Ly9jYWNlcnRzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydFNIQTJB
# c3N1cmVkSURUaW1lc3RhbXBpbmdDQS5jcnQwDQYJKoZIhvcNAQELBQADggEBAEgc
# 3LXpmiO85xrnIA6OZ0b9QnJRdAojR6OrktIlxHBZvhSg5SeBpU0UFRkHefDRBMOG
# 2Tu9/kQCZk3taaQP9rhwz2Lo9VFKeHk2eie38+dSn5On7UOee+e03UEiifuHokYD
# Tvz0/rdkd2NfI1Jpg4L6GlPtkMyNoRdzDfTzZTlwS/Oc1np72gy8PTLQG8v1Yfx1
# CAB2vIEO+MDhXM/EEXLnG2RJ2CKadRVC9S0yOIHa9GCiurRS+1zgYSQlT7LfySmo
# c0NR2r1j1h9bm/cuG08THfdKDXF+l7f0P4TrweOjSaH6zqe/Vs+6WXZhiV9+p7SO
# Z3j5NpjhyyjaW4emii8wggUxMIIEGaADAgECAhAKoSXW1jIbfkHkBdo2l8IVMA0G
# CSqGSIb3DQEBCwUAMGUxCzAJBgNVBAYTAlVTMRUwEwYDVQQKEwxEaWdpQ2VydCBJ
# bmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20xJDAiBgNVBAMTG0RpZ2lDZXJ0
# IEFzc3VyZWQgSUQgUm9vdCBDQTAeFw0xNjAxMDcxMjAwMDBaFw0zMTAxMDcxMjAw
# MDBaMHIxCzAJBgNVBAYTAlVTMRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNV
# BAsTEHd3dy5kaWdpY2VydC5jb20xMTAvBgNVBAMTKERpZ2lDZXJ0IFNIQTIgQXNz
# dXJlZCBJRCBUaW1lc3RhbXBpbmcgQ0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAw
# ggEKAoIBAQC90DLuS82Pf92puoKZxTlUKFe2I0rEDgdFM1EQfdD5fU1ofue2oPSN
# s4jkl79jIZCYvxO8V9PD4X4I1moUADj3Lh477sym9jJZ/l9lP+Cb6+NGRwYaVX4L
# J37AovWg4N4iPw7/fpX786O6Ij4YrBHk8JkDbTuFfAnT7l3ImgtU46gJcWvgzyIQ
# D3XPcXJOCq3fQDpct1HhoXkUxk0kIzBdvOw8YGqsLwfM/fDqR9mIUF79Zm5WYScp
# iYRR5oLnRlD9lCosp+R1PrqYD4R/nzEU1q3V8mTLex4F0IQZchfxFwbvPc3WTe8G
# Qv2iUypPhR3EHTyvz9qsEPXdrKzpVv+TAgMBAAGjggHOMIIByjAdBgNVHQ4EFgQU
# 9LbhIB3+Ka7S5GGlsqIlssgXNW4wHwYDVR0jBBgwFoAUReuir/SSy4IxLVGLp6ch
# nfNtyA8wEgYDVR0TAQH/BAgwBgEB/wIBADAOBgNVHQ8BAf8EBAMCAYYwEwYDVR0l
# BAwwCgYIKwYBBQUHAwgweQYIKwYBBQUHAQEEbTBrMCQGCCsGAQUFBzABhhhodHRw
# Oi8vb2NzcC5kaWdpY2VydC5jb20wQwYIKwYBBQUHMAKGN2h0dHA6Ly9jYWNlcnRz
# LmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydEFzc3VyZWRJRFJvb3RDQS5jcnQwgYEGA1Ud
# HwR6MHgwOqA4oDaGNGh0dHA6Ly9jcmw0LmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydEFz
# c3VyZWRJRFJvb3RDQS5jcmwwOqA4oDaGNGh0dHA6Ly9jcmwzLmRpZ2ljZXJ0LmNv
# bS9EaWdpQ2VydEFzc3VyZWRJRFJvb3RDQS5jcmwwUAYDVR0gBEkwRzA4BgpghkgB
# hv1sAAIEMCowKAYIKwYBBQUHAgEWHGh0dHBzOi8vd3d3LmRpZ2ljZXJ0LmNvbS9D
# UFMwCwYJYIZIAYb9bAcBMA0GCSqGSIb3DQEBCwUAA4IBAQBxlRLpUYdWac3v3dp8
# qmN6s3jPBjdAhO9LhL/KzwMC/cWnww4gQiyvd/MrHwwhWiq3BTQdaq6Z+CeiZr8J
# qmDfdqQ6kw/4stHYfBli6F6CJR7Euhx7LCHi1lssFDVDBGiy23UC4HLHmNY8ZOUf
# SBAYX4k4YU1iRiSHY4yRUiyvKYnleB/WCxSlgNcSR3CzddWThZN+tpJn+1Nhiaj1
# a5bA9FhpDXzIAbG5KHW3mWOFIoxhynmUfln8jA/jb7UBJrZspe6HUSHkWGCbugwt
# K22ixH67xCUrRwIIfEmuE7bhfEJCKMYYVs9BNLZmXbZ0e/VWMyIvIjayS6JKldj1
# po5SMYICTTCCAkkCAQEwgYYwcjELMAkGA1UEBhMCVVMxFTATBgNVBAoTDERpZ2lD
# ZXJ0IEluYzEZMBcGA1UECxMQd3d3LmRpZ2ljZXJ0LmNvbTExMC8GA1UEAxMoRGln
# aUNlcnQgU0hBMiBBc3N1cmVkIElEIFRpbWVzdGFtcGluZyBDQQIQDUJK4L46iP9g
# QCHOFADw3TANBglghkgBZQMEAgEFAKCBmDAaBgkqhkiG9w0BCQMxDQYLKoZIhvcN
# AQkQAQQwHAYJKoZIhvcNAQkFMQ8XDTIxMDUwMzExNTI0OFowKwYLKoZIhvcNAQkQ
# AgwxHDAaMBgwFgQU4deCqOGRvu9ryhaRtaq0lKYkm/MwLwYJKoZIhvcNAQkEMSIE
# IF4QsNmEZSxyIoRHXsGFuji0l/UWCqW9PHVzCEWUlsAfMA0GCSqGSIb3DQEBAQUA
# BIIBAGLTB2GJcXOg7O2cTQmsIoasfSqq+sCpeV4z2od18Lx4IIdnj3R0gKY8UH2T
# 2j0JMcPogZDZxqxKY//0KP5AL1SzHwD5tN/61Fg/oVk6Yp7dw8HN1V5Kayg9IrXf
# xyfway7Zc6YAWWzRtf5vv7xpgRKGTUahGrYZwxJPnAyhW643vymwhkQ/cRodTJIz
# d3qdy4sTHORPKwPUzhxjhsGah6GBAe+Rho03JiRIvQsvUaF5igjA4fJ1QSFKZvqz
# rA0oiJNZckQHYEPxh1AQShen9Jhr7fd2j5bVVBpaWAALmRdr8Q12CiFlQyk4KKy7
# AEIHi2Rbf06++s+R2qJ5Tzfggs4=
# SIG # End signature block

View File

@ -0,0 +1,76 @@
# This file must be used with "source bin/activate" *from bash*
# you cannot run it directly
deactivate () {
# reset old environment variables
if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
PATH="${_OLD_VIRTUAL_PATH:-}"
export PATH
unset _OLD_VIRTUAL_PATH
fi
if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
export PYTHONHOME
unset _OLD_VIRTUAL_PYTHONHOME
fi
# This should detect bash and zsh, which have a hash command that must
# be called to get it to forget past commands. Without forgetting
# past commands the $PATH changes we made may not be respected
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
hash -r
fi
if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
PS1="${_OLD_VIRTUAL_PS1:-}"
export PS1
unset _OLD_VIRTUAL_PS1
fi
unset VIRTUAL_ENV
if [ ! "${1:-}" = "nondestructive" ] ; then
# Self destruct!
unset -f deactivate
fi
}
# unset irrelevant variables
deactivate nondestructive
VIRTUAL_ENV="J:\Desktop\Projekt\bitsearch"
export VIRTUAL_ENV
_OLD_VIRTUAL_PATH="$PATH"
PATH="$VIRTUAL_ENV/Scripts:$PATH"
export PATH
# unset PYTHONHOME if set
# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
# could use `if (set -u; : $PYTHONHOME) ;` in bash
if [ -n "${PYTHONHOME:-}" ] ; then
_OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
unset PYTHONHOME
fi
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
_OLD_VIRTUAL_PS1="${PS1:-}"
if [ "x(bitsearch) " != x ] ; then
PS1="(bitsearch) ${PS1:-}"
else
if [ "`basename \"$VIRTUAL_ENV\"`" = "__" ] ; then
# special case for Aspen magic directories
# see https://aspen.io/
PS1="[`basename \`dirname \"$VIRTUAL_ENV\"\``] $PS1"
else
PS1="(`basename \"$VIRTUAL_ENV\"`)$PS1"
fi
fi
export PS1
fi
# This should detect bash and zsh, which have a hash command that must
# be called to get it to forget past commands. Without forgetting
# past commands the $PATH changes we made may not be respected
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
hash -r
fi

View File

@ -0,0 +1,33 @@
@echo off
rem This file is UTF-8 encoded, so we need to update the current code page while executing it
for /f "tokens=2 delims=:." %%a in ('"%SystemRoot%\System32\chcp.com"') do (
set _OLD_CODEPAGE=%%a
)
if defined _OLD_CODEPAGE (
"%SystemRoot%\System32\chcp.com" 65001 > nul
)
set VIRTUAL_ENV=J:\Desktop\Projekt\bitsearch
if not defined PROMPT set PROMPT=$P$G
if defined _OLD_VIRTUAL_PROMPT set PROMPT=%_OLD_VIRTUAL_PROMPT%
if defined _OLD_VIRTUAL_PYTHONHOME set PYTHONHOME=%_OLD_VIRTUAL_PYTHONHOME%
set _OLD_VIRTUAL_PROMPT=%PROMPT%
set PROMPT=(bitsearch) %PROMPT%
if defined PYTHONHOME set _OLD_VIRTUAL_PYTHONHOME=%PYTHONHOME%
set PYTHONHOME=
if defined _OLD_VIRTUAL_PATH set PATH=%_OLD_VIRTUAL_PATH%
if not defined _OLD_VIRTUAL_PATH set _OLD_VIRTUAL_PATH=%PATH%
set PATH=%VIRTUAL_ENV%\Scripts;%PATH%
:END
if defined _OLD_CODEPAGE (
"%SystemRoot%\System32\chcp.com" %_OLD_CODEPAGE% > nul
set _OLD_CODEPAGE=
)

View File

@ -0,0 +1,21 @@
@echo off
if defined _OLD_VIRTUAL_PROMPT (
set "PROMPT=%_OLD_VIRTUAL_PROMPT%"
)
set _OLD_VIRTUAL_PROMPT=
if defined _OLD_VIRTUAL_PYTHONHOME (
set "PYTHONHOME=%_OLD_VIRTUAL_PYTHONHOME%"
set _OLD_VIRTUAL_PYTHONHOME=
)
if defined _OLD_VIRTUAL_PATH (
set "PATH=%_OLD_VIRTUAL_PATH%"
)
set _OLD_VIRTUAL_PATH=
set VIRTUAL_ENV=
:END

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,3 @@
home = C:\software\python3
include-system-site-packages = false
version = 3.8.10

View File

@ -0,0 +1,23 @@
aniso8601==9.0.1
attrs==21.4.0
click==8.0.3
colorama==0.4.4
Flask==2.0.2
Flask-RESTful==0.3.9
flask-restplus==0.13.0
importlib-resources==5.4.0
itsdangerous==2.0.1
Jinja2==3.0.3
joblib==1.1.0
jsonschema==4.4.0
MarkupSafe==2.0.1
numpy==1.22.1
pyrsistent==0.18.1
pytz==2021.3
scikit-learn==1.0.2
scipy==1.7.3
six==1.16.0
sklearn==0.0
threadpoolctl==3.0.0
Werkzeug==2.0.2
zipp==3.7.0

61
twitter.py Normal file
View File

@ -0,0 +1,61 @@
# %%
import pandas as pd
import os
import re
# %% [markdown]
### Reading data - this part need changing when data
# %%
path = os.getcwd()
filename = 'training_data_clean.csv'
filepath = path+'/'+filename
data = pd.read_csv(filepath, header=None,
delimiter=',', encoding_errors='surrogateescape')
data.columns = ['index', 'id','date', 'query', 'user', 'text']
# %% [markdown]
### Function definitions
# %%
change_dict = {
# tokens
"USERNAME": ['@\w+|@'],
"URL": ['http\S*'],
"EMOJI": ["[;:][dbop\(\)\[\]]|[dbop\(\)\[\]][;:]|xd+|\S*&\S*"],
# standardization
', ': ['\s,'],
'. ': ['\s\.'],
' ': ['\s{2,}'],
"'": ["<EFBFBD>"],
'?': ["\s\?+|\?+"],
'!': ["\s\!+|\!+"]
}
def clean_lines(line, change_dict):
line = line.lower()
for change_to, change_regex_list in change_dict.items():
for change_regex in change_regex_list:
line = re.sub(change_regex, change_to, line)
return line
def get_rep_idx_to_cut_out_from_str(line):
occurence = 0
idx_to_cut = []
for idx, letter in enumerate(line):
if idx > 0:
occurence = occurence+1 if line[idx-1] == letter else 0
if occurence >= 2:
idx_to_cut.append(idx)
return idx_to_cut
def truncate_duplicated_letters_to_two(line):
idx_to_cut = get_rep_idx_to_cut_out_from_str(line)
str_out =''
for i,s in enumerate(line):
if i not in idx_to_cut:
str_out += s
return str_out
# %% [markdown]
### Cleaning
# %%
text = [clean_lines(x, change_dict) for x in data.loc[:, 'text'].values.tolist()]
text = [truncate_duplicated_letters_to_two(x).strip() for x in text]
data.text = text
# %%