Compare commits
11 Commits
feature/se
...
master
Author | SHA1 | Date | |
---|---|---|---|
96e9623413 | |||
|
76a2396a06 | ||
|
2225596d53 | ||
91701a6ab5 | |||
1095e5cce4 | |||
98f0649a4a | |||
3a32bdcdaa | |||
ced5b1e4e1 | |||
3dd504a287 | |||
b080e031c7 | |||
bc8df9aa86 |
238
.gitignore
vendored
238
.gitignore
vendored
@ -518,4 +518,240 @@ FodyWeavers.xsd
|
||||
### VisualStudio Patch ###
|
||||
# Additional files built by Visual Studio
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/visualstudio,aspnetcore
|
||||
# End of https://www.toptal.com/developers/gitignore/api/visualstudio,aspnetcore
|
||||
|
||||
### Flask ###
|
||||
instance/*
|
||||
!instance/.gitignore
|
||||
.webassets-cache
|
||||
.env
|
||||
|
||||
### Flask.Python Stack ###
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
### Python ###
|
||||
# Byte-compiled / optimized / DLL files
|
||||
|
||||
# C extensions
|
||||
|
||||
# Distribution / packaging
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
|
||||
# Installer logs
|
||||
|
||||
# Unit test / coverage reports
|
||||
|
||||
# Translations
|
||||
|
||||
# Django stuff:
|
||||
|
||||
# Flask stuff:
|
||||
|
||||
# Scrapy stuff:
|
||||
|
||||
# Sphinx documentation
|
||||
|
||||
# PyBuilder
|
||||
|
||||
# Jupyter Notebook
|
||||
|
||||
# IPython
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||
|
||||
# Celery stuff
|
||||
|
||||
# SageMath parsed files
|
||||
|
||||
# Environments
|
||||
|
||||
# Spyder project settings
|
||||
|
||||
# Rope project settings
|
||||
|
||||
# mkdocs documentation
|
||||
|
||||
# mypy
|
||||
|
||||
# Pyre type checker
|
||||
|
||||
# pytype static type analyzer
|
||||
|
||||
# Cython debug symbols
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/python,flask
|
@ -11,6 +11,7 @@ using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Net.Http;
|
||||
using System.Net.Http.Headers;
|
||||
using System.Net.Http.Json;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace BitSearch.API.Controllers
|
||||
@ -32,11 +33,42 @@ namespace BitSearch.API.Controllers
|
||||
}
|
||||
|
||||
[HttpGet("ranking")]
|
||||
public IActionResult GetRanking()
|
||||
public async Task<IActionResult> GetRanking()
|
||||
{
|
||||
//var request = new HttpRequestMessage(HttpMethod.Get, "https://api.twitter.com/2/tweets/counts/recent");
|
||||
var cryptoHash = new Dictionary<string, string>()
|
||||
{
|
||||
{"Bitcoin", "#btc"},
|
||||
{"Etherum", "#eth"},
|
||||
{"Tron", "#trx"},
|
||||
{"Tether", "#usdt"},
|
||||
{"Binance Coin", "#bnb"},
|
||||
{"Cardano", "#ada"},
|
||||
{"Dash", "#dash"},
|
||||
{"Neo", "#neo"},
|
||||
{"Steem", "#steem"},
|
||||
{"Zcash", "#zec"}
|
||||
};
|
||||
|
||||
return Ok();
|
||||
var token = Configuration["Token"];
|
||||
var twitterClient = _httpClientFactory.CreateClient();
|
||||
twitterClient.BaseAddress = new Uri("https://api.twitter.com/2/tweets/counts/");
|
||||
twitterClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", token);
|
||||
twitterClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
|
||||
var ranking = new List<CryptoRanking>();
|
||||
|
||||
foreach (KeyValuePair<string, string> crypto in cryptoHash)
|
||||
{
|
||||
var requestUri = QueryHelpers.AddQueryString("recent", "query", crypto.Value);
|
||||
var request = new HttpRequestMessage(HttpMethod.Get, requestUri);
|
||||
var response = await twitterClient.SendAsync(request);
|
||||
if (response.IsSuccessStatusCode)
|
||||
{
|
||||
var tweetsDto = JsonConvert.DeserializeObject<TweetCount>(await response.Content.ReadAsStringAsync());
|
||||
ranking.Add(new CryptoRanking() { Name = crypto.Key, TweetAmount = tweetsDto.meta.total_tweet_count });
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(ranking.OrderByDescending(r => r.TweetAmount));
|
||||
}
|
||||
|
||||
[HttpGet("analise/{hash}")]
|
||||
@ -44,23 +76,31 @@ namespace BitSearch.API.Controllers
|
||||
{
|
||||
var queryString = new Dictionary<string, string>()
|
||||
{
|
||||
{"query", hash},
|
||||
{"query", $"#{hash}"},
|
||||
{"max_results", "100"},
|
||||
{"tweet.fields", "lang,referenced_tweets"},
|
||||
};
|
||||
|
||||
var token = Configuration["Token"];
|
||||
var httpClient = _httpClientFactory.CreateClient();
|
||||
httpClient.BaseAddress = new Uri("https://api.twitter.com/2/tweets/search/");
|
||||
var twitterClient = _httpClientFactory.CreateClient();
|
||||
twitterClient.BaseAddress = new Uri("https://api.twitter.com/2/tweets/search/");
|
||||
var requestUri = QueryHelpers.AddQueryString("recent", queryString);
|
||||
var request = new HttpRequestMessage(HttpMethod.Get, requestUri);
|
||||
httpClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", token);
|
||||
httpClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
|
||||
var response = await httpClient.SendAsync(request);
|
||||
var twiiterRequest = new HttpRequestMessage(HttpMethod.Get, requestUri);
|
||||
twitterClient.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", token);
|
||||
twitterClient.DefaultRequestHeaders.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
|
||||
var response = await twitterClient.SendAsync(twiiterRequest);
|
||||
|
||||
if (response.IsSuccessStatusCode)
|
||||
{
|
||||
return Ok(await response.Content.ReadAsStringAsync());
|
||||
var tweetsDto = JsonConvert.DeserializeObject<TweeterResponse>(await response.Content.ReadAsStringAsync());
|
||||
tweetsDto.data = tweetsDto.data.Where(d => d.lang == "en");
|
||||
var microServiceClient = _httpClientFactory.CreateClient();
|
||||
var microServiceRequest = new HttpRequestMessage(HttpMethod.Get, "http://127.0.0.1:5000/analysis");
|
||||
microServiceRequest.Content = JsonContent.Create(tweetsDto);
|
||||
var microServiceResponse = await microServiceClient.SendAsync(microServiceRequest);
|
||||
|
||||
return Ok(await microServiceResponse.Content.ReadAsStringAsync());
|
||||
//return Ok(tweetsDto);
|
||||
}
|
||||
|
||||
return Ok(response);
|
||||
|
12
BitSearch/BitSearch.API/Models/BaseTweetsDto.cs
Normal file
12
BitSearch/BitSearch.API/Models/BaseTweetsDto.cs
Normal file
@ -0,0 +1,12 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace BitSearch.API.Models
|
||||
{
|
||||
public class BaseTweetsDto<T>
|
||||
{
|
||||
public IEnumerable<T> data { get; set; }
|
||||
}
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace BitSearch.API.Models
|
||||
{
|
||||
public class CryptoAnalysis
|
||||
{
|
||||
public string Name { get; set; }
|
||||
public decimal Positive { get; set; }
|
||||
public decimal Negative { get; set; }
|
||||
|
||||
}
|
||||
}
|
@ -6,9 +6,8 @@ using System.Threading.Tasks;
|
||||
namespace BitSearch.API.Models
|
||||
{
|
||||
public class CryptoRanking
|
||||
{
|
||||
{
|
||||
public string Name { get; set; }
|
||||
public string Hasztag { get; set; }
|
||||
public int TweetAmount { get; set; }
|
||||
}
|
||||
}
|
||||
|
12
BitSearch/BitSearch.API/Models/MetaData.cs
Normal file
12
BitSearch/BitSearch.API/Models/MetaData.cs
Normal file
@ -0,0 +1,12 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace BitSearch.API.Models
|
||||
{
|
||||
public class MetaData
|
||||
{
|
||||
public int total_tweet_count { get; set; }
|
||||
}
|
||||
}
|
13
BitSearch/BitSearch.API/Models/ReferencedTweets.cs
Normal file
13
BitSearch/BitSearch.API/Models/ReferencedTweets.cs
Normal file
@ -0,0 +1,13 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace BitSearch.API.Models
|
||||
{
|
||||
public class ReferencedTweets
|
||||
{
|
||||
public string type { get; set; }
|
||||
public string id { get; set; }
|
||||
}
|
||||
}
|
12
BitSearch/BitSearch.API/Models/TweetCount.cs
Normal file
12
BitSearch/BitSearch.API/Models/TweetCount.cs
Normal file
@ -0,0 +1,12 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace BitSearch.API.Models
|
||||
{
|
||||
public class TweetCount
|
||||
{
|
||||
public MetaData meta { get; set; }
|
||||
}
|
||||
}
|
11
BitSearch/BitSearch.API/Models/TweeterResponse.cs
Normal file
11
BitSearch/BitSearch.API/Models/TweeterResponse.cs
Normal file
@ -0,0 +1,11 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace BitSearch.API.Models
|
||||
{
|
||||
public class TweeterResponse : BaseTweetsDto<TweetsDto>
|
||||
{
|
||||
}
|
||||
}
|
15
BitSearch/BitSearch.API/Models/TweetsDto.cs
Normal file
15
BitSearch/BitSearch.API/Models/TweetsDto.cs
Normal file
@ -0,0 +1,15 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace BitSearch.API.Models
|
||||
{
|
||||
public class TweetsDto
|
||||
{
|
||||
public string id { get; set; }
|
||||
public string text { get; set; }
|
||||
public string lang { get; set; }
|
||||
public IEnumerable<ReferencedTweets> referenced_tweets { get; set; }
|
||||
}
|
||||
}
|
@ -1,13 +1,13 @@
|
||||
{
|
||||
"$schema": "http://json.schemastore.org/launchsettings.json",
|
||||
{
|
||||
"iisSettings": {
|
||||
"windowsAuthentication": false,
|
||||
"anonymousAuthentication": true,
|
||||
"iisExpress": {
|
||||
"applicationUrl": "http://localhost:62241",
|
||||
"sslPort": 44393
|
||||
"sslPort": 0
|
||||
}
|
||||
},
|
||||
"$schema": "http://json.schemastore.org/launchsettings.json",
|
||||
"profiles": {
|
||||
"IIS Express": {
|
||||
"commandName": "IISExpress",
|
||||
@ -19,13 +19,13 @@
|
||||
},
|
||||
"BitSearch.API": {
|
||||
"commandName": "Project",
|
||||
"dotnetRunMessages": "true",
|
||||
"launchBrowser": true,
|
||||
"launchUrl": "swagger",
|
||||
"applicationUrl": "https://localhost:5001;http://localhost:5000",
|
||||
"environmentVariables": {
|
||||
"ASPNETCORE_ENVIRONMENT": "Development"
|
||||
}
|
||||
},
|
||||
"dotnetRunMessages": "true",
|
||||
"applicationUrl": "https://localhost:5004;http://localhost:5005"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
51
BitSearch/Bitsearch.Microservice/BitSearch/main.py
Normal file
51
BitSearch/Bitsearch.Microservice/BitSearch/main.py
Normal file
@ -0,0 +1,51 @@
|
||||
from flask import Flask, request
|
||||
import flask.scaffold
|
||||
flask.helpers._endpoint_from_view_func = flask.scaffold._endpoint_from_view_func
|
||||
from flask_restful import Resource
|
||||
import werkzeug
|
||||
werkzeug.cached_property = werkzeug.utils.cached_property
|
||||
from flask_restplus import Namespace, Resource, fields, Api
|
||||
from flask import Blueprint
|
||||
from twitter_pred import Predictor
|
||||
|
||||
|
||||
app = Flask(__name__)
|
||||
api = Api(app)
|
||||
namespace = Namespace("test", "opis")
|
||||
test_model = namespace.model("Test", {
|
||||
'result': fields.String(
|
||||
readonly = True,
|
||||
description = 'test message'
|
||||
)
|
||||
})
|
||||
predictor = Predictor()
|
||||
|
||||
@namespace.route('')
|
||||
class Analysis(Resource):
|
||||
@namespace.marshal_list_with(test_model)
|
||||
@namespace.response(500, "error")
|
||||
def get(self):
|
||||
json = request.get_json()
|
||||
|
||||
data = predictor.anylise([tweet["text"] for tweet in json['data']])
|
||||
return {"result": data}
|
||||
|
||||
|
||||
blueprint = Blueprint('documented_api', __name__, url_prefix='/documented_api')
|
||||
|
||||
api_extension = Api(
|
||||
blueprint,
|
||||
title='Flask RESTplus Demo',
|
||||
version='1.0',
|
||||
description='Application tutorial to demonstrate Flask RESTplus extension\
|
||||
for better project structure and auto generated documentation',
|
||||
doc='/doc'
|
||||
)
|
||||
api_extension.add_namespace(namespace)
|
||||
app.config['RESTPLUS_MASK_SWAGGER'] = False
|
||||
app.register_blueprint(blueprint)
|
||||
|
||||
api.add_resource(Analysis, "/analysis")
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=True)
|
BIN
BitSearch/Bitsearch.Microservice/BitSearch/model_pkl
Normal file
BIN
BitSearch/Bitsearch.Microservice/BitSearch/model_pkl
Normal file
Binary file not shown.
76
BitSearch/Bitsearch.Microservice/BitSearch/twitter_pred.py
Normal file
76
BitSearch/Bitsearch.Microservice/BitSearch/twitter_pred.py
Normal file
@ -0,0 +1,76 @@
|
||||
# %%
|
||||
import pickle
|
||||
import re
|
||||
import json
|
||||
# %%
|
||||
|
||||
class Predictor:
|
||||
change_dict = {
|
||||
# tokens
|
||||
" username ": ['@\w+|@'],
|
||||
" url ": ['http\S*'],
|
||||
" emoji ": ["[;:][dbop\(\)\[\]]|[^\w][dbop\(\)\[\]][;:]|xd+|\S*&\S*", "[^\w\s,.?!:;#\'\"\(\)\$\-\+%\[\]\|]"],
|
||||
" number ": ["[\+\-\$]?[\d]+[,\.\:k]?[\d]?[%]?"],
|
||||
# standardization
|
||||
', ': ['\s,'],
|
||||
'. ': ['\s\.'],
|
||||
' ': ['\s{2,}', '\n', '^rt[\s]+', '\s\:\s'],
|
||||
"'": ["�"],
|
||||
'?': ["\s\?"],
|
||||
'!': ["\s\!"],
|
||||
'".': ["\s\"\."],
|
||||
'",': ["\s\"\,"],
|
||||
'" ': ["\s\"\s"]
|
||||
}
|
||||
|
||||
def clean_lines(self, line, change_dict):
|
||||
line = str(line).lower()
|
||||
for change_to, change_regex_list in change_dict.items():
|
||||
for change_regex in change_regex_list:
|
||||
line = re.sub(change_regex, change_to, line)
|
||||
return line
|
||||
|
||||
def get_rep_idx_to_cut_out_from_str(self, line):
|
||||
occurence = 0
|
||||
idx_to_cut = []
|
||||
for idx, letter in enumerate(line):
|
||||
if idx > 0:
|
||||
occurence = occurence + 1 if line[idx - 1] == letter else 0
|
||||
if occurence >= 2:
|
||||
idx_to_cut.append(idx)
|
||||
return idx_to_cut
|
||||
|
||||
def truncate_duplicated_letters_to_two(self, line):
|
||||
idx_to_cut = self.get_rep_idx_to_cut_out_from_str(line)
|
||||
str_out = ''
|
||||
for i, s in enumerate(line):
|
||||
if i not in idx_to_cut:
|
||||
str_out += s
|
||||
return str_out
|
||||
|
||||
def clean_data(self, l):
|
||||
text = [self.clean_lines(x, self.change_dict) for x in l]
|
||||
text = [self.truncate_duplicated_letters_to_two(x).strip() for x in text]
|
||||
return text
|
||||
|
||||
def __init__(self):
|
||||
with open('model_pkl' , 'rb') as f:
|
||||
self.model = pickle.load(f)
|
||||
# %%
|
||||
with open('vectorizer_pkl' , 'rb') as f:
|
||||
self.vectorizer = pickle.load(f)
|
||||
|
||||
def anylise(self, text_to_predict):
|
||||
# %%
|
||||
text_to_predict = self.clean_data(text_to_predict)
|
||||
test_matrix = self.vectorizer.transform(text_to_predict)
|
||||
data_predicted = self.model.predict(test_matrix).tolist()
|
||||
|
||||
# %%
|
||||
positives = sum([1 for x in data_predicted if x == 1])
|
||||
negatives = sum([1 for x in data_predicted if x == -1])
|
||||
# %%
|
||||
data_to_send = {"pos_perc": positives/(positives+negatives),
|
||||
"neg_perc": negatives/(positives+negatives)}
|
||||
|
||||
return data_to_send
|
BIN
BitSearch/Bitsearch.Microservice/BitSearch/vectorizer_pkl
Normal file
BIN
BitSearch/Bitsearch.Microservice/BitSearch/vectorizer_pkl
Normal file
Binary file not shown.
398
BitSearch/Bitsearch.Microservice/Scripts/Activate.ps1
Normal file
398
BitSearch/Bitsearch.Microservice/Scripts/Activate.ps1
Normal file
@ -0,0 +1,398 @@
|
||||
<#
|
||||
.Synopsis
|
||||
Activate a Python virtual environment for the current PowerShell session.
|
||||
|
||||
.Description
|
||||
Pushes the python executable for a virtual environment to the front of the
|
||||
$Env:PATH environment variable and sets the prompt to signify that you are
|
||||
in a Python virtual environment. Makes use of the command line switches as
|
||||
well as the `pyvenv.cfg` file values present in the virtual environment.
|
||||
|
||||
.Parameter VenvDir
|
||||
Path to the directory that contains the virtual environment to activate. The
|
||||
default value for this is the parent of the directory that the Activate.ps1
|
||||
script is located within.
|
||||
|
||||
.Parameter Prompt
|
||||
The prompt prefix to display when this virtual environment is activated. By
|
||||
default, this prompt is the name of the virtual environment folder (VenvDir)
|
||||
surrounded by parentheses and followed by a single space (ie. '(.venv) ').
|
||||
|
||||
.Example
|
||||
Activate.ps1
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -Verbose
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script,
|
||||
and shows extra information about the activation as it executes.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
|
||||
Activates the Python virtual environment located in the specified location.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -Prompt "MyPython"
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script,
|
||||
and prefixes the current prompt with the specified string (surrounded in
|
||||
parentheses) while the virtual environment is active.
|
||||
|
||||
.Notes
|
||||
On Windows, it may be required to enable this Activate.ps1 script by setting the
|
||||
execution policy for the user. You can do this by issuing the following PowerShell
|
||||
command:
|
||||
|
||||
PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
|
||||
|
||||
For more information on Execution Policies:
|
||||
https://go.microsoft.com/fwlink/?LinkID=135170
|
||||
|
||||
#>
|
||||
Param(
|
||||
[Parameter(Mandatory = $false)]
|
||||
[String]
|
||||
$VenvDir,
|
||||
[Parameter(Mandatory = $false)]
|
||||
[String]
|
||||
$Prompt
|
||||
)
|
||||
|
||||
<# Function declarations --------------------------------------------------- #>
|
||||
|
||||
<#
|
||||
.Synopsis
|
||||
Remove all shell session elements added by the Activate script, including the
|
||||
addition of the virtual environment's Python executable from the beginning of
|
||||
the PATH variable.
|
||||
|
||||
.Parameter NonDestructive
|
||||
If present, do not remove this function from the global namespace for the
|
||||
session.
|
||||
|
||||
#>
|
||||
function global:deactivate ([switch]$NonDestructive) {
|
||||
# Revert to original values
|
||||
|
||||
# The prior prompt:
|
||||
if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
|
||||
Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
|
||||
Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
|
||||
}
|
||||
|
||||
# The prior PYTHONHOME:
|
||||
if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
|
||||
Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
|
||||
Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
|
||||
}
|
||||
|
||||
# The prior PATH:
|
||||
if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
|
||||
Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
|
||||
Remove-Item -Path Env:_OLD_VIRTUAL_PATH
|
||||
}
|
||||
|
||||
# Just remove the VIRTUAL_ENV altogether:
|
||||
if (Test-Path -Path Env:VIRTUAL_ENV) {
|
||||
Remove-Item -Path env:VIRTUAL_ENV
|
||||
}
|
||||
|
||||
# Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
|
||||
if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
|
||||
Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
|
||||
}
|
||||
|
||||
# Leave deactivate function in the global namespace if requested:
|
||||
if (-not $NonDestructive) {
|
||||
Remove-Item -Path function:deactivate
|
||||
}
|
||||
}
|
||||
|
||||
<#
|
||||
.Description
|
||||
Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
|
||||
given folder, and returns them in a map.
|
||||
|
||||
For each line in the pyvenv.cfg file, if that line can be parsed into exactly
|
||||
two strings separated by `=` (with any amount of whitespace surrounding the =)
|
||||
then it is considered a `key = value` line. The left hand string is the key,
|
||||
the right hand is the value.
|
||||
|
||||
If the value starts with a `'` or a `"` then the first and last character is
|
||||
stripped from the value before being captured.
|
||||
|
||||
.Parameter ConfigDir
|
||||
Path to the directory that contains the `pyvenv.cfg` file.
|
||||
#>
|
||||
function Get-PyVenvConfig(
|
||||
[String]
|
||||
$ConfigDir
|
||||
) {
|
||||
Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
|
||||
|
||||
# Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
|
||||
$pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
|
||||
|
||||
# An empty map will be returned if no config file is found.
|
||||
$pyvenvConfig = @{ }
|
||||
|
||||
if ($pyvenvConfigPath) {
|
||||
|
||||
Write-Verbose "File exists, parse `key = value` lines"
|
||||
$pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
|
||||
|
||||
$pyvenvConfigContent | ForEach-Object {
|
||||
$keyval = $PSItem -split "\s*=\s*", 2
|
||||
if ($keyval[0] -and $keyval[1]) {
|
||||
$val = $keyval[1]
|
||||
|
||||
# Remove extraneous quotations around a string value.
|
||||
if ("'""".Contains($val.Substring(0, 1))) {
|
||||
$val = $val.Substring(1, $val.Length - 2)
|
||||
}
|
||||
|
||||
$pyvenvConfig[$keyval[0]] = $val
|
||||
Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
|
||||
}
|
||||
}
|
||||
}
|
||||
return $pyvenvConfig
|
||||
}
|
||||
|
||||
|
||||
<# Begin Activate script --------------------------------------------------- #>
|
||||
|
||||
# Determine the containing directory of this script
|
||||
$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
|
||||
$VenvExecDir = Get-Item -Path $VenvExecPath
|
||||
|
||||
Write-Verbose "Activation script is located in path: '$VenvExecPath'"
|
||||
Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
|
||||
Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
|
||||
|
||||
# Set values required in priority: CmdLine, ConfigFile, Default
|
||||
# First, get the location of the virtual environment, it might not be
|
||||
# VenvExecDir if specified on the command line.
|
||||
if ($VenvDir) {
|
||||
Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
|
||||
}
|
||||
else {
|
||||
Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
|
||||
$VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
|
||||
Write-Verbose "VenvDir=$VenvDir"
|
||||
}
|
||||
|
||||
# Next, read the `pyvenv.cfg` file to determine any required value such
|
||||
# as `prompt`.
|
||||
$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
|
||||
|
||||
# Next, set the prompt from the command line, or the config file, or
|
||||
# just use the name of the virtual environment folder.
|
||||
if ($Prompt) {
|
||||
Write-Verbose "Prompt specified as argument, using '$Prompt'"
|
||||
}
|
||||
else {
|
||||
Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
|
||||
if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
|
||||
Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
|
||||
$Prompt = $pyvenvCfg['prompt'];
|
||||
}
|
||||
else {
|
||||
Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virutal environment)"
|
||||
Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
|
||||
$Prompt = Split-Path -Path $venvDir -Leaf
|
||||
}
|
||||
}
|
||||
|
||||
Write-Verbose "Prompt = '$Prompt'"
|
||||
Write-Verbose "VenvDir='$VenvDir'"
|
||||
|
||||
# Deactivate any currently active virtual environment, but leave the
|
||||
# deactivate function in place.
|
||||
deactivate -nondestructive
|
||||
|
||||
# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
|
||||
# that there is an activated venv.
|
||||
$env:VIRTUAL_ENV = $VenvDir
|
||||
|
||||
if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
|
||||
|
||||
Write-Verbose "Setting prompt to '$Prompt'"
|
||||
|
||||
# Set the prompt to include the env name
|
||||
# Make sure _OLD_VIRTUAL_PROMPT is global
|
||||
function global:_OLD_VIRTUAL_PROMPT { "" }
|
||||
Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
|
||||
New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
|
||||
|
||||
function global:prompt {
|
||||
Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
|
||||
_OLD_VIRTUAL_PROMPT
|
||||
}
|
||||
}
|
||||
|
||||
# Clear PYTHONHOME
|
||||
if (Test-Path -Path Env:PYTHONHOME) {
|
||||
Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
|
||||
Remove-Item -Path Env:PYTHONHOME
|
||||
}
|
||||
|
||||
# Add the venv to the PATH
|
||||
Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
|
||||
$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
|
||||
|
||||
# SIG # Begin signature block
|
||||
# MIIcwAYJKoZIhvcNAQcCoIIcsTCCHK0CAQExDzANBglghkgBZQMEAgEFADB5Bgor
|
||||
# BgEEAYI3AgEEoGswaTA0BgorBgEEAYI3AgEeMCYCAwEAAAQQH8w7YFlLCE63JNLG
|
||||
# KX7zUQIBAAIBAAIBAAIBAAIBADAxMA0GCWCGSAFlAwQCAQUABCAwnDYwEHaCQq0n
|
||||
# 8NAvsN7H7BO7/48rXCNwrg891FS5vaCCC38wggUwMIIEGKADAgECAhAECRgbX9W7
|
||||
# ZnVTQ7VvlVAIMA0GCSqGSIb3DQEBCwUAMGUxCzAJBgNVBAYTAlVTMRUwEwYDVQQK
|
||||
# EwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20xJDAiBgNV
|
||||
# BAMTG0RpZ2lDZXJ0IEFzc3VyZWQgSUQgUm9vdCBDQTAeFw0xMzEwMjIxMjAwMDBa
|
||||
# Fw0yODEwMjIxMjAwMDBaMHIxCzAJBgNVBAYTAlVTMRUwEwYDVQQKEwxEaWdpQ2Vy
|
||||
# dCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20xMTAvBgNVBAMTKERpZ2lD
|
||||
# ZXJ0IFNIQTIgQXNzdXJlZCBJRCBDb2RlIFNpZ25pbmcgQ0EwggEiMA0GCSqGSIb3
|
||||
# DQEBAQUAA4IBDwAwggEKAoIBAQD407Mcfw4Rr2d3B9MLMUkZz9D7RZmxOttE9X/l
|
||||
# qJ3bMtdx6nadBS63j/qSQ8Cl+YnUNxnXtqrwnIal2CWsDnkoOn7p0WfTxvspJ8fT
|
||||
# eyOU5JEjlpB3gvmhhCNmElQzUHSxKCa7JGnCwlLyFGeKiUXULaGj6YgsIJWuHEqH
|
||||
# CN8M9eJNYBi+qsSyrnAxZjNxPqxwoqvOf+l8y5Kh5TsxHM/q8grkV7tKtel05iv+
|
||||
# bMt+dDk2DZDv5LVOpKnqagqrhPOsZ061xPeM0SAlI+sIZD5SlsHyDxL0xY4PwaLo
|
||||
# LFH3c7y9hbFig3NBggfkOItqcyDQD2RzPJ6fpjOp/RnfJZPRAgMBAAGjggHNMIIB
|
||||
# yTASBgNVHRMBAf8ECDAGAQH/AgEAMA4GA1UdDwEB/wQEAwIBhjATBgNVHSUEDDAK
|
||||
# BggrBgEFBQcDAzB5BggrBgEFBQcBAQRtMGswJAYIKwYBBQUHMAGGGGh0dHA6Ly9v
|
||||
# Y3NwLmRpZ2ljZXJ0LmNvbTBDBggrBgEFBQcwAoY3aHR0cDovL2NhY2VydHMuZGln
|
||||
# aWNlcnQuY29tL0RpZ2lDZXJ0QXNzdXJlZElEUm9vdENBLmNydDCBgQYDVR0fBHow
|
||||
# eDA6oDigNoY0aHR0cDovL2NybDQuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0QXNzdXJl
|
||||
# ZElEUm9vdENBLmNybDA6oDigNoY0aHR0cDovL2NybDMuZGlnaWNlcnQuY29tL0Rp
|
||||
# Z2lDZXJ0QXNzdXJlZElEUm9vdENBLmNybDBPBgNVHSAESDBGMDgGCmCGSAGG/WwA
|
||||
# AgQwKjAoBggrBgEFBQcCARYcaHR0cHM6Ly93d3cuZGlnaWNlcnQuY29tL0NQUzAK
|
||||
# BghghkgBhv1sAzAdBgNVHQ4EFgQUWsS5eyoKo6XqcQPAYPkt9mV1DlgwHwYDVR0j
|
||||
# BBgwFoAUReuir/SSy4IxLVGLp6chnfNtyA8wDQYJKoZIhvcNAQELBQADggEBAD7s
|
||||
# DVoks/Mi0RXILHwlKXaoHV0cLToaxO8wYdd+C2D9wz0PxK+L/e8q3yBVN7Dh9tGS
|
||||
# dQ9RtG6ljlriXiSBThCk7j9xjmMOE0ut119EefM2FAaK95xGTlz/kLEbBw6RFfu6
|
||||
# r7VRwo0kriTGxycqoSkoGjpxKAI8LpGjwCUR4pwUR6F6aGivm6dcIFzZcbEMj7uo
|
||||
# +MUSaJ/PQMtARKUT8OZkDCUIQjKyNookAv4vcn4c10lFluhZHen6dGRrsutmQ9qz
|
||||
# sIzV6Q3d9gEgzpkxYz0IGhizgZtPxpMQBvwHgfqL2vmCSfdibqFT+hKUGIUukpHq
|
||||
# aGxEMrJmoecYpJpkUe8wggZHMIIFL6ADAgECAhADPtXtoGXRuMkd/PkqbJvYMA0G
|
||||
# CSqGSIb3DQEBCwUAMHIxCzAJBgNVBAYTAlVTMRUwEwYDVQQKEwxEaWdpQ2VydCBJ
|
||||
# bmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20xMTAvBgNVBAMTKERpZ2lDZXJ0
|
||||
# IFNIQTIgQXNzdXJlZCBJRCBDb2RlIFNpZ25pbmcgQ0EwHhcNMTgxMjE4MDAwMDAw
|
||||
# WhcNMjExMjIyMTIwMDAwWjCBgzELMAkGA1UEBhMCVVMxFjAUBgNVBAgTDU5ldyBI
|
||||
# YW1wc2hpcmUxEjAQBgNVBAcTCVdvbGZlYm9ybzEjMCEGA1UEChMaUHl0aG9uIFNv
|
||||
# ZnR3YXJlIEZvdW5kYXRpb24xIzAhBgNVBAMTGlB5dGhvbiBTb2Z0d2FyZSBGb3Vu
|
||||
# ZGF0aW9uMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAqr2kS7J1uW7o
|
||||
# JRxlsdrETAjKarfoH5TI8PWST6Yb2xPooP7vHT4iaVXyL5Lze1f53Jw67Sp+u524
|
||||
# fJXf30qHViEWxumy2RWG0nciU2d+mMqzjlaAWSZNF0u4RcvyDJokEV0RUOqI5CG5
|
||||
# zPI3W9uQ6LiUk3HCYW6kpH177A5T3pw/Po8O8KErJGn1anaqtIICq99ySxrMad/2
|
||||
# hPMBRf6Ndah7f7HPn1gkSSTAoejyuqF5h+B0qI4+JK5+VLvz659VTbAWJsYakkxZ
|
||||
# xVWYpFv4KeQSSwoo0DzMvmERsTzNvVBMWhu9OriJNg+QfFmf96zVTu93cZ+r7xMp
|
||||
# bXyfIOGKhHMaRuZ8ihuWIx3gI9WHDFX6fBKR8+HlhdkaiBEWIsXRoy+EQUyK7zUs
|
||||
# +FqOo2sRYttbs8MTF9YDKFZwyPjn9Wn+gLGd5NUEVyNvD9QVGBEtN7vx87bduJUB
|
||||
# 8F4DylEsMtZTfjw/au6AmOnmneK5UcqSJuwRyZaGNk7y3qj06utx+HTTqHgi975U
|
||||
# pxfyrwAqkovoZEWBVSpvku8PVhkBXcLmNe6MEHlFiaMoiADAeKmX5RFRkN+VrmYG
|
||||
# Tg4zajxfdHeIY8TvLf48tTfmnQJd98geJQv/01NUy/FxuwqAuTkaez5Nl1LxP0Cp
|
||||
# THhghzO4FRD4itT2wqTh4jpojw9QZnsCAwEAAaOCAcUwggHBMB8GA1UdIwQYMBaA
|
||||
# FFrEuXsqCqOl6nEDwGD5LfZldQ5YMB0GA1UdDgQWBBT8Kr9+1L6s84KcpM97IgE7
|
||||
# uI8H8jAOBgNVHQ8BAf8EBAMCB4AwEwYDVR0lBAwwCgYIKwYBBQUHAwMwdwYDVR0f
|
||||
# BHAwbjA1oDOgMYYvaHR0cDovL2NybDMuZGlnaWNlcnQuY29tL3NoYTItYXNzdXJl
|
||||
# ZC1jcy1nMS5jcmwwNaAzoDGGL2h0dHA6Ly9jcmw0LmRpZ2ljZXJ0LmNvbS9zaGEy
|
||||
# LWFzc3VyZWQtY3MtZzEuY3JsMEwGA1UdIARFMEMwNwYJYIZIAYb9bAMBMCowKAYI
|
||||
# KwYBBQUHAgEWHGh0dHBzOi8vd3d3LmRpZ2ljZXJ0LmNvbS9DUFMwCAYGZ4EMAQQB
|
||||
# MIGEBggrBgEFBQcBAQR4MHYwJAYIKwYBBQUHMAGGGGh0dHA6Ly9vY3NwLmRpZ2lj
|
||||
# ZXJ0LmNvbTBOBggrBgEFBQcwAoZCaHR0cDovL2NhY2VydHMuZGlnaWNlcnQuY29t
|
||||
# L0RpZ2lDZXJ0U0hBMkFzc3VyZWRJRENvZGVTaWduaW5nQ0EuY3J0MAwGA1UdEwEB
|
||||
# /wQCMAAwDQYJKoZIhvcNAQELBQADggEBAEt1oS21X0axiafPjyY+vlYqjWKuUu/Y
|
||||
# FuYWIEq6iRRaFabNDhj9RBFQF/aJiE5msrQEOfAD6/6gVSH91lZWBqg6NEeG9T9S
|
||||
# XbiAPvJ9CEWFsdkXUrjbWhvCnuZ7kqUuU5BAumI1QRbpYgZL3UA+iZXkmjbGh1ln
|
||||
# 8rUhWIxbBYL4Sg2nqpB44p7CUFYkPj/MbwU2gvBV2pXjj5WaskoZtsACMv5g42BN
|
||||
# oVLoRAi+ev6s07POt+JtHRIm87lTyuc8wh0swTPUwksKbLU1Zdj9CpqtzXnuVE0w
|
||||
# 50exJvRSK3Vt4g+0vigpI3qPmDdpkf9+4Mvy0XMNcqrthw20R+PkIlMxghCXMIIQ
|
||||
# kwIBATCBhjByMQswCQYDVQQGEwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkw
|
||||
# FwYDVQQLExB3d3cuZGlnaWNlcnQuY29tMTEwLwYDVQQDEyhEaWdpQ2VydCBTSEEy
|
||||
# IEFzc3VyZWQgSUQgQ29kZSBTaWduaW5nIENBAhADPtXtoGXRuMkd/PkqbJvYMA0G
|
||||
# CWCGSAFlAwQCAQUAoIGaMBkGCSqGSIb3DQEJAzEMBgorBgEEAYI3AgEEMBwGCisG
|
||||
# AQQBgjcCAQsxDjAMBgorBgEEAYI3AgEVMC4GCisGAQQBgjcCAQwxIDAeoByAGgBQ
|
||||
# AHkAdABoAG8AbgAgADMALgA4AC4AMQAwMC8GCSqGSIb3DQEJBDEiBCAGueLiZxG/
|
||||
# uwwkezGlE6NGik7PbC5BWsmef6UPtfvL6DANBgkqhkiG9w0BAQEFAASCAgBFCSGD
|
||||
# sxcFCakg+TKDhbuPBHuY6y9GIgOIPoDAKzSNTmGd38nlGq9GNiOCDzYDS2tEdwwc
|
||||
# 3Kt4G9Yc6OF8aPhYSjOg2N2DGoRDk5FYg1VAZYtdZGWIWkbxHKv1//04J38a18Wd
|
||||
# QpSk+54Nu4ith/3HXeiUxb1IjHECHyq/cBj34op7Kl3SiSfLt4qW6n1FyQnrAq9M
|
||||
# 8EyAJwx1q7sX5ugzvHTUE/stbLkxXO/k06MQ96GPt+knJgWy77EOgdwMQmnQoLQV
|
||||
# XFyMQsa4SxpytVOtOgpdzAavrrmC6qbifbqLeUcioA4a2pm9Pa2xVetUFDilvSyM
|
||||
# rIHRGx5LUCK1FGYccjXidJ4NFvINNT6pOylwkraxZdWJsptTrdR9oRUo8Lh6QAh1
|
||||
# JPAw4mod88kbI5/5H0rOcTsa7P8jAtxkp0uwvxvGUxT+M+A5hwu81wTYsQZbEaVn
|
||||
# H+JI2ADE4CnquMwhoUqQsqgVctZGX1r4AA7LhveEZEHOQ7m6KPYepdTKAGBf0KhO
|
||||
# eEOSjJpnjQzaVmTaP2pfxgaoHHTSQ4AwIdIyO9m5wDoIznWT9T8618T6mzL0m8W5
|
||||
# cX1fHREbifIJLSv3PXiYde0OWVqYIQO9PS7uTJ345Th+TCZUc+SYrC6Vq0PUsRZH
|
||||
# IHLP6UH7SZfsjmlQbD/IiHR9g3kDInlij+IgQ6GCDUQwgg1ABgorBgEEAYI3AwMB
|
||||
# MYINMDCCDSwGCSqGSIb3DQEHAqCCDR0wgg0ZAgEDMQ8wDQYJYIZIAWUDBAIBBQAw
|
||||
# dwYLKoZIhvcNAQkQAQSgaARmMGQCAQEGCWCGSAGG/WwHATAxMA0GCWCGSAFlAwQC
|
||||
# AQUABCBuuoKKI5VzqiwEcymFiOxOYAKyJj+lwucjo+Pb4yWr0QIQTCCWuLxarqMV
|
||||
# tBcxlyFhXBgPMjAyMTA1MDMxMTUyNDhaoIIKNzCCBP4wggPmoAMCAQICEA1CSuC+
|
||||
# Ooj/YEAhzhQA8N0wDQYJKoZIhvcNAQELBQAwcjELMAkGA1UEBhMCVVMxFTATBgNV
|
||||
# BAoTDERpZ2lDZXJ0IEluYzEZMBcGA1UECxMQd3d3LmRpZ2ljZXJ0LmNvbTExMC8G
|
||||
# A1UEAxMoRGlnaUNlcnQgU0hBMiBBc3N1cmVkIElEIFRpbWVzdGFtcGluZyBDQTAe
|
||||
# Fw0yMTAxMDEwMDAwMDBaFw0zMTAxMDYwMDAwMDBaMEgxCzAJBgNVBAYTAlVTMRcw
|
||||
# FQYDVQQKEw5EaWdpQ2VydCwgSW5jLjEgMB4GA1UEAxMXRGlnaUNlcnQgVGltZXN0
|
||||
# YW1wIDIwMjEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDC5mGEZ8WK
|
||||
# 9Q0IpEXKY2tR1zoRQr0KdXVNlLQMULUmEP4dyG+RawyW5xpcSO9E5b+bYc0VkWJa
|
||||
# uP9nC5xj/TZqgfop+N0rcIXeAhjzeG28ffnHbQk9vmp2h+mKvfiEXR52yeTGdnY6
|
||||
# U9HR01o2j8aj4S8bOrdh1nPsTm0zinxdRS1LsVDmQTo3VobckyON91Al6GTm3dOP
|
||||
# L1e1hyDrDo4s1SPa9E14RuMDgzEpSlwMMYpKjIjF9zBa+RSvFV9sQ0kJ/SYjU/aN
|
||||
# Y+gaq1uxHTDCm2mCtNv8VlS8H6GHq756WwogL0sJyZWnjbL61mOLTqVyHO6fegFz
|
||||
# +BnW/g1JhL0BAgMBAAGjggG4MIIBtDAOBgNVHQ8BAf8EBAMCB4AwDAYDVR0TAQH/
|
||||
# BAIwADAWBgNVHSUBAf8EDDAKBggrBgEFBQcDCDBBBgNVHSAEOjA4MDYGCWCGSAGG
|
||||
# /WwHATApMCcGCCsGAQUFBwIBFhtodHRwOi8vd3d3LmRpZ2ljZXJ0LmNvbS9DUFMw
|
||||
# HwYDVR0jBBgwFoAU9LbhIB3+Ka7S5GGlsqIlssgXNW4wHQYDVR0OBBYEFDZEho6k
|
||||
# urBmvrwoLR1ENt3janq8MHEGA1UdHwRqMGgwMqAwoC6GLGh0dHA6Ly9jcmwzLmRp
|
||||
# Z2ljZXJ0LmNvbS9zaGEyLWFzc3VyZWQtdHMuY3JsMDKgMKAuhixodHRwOi8vY3Js
|
||||
# NC5kaWdpY2VydC5jb20vc2hhMi1hc3N1cmVkLXRzLmNybDCBhQYIKwYBBQUHAQEE
|
||||
# eTB3MCQGCCsGAQUFBzABhhhodHRwOi8vb2NzcC5kaWdpY2VydC5jb20wTwYIKwYB
|
||||
# BQUHMAKGQ2h0dHA6Ly9jYWNlcnRzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydFNIQTJB
|
||||
# c3N1cmVkSURUaW1lc3RhbXBpbmdDQS5jcnQwDQYJKoZIhvcNAQELBQADggEBAEgc
|
||||
# 3LXpmiO85xrnIA6OZ0b9QnJRdAojR6OrktIlxHBZvhSg5SeBpU0UFRkHefDRBMOG
|
||||
# 2Tu9/kQCZk3taaQP9rhwz2Lo9VFKeHk2eie38+dSn5On7UOee+e03UEiifuHokYD
|
||||
# Tvz0/rdkd2NfI1Jpg4L6GlPtkMyNoRdzDfTzZTlwS/Oc1np72gy8PTLQG8v1Yfx1
|
||||
# CAB2vIEO+MDhXM/EEXLnG2RJ2CKadRVC9S0yOIHa9GCiurRS+1zgYSQlT7LfySmo
|
||||
# c0NR2r1j1h9bm/cuG08THfdKDXF+l7f0P4TrweOjSaH6zqe/Vs+6WXZhiV9+p7SO
|
||||
# Z3j5NpjhyyjaW4emii8wggUxMIIEGaADAgECAhAKoSXW1jIbfkHkBdo2l8IVMA0G
|
||||
# CSqGSIb3DQEBCwUAMGUxCzAJBgNVBAYTAlVTMRUwEwYDVQQKEwxEaWdpQ2VydCBJ
|
||||
# bmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20xJDAiBgNVBAMTG0RpZ2lDZXJ0
|
||||
# IEFzc3VyZWQgSUQgUm9vdCBDQTAeFw0xNjAxMDcxMjAwMDBaFw0zMTAxMDcxMjAw
|
||||
# MDBaMHIxCzAJBgNVBAYTAlVTMRUwEwYDVQQKEwxEaWdpQ2VydCBJbmMxGTAXBgNV
|
||||
# BAsTEHd3dy5kaWdpY2VydC5jb20xMTAvBgNVBAMTKERpZ2lDZXJ0IFNIQTIgQXNz
|
||||
# dXJlZCBJRCBUaW1lc3RhbXBpbmcgQ0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAw
|
||||
# ggEKAoIBAQC90DLuS82Pf92puoKZxTlUKFe2I0rEDgdFM1EQfdD5fU1ofue2oPSN
|
||||
# s4jkl79jIZCYvxO8V9PD4X4I1moUADj3Lh477sym9jJZ/l9lP+Cb6+NGRwYaVX4L
|
||||
# J37AovWg4N4iPw7/fpX786O6Ij4YrBHk8JkDbTuFfAnT7l3ImgtU46gJcWvgzyIQ
|
||||
# D3XPcXJOCq3fQDpct1HhoXkUxk0kIzBdvOw8YGqsLwfM/fDqR9mIUF79Zm5WYScp
|
||||
# iYRR5oLnRlD9lCosp+R1PrqYD4R/nzEU1q3V8mTLex4F0IQZchfxFwbvPc3WTe8G
|
||||
# Qv2iUypPhR3EHTyvz9qsEPXdrKzpVv+TAgMBAAGjggHOMIIByjAdBgNVHQ4EFgQU
|
||||
# 9LbhIB3+Ka7S5GGlsqIlssgXNW4wHwYDVR0jBBgwFoAUReuir/SSy4IxLVGLp6ch
|
||||
# nfNtyA8wEgYDVR0TAQH/BAgwBgEB/wIBADAOBgNVHQ8BAf8EBAMCAYYwEwYDVR0l
|
||||
# BAwwCgYIKwYBBQUHAwgweQYIKwYBBQUHAQEEbTBrMCQGCCsGAQUFBzABhhhodHRw
|
||||
# Oi8vb2NzcC5kaWdpY2VydC5jb20wQwYIKwYBBQUHMAKGN2h0dHA6Ly9jYWNlcnRz
|
||||
# LmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydEFzc3VyZWRJRFJvb3RDQS5jcnQwgYEGA1Ud
|
||||
# HwR6MHgwOqA4oDaGNGh0dHA6Ly9jcmw0LmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydEFz
|
||||
# c3VyZWRJRFJvb3RDQS5jcmwwOqA4oDaGNGh0dHA6Ly9jcmwzLmRpZ2ljZXJ0LmNv
|
||||
# bS9EaWdpQ2VydEFzc3VyZWRJRFJvb3RDQS5jcmwwUAYDVR0gBEkwRzA4BgpghkgB
|
||||
# hv1sAAIEMCowKAYIKwYBBQUHAgEWHGh0dHBzOi8vd3d3LmRpZ2ljZXJ0LmNvbS9D
|
||||
# UFMwCwYJYIZIAYb9bAcBMA0GCSqGSIb3DQEBCwUAA4IBAQBxlRLpUYdWac3v3dp8
|
||||
# qmN6s3jPBjdAhO9LhL/KzwMC/cWnww4gQiyvd/MrHwwhWiq3BTQdaq6Z+CeiZr8J
|
||||
# qmDfdqQ6kw/4stHYfBli6F6CJR7Euhx7LCHi1lssFDVDBGiy23UC4HLHmNY8ZOUf
|
||||
# SBAYX4k4YU1iRiSHY4yRUiyvKYnleB/WCxSlgNcSR3CzddWThZN+tpJn+1Nhiaj1
|
||||
# a5bA9FhpDXzIAbG5KHW3mWOFIoxhynmUfln8jA/jb7UBJrZspe6HUSHkWGCbugwt
|
||||
# K22ixH67xCUrRwIIfEmuE7bhfEJCKMYYVs9BNLZmXbZ0e/VWMyIvIjayS6JKldj1
|
||||
# po5SMYICTTCCAkkCAQEwgYYwcjELMAkGA1UEBhMCVVMxFTATBgNVBAoTDERpZ2lD
|
||||
# ZXJ0IEluYzEZMBcGA1UECxMQd3d3LmRpZ2ljZXJ0LmNvbTExMC8GA1UEAxMoRGln
|
||||
# aUNlcnQgU0hBMiBBc3N1cmVkIElEIFRpbWVzdGFtcGluZyBDQQIQDUJK4L46iP9g
|
||||
# QCHOFADw3TANBglghkgBZQMEAgEFAKCBmDAaBgkqhkiG9w0BCQMxDQYLKoZIhvcN
|
||||
# AQkQAQQwHAYJKoZIhvcNAQkFMQ8XDTIxMDUwMzExNTI0OFowKwYLKoZIhvcNAQkQ
|
||||
# AgwxHDAaMBgwFgQU4deCqOGRvu9ryhaRtaq0lKYkm/MwLwYJKoZIhvcNAQkEMSIE
|
||||
# IF4QsNmEZSxyIoRHXsGFuji0l/UWCqW9PHVzCEWUlsAfMA0GCSqGSIb3DQEBAQUA
|
||||
# BIIBAGLTB2GJcXOg7O2cTQmsIoasfSqq+sCpeV4z2od18Lx4IIdnj3R0gKY8UH2T
|
||||
# 2j0JMcPogZDZxqxKY//0KP5AL1SzHwD5tN/61Fg/oVk6Yp7dw8HN1V5Kayg9IrXf
|
||||
# xyfway7Zc6YAWWzRtf5vv7xpgRKGTUahGrYZwxJPnAyhW643vymwhkQ/cRodTJIz
|
||||
# d3qdy4sTHORPKwPUzhxjhsGah6GBAe+Rho03JiRIvQsvUaF5igjA4fJ1QSFKZvqz
|
||||
# rA0oiJNZckQHYEPxh1AQShen9Jhr7fd2j5bVVBpaWAALmRdr8Q12CiFlQyk4KKy7
|
||||
# AEIHi2Rbf06++s+R2qJ5Tzfggs4=
|
||||
# SIG # End signature block
|
76
BitSearch/Bitsearch.Microservice/Scripts/activate
Normal file
76
BitSearch/Bitsearch.Microservice/Scripts/activate
Normal file
@ -0,0 +1,76 @@
|
||||
# This file must be used with "source bin/activate" *from bash*
|
||||
# you cannot run it directly
|
||||
|
||||
deactivate () {
|
||||
# reset old environment variables
|
||||
if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
|
||||
PATH="${_OLD_VIRTUAL_PATH:-}"
|
||||
export PATH
|
||||
unset _OLD_VIRTUAL_PATH
|
||||
fi
|
||||
if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
|
||||
PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
|
||||
export PYTHONHOME
|
||||
unset _OLD_VIRTUAL_PYTHONHOME
|
||||
fi
|
||||
|
||||
# This should detect bash and zsh, which have a hash command that must
|
||||
# be called to get it to forget past commands. Without forgetting
|
||||
# past commands the $PATH changes we made may not be respected
|
||||
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
|
||||
hash -r
|
||||
fi
|
||||
|
||||
if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
|
||||
PS1="${_OLD_VIRTUAL_PS1:-}"
|
||||
export PS1
|
||||
unset _OLD_VIRTUAL_PS1
|
||||
fi
|
||||
|
||||
unset VIRTUAL_ENV
|
||||
if [ ! "${1:-}" = "nondestructive" ] ; then
|
||||
# Self destruct!
|
||||
unset -f deactivate
|
||||
fi
|
||||
}
|
||||
|
||||
# unset irrelevant variables
|
||||
deactivate nondestructive
|
||||
|
||||
VIRTUAL_ENV="J:\Desktop\Projekt\bitsearch"
|
||||
export VIRTUAL_ENV
|
||||
|
||||
_OLD_VIRTUAL_PATH="$PATH"
|
||||
PATH="$VIRTUAL_ENV/Scripts:$PATH"
|
||||
export PATH
|
||||
|
||||
# unset PYTHONHOME if set
|
||||
# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
|
||||
# could use `if (set -u; : $PYTHONHOME) ;` in bash
|
||||
if [ -n "${PYTHONHOME:-}" ] ; then
|
||||
_OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
|
||||
unset PYTHONHOME
|
||||
fi
|
||||
|
||||
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
|
||||
_OLD_VIRTUAL_PS1="${PS1:-}"
|
||||
if [ "x(bitsearch) " != x ] ; then
|
||||
PS1="(bitsearch) ${PS1:-}"
|
||||
else
|
||||
if [ "`basename \"$VIRTUAL_ENV\"`" = "__" ] ; then
|
||||
# special case for Aspen magic directories
|
||||
# see https://aspen.io/
|
||||
PS1="[`basename \`dirname \"$VIRTUAL_ENV\"\``] $PS1"
|
||||
else
|
||||
PS1="(`basename \"$VIRTUAL_ENV\"`)$PS1"
|
||||
fi
|
||||
fi
|
||||
export PS1
|
||||
fi
|
||||
|
||||
# This should detect bash and zsh, which have a hash command that must
|
||||
# be called to get it to forget past commands. Without forgetting
|
||||
# past commands the $PATH changes we made may not be respected
|
||||
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
|
||||
hash -r
|
||||
fi
|
33
BitSearch/Bitsearch.Microservice/Scripts/activate.bat
Normal file
33
BitSearch/Bitsearch.Microservice/Scripts/activate.bat
Normal file
@ -0,0 +1,33 @@
|
||||
@echo off
|
||||
|
||||
rem This file is UTF-8 encoded, so we need to update the current code page while executing it
|
||||
for /f "tokens=2 delims=:." %%a in ('"%SystemRoot%\System32\chcp.com"') do (
|
||||
set _OLD_CODEPAGE=%%a
|
||||
)
|
||||
if defined _OLD_CODEPAGE (
|
||||
"%SystemRoot%\System32\chcp.com" 65001 > nul
|
||||
)
|
||||
|
||||
set VIRTUAL_ENV=J:\Desktop\Projekt\bitsearch
|
||||
|
||||
if not defined PROMPT set PROMPT=$P$G
|
||||
|
||||
if defined _OLD_VIRTUAL_PROMPT set PROMPT=%_OLD_VIRTUAL_PROMPT%
|
||||
if defined _OLD_VIRTUAL_PYTHONHOME set PYTHONHOME=%_OLD_VIRTUAL_PYTHONHOME%
|
||||
|
||||
set _OLD_VIRTUAL_PROMPT=%PROMPT%
|
||||
set PROMPT=(bitsearch) %PROMPT%
|
||||
|
||||
if defined PYTHONHOME set _OLD_VIRTUAL_PYTHONHOME=%PYTHONHOME%
|
||||
set PYTHONHOME=
|
||||
|
||||
if defined _OLD_VIRTUAL_PATH set PATH=%_OLD_VIRTUAL_PATH%
|
||||
if not defined _OLD_VIRTUAL_PATH set _OLD_VIRTUAL_PATH=%PATH%
|
||||
|
||||
set PATH=%VIRTUAL_ENV%\Scripts;%PATH%
|
||||
|
||||
:END
|
||||
if defined _OLD_CODEPAGE (
|
||||
"%SystemRoot%\System32\chcp.com" %_OLD_CODEPAGE% > nul
|
||||
set _OLD_CODEPAGE=
|
||||
)
|
21
BitSearch/Bitsearch.Microservice/Scripts/deactivate.bat
Normal file
21
BitSearch/Bitsearch.Microservice/Scripts/deactivate.bat
Normal file
@ -0,0 +1,21 @@
|
||||
@echo off
|
||||
|
||||
if defined _OLD_VIRTUAL_PROMPT (
|
||||
set "PROMPT=%_OLD_VIRTUAL_PROMPT%"
|
||||
)
|
||||
set _OLD_VIRTUAL_PROMPT=
|
||||
|
||||
if defined _OLD_VIRTUAL_PYTHONHOME (
|
||||
set "PYTHONHOME=%_OLD_VIRTUAL_PYTHONHOME%"
|
||||
set _OLD_VIRTUAL_PYTHONHOME=
|
||||
)
|
||||
|
||||
if defined _OLD_VIRTUAL_PATH (
|
||||
set "PATH=%_OLD_VIRTUAL_PATH%"
|
||||
)
|
||||
|
||||
set _OLD_VIRTUAL_PATH=
|
||||
|
||||
set VIRTUAL_ENV=
|
||||
|
||||
:END
|
BIN
BitSearch/Bitsearch.Microservice/Scripts/f2py.exe
Normal file
BIN
BitSearch/Bitsearch.Microservice/Scripts/f2py.exe
Normal file
Binary file not shown.
BIN
BitSearch/Bitsearch.Microservice/Scripts/flask.exe
Normal file
BIN
BitSearch/Bitsearch.Microservice/Scripts/flask.exe
Normal file
Binary file not shown.
BIN
BitSearch/Bitsearch.Microservice/Scripts/jsonschema.exe
Normal file
BIN
BitSearch/Bitsearch.Microservice/Scripts/jsonschema.exe
Normal file
Binary file not shown.
BIN
BitSearch/Bitsearch.Microservice/Scripts/pip.exe
Normal file
BIN
BitSearch/Bitsearch.Microservice/Scripts/pip.exe
Normal file
Binary file not shown.
BIN
BitSearch/Bitsearch.Microservice/Scripts/pip3.8.exe
Normal file
BIN
BitSearch/Bitsearch.Microservice/Scripts/pip3.8.exe
Normal file
Binary file not shown.
BIN
BitSearch/Bitsearch.Microservice/Scripts/pip3.exe
Normal file
BIN
BitSearch/Bitsearch.Microservice/Scripts/pip3.exe
Normal file
Binary file not shown.
BIN
BitSearch/Bitsearch.Microservice/Scripts/python.exe
Normal file
BIN
BitSearch/Bitsearch.Microservice/Scripts/python.exe
Normal file
Binary file not shown.
BIN
BitSearch/Bitsearch.Microservice/Scripts/pythonw.exe
Normal file
BIN
BitSearch/Bitsearch.Microservice/Scripts/pythonw.exe
Normal file
Binary file not shown.
3
BitSearch/Bitsearch.Microservice/pyvenv.cfg
Normal file
3
BitSearch/Bitsearch.Microservice/pyvenv.cfg
Normal file
@ -0,0 +1,3 @@
|
||||
home = C:\software\python3
|
||||
include-system-site-packages = false
|
||||
version = 3.8.10
|
23
BitSearch/Bitsearch.Microservice/requirements.txt
Normal file
23
BitSearch/Bitsearch.Microservice/requirements.txt
Normal file
@ -0,0 +1,23 @@
|
||||
aniso8601==9.0.1
|
||||
attrs==21.4.0
|
||||
click==8.0.3
|
||||
colorama==0.4.4
|
||||
Flask==2.0.2
|
||||
Flask-RESTful==0.3.9
|
||||
flask-restplus==0.13.0
|
||||
importlib-resources==5.4.0
|
||||
itsdangerous==2.0.1
|
||||
Jinja2==3.0.3
|
||||
joblib==1.1.0
|
||||
jsonschema==4.4.0
|
||||
MarkupSafe==2.0.1
|
||||
numpy==1.22.1
|
||||
pyrsistent==0.18.1
|
||||
pytz==2021.3
|
||||
scikit-learn==1.0.2
|
||||
scipy==1.7.3
|
||||
six==1.16.0
|
||||
sklearn==0.0
|
||||
threadpoolctl==3.0.0
|
||||
Werkzeug==2.0.2
|
||||
zipp==3.7.0
|
61
twitter.py
Normal file
61
twitter.py
Normal file
@ -0,0 +1,61 @@
|
||||
# %%
|
||||
import pandas as pd
|
||||
import os
|
||||
import re
|
||||
# %% [markdown]
|
||||
### Reading data - this part need changing when data
|
||||
# %%
|
||||
path = os.getcwd()
|
||||
filename = 'training_data_clean.csv'
|
||||
filepath = path+'/'+filename
|
||||
data = pd.read_csv(filepath, header=None,
|
||||
delimiter=',', encoding_errors='surrogateescape')
|
||||
data.columns = ['index', 'id','date', 'query', 'user', 'text']
|
||||
# %% [markdown]
|
||||
### Function definitions
|
||||
# %%
|
||||
change_dict = {
|
||||
# tokens
|
||||
"USERNAME": ['@\w+|@'],
|
||||
"URL": ['http\S*'],
|
||||
"EMOJI": ["[;:][dbop\(\)\[\]]|[dbop\(\)\[\]][;:]|xd+|\S*&\S*"],
|
||||
# standardization
|
||||
', ': ['\s,'],
|
||||
'. ': ['\s\.'],
|
||||
' ': ['\s{2,}'],
|
||||
"'": ["<EFBFBD>"],
|
||||
'?': ["\s\?+|\?+"],
|
||||
'!': ["\s\!+|\!+"]
|
||||
}
|
||||
|
||||
def clean_lines(line, change_dict):
|
||||
line = line.lower()
|
||||
for change_to, change_regex_list in change_dict.items():
|
||||
for change_regex in change_regex_list:
|
||||
line = re.sub(change_regex, change_to, line)
|
||||
return line
|
||||
|
||||
def get_rep_idx_to_cut_out_from_str(line):
|
||||
occurence = 0
|
||||
idx_to_cut = []
|
||||
for idx, letter in enumerate(line):
|
||||
if idx > 0:
|
||||
occurence = occurence+1 if line[idx-1] == letter else 0
|
||||
if occurence >= 2:
|
||||
idx_to_cut.append(idx)
|
||||
return idx_to_cut
|
||||
|
||||
def truncate_duplicated_letters_to_two(line):
|
||||
idx_to_cut = get_rep_idx_to_cut_out_from_str(line)
|
||||
str_out =''
|
||||
for i,s in enumerate(line):
|
||||
if i not in idx_to_cut:
|
||||
str_out += s
|
||||
return str_out
|
||||
# %% [markdown]
|
||||
### Cleaning
|
||||
# %%
|
||||
text = [clean_lines(x, change_dict) for x in data.loc[:, 'text'].values.tolist()]
|
||||
text = [truncate_duplicated_letters_to_two(x).strip() for x in text]
|
||||
data.text = text
|
||||
# %%
|
Loading…
Reference in New Issue
Block a user