Update repo
This commit is contained in:
parent
a31ff34818
commit
18257d0941
8
D/.idea/.gitignore
vendored
Normal file
8
D/.idea/.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
8
D/.idea/D.iml
Normal file
8
D/.idea/D.iml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
6
D/.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
D/.idea/inspectionProfiles/profiles_settings.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
4
D/.idea/misc.xml
Normal file
4
D/.idea/misc.xml
Normal file
@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (D)" project-jdk-type="Python SDK" />
|
||||
</project>
|
8
D/.idea/modules.xml
Normal file
8
D/.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/D.iml" filepath="$PROJECT_DIR$/.idea/D.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
6
D/.idea/vcs.xml
Normal file
6
D/.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
|
||||
</component>
|
||||
</project>
|
BIN
D/__pycache__/calclex.cpython-310.pyc
Normal file
BIN
D/__pycache__/calclex.cpython-310.pyc
Normal file
Binary file not shown.
BIN
D/__pycache__/parsetab.cpython-310.pyc
Normal file
BIN
D/__pycache__/parsetab.cpython-310.pyc
Normal file
Binary file not shown.
76
D/calclex.py
Normal file
76
D/calclex.py
Normal file
@ -0,0 +1,76 @@
|
||||
from ply import lex
|
||||
|
||||
tokens = (
|
||||
'NUMBER',
|
||||
'OPERATE',
|
||||
'SIZE',
|
||||
'KIND',
|
||||
'COLOR',
|
||||
'MATERIAL'
|
||||
)
|
||||
|
||||
|
||||
def t_OPERATE(t):
|
||||
r'Buy | Sell'
|
||||
return t
|
||||
|
||||
|
||||
def t_NUMBER(t):
|
||||
r'\d+'
|
||||
t.value = int(t.value)
|
||||
return t
|
||||
|
||||
|
||||
def t_SIZE(t):
|
||||
r'tiny | small | big | large'
|
||||
if t.value == 'tiny':
|
||||
t.value = 1
|
||||
elif t.value == 'small':
|
||||
t.value = 2
|
||||
elif t.value == 'big':
|
||||
t.value = 3
|
||||
elif t.value == 'large':
|
||||
t.value = 4
|
||||
return t
|
||||
|
||||
|
||||
def t_COLOR(t):
|
||||
r'(black | white | red | green | blue)'
|
||||
if t.value == 'black':
|
||||
t.value = 1
|
||||
elif t.value == 'white':
|
||||
t.value = 2
|
||||
elif t.value == 'red':
|
||||
t.value = 3
|
||||
elif t.value == 'green':
|
||||
t.value = 4
|
||||
elif t.value == 'blue':
|
||||
t.value = 5
|
||||
return t
|
||||
|
||||
|
||||
def t_MATERIAL(t):
|
||||
r'metal | plastic'
|
||||
if t.value == 'metal':
|
||||
t.value = 1
|
||||
elif t.value == 'plastic':
|
||||
t.value = 2
|
||||
return t
|
||||
|
||||
|
||||
def t_KIND(t):
|
||||
r'box(es)? | ring(s)?'
|
||||
if t.value[0] == 'b':
|
||||
t.value = 1
|
||||
else:
|
||||
t.value = 2
|
||||
return t
|
||||
|
||||
|
||||
def t_error(t):
|
||||
print("Illegal character '%s" % t.value[0])
|
||||
t.lexer.skip(1)
|
||||
|
||||
|
||||
t_ignore = ' \t'
|
||||
lexer = lex.lex()
|
147
D/parser.out
Normal file
147
D/parser.out
Normal file
@ -0,0 +1,147 @@
|
||||
Created by PLY version 3.11 (http://www.dabeaz.com/ply)
|
||||
|
||||
Grammar
|
||||
|
||||
Rule 0 S' -> command
|
||||
Rule 1 command -> operate NUMBER article
|
||||
Rule 2 article -> attribute article
|
||||
Rule 3 attribute -> COLOR
|
||||
Rule 4 attribute -> MATERIAL
|
||||
Rule 5 attribute -> SIZE
|
||||
Rule 6 article -> KIND
|
||||
Rule 7 operate -> OPERATE
|
||||
|
||||
Terminals, with rules where they appear
|
||||
|
||||
COLOR : 3
|
||||
KIND : 6
|
||||
MATERIAL : 4
|
||||
NUMBER : 1
|
||||
OPERATE : 7
|
||||
SIZE : 5
|
||||
error :
|
||||
|
||||
Nonterminals, with rules where they appear
|
||||
|
||||
article : 1 2
|
||||
attribute : 2
|
||||
command : 0
|
||||
operate : 1
|
||||
|
||||
Parsing method: LALR
|
||||
|
||||
state 0
|
||||
|
||||
(0) S' -> . command
|
||||
(1) command -> . operate NUMBER article
|
||||
(7) operate -> . OPERATE
|
||||
|
||||
OPERATE shift and go to state 3
|
||||
|
||||
command shift and go to state 1
|
||||
operate shift and go to state 2
|
||||
|
||||
state 1
|
||||
|
||||
(0) S' -> command .
|
||||
|
||||
|
||||
|
||||
state 2
|
||||
|
||||
(1) command -> operate . NUMBER article
|
||||
|
||||
NUMBER shift and go to state 4
|
||||
|
||||
|
||||
state 3
|
||||
|
||||
(7) operate -> OPERATE .
|
||||
|
||||
NUMBER reduce using rule 7 (operate -> OPERATE .)
|
||||
|
||||
|
||||
state 4
|
||||
|
||||
(1) command -> operate NUMBER . article
|
||||
(2) article -> . attribute article
|
||||
(6) article -> . KIND
|
||||
(3) attribute -> . COLOR
|
||||
(4) attribute -> . MATERIAL
|
||||
(5) attribute -> . SIZE
|
||||
|
||||
KIND shift and go to state 7
|
||||
COLOR shift and go to state 8
|
||||
MATERIAL shift and go to state 9
|
||||
SIZE shift and go to state 10
|
||||
|
||||
article shift and go to state 5
|
||||
attribute shift and go to state 6
|
||||
|
||||
state 5
|
||||
|
||||
(1) command -> operate NUMBER article .
|
||||
|
||||
$end reduce using rule 1 (command -> operate NUMBER article .)
|
||||
|
||||
|
||||
state 6
|
||||
|
||||
(2) article -> attribute . article
|
||||
(2) article -> . attribute article
|
||||
(6) article -> . KIND
|
||||
(3) attribute -> . COLOR
|
||||
(4) attribute -> . MATERIAL
|
||||
(5) attribute -> . SIZE
|
||||
|
||||
KIND shift and go to state 7
|
||||
COLOR shift and go to state 8
|
||||
MATERIAL shift and go to state 9
|
||||
SIZE shift and go to state 10
|
||||
|
||||
attribute shift and go to state 6
|
||||
article shift and go to state 11
|
||||
|
||||
state 7
|
||||
|
||||
(6) article -> KIND .
|
||||
|
||||
$end reduce using rule 6 (article -> KIND .)
|
||||
|
||||
|
||||
state 8
|
||||
|
||||
(3) attribute -> COLOR .
|
||||
|
||||
KIND reduce using rule 3 (attribute -> COLOR .)
|
||||
COLOR reduce using rule 3 (attribute -> COLOR .)
|
||||
MATERIAL reduce using rule 3 (attribute -> COLOR .)
|
||||
SIZE reduce using rule 3 (attribute -> COLOR .)
|
||||
|
||||
|
||||
state 9
|
||||
|
||||
(4) attribute -> MATERIAL .
|
||||
|
||||
KIND reduce using rule 4 (attribute -> MATERIAL .)
|
||||
COLOR reduce using rule 4 (attribute -> MATERIAL .)
|
||||
MATERIAL reduce using rule 4 (attribute -> MATERIAL .)
|
||||
SIZE reduce using rule 4 (attribute -> MATERIAL .)
|
||||
|
||||
|
||||
state 10
|
||||
|
||||
(5) attribute -> SIZE .
|
||||
|
||||
KIND reduce using rule 5 (attribute -> SIZE .)
|
||||
COLOR reduce using rule 5 (attribute -> SIZE .)
|
||||
MATERIAL reduce using rule 5 (attribute -> SIZE .)
|
||||
SIZE reduce using rule 5 (attribute -> SIZE .)
|
||||
|
||||
|
||||
state 11
|
||||
|
||||
(2) article -> attribute article .
|
||||
|
||||
$end reduce using rule 2 (article -> attribute article .)
|
||||
|
37
D/parsetab.py
Normal file
37
D/parsetab.py
Normal file
@ -0,0 +1,37 @@
|
||||
|
||||
# parsetab.py
|
||||
# This file is automatically generated. Do not edit.
|
||||
# pylint: disable=W,C,R
|
||||
_tabversion = '3.10'
|
||||
|
||||
_lr_method = 'LALR'
|
||||
|
||||
_lr_signature = 'COLOR KIND MATERIAL NUMBER OPERATE SIZEcommand : operate NUMBER articlearticle : attribute articleattribute : COLORattribute : MATERIALattribute : SIZEarticle : KINDoperate : OPERATE'
|
||||
|
||||
_lr_action_items = {'OPERATE':([0,],[3,]),'$end':([1,5,7,11,],[0,-1,-6,-2,]),'NUMBER':([2,3,],[4,-7,]),'KIND':([4,6,8,9,10,],[7,7,-3,-4,-5,]),'COLOR':([4,6,8,9,10,],[8,8,-3,-4,-5,]),'MATERIAL':([4,6,8,9,10,],[9,9,-3,-4,-5,]),'SIZE':([4,6,8,9,10,],[10,10,-3,-4,-5,]),}
|
||||
|
||||
_lr_action = {}
|
||||
for _k, _v in _lr_action_items.items():
|
||||
for _x,_y in zip(_v[0],_v[1]):
|
||||
if not _x in _lr_action: _lr_action[_x] = {}
|
||||
_lr_action[_x][_k] = _y
|
||||
del _lr_action_items
|
||||
|
||||
_lr_goto_items = {'command':([0,],[1,]),'operate':([0,],[2,]),'article':([4,6,],[5,11,]),'attribute':([4,6,],[6,6,]),}
|
||||
|
||||
_lr_goto = {}
|
||||
for _k, _v in _lr_goto_items.items():
|
||||
for _x, _y in zip(_v[0], _v[1]):
|
||||
if not _x in _lr_goto: _lr_goto[_x] = {}
|
||||
_lr_goto[_x][_k] = _y
|
||||
del _lr_goto_items
|
||||
_lr_productions = [
|
||||
("S' -> command","S'",1,None,None,None),
|
||||
('command -> operate NUMBER article','command',3,'p_command','yacc.py',6),
|
||||
('article -> attribute article','article',2,'p_article_attribute','yacc.py',22),
|
||||
('attribute -> COLOR','attribute',1,'p_attribute_color','yacc.py',27),
|
||||
('attribute -> MATERIAL','attribute',1,'p_attribute_material','yacc.py',32),
|
||||
('attribute -> SIZE','attribute',1,'p_attribute_size','yacc.py',37),
|
||||
('article -> KIND','article',1,'p_article_kind','yacc.py',42),
|
||||
('operate -> OPERATE','operate',1,'p_operate','yacc.py',46),
|
||||
]
|
66
D/yacc.py
Normal file
66
D/yacc.py
Normal file
@ -0,0 +1,66 @@
|
||||
from ply import yacc
|
||||
from calclex import tokens
|
||||
|
||||
|
||||
def p_command(p):
|
||||
'command : operate NUMBER article'
|
||||
index = p[3]
|
||||
|
||||
if p[1] == 'Buy':
|
||||
tab[index] += p[2]
|
||||
print('OK. I am buying ' + str(p[2]) + ' new articles indexed as ' + str(index) + '.')
|
||||
print('No of articles in shop: ' + str(tab[index]))
|
||||
elif p[1] == 'Sell':
|
||||
if p[2] > tab[index]:
|
||||
print('I do not have as many articles as you want.')
|
||||
else:
|
||||
tab[index] -= p[2]
|
||||
print('OK. I am selling ' + str(p[2]) + ' articles indexed as ' + str(index) + '.')
|
||||
print('No of articles in shop: ' + str(tab[index]))
|
||||
|
||||
|
||||
def p_article_attribute(p):
|
||||
'article : attribute article'
|
||||
p[0] = p[1] + p[2]
|
||||
|
||||
|
||||
def p_attribute_color(p):
|
||||
'attribute : COLOR'
|
||||
p[0] = p[1]
|
||||
|
||||
|
||||
def p_attribute_material(p):
|
||||
'attribute : MATERIAL'
|
||||
p[0] = 10 * p[1]
|
||||
|
||||
|
||||
def p_attribute_size(p):
|
||||
'attribute : SIZE'
|
||||
p[0] = 100 * p[1]
|
||||
|
||||
|
||||
def p_article_kind(p):
|
||||
'article : KIND'
|
||||
p[0] = 1000 * p[1]
|
||||
|
||||
|
||||
def p_operate(p):
|
||||
'operate : OPERATE'
|
||||
p[0] = p[1]
|
||||
|
||||
|
||||
def p_error(p):
|
||||
print("Syntax error in input!")
|
||||
|
||||
|
||||
tab = []
|
||||
for index in range(3000):
|
||||
tab.append(0)
|
||||
|
||||
parser = yacc.yacc()
|
||||
|
||||
while True:
|
||||
s = input('What can I do for you? \n')
|
||||
if s == 'Bye':
|
||||
break
|
||||
parser.parse(s)
|
8
E/.idea/.gitignore
vendored
Normal file
8
E/.idea/.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
8
E/.idea/E.iml
Normal file
8
E/.idea/E.iml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.10" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
6
E/.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
E/.idea/inspectionProfiles/profiles_settings.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
4
E/.idea/misc.xml
Normal file
4
E/.idea/misc.xml
Normal file
@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
|
||||
</project>
|
8
E/.idea/modules.xml
Normal file
8
E/.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/E.iml" filepath="$PROJECT_DIR$/.idea/E.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
6
E/.idea/vcs.xml
Normal file
6
E/.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
|
||||
</component>
|
||||
</project>
|
6578142
E/PoliMorf-0.6.7.tab
Normal file
6578142
E/PoliMorf-0.6.7.tab
Normal file
File diff suppressed because it is too large
Load Diff
50
E/bpe.py
Normal file
50
E/bpe.py
Normal file
@ -0,0 +1,50 @@
|
||||
import string
|
||||
|
||||
|
||||
def bpe(sentence: str, V:int) -> list:
|
||||
# remove punctuation
|
||||
sentence = sentence.translate(str.maketrans("", "", string.punctuation)).lower()
|
||||
characters = [i if i != " " else "<w>" for i in sentence]
|
||||
characters.append("<w>")
|
||||
characters.insert(0, "<w>")
|
||||
vocabulary = set(characters)
|
||||
while len(vocabulary) < V:
|
||||
bigrams = get_bigrams(characters)
|
||||
frequencies = get_frequencies(bigrams)
|
||||
most_freq_bigr = max(frequencies,key=frequencies.get)
|
||||
upd_sentence_with_bigram(characters,most_freq_bigr)
|
||||
vocabulary.add(most_freq_bigr)
|
||||
return vocabulary
|
||||
|
||||
|
||||
|
||||
def get_bigrams(characters: list) -> list:
|
||||
bigrams = []
|
||||
for i in range(0, len(characters) - 1):
|
||||
bigrams.append(characters[i] + characters[i + 1])
|
||||
return bigrams
|
||||
|
||||
|
||||
def get_frequencies(item: list) -> dict:
|
||||
frequencies = {}
|
||||
for i in item:
|
||||
if i not in frequencies:
|
||||
frequencies[i] = 1
|
||||
else:
|
||||
frequencies[i] += 1
|
||||
return frequencies
|
||||
|
||||
def upd_sentence_with_bigram(chars:list, bigram:str) -> list:
|
||||
i = 0
|
||||
while i < len(chars) - 1:
|
||||
if chars[i] + chars[i + 1] == bigram:
|
||||
chars.pop(i + 1)
|
||||
chars.pop(i)
|
||||
chars.insert(i, bigram)
|
||||
i+=1
|
||||
return chars
|
||||
|
||||
usr_input = input('Podaj zdanie: ')
|
||||
V = int(input('Podaj wielkosc slownika: '))
|
||||
vocab = bpe(usr_input, V)
|
||||
for i in vocab: print(i)
|
22
E/maxmatch.py
Normal file
22
E/maxmatch.py
Normal file
@ -0,0 +1,22 @@
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def max_match(sentence: str, dictionary: list) -> list:
|
||||
if not sentence:
|
||||
return []
|
||||
sen_len = len(sentence)
|
||||
for i in range(sen_len - 1, -1, -1):
|
||||
firstword = sentence[0:i]
|
||||
remainder = sentence[i:sen_len]
|
||||
if firstword in dictionary:
|
||||
return [firstword] + max_match(remainder, dictionary)
|
||||
|
||||
firstword = sentence[0]
|
||||
remainder = sentence[1:sen_len]
|
||||
return [firstword] + max_match(remainder, dictionary)
|
||||
|
||||
|
||||
pm = pd.read_csv("PoliMorf-0.6.7.tab", delimiter="\t", header=None)
|
||||
polish_dict = list(pm[0])
|
||||
result = max_match("Alamakota", polish_dict)
|
||||
print(result)
|
26
G/G.py
Normal file
26
G/G.py
Normal file
@ -0,0 +1,26 @@
|
||||
import spacy
|
||||
|
||||
class Imp:
|
||||
action_s = ''
|
||||
object_s = ''
|
||||
|
||||
nlp = spacy.load("pl_core_news_sm")
|
||||
inputed_text = input('Podaj zdanie: ')
|
||||
doc = nlp(inputed_text)
|
||||
for token in doc:
|
||||
print(token.text, token.pos_)
|
||||
contains_imp = any(token.morph.get('Mood') == ['Imp'] for token in doc)
|
||||
if not contains_imp:
|
||||
print('Bye!')
|
||||
else:
|
||||
imp_list = []
|
||||
index = -1
|
||||
for token in doc:
|
||||
if token.tag_ == 'IMPT':
|
||||
imp_list.append(Imp())
|
||||
index+=1
|
||||
imp_list[index].action_s = token.text
|
||||
else:
|
||||
imp_list[index].object_s += token.text + " "
|
||||
for imp in imp_list:
|
||||
print('Action: '+imp.action_s+", Object: "+imp.object_s)
|
375
P1/Movielens/main.ipynb
Normal file
375
P1/Movielens/main.ipynb
Normal file
@ -0,0 +1,375 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"import seaborn as sns\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"%matplotlib inline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>MovieID</th>\n",
|
||||
" <th>MovieName</th>\n",
|
||||
" <th>Category</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Toy Story (1995)</td>\n",
|
||||
" <td>Animation|Children's|Comedy</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>Jumanji (1995)</td>\n",
|
||||
" <td>Adventure|Children's|Fantasy</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>Grumpier Old Men (1995)</td>\n",
|
||||
" <td>Comedy|Romance</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>Waiting to Exhale (1995)</td>\n",
|
||||
" <td>Comedy|Drama</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>Father of the Bride Part II (1995)</td>\n",
|
||||
" <td>Comedy</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" MovieID MovieName Category\n",
|
||||
"0 1 Toy Story (1995) Animation|Children's|Comedy\n",
|
||||
"1 2 Jumanji (1995) Adventure|Children's|Fantasy\n",
|
||||
"2 3 Grumpier Old Men (1995) Comedy|Romance\n",
|
||||
"3 4 Waiting to Exhale (1995) Comedy|Drama\n",
|
||||
"4 5 Father of the Bride Part II (1995) Comedy"
|
||||
]
|
||||
},
|
||||
"execution_count": 48,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df_movie = pd.read_csv('movies.dat',sep='::',encoding='latin1',engine='python',names=['MovieID','MovieName','Category'])\n",
|
||||
"df_movie.dropna(inplace=True)\n",
|
||||
"df_movie.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>ID</th>\n",
|
||||
" <th>MovieID</th>\n",
|
||||
" <th>Ratings</th>\n",
|
||||
" <th>TimeStamp</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1193</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>978300760</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>661</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>978302109</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>914</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>978301968</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>3408</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>978300275</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>2355</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>978824291</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" ID MovieID Ratings TimeStamp\n",
|
||||
"0 1 1193 5 978300760\n",
|
||||
"1 1 661 3 978302109\n",
|
||||
"2 1 914 3 978301968\n",
|
||||
"3 1 3408 4 978300275\n",
|
||||
"4 1 2355 5 978824291"
|
||||
]
|
||||
},
|
||||
"execution_count": 49,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df_rating = pd.read_csv('ratings.dat',sep='::',encoding='latin1',engine='python',names=['ID','MovieID','Ratings','TimeStamp'])\n",
|
||||
"df_rating.dropna(inplace=True)\n",
|
||||
"df_rating.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>MovieID</th>\n",
|
||||
" <th>MovieName</th>\n",
|
||||
" <th>Category</th>\n",
|
||||
" <th>ID</th>\n",
|
||||
" <th>Ratings</th>\n",
|
||||
" <th>TimeStamp</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Toy Story (1995)</td>\n",
|
||||
" <td>Animation|Children's|Comedy</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>978824268</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Toy Story (1995)</td>\n",
|
||||
" <td>Animation|Children's|Comedy</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>978237008</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Toy Story (1995)</td>\n",
|
||||
" <td>Animation|Children's|Comedy</td>\n",
|
||||
" <td>8</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>978233496</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Toy Story (1995)</td>\n",
|
||||
" <td>Animation|Children's|Comedy</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>978225952</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Toy Story (1995)</td>\n",
|
||||
" <td>Animation|Children's|Comedy</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>978226474</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" MovieID MovieName Category ID Ratings \\\n",
|
||||
"0 1 Toy Story (1995) Animation|Children's|Comedy 1 5 \n",
|
||||
"1 1 Toy Story (1995) Animation|Children's|Comedy 6 4 \n",
|
||||
"2 1 Toy Story (1995) Animation|Children's|Comedy 8 4 \n",
|
||||
"3 1 Toy Story (1995) Animation|Children's|Comedy 9 5 \n",
|
||||
"4 1 Toy Story (1995) Animation|Children's|Comedy 10 5 \n",
|
||||
"\n",
|
||||
" TimeStamp \n",
|
||||
"0 978824268 \n",
|
||||
"1 978237008 \n",
|
||||
"2 978233496 \n",
|
||||
"3 978225952 \n",
|
||||
"4 978226474 "
|
||||
]
|
||||
},
|
||||
"execution_count": 50,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df = pd.merge(df_movie,df_rating,left_on='MovieID',right_on='MovieID')\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 60,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"MovieID\n",
|
||||
"1 8.293693\n",
|
||||
"2 6.402282\n",
|
||||
"3 6.033473\n",
|
||||
"4 5.458824\n",
|
||||
"5 6.013514\n",
|
||||
" ... \n",
|
||||
"3948 7.271462\n",
|
||||
"3949 8.230263\n",
|
||||
"3950 7.333333\n",
|
||||
"3951 7.800000\n",
|
||||
"3952 7.561856\n",
|
||||
"Name: Ratings, Length: 3706, dtype: float64"
|
||||
]
|
||||
},
|
||||
"execution_count": 60,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"groupByMovie = df.groupby('MovieID')\n",
|
||||
"movieRatingsMean = groupByMovie['Ratings'].mean()*2\n",
|
||||
"movieRatingsMean.columns = ['MovieID','Mean']\n",
|
||||
"movieRatingsMean"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10 (default, Nov 14 2022, 12:59:47) \n[GCC 9.4.0]"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
3883
P1/Movielens/movies.dat
Normal file
3883
P1/Movielens/movies.dat
Normal file
File diff suppressed because it is too large
Load Diff
1000209
P1/Movielens/ratings.dat
Normal file
1000209
P1/Movielens/ratings.dat
Normal file
File diff suppressed because it is too large
Load Diff
6040
P1/Movielens/users.dat
Normal file
6040
P1/Movielens/users.dat
Normal file
File diff suppressed because it is too large
Load Diff
484
P1/imdb/main.ipynb
Normal file
484
P1/imdb/main.ipynb
Normal file
@ -0,0 +1,484 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"import seaborn as sns\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"%matplotlib inline\n",
|
||||
"\n",
|
||||
"import os"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 52,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>MOVIES</th>\n",
|
||||
" <th>YEAR</th>\n",
|
||||
" <th>GENRE</th>\n",
|
||||
" <th>RATING</th>\n",
|
||||
" <th>ONE-LINE</th>\n",
|
||||
" <th>STARS</th>\n",
|
||||
" <th>VOTES</th>\n",
|
||||
" <th>RunTime</th>\n",
|
||||
" <th>Gross</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>Blood Red Sky</td>\n",
|
||||
" <td>(2021)</td>\n",
|
||||
" <td>\\nAction, Horror, Thriller</td>\n",
|
||||
" <td>6.1</td>\n",
|
||||
" <td>\\nA woman with a mysterious illness is forced ...</td>\n",
|
||||
" <td>\\n Director:\\nPeter Thorwarth\\n| \\n Star...</td>\n",
|
||||
" <td>21,062</td>\n",
|
||||
" <td>121.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>Masters of the Universe: Revelation</td>\n",
|
||||
" <td>(2021– )</td>\n",
|
||||
" <td>\\nAnimation, Action, Adventure</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>\\nThe war for Eternia begins again in what may...</td>\n",
|
||||
" <td>\\n \\n Stars:\\nChris Wood, \\nSara...</td>\n",
|
||||
" <td>17,870</td>\n",
|
||||
" <td>25.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>The Walking Dead</td>\n",
|
||||
" <td>(2010–2022)</td>\n",
|
||||
" <td>\\nDrama, Horror, Thriller</td>\n",
|
||||
" <td>8.2</td>\n",
|
||||
" <td>\\nSheriff Deputy Rick Grimes wakes up from a c...</td>\n",
|
||||
" <td>\\n \\n Stars:\\nAndrew Lincoln, \\n...</td>\n",
|
||||
" <td>885,805</td>\n",
|
||||
" <td>44.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>Rick and Morty</td>\n",
|
||||
" <td>(2013– )</td>\n",
|
||||
" <td>\\nAnimation, Adventure, Comedy</td>\n",
|
||||
" <td>9.2</td>\n",
|
||||
" <td>\\nAn animated series that follows the exploits...</td>\n",
|
||||
" <td>\\n \\n Stars:\\nJustin Roiland, \\n...</td>\n",
|
||||
" <td>414,849</td>\n",
|
||||
" <td>23.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>Army of Thieves</td>\n",
|
||||
" <td>(2021)</td>\n",
|
||||
" <td>\\nAction, Crime, Horror</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>\\nA prequel, set before the events of Army of ...</td>\n",
|
||||
" <td>\\n Director:\\nMatthias Schweighöfer\\n| \\n ...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" MOVIES YEAR \\\n",
|
||||
"0 Blood Red Sky (2021) \n",
|
||||
"1 Masters of the Universe: Revelation (2021– ) \n",
|
||||
"2 The Walking Dead (2010–2022) \n",
|
||||
"3 Rick and Morty (2013– ) \n",
|
||||
"4 Army of Thieves (2021) \n",
|
||||
"\n",
|
||||
" GENRE RATING \\\n",
|
||||
"0 \\nAction, Horror, Thriller 6.1 \n",
|
||||
"1 \\nAnimation, Action, Adventure 5.0 \n",
|
||||
"2 \\nDrama, Horror, Thriller 8.2 \n",
|
||||
"3 \\nAnimation, Adventure, Comedy 9.2 \n",
|
||||
"4 \\nAction, Crime, Horror NaN \n",
|
||||
"\n",
|
||||
" ONE-LINE \\\n",
|
||||
"0 \\nA woman with a mysterious illness is forced ... \n",
|
||||
"1 \\nThe war for Eternia begins again in what may... \n",
|
||||
"2 \\nSheriff Deputy Rick Grimes wakes up from a c... \n",
|
||||
"3 \\nAn animated series that follows the exploits... \n",
|
||||
"4 \\nA prequel, set before the events of Army of ... \n",
|
||||
"\n",
|
||||
" STARS VOTES RunTime Gross \n",
|
||||
"0 \\n Director:\\nPeter Thorwarth\\n| \\n Star... 21,062 121.0 NaN \n",
|
||||
"1 \\n \\n Stars:\\nChris Wood, \\nSara... 17,870 25.0 NaN \n",
|
||||
"2 \\n \\n Stars:\\nAndrew Lincoln, \\n... 885,805 44.0 NaN \n",
|
||||
"3 \\n \\n Stars:\\nJustin Roiland, \\n... 414,849 23.0 NaN \n",
|
||||
"4 \\n Director:\\nMatthias Schweighöfer\\n| \\n ... NaN NaN NaN "
|
||||
]
|
||||
},
|
||||
"execution_count": 52,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df = pd.read_csv('movies.csv',header=0)\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>MOVIES</th>\n",
|
||||
" <th>YEAR</th>\n",
|
||||
" <th>GENRE</th>\n",
|
||||
" <th>RATING</th>\n",
|
||||
" <th>ONE-LINE</th>\n",
|
||||
" <th>STARS</th>\n",
|
||||
" <th>VOTES</th>\n",
|
||||
" <th>RunTime</th>\n",
|
||||
" <th>Gross</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>Blood Red Sky</td>\n",
|
||||
" <td>(2021)</td>\n",
|
||||
" <td>Action, Horror, Thriller</td>\n",
|
||||
" <td>6.1</td>\n",
|
||||
" <td>A woman with a mysterious illness is forced in...</td>\n",
|
||||
" <td>Director:Peter Thorwarth| Stars:Peri Baume...</td>\n",
|
||||
" <td>21,062</td>\n",
|
||||
" <td>121.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>Masters of the Universe: Revelation</td>\n",
|
||||
" <td>(2021– )</td>\n",
|
||||
" <td>Animation, Action, Adventure</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>The war for Eternia begins again in what may b...</td>\n",
|
||||
" <td>Stars:Chris Wood, Sarah Michelle Gellar, Lena ...</td>\n",
|
||||
" <td>17,870</td>\n",
|
||||
" <td>25.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>The Walking Dead</td>\n",
|
||||
" <td>(2010–2022)</td>\n",
|
||||
" <td>Drama, Horror, Thriller</td>\n",
|
||||
" <td>8.2</td>\n",
|
||||
" <td>Sheriff Deputy Rick Grimes wakes up from a com...</td>\n",
|
||||
" <td>Stars:Andrew Lincoln, Norman Reedus, Melissa M...</td>\n",
|
||||
" <td>885,805</td>\n",
|
||||
" <td>44.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>Rick and Morty</td>\n",
|
||||
" <td>(2013– )</td>\n",
|
||||
" <td>Animation, Adventure, Comedy</td>\n",
|
||||
" <td>9.2</td>\n",
|
||||
" <td>An animated series that follows the exploits o...</td>\n",
|
||||
" <td>Stars:Justin Roiland, Chris Parnell, Spencer G...</td>\n",
|
||||
" <td>414,849</td>\n",
|
||||
" <td>23.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>Army of Thieves</td>\n",
|
||||
" <td>(2021)</td>\n",
|
||||
" <td>Action, Crime, Horror</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>A prequel, set before the events of Army of th...</td>\n",
|
||||
" <td>Director:Matthias Schweighöfer| Stars:Matt...</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" MOVIES YEAR \\\n",
|
||||
"0 Blood Red Sky (2021) \n",
|
||||
"1 Masters of the Universe: Revelation (2021– ) \n",
|
||||
"2 The Walking Dead (2010–2022) \n",
|
||||
"3 Rick and Morty (2013– ) \n",
|
||||
"4 Army of Thieves (2021) \n",
|
||||
"\n",
|
||||
" GENRE RATING \\\n",
|
||||
"0 Action, Horror, Thriller 6.1 \n",
|
||||
"1 Animation, Action, Adventure 5.0 \n",
|
||||
"2 Drama, Horror, Thriller 8.2 \n",
|
||||
"3 Animation, Adventure, Comedy 9.2 \n",
|
||||
"4 Action, Crime, Horror NaN \n",
|
||||
"\n",
|
||||
" ONE-LINE \\\n",
|
||||
"0 A woman with a mysterious illness is forced in... \n",
|
||||
"1 The war for Eternia begins again in what may b... \n",
|
||||
"2 Sheriff Deputy Rick Grimes wakes up from a com... \n",
|
||||
"3 An animated series that follows the exploits o... \n",
|
||||
"4 A prequel, set before the events of Army of th... \n",
|
||||
"\n",
|
||||
" STARS VOTES RunTime Gross \n",
|
||||
"0 Director:Peter Thorwarth| Stars:Peri Baume... 21,062 121.0 NaN \n",
|
||||
"1 Stars:Chris Wood, Sarah Michelle Gellar, Lena ... 17,870 25.0 NaN \n",
|
||||
"2 Stars:Andrew Lincoln, Norman Reedus, Melissa M... 885,805 44.0 NaN \n",
|
||||
"3 Stars:Justin Roiland, Chris Parnell, Spencer G... 414,849 23.0 NaN \n",
|
||||
"4 Director:Matthias Schweighöfer| Stars:Matt... NaN NaN NaN "
|
||||
]
|
||||
},
|
||||
"execution_count": 53,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df['GENRE'] = df['GENRE'].str.replace('\\n','')\n",
|
||||
"df['ONE-LINE'] = df['ONE-LINE'].str.replace('\\n','')\n",
|
||||
"df['STARS'] = df['STARS'].str.replace('\\n','')\n",
|
||||
"\n",
|
||||
"df['GENRE'] = df['GENRE'].str.strip()\n",
|
||||
"df['ONE-LINE'] = df['ONE-LINE'].str.strip()\n",
|
||||
"df['STARS'] = df['STARS'].str.strip()\n",
|
||||
"\n",
|
||||
"# df['YEAR'] = df['YEAR'].str.strip('()')\n",
|
||||
"\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 54,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"0 Peri Baumeister, Carl Anton Koch, Alexander Sc...\n",
|
||||
"1 Chris Wood, Sarah Michelle Gellar, Lena Headey...\n",
|
||||
"2 Andrew Lincoln, Norman Reedus, Melissa McBride...\n",
|
||||
"3 Justin Roiland, Chris Parnell, Spencer Grammer...\n",
|
||||
"4 Matthias Schweighöfer, Nathalie Emmanuel, Ruby...\n",
|
||||
" ... \n",
|
||||
"9993 Felix Klare, Romina Küper, Anna Maria Mühe, Ro...\n",
|
||||
"9994 Morgan Taylor Campbell, Chris Cope, Iñaki Godo...\n",
|
||||
"9996 Prince Harry\n",
|
||||
"9997 Morgan Taylor Campbell, Iñaki Godoy, Rhianna J...\n",
|
||||
"9998 Morgan Taylor Campbell, Jennifer Cheon Garcia,...\n",
|
||||
"Name: Directors, Length: 9206, dtype: object"
|
||||
]
|
||||
},
|
||||
"execution_count": 54,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"\n",
|
||||
"def extract_director(direct):\n",
|
||||
" result = re.search(r'(Director:|Directors:)(.*)\\|',direct)\n",
|
||||
" if result:\n",
|
||||
" return result.group(2).strip()\n",
|
||||
"\n",
|
||||
"def extract_stars(stars):\n",
|
||||
" result = re.search(r'(Stars:|Star:)(.*)',stars)\n",
|
||||
" if result:\n",
|
||||
" return result.group(2).strip()\n",
|
||||
"\n",
|
||||
"df['Stars'] = df['STARS'].apply(lambda d : extract_director(d))\n",
|
||||
"df['Directors'] = df['STARS'].apply(lambda s : extract_stars(s))\n",
|
||||
"df['Stars'].dropna()\n",
|
||||
"df['Directors'].dropna()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Year</th>\n",
|
||||
" <th>Count</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>2020–</td>\n",
|
||||
" <td>898</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2020</td>\n",
|
||||
" <td>742</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>2021–</td>\n",
|
||||
" <td>661</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>2019</td>\n",
|
||||
" <td>657</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>2019–</td>\n",
|
||||
" <td>553</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Year Count\n",
|
||||
"0 2020– 898\n",
|
||||
"1 2020 742\n",
|
||||
"2 2021– 661\n",
|
||||
"3 2019 657\n",
|
||||
"4 2019– 553"
|
||||
]
|
||||
},
|
||||
"execution_count": 55,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df['Year'] = df['YEAR'].str.extract(r'([0-9]{4}–.*|[0-9]{4})')\n",
|
||||
"df['Year'] = df['Year'].str.strip().replace(\")\",\"\")\n",
|
||||
"\n",
|
||||
"def extract_year(year):\n",
|
||||
" if year[-3:] == '– )':\n",
|
||||
" return year.replace('– )',\"–\")\n",
|
||||
" else:\n",
|
||||
" return year.replace(')',\"\")\n",
|
||||
"\n",
|
||||
"df['Year'] = df['Year'].fillna('Unknown')\n",
|
||||
"df['Year'] = df['Year'].apply(lambda y: extract_year(y))\n",
|
||||
" \n",
|
||||
"year_count = df[df['Year'] != 'Unknown']['Year'].value_counts().reset_index().rename(columns = {'Year':'Count','index':'Year'})\n",
|
||||
"year_count.head()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
108876
P1/imdb/movies.csv
Normal file
108876
P1/imdb/movies.csv
Normal file
File diff suppressed because it is too large
Load Diff
128
P1/tmdb/main.ipynb
Normal file
128
P1/tmdb/main.ipynb
Normal file
@ -0,0 +1,128 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"movies = pd.read_csv('tmdb_5000_movies.csv')\n",
|
||||
"credits = pd.read_csv('tmdb_5000_credits.csv')\n",
|
||||
"df = pd.merge(movies,credits,left_on=['id','title'],right_on=['movie_id','title'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 39,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"pandas.core.series.Series"
|
||||
]
|
||||
},
|
||||
"execution_count": 39,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"def load_json_columns(col):\n",
|
||||
" col = col.apply(json.loads)\n",
|
||||
"load_json_columns(df['genres'])\n",
|
||||
"# df['genres'] = df['genres'].apply(json.loads)\n",
|
||||
"# df['keywords'] = df['keywords'].apply(json.loads)\n",
|
||||
"# df['production_companies'] = df['production_companies'].apply(json.loads)\n",
|
||||
"# df['production_countries'] = df['production_countries'].apply(json.loads)\n",
|
||||
"# df['cast'] = df['cast'].apply(json.loads)\n",
|
||||
"# df['crew'] = df['crew'].apply(json.loads)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 38,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[{'id': 1463, 'name': 'culture clash'},\n",
|
||||
" {'id': 2964, 'name': 'future'},\n",
|
||||
" {'id': 3386, 'name': 'space war'},\n",
|
||||
" {'id': 3388, 'name': 'space colony'},\n",
|
||||
" {'id': 3679, 'name': 'society'},\n",
|
||||
" {'id': 3801, 'name': 'space travel'},\n",
|
||||
" {'id': 9685, 'name': 'futuristic'},\n",
|
||||
" {'id': 9840, 'name': 'romance'},\n",
|
||||
" {'id': 9882, 'name': 'space'},\n",
|
||||
" {'id': 9951, 'name': 'alien'},\n",
|
||||
" {'id': 10148, 'name': 'tribe'},\n",
|
||||
" {'id': 10158, 'name': 'alien planet'},\n",
|
||||
" {'id': 10987, 'name': 'cgi'},\n",
|
||||
" {'id': 11399, 'name': 'marine'},\n",
|
||||
" {'id': 13065, 'name': 'soldier'},\n",
|
||||
" {'id': 14643, 'name': 'battle'},\n",
|
||||
" {'id': 14720, 'name': 'love affair'},\n",
|
||||
" {'id': 165431, 'name': 'anti war'},\n",
|
||||
" {'id': 193554, 'name': 'power relations'},\n",
|
||||
" {'id': 206690, 'name': 'mind and soul'},\n",
|
||||
" {'id': 209714, 'name': '3d'}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 38,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df['keywords'][0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
4804
P1/tmdb/tmdb_5000_credits.csv
Normal file
4804
P1/tmdb/tmdb_5000_credits.csv
Normal file
File diff suppressed because one or more lines are too long
4804
P1/tmdb/tmdb_5000_movies.csv
Normal file
4804
P1/tmdb/tmdb_5000_movies.csv
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user