Update repo

2023-01-19 21:22:01 +01:00 · 2023-01-19 21:22:01 +01:00 · 18257d0941
commit 18257d0941
parent a31ff34818
32 changed files with 7708249 additions and 0 deletions
--- a/D/.idea/.gitignore
+++ b/D/.idea/.gitignore
@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
--- a/D/.idea/D.iml
+++ b/D/.idea/D.iml
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
--- a/D/.idea/inspectionProfiles/profiles_settings.xml
+++ b/D/.idea/inspectionProfiles/profiles_settings.xml
@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
--- a/D/.idea/misc.xml
+++ b/D/.idea/misc.xml
@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (D)" project-jdk-type="Python SDK" />
+</project>
--- a/D/.idea/modules.xml
+++ b/D/.idea/modules.xml
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/D.iml" filepath="$PROJECT_DIR$/.idea/D.iml" />
+    </modules>
+  </component>
+</project>
--- a/D/.idea/vcs.xml
+++ b/D/.idea/vcs.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
+  </component>
+</project>
--- a/D/pycache/calclex.cpython-310.pyc
+++ b/D/pycache/calclex.cpython-310.pyc
--- a/D/pycache/parsetab.cpython-310.pyc
+++ b/D/pycache/parsetab.cpython-310.pyc
--- a/D/calclex.py
+++ b/D/calclex.py
@ -0,0 +1,76 @@
+from ply import lex
+
+tokens = (
+    'NUMBER',
+    'OPERATE',
+    'SIZE',
+    'KIND',
+    'COLOR',
+    'MATERIAL'
+)
+
+
+def t_OPERATE(t):
+    r'Buy | Sell'
+    return t
+
+
+def t_NUMBER(t):
+    r'\d+'
+    t.value = int(t.value)
+    return t
+
+
+def t_SIZE(t):
+    r'tiny | small | big | large'
+    if t.value == 'tiny':
+        t.value = 1
+    elif t.value == 'small':
+        t.value = 2
+    elif t.value == 'big':
+        t.value = 3
+    elif t.value == 'large':
+        t.value = 4
+    return t
+
+
+def t_COLOR(t):
+    r'(black | white | red | green | blue)'
+    if t.value == 'black':
+        t.value = 1
+    elif t.value == 'white':
+        t.value = 2
+    elif t.value == 'red':
+        t.value = 3
+    elif t.value == 'green':
+        t.value = 4
+    elif t.value == 'blue':
+        t.value = 5
+    return t
+
+
+def t_MATERIAL(t):
+    r'metal | plastic'
+    if t.value == 'metal':
+        t.value = 1
+    elif t.value == 'plastic':
+        t.value = 2
+    return t
+
+
+def t_KIND(t):
+    r'box(es)? | ring(s)?'
+    if t.value[0] == 'b':
+        t.value = 1
+    else:
+        t.value = 2
+    return t
+
+
+def t_error(t):
+    print("Illegal character '%s" % t.value[0])
+    t.lexer.skip(1)
+
+
+t_ignore = ' \t'
+lexer = lex.lex()
--- a/D/parser.out
+++ b/D/parser.out
@ -0,0 +1,147 @@
+Created by PLY version 3.11 (http://www.dabeaz.com/ply)
+
+Grammar
+
+Rule 0     S' -> command
+Rule 1     command -> operate NUMBER article
+Rule 2     article -> attribute article
+Rule 3     attribute -> COLOR
+Rule 4     attribute -> MATERIAL
+Rule 5     attribute -> SIZE
+Rule 6     article -> KIND
+Rule 7     operate -> OPERATE
+
+Terminals, with rules where they appear
+
+COLOR                : 3
+KIND                 : 6
+MATERIAL             : 4
+NUMBER               : 1
+OPERATE              : 7
+SIZE                 : 5
+error                : 
+
+Nonterminals, with rules where they appear
+
+article              : 1 2
+attribute            : 2
+command              : 0
+operate              : 1
+
+Parsing method: LALR
+
+state 0
+
+    (0) S' -> . command
+    (1) command -> . operate NUMBER article
+    (7) operate -> . OPERATE
+
+    OPERATE         shift and go to state 3
+
+    command                        shift and go to state 1
+    operate                        shift and go to state 2
+
+state 1
+
+    (0) S' -> command .
+
+
+
+state 2
+
+    (1) command -> operate . NUMBER article
+
+    NUMBER          shift and go to state 4
+
+
+state 3
+
+    (7) operate -> OPERATE .
+
+    NUMBER          reduce using rule 7 (operate -> OPERATE .)
+
+
+state 4
+
+    (1) command -> operate NUMBER . article
+    (2) article -> . attribute article
+    (6) article -> . KIND
+    (3) attribute -> . COLOR
+    (4) attribute -> . MATERIAL
+    (5) attribute -> . SIZE
+
+    KIND            shift and go to state 7
+    COLOR           shift and go to state 8
+    MATERIAL        shift and go to state 9
+    SIZE            shift and go to state 10
+
+    article                        shift and go to state 5
+    attribute                      shift and go to state 6
+
+state 5
+
+    (1) command -> operate NUMBER article .
+
+    $end            reduce using rule 1 (command -> operate NUMBER article .)
+
+
+state 6
+
+    (2) article -> attribute . article
+    (2) article -> . attribute article
+    (6) article -> . KIND
+    (3) attribute -> . COLOR
+    (4) attribute -> . MATERIAL
+    (5) attribute -> . SIZE
+
+    KIND            shift and go to state 7
+    COLOR           shift and go to state 8
+    MATERIAL        shift and go to state 9
+    SIZE            shift and go to state 10
+
+    attribute                      shift and go to state 6
+    article                        shift and go to state 11
+
+state 7
+
+    (6) article -> KIND .
+
+    $end            reduce using rule 6 (article -> KIND .)
+
+
+state 8
+
+    (3) attribute -> COLOR .
+
+    KIND            reduce using rule 3 (attribute -> COLOR .)
+    COLOR           reduce using rule 3 (attribute -> COLOR .)
+    MATERIAL        reduce using rule 3 (attribute -> COLOR .)
+    SIZE            reduce using rule 3 (attribute -> COLOR .)
+
+
+state 9
+
+    (4) attribute -> MATERIAL .
+
+    KIND            reduce using rule 4 (attribute -> MATERIAL .)
+    COLOR           reduce using rule 4 (attribute -> MATERIAL .)
+    MATERIAL        reduce using rule 4 (attribute -> MATERIAL .)
+    SIZE            reduce using rule 4 (attribute -> MATERIAL .)
+
+
+state 10
+
+    (5) attribute -> SIZE .
+
+    KIND            reduce using rule 5 (attribute -> SIZE .)
+    COLOR           reduce using rule 5 (attribute -> SIZE .)
+    MATERIAL        reduce using rule 5 (attribute -> SIZE .)
+    SIZE            reduce using rule 5 (attribute -> SIZE .)
+
+
+state 11
+
+    (2) article -> attribute article .
+
+    $end            reduce using rule 2 (article -> attribute article .)
+
--- a/D/parsetab.py
+++ b/D/parsetab.py
@ -0,0 +1,37 @@
+
+# parsetab.py
+# This file is automatically generated. Do not edit.
+# pylint: disable=W,C,R
+_tabversion = '3.10'
+
+_lr_method = 'LALR'
+
+_lr_signature = 'COLOR KIND MATERIAL NUMBER OPERATE SIZEcommand : operate NUMBER articlearticle : attribute articleattribute : COLORattribute : MATERIALattribute : SIZEarticle : KINDoperate : OPERATE'
+    
+_lr_action_items = {'OPERATE':([0,],[3,]),'$end':([1,5,7,11,],[0,-1,-6,-2,]),'NUMBER':([2,3,],[4,-7,]),'KIND':([4,6,8,9,10,],[7,7,-3,-4,-5,]),'COLOR':([4,6,8,9,10,],[8,8,-3,-4,-5,]),'MATERIAL':([4,6,8,9,10,],[9,9,-3,-4,-5,]),'SIZE':([4,6,8,9,10,],[10,10,-3,-4,-5,]),}
+
+_lr_action = {}
+for _k, _v in _lr_action_items.items():
+   for _x,_y in zip(_v[0],_v[1]):
+      if not _x in _lr_action:  _lr_action[_x] = {}
+      _lr_action[_x][_k] = _y
+del _lr_action_items
+
+_lr_goto_items = {'command':([0,],[1,]),'operate':([0,],[2,]),'article':([4,6,],[5,11,]),'attribute':([4,6,],[6,6,]),}
+
+_lr_goto = {}
+for _k, _v in _lr_goto_items.items():
+   for _x, _y in zip(_v[0], _v[1]):
+       if not _x in _lr_goto: _lr_goto[_x] = {}
+       _lr_goto[_x][_k] = _y
+del _lr_goto_items
+_lr_productions = [
+  ("S' -> command","S'",1,None,None,None),
+  ('command -> operate NUMBER article','command',3,'p_command','yacc.py',6),
+  ('article -> attribute article','article',2,'p_article_attribute','yacc.py',22),
+  ('attribute -> COLOR','attribute',1,'p_attribute_color','yacc.py',27),
+  ('attribute -> MATERIAL','attribute',1,'p_attribute_material','yacc.py',32),
+  ('attribute -> SIZE','attribute',1,'p_attribute_size','yacc.py',37),
+  ('article -> KIND','article',1,'p_article_kind','yacc.py',42),
+  ('operate -> OPERATE','operate',1,'p_operate','yacc.py',46),
+]
--- a/D/yacc.py
+++ b/D/yacc.py
@ -0,0 +1,66 @@
+from ply import yacc
+from calclex import tokens
+
+
+def p_command(p):
+    'command : operate NUMBER article'
+    index = p[3]
+
+    if p[1] == 'Buy':
+        tab[index] += p[2]
+        print('OK. I am buying ' + str(p[2]) + ' new articles indexed as ' + str(index) + '.')
+        print('No of articles in shop: ' + str(tab[index]))
+    elif p[1] == 'Sell':
+        if p[2] > tab[index]:
+            print('I do not have as many articles as you want.')
+        else:
+            tab[index] -= p[2]
+            print('OK. I am selling ' + str(p[2]) + ' articles indexed as ' + str(index) + '.')
+            print('No of articles in shop: ' + str(tab[index]))
+
+
+def p_article_attribute(p):
+    'article : attribute article'
+    p[0] = p[1] + p[2]
+
+
+def p_attribute_color(p):
+    'attribute : COLOR'
+    p[0] = p[1]
+
+
+def p_attribute_material(p):
+    'attribute : MATERIAL'
+    p[0] = 10 * p[1]
+
+
+def p_attribute_size(p):
+    'attribute : SIZE'
+    p[0] = 100 * p[1]
+
+
+def p_article_kind(p):
+    'article : KIND'
+    p[0] = 1000 * p[1]
+
+
+def p_operate(p):
+    'operate : OPERATE'
+    p[0] = p[1]
+
+
+def p_error(p):
+    print("Syntax error in input!")
+
+
+tab = []
+for index in range(3000):
+    tab.append(0)
+
+parser = yacc.yacc()
+
+while True:
+    s = input('What can I do for you? \n')
+    if s == 'Bye':
+        break
+    parser.parse(s)
--- a/E/.idea/.gitignore
+++ b/E/.idea/.gitignore
@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
--- a/E/.idea/E.iml
+++ b/E/.idea/E.iml
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.10" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
--- a/E/.idea/inspectionProfiles/profiles_settings.xml
+++ b/E/.idea/inspectionProfiles/profiles_settings.xml
@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
--- a/E/.idea/misc.xml
+++ b/E/.idea/misc.xml
@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
+</project>
--- a/E/.idea/modules.xml
+++ b/E/.idea/modules.xml
@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/E.iml" filepath="$PROJECT_DIR$/.idea/E.iml" />
+    </modules>
+  </component>
+</project>
--- a/E/.idea/vcs.xml
+++ b/E/.idea/vcs.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
+  </component>
+</project>
--- a/E/E.zip
+++ b/E/E.zip
--- a/E/PoliMorf-0.6.7.tab
+++ b/E/PoliMorf-0.6.7.tab
--- a/E/bpe.py
+++ b/E/bpe.py
@ -0,0 +1,50 @@
+import string
+
+
+def bpe(sentence: str, V:int) -> list:
+    # remove punctuation
+    sentence = sentence.translate(str.maketrans("", "", string.punctuation)).lower()
+    characters = [i if i != " " else "<w>" for i in sentence]
+    characters.append("<w>")
+    characters.insert(0, "<w>")
+    vocabulary = set(characters)
+    while len(vocabulary) < V:
+        bigrams = get_bigrams(characters)
+        frequencies = get_frequencies(bigrams)
+        most_freq_bigr = max(frequencies,key=frequencies.get)
+        upd_sentence_with_bigram(characters,most_freq_bigr)
+        vocabulary.add(most_freq_bigr)
+    return vocabulary
+
+
+
+def get_bigrams(characters: list) -> list:
+    bigrams = []
+    for i in range(0, len(characters) - 1):
+        bigrams.append(characters[i] + characters[i + 1])
+    return bigrams
+
+
+def get_frequencies(item: list) -> dict:
+    frequencies = {}
+    for i in item:
+        if i not in frequencies:
+            frequencies[i] = 1
+        else:
+            frequencies[i] += 1
+    return frequencies
+
+def upd_sentence_with_bigram(chars:list, bigram:str) -> list:
+    i = 0
+    while i < len(chars) - 1:
+        if chars[i] + chars[i + 1] == bigram:
+            chars.pop(i + 1)
+            chars.pop(i)
+            chars.insert(i, bigram)
+        i+=1
+    return chars
+
+usr_input = input('Podaj zdanie: ')
+V = int(input('Podaj wielkosc slownika: '))
+vocab = bpe(usr_input, V)
+for i in vocab: print(i)
--- a/E/maxmatch.py
+++ b/E/maxmatch.py
@ -0,0 +1,22 @@
+import pandas as pd
+
+
+def max_match(sentence: str, dictionary: list) -> list:
+    if not sentence:
+        return []
+    sen_len = len(sentence)
+    for i in range(sen_len - 1, -1, -1):
+        firstword = sentence[0:i]
+        remainder = sentence[i:sen_len]
+        if firstword in dictionary:
+            return [firstword] + max_match(remainder, dictionary)
+
+    firstword = sentence[0]
+    remainder = sentence[1:sen_len]
+    return [firstword] + max_match(remainder, dictionary)
+
+
+pm = pd.read_csv("PoliMorf-0.6.7.tab", delimiter="\t", header=None)
+polish_dict = list(pm[0])
+result = max_match("Alamakota", polish_dict)
+print(result)
--- a/G/G.py
+++ b/G/G.py
@ -0,0 +1,26 @@
+import spacy
+
+class Imp:
+    action_s = ''
+    object_s = ''
+
+nlp = spacy.load("pl_core_news_sm")
+inputed_text = input('Podaj zdanie: ')
+doc = nlp(inputed_text)
+for token in doc:
+    print(token.text, token.pos_)
+contains_imp = any(token.morph.get('Mood') == ['Imp'] for token in doc)
+if not contains_imp:
+    print('Bye!')
+else:
+    imp_list = []
+    index = -1
+    for token in doc:
+        if token.tag_ == 'IMPT':
+            imp_list.append(Imp())
+            index+=1
+            imp_list[index].action_s = token.text
+        else:
+            imp_list[index].object_s += token.text + " "
+    for imp in imp_list:
+        print('Action: '+imp.action_s+", Object: "+imp.object_s)
--- a/P1/Movielens/main.ipynb
+++ b/P1/Movielens/main.ipynb
@ -0,0 +1,375 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "import seaborn as sns\n",
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MovieID</th>\n",
+       "      <th>MovieName</th>\n",
+       "      <th>Category</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Toy Story (1995)</td>\n",
+       "      <td>Animation|Children's|Comedy</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Jumanji (1995)</td>\n",
+       "      <td>Adventure|Children's|Fantasy</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Grumpier Old Men (1995)</td>\n",
+       "      <td>Comedy|Romance</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>Waiting to Exhale (1995)</td>\n",
+       "      <td>Comedy|Drama</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>Father of the Bride Part II (1995)</td>\n",
+       "      <td>Comedy</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   MovieID                           MovieName                      Category\n",
+       "0        1                    Toy Story (1995)   Animation|Children's|Comedy\n",
+       "1        2                      Jumanji (1995)  Adventure|Children's|Fantasy\n",
+       "2        3             Grumpier Old Men (1995)                Comedy|Romance\n",
+       "3        4            Waiting to Exhale (1995)                  Comedy|Drama\n",
+       "4        5  Father of the Bride Part II (1995)                        Comedy"
+      ]
+     },
+     "execution_count": 48,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_movie = pd.read_csv('movies.dat',sep='::',encoding='latin1',engine='python',names=['MovieID','MovieName','Category'])\n",
+    "df_movie.dropna(inplace=True)\n",
+    "df_movie.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ID</th>\n",
+       "      <th>MovieID</th>\n",
+       "      <th>Ratings</th>\n",
+       "      <th>TimeStamp</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1193</td>\n",
+       "      <td>5</td>\n",
+       "      <td>978300760</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>661</td>\n",
+       "      <td>3</td>\n",
+       "      <td>978302109</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>914</td>\n",
+       "      <td>3</td>\n",
+       "      <td>978301968</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3408</td>\n",
+       "      <td>4</td>\n",
+       "      <td>978300275</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2355</td>\n",
+       "      <td>5</td>\n",
+       "      <td>978824291</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   ID  MovieID  Ratings  TimeStamp\n",
+       "0   1     1193        5  978300760\n",
+       "1   1      661        3  978302109\n",
+       "2   1      914        3  978301968\n",
+       "3   1     3408        4  978300275\n",
+       "4   1     2355        5  978824291"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_rating = pd.read_csv('ratings.dat',sep='::',encoding='latin1',engine='python',names=['ID','MovieID','Ratings','TimeStamp'])\n",
+    "df_rating.dropna(inplace=True)\n",
+    "df_rating.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MovieID</th>\n",
+       "      <th>MovieName</th>\n",
+       "      <th>Category</th>\n",
+       "      <th>ID</th>\n",
+       "      <th>Ratings</th>\n",
+       "      <th>TimeStamp</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Toy Story (1995)</td>\n",
+       "      <td>Animation|Children's|Comedy</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>978824268</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Toy Story (1995)</td>\n",
+       "      <td>Animation|Children's|Comedy</td>\n",
+       "      <td>6</td>\n",
+       "      <td>4</td>\n",
+       "      <td>978237008</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Toy Story (1995)</td>\n",
+       "      <td>Animation|Children's|Comedy</td>\n",
+       "      <td>8</td>\n",
+       "      <td>4</td>\n",
+       "      <td>978233496</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Toy Story (1995)</td>\n",
+       "      <td>Animation|Children's|Comedy</td>\n",
+       "      <td>9</td>\n",
+       "      <td>5</td>\n",
+       "      <td>978225952</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Toy Story (1995)</td>\n",
+       "      <td>Animation|Children's|Comedy</td>\n",
+       "      <td>10</td>\n",
+       "      <td>5</td>\n",
+       "      <td>978226474</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   MovieID         MovieName                     Category  ID  Ratings  \\\n",
+       "0        1  Toy Story (1995)  Animation|Children's|Comedy   1        5   \n",
+       "1        1  Toy Story (1995)  Animation|Children's|Comedy   6        4   \n",
+       "2        1  Toy Story (1995)  Animation|Children's|Comedy   8        4   \n",
+       "3        1  Toy Story (1995)  Animation|Children's|Comedy   9        5   \n",
+       "4        1  Toy Story (1995)  Animation|Children's|Comedy  10        5   \n",
+       "\n",
+       "   TimeStamp  \n",
+       "0  978824268  \n",
+       "1  978237008  \n",
+       "2  978233496  \n",
+       "3  978225952  \n",
+       "4  978226474  "
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = pd.merge(df_movie,df_rating,left_on='MovieID',right_on='MovieID')\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "MovieID\n",
+       "1       8.293693\n",
+       "2       6.402282\n",
+       "3       6.033473\n",
+       "4       5.458824\n",
+       "5       6.013514\n",
+       "          ...   \n",
+       "3948    7.271462\n",
+       "3949    8.230263\n",
+       "3950    7.333333\n",
+       "3951    7.800000\n",
+       "3952    7.561856\n",
+       "Name: Ratings, Length: 3706, dtype: float64"
+      ]
+     },
+     "execution_count": 60,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "groupByMovie = df.groupby('MovieID')\n",
+    "movieRatingsMean = groupByMovie['Ratings'].mean()*2\n",
+    "movieRatingsMean.columns = ['MovieID','Mean']\n",
+    "movieRatingsMean"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10 (default, Nov 14 2022, 12:59:47) \n[GCC 9.4.0]"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/P1/Movielens/movies.dat
+++ b/P1/Movielens/movies.dat
--- a/P1/Movielens/ratings.dat
+++ b/P1/Movielens/ratings.dat
--- a/P1/Movielens/users.dat
+++ b/P1/Movielens/users.dat
--- a/P1/imdb/main.ipynb
+++ b/P1/imdb/main.ipynb
@ -0,0 +1,484 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "import seaborn as sns\n",
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline\n",
+    "\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MOVIES</th>\n",
+       "      <th>YEAR</th>\n",
+       "      <th>GENRE</th>\n",
+       "      <th>RATING</th>\n",
+       "      <th>ONE-LINE</th>\n",
+       "      <th>STARS</th>\n",
+       "      <th>VOTES</th>\n",
+       "      <th>RunTime</th>\n",
+       "      <th>Gross</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Blood Red Sky</td>\n",
+       "      <td>(2021)</td>\n",
+       "      <td>\\nAction, Horror, Thriller</td>\n",
+       "      <td>6.1</td>\n",
+       "      <td>\\nA woman with a mysterious illness is forced ...</td>\n",
+       "      <td>\\n    Director:\\nPeter Thorwarth\\n| \\n    Star...</td>\n",
+       "      <td>21,062</td>\n",
+       "      <td>121.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Masters of the Universe: Revelation</td>\n",
+       "      <td>(2021– )</td>\n",
+       "      <td>\\nAnimation, Action, Adventure</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>\\nThe war for Eternia begins again in what may...</td>\n",
+       "      <td>\\n            \\n    Stars:\\nChris Wood, \\nSara...</td>\n",
+       "      <td>17,870</td>\n",
+       "      <td>25.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>The Walking Dead</td>\n",
+       "      <td>(2010–2022)</td>\n",
+       "      <td>\\nDrama, Horror, Thriller</td>\n",
+       "      <td>8.2</td>\n",
+       "      <td>\\nSheriff Deputy Rick Grimes wakes up from a c...</td>\n",
+       "      <td>\\n            \\n    Stars:\\nAndrew Lincoln, \\n...</td>\n",
+       "      <td>885,805</td>\n",
+       "      <td>44.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Rick and Morty</td>\n",
+       "      <td>(2013– )</td>\n",
+       "      <td>\\nAnimation, Adventure, Comedy</td>\n",
+       "      <td>9.2</td>\n",
+       "      <td>\\nAn animated series that follows the exploits...</td>\n",
+       "      <td>\\n            \\n    Stars:\\nJustin Roiland, \\n...</td>\n",
+       "      <td>414,849</td>\n",
+       "      <td>23.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Army of Thieves</td>\n",
+       "      <td>(2021)</td>\n",
+       "      <td>\\nAction, Crime, Horror</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>\\nA prequel, set before the events of Army of ...</td>\n",
+       "      <td>\\n    Director:\\nMatthias Schweighöfer\\n| \\n  ...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                MOVIES         YEAR  \\\n",
+       "0                        Blood Red Sky       (2021)   \n",
+       "1  Masters of the Universe: Revelation     (2021– )   \n",
+       "2                     The Walking Dead  (2010–2022)   \n",
+       "3                       Rick and Morty     (2013– )   \n",
+       "4                      Army of Thieves       (2021)   \n",
+       "\n",
+       "                                        GENRE  RATING  \\\n",
+       "0      \\nAction, Horror, Thriller                 6.1   \n",
+       "1  \\nAnimation, Action, Adventure                 5.0   \n",
+       "2       \\nDrama, Horror, Thriller                 8.2   \n",
+       "3  \\nAnimation, Adventure, Comedy                 9.2   \n",
+       "4         \\nAction, Crime, Horror                 NaN   \n",
+       "\n",
+       "                                            ONE-LINE  \\\n",
+       "0  \\nA woman with a mysterious illness is forced ...   \n",
+       "1  \\nThe war for Eternia begins again in what may...   \n",
+       "2  \\nSheriff Deputy Rick Grimes wakes up from a c...   \n",
+       "3  \\nAn animated series that follows the exploits...   \n",
+       "4  \\nA prequel, set before the events of Army of ...   \n",
+       "\n",
+       "                                               STARS    VOTES  RunTime Gross  \n",
+       "0  \\n    Director:\\nPeter Thorwarth\\n| \\n    Star...   21,062    121.0   NaN  \n",
+       "1  \\n            \\n    Stars:\\nChris Wood, \\nSara...   17,870     25.0   NaN  \n",
+       "2  \\n            \\n    Stars:\\nAndrew Lincoln, \\n...  885,805     44.0   NaN  \n",
+       "3  \\n            \\n    Stars:\\nJustin Roiland, \\n...  414,849     23.0   NaN  \n",
+       "4  \\n    Director:\\nMatthias Schweighöfer\\n| \\n  ...      NaN      NaN   NaN  "
+      ]
+     },
+     "execution_count": 52,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = pd.read_csv('movies.csv',header=0)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>MOVIES</th>\n",
+       "      <th>YEAR</th>\n",
+       "      <th>GENRE</th>\n",
+       "      <th>RATING</th>\n",
+       "      <th>ONE-LINE</th>\n",
+       "      <th>STARS</th>\n",
+       "      <th>VOTES</th>\n",
+       "      <th>RunTime</th>\n",
+       "      <th>Gross</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Blood Red Sky</td>\n",
+       "      <td>(2021)</td>\n",
+       "      <td>Action, Horror, Thriller</td>\n",
+       "      <td>6.1</td>\n",
+       "      <td>A woman with a mysterious illness is forced in...</td>\n",
+       "      <td>Director:Peter Thorwarth|     Stars:Peri Baume...</td>\n",
+       "      <td>21,062</td>\n",
+       "      <td>121.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Masters of the Universe: Revelation</td>\n",
+       "      <td>(2021– )</td>\n",
+       "      <td>Animation, Action, Adventure</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>The war for Eternia begins again in what may b...</td>\n",
+       "      <td>Stars:Chris Wood, Sarah Michelle Gellar, Lena ...</td>\n",
+       "      <td>17,870</td>\n",
+       "      <td>25.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>The Walking Dead</td>\n",
+       "      <td>(2010–2022)</td>\n",
+       "      <td>Drama, Horror, Thriller</td>\n",
+       "      <td>8.2</td>\n",
+       "      <td>Sheriff Deputy Rick Grimes wakes up from a com...</td>\n",
+       "      <td>Stars:Andrew Lincoln, Norman Reedus, Melissa M...</td>\n",
+       "      <td>885,805</td>\n",
+       "      <td>44.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Rick and Morty</td>\n",
+       "      <td>(2013– )</td>\n",
+       "      <td>Animation, Adventure, Comedy</td>\n",
+       "      <td>9.2</td>\n",
+       "      <td>An animated series that follows the exploits o...</td>\n",
+       "      <td>Stars:Justin Roiland, Chris Parnell, Spencer G...</td>\n",
+       "      <td>414,849</td>\n",
+       "      <td>23.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Army of Thieves</td>\n",
+       "      <td>(2021)</td>\n",
+       "      <td>Action, Crime, Horror</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>A prequel, set before the events of Army of th...</td>\n",
+       "      <td>Director:Matthias Schweighöfer|     Stars:Matt...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                MOVIES         YEAR  \\\n",
+       "0                        Blood Red Sky       (2021)   \n",
+       "1  Masters of the Universe: Revelation     (2021– )   \n",
+       "2                     The Walking Dead  (2010–2022)   \n",
+       "3                       Rick and Morty     (2013– )   \n",
+       "4                      Army of Thieves       (2021)   \n",
+       "\n",
+       "                          GENRE  RATING  \\\n",
+       "0      Action, Horror, Thriller     6.1   \n",
+       "1  Animation, Action, Adventure     5.0   \n",
+       "2       Drama, Horror, Thriller     8.2   \n",
+       "3  Animation, Adventure, Comedy     9.2   \n",
+       "4         Action, Crime, Horror     NaN   \n",
+       "\n",
+       "                                            ONE-LINE  \\\n",
+       "0  A woman with a mysterious illness is forced in...   \n",
+       "1  The war for Eternia begins again in what may b...   \n",
+       "2  Sheriff Deputy Rick Grimes wakes up from a com...   \n",
+       "3  An animated series that follows the exploits o...   \n",
+       "4  A prequel, set before the events of Army of th...   \n",
+       "\n",
+       "                                               STARS    VOTES  RunTime Gross  \n",
+       "0  Director:Peter Thorwarth|     Stars:Peri Baume...   21,062    121.0   NaN  \n",
+       "1  Stars:Chris Wood, Sarah Michelle Gellar, Lena ...   17,870     25.0   NaN  \n",
+       "2  Stars:Andrew Lincoln, Norman Reedus, Melissa M...  885,805     44.0   NaN  \n",
+       "3  Stars:Justin Roiland, Chris Parnell, Spencer G...  414,849     23.0   NaN  \n",
+       "4  Director:Matthias Schweighöfer|     Stars:Matt...      NaN      NaN   NaN  "
+      ]
+     },
+     "execution_count": 53,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df['GENRE'] = df['GENRE'].str.replace('\\n','')\n",
+    "df['ONE-LINE'] = df['ONE-LINE'].str.replace('\\n','')\n",
+    "df['STARS'] = df['STARS'].str.replace('\\n','')\n",
+    "\n",
+    "df['GENRE'] = df['GENRE'].str.strip()\n",
+    "df['ONE-LINE'] = df['ONE-LINE'].str.strip()\n",
+    "df['STARS'] = df['STARS'].str.strip()\n",
+    "\n",
+    "# df['YEAR'] = df['YEAR'].str.strip('()')\n",
+    "\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0       Peri Baumeister, Carl Anton Koch, Alexander Sc...\n",
+       "1       Chris Wood, Sarah Michelle Gellar, Lena Headey...\n",
+       "2       Andrew Lincoln, Norman Reedus, Melissa McBride...\n",
+       "3       Justin Roiland, Chris Parnell, Spencer Grammer...\n",
+       "4       Matthias Schweighöfer, Nathalie Emmanuel, Ruby...\n",
+       "                              ...                        \n",
+       "9993    Felix Klare, Romina Küper, Anna Maria Mühe, Ro...\n",
+       "9994    Morgan Taylor Campbell, Chris Cope, Iñaki Godo...\n",
+       "9996                                         Prince Harry\n",
+       "9997    Morgan Taylor Campbell, Iñaki Godoy, Rhianna J...\n",
+       "9998    Morgan Taylor Campbell, Jennifer Cheon Garcia,...\n",
+       "Name: Directors, Length: 9206, dtype: object"
+      ]
+     },
+     "execution_count": 54,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import re\n",
+    "\n",
+    "def extract_director(direct):\n",
+    "    result = re.search(r'(Director:|Directors:)(.*)\\|',direct)\n",
+    "    if result:\n",
+    "        return result.group(2).strip()\n",
+    "\n",
+    "def extract_stars(stars):\n",
+    "    result = re.search(r'(Stars:|Star:)(.*)',stars)\n",
+    "    if result:\n",
+    "        return result.group(2).strip()\n",
+    "\n",
+    "df['Stars'] = df['STARS'].apply(lambda d : extract_director(d))\n",
+    "df['Directors'] = df['STARS'].apply(lambda s : extract_stars(s))\n",
+    "df['Stars'].dropna()\n",
+    "df['Directors'].dropna()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Year</th>\n",
+       "      <th>Count</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2020–</td>\n",
+       "      <td>898</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2020</td>\n",
+       "      <td>742</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2021–</td>\n",
+       "      <td>661</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2019</td>\n",
+       "      <td>657</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2019–</td>\n",
+       "      <td>553</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    Year  Count\n",
+       "0  2020–    898\n",
+       "1   2020    742\n",
+       "2  2021–    661\n",
+       "3   2019    657\n",
+       "4  2019–    553"
+      ]
+     },
+     "execution_count": 55,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df['Year'] = df['YEAR'].str.extract(r'([0-9]{4}–.*|[0-9]{4})')\n",
+    "df['Year'] = df['Year'].str.strip().replace(\")\",\"\")\n",
+    "\n",
+    "def extract_year(year):\n",
+    "    if year[-3:] == '– )':\n",
+    "        return year.replace('– )',\"–\")\n",
+    "    else:\n",
+    "        return year.replace(')',\"\")\n",
+    "\n",
+    "df['Year'] = df['Year'].fillna('Unknown')\n",
+    "df['Year'] = df['Year'].apply(lambda y: extract_year(y))\n",
+    "    \n",
+    "year_count = df[df['Year'] != 'Unknown']['Year'].value_counts().reset_index().rename(columns = {'Year':'Count','index':'Year'})\n",
+    "year_count.head()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/P1/imdb/movies.csv
+++ b/P1/imdb/movies.csv
--- a/P1/tmdb/main.ipynb
+++ b/P1/tmdb/main.ipynb
@ -0,0 +1,128 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "movies = pd.read_csv('tmdb_5000_movies.csv')\n",
+    "credits = pd.read_csv('tmdb_5000_credits.csv')\n",
+    "df = pd.merge(movies,credits,left_on=['id','title'],right_on=['movie_id','title'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "pandas.core.series.Series"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import json\n",
+    "def load_json_columns(col):\n",
+    "    col = col.apply(json.loads)\n",
+    "load_json_columns(df['genres'])\n",
+    "# df['genres'] = df['genres'].apply(json.loads)\n",
+    "# df['keywords'] = df['keywords'].apply(json.loads)\n",
+    "# df['production_companies'] = df['production_companies'].apply(json.loads)\n",
+    "# df['production_countries'] = df['production_countries'].apply(json.loads)\n",
+    "# df['cast'] = df['cast'].apply(json.loads)\n",
+    "# df['crew'] = df['crew'].apply(json.loads)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'id': 1463, 'name': 'culture clash'},\n",
+       " {'id': 2964, 'name': 'future'},\n",
+       " {'id': 3386, 'name': 'space war'},\n",
+       " {'id': 3388, 'name': 'space colony'},\n",
+       " {'id': 3679, 'name': 'society'},\n",
+       " {'id': 3801, 'name': 'space travel'},\n",
+       " {'id': 9685, 'name': 'futuristic'},\n",
+       " {'id': 9840, 'name': 'romance'},\n",
+       " {'id': 9882, 'name': 'space'},\n",
+       " {'id': 9951, 'name': 'alien'},\n",
+       " {'id': 10148, 'name': 'tribe'},\n",
+       " {'id': 10158, 'name': 'alien planet'},\n",
+       " {'id': 10987, 'name': 'cgi'},\n",
+       " {'id': 11399, 'name': 'marine'},\n",
+       " {'id': 13065, 'name': 'soldier'},\n",
+       " {'id': 14643, 'name': 'battle'},\n",
+       " {'id': 14720, 'name': 'love affair'},\n",
+       " {'id': 165431, 'name': 'anti war'},\n",
+       " {'id': 193554, 'name': 'power relations'},\n",
+       " {'id': 206690, 'name': 'mind and soul'},\n",
+       " {'id': 209714, 'name': '3d'}]"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df['keywords'][0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/P1/tmdb/tmdb_5000_credits.csv
+++ b/P1/tmdb/tmdb_5000_credits.csv
--- a/P1/tmdb/tmdb_5000_movies.csv
+++ b/P1/tmdb/tmdb_5000_movies.csv