From 1efd08287c01936e27c7e93e8f1ff739a9d82192 Mon Sep 17 00:00:00 2001 From: liza Date: Sun, 13 Jun 2021 23:11:09 +0200 Subject: [PATCH] =?UTF-8?q?pobieranie=20informacji=20o=20wydarzeniach=20do?= =?UTF-8?q?=20plik=C3=B3w=20json;=20problemy=20z=20kodowanie?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- event_0.json | 1 + event_1.json | 1 + geckodriver.log | 145 +++++++++++++++++++++++++++++++++++++++++++++++ get_events_FB.py | 93 ++++++++++++++++++++++++++++-- 4 files changed, 235 insertions(+), 5 deletions(-) create mode 100644 event_0.json create mode 100644 event_1.json create mode 100644 geckodriver.log diff --git a/event_0.json b/event_0.json new file mode 100644 index 0000000..db58504 --- /dev/null +++ b/event_0.json @@ -0,0 +1 @@ +{"startDate": "2021-06-16T22:15:00+0200\"", "name": "Darmowy webinar \"Jak odzwyczai\u0107 dziecko korzystania z urz\u0105dze\u0144 mobilnych?\"\"", "description": "Ju\u017c 16.06.2021 (\u015broda) o godzinie 22:15 wyj\u0105tkowy webinar\n \"Jak odzwyczai\u0107 dziecko korzystania z urz\u0105dze\u0144 mobilnych?\"\n\nPodczas webinaru:\n Dowiedz\u0105 si\u0119 Pa\u0144stwo co zrobi\u0107, gdy dziecko sp\u0119dza sw\u00f3j ca\u0142y wolny czas przed tabletem, telefonem czy komputerem,\n\n Otrzymaj\u0105 Pa\u0144stwo wskaz\u00f3wki jak odzwyczai\u0107 dziecko korzystania z urz\u0105dze\u0144 mobilnych\n\nGo\u015bciem specjalnym b\u0119dzie mgr Sonia Michalik, psycholog, coach, familiolog\n\nSzanowni Pa\u0144stwo\u2757\ufe0f\n\nMusicie by\u0107 z nami \n\nZapisuj\u0105c si\u0119 na webinar - otrzymuj\u0105 Pa\u0144stwo darmowego e-booka !\n\u27a1\ufe0f https://webinar.misjarodzic.com/jak-odzwyczaic-dziecko-korzystania-zurzadzen-mobilnych/\n\nSpotkajmy si\u0119 na BEZP\u0141ATNYM webinarze z cyklu Misja Rodzic TV \u2764\n\nDo zobaczenia ! \"", "organizer": " Sonia Michalik - psycholog dzieci\u0119cy", "online": "true", "price": "za darmo", "category": ["Film"]} \ No newline at end of file diff --git a/event_1.json b/event_1.json new file mode 100644 index 0000000..a657927 --- /dev/null +++ b/event_1.json @@ -0,0 +1 @@ +{"startDate": "2021-06-13T10:30:00+0200\"", "name": "Rodzinna joga na \u015bwie\u017cym powietrzu\"", "description": "Joga rodzinna, czyli co? Czego si\u0119 mo\u017cesz spodziewa\u0107?... Postawimy las drzew, b\u0119dziemy si\u0119 rusza\u0107 jak koty i psy, zrobimy or\u0142a cie\u0144 i wiele innych pozycji b\u0119dzie du\u017co ruchu i wsp\u00f3lnej zabawy z dzie\u0107mi! Przyjd\u017a i sprawd\u017a czy i Wam si\u0119 spodoba!\n\n\nZapisy pod numerem 780044816 (mo\u017cna SMSem). Op\u0142ata za zaj\u0119cia:\nKa\u017cda \u0107wicz\u0105ca osoba (r\u00f3wnie\u017c dzieci) - 10 z\u0142:)\n...przelewem na konto:\nFundacja Pozna\u0144czycy\nSantander Bank Polska\n511090136200000001 34828100\n\n\nNale\u017cy mie\u0107 mat\u0119 lub karimat\u0119 (je\u015bli nie ma si\u0119 maty). Warto zabezpieczy\u0107 si\u0119 przed kleszczami \nWej\u015bcie na teren od ul. Smardzewskiej. W razie deszczu przeniesiemy si\u0119 do sali w \nMinor Studio Aktywno\u015bci ul. Podchor\u0105\u017cych 15. \n\"", "organizer": " Grunwaldzki Park Spo\u0142eczny i Kalejdoskop jogi", "online": "false", "place": ["Projekt schron"], "price": "za darmo", "category": ["Wellness"]} \ No newline at end of file diff --git a/geckodriver.log b/geckodriver.log new file mode 100644 index 0000000..22d7067 --- /dev/null +++ b/geckodriver.log @@ -0,0 +1,145 @@ +1623615934569 geckodriver INFO Listening on 127.0.0.1:51621 +1623615937594 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofilewC24Bi" +1623615937968 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofilewC24Bi\\search.json.mozlz4", (void 0))) +1623615941225 Marionette INFO Listening on port 51629 +1623615941332 Marionette WARN TLS certificate errors will be ignored for this session +JavaScript error: , line 0: NS_ERROR_NOT_AVAILABLE + +###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +1623617250054 Marionette INFO Stopped listening on port 51629 + "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofile5TtyBH" +1623616342641 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofile5TtyBH\\search.json.mozlz4", (void 0))) +1623616346488 Marionette INFO Listening on port 51774 +1623616346554 Marionette WARN TLS certificate errors will be ignored for this session +JavaScript error: , line 0: NS_ERROR_NOT_AVAILABLE + +###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + + +###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +1623617253590 Marionette INFO Stopped listening on port 51774 +ionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileznducQ\\search.json.mozlz4", (void 0))) +1623616524743 Marionette INFO Listening on port 51888 +1623616525165 Marionette WARN TLS certificate errors will be ignored for this session +1623617252176 Marionette INFO Stopped listening on port 51888 + +###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +"--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileXHB4nx" +1623616606356 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileXHB4nx\\search.json.mozlz4", (void 0))) +1623616609691 Marionette INFO Listening on port 51983 +1623616610176 Marionette WARN TLS certificate errors will be ignored for this session +1623617251535 Marionette INFO Stopped listening on port 51983 +3616613285 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileRUFZHJ" +1623616613765 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileRUFZHJ\\search.json.mozlz4", (void 0))) +1623616617502 Marionette INFO Listening on port 52032 +1623616617558 Marionette WARN TLS certificate errors will be ignored for this session +1623617252912 Marionette INFO Stopped listening on port 52032 +3616696590 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileBkXZdk" +1623616696991 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileBkXZdk\\search.json.mozlz4", (void 0))) +1623616700059 Marionette INFO Listening on port 52134 +1623616700285 Marionette WARN TLS certificate errors will be ignored for this session +1623617255094 Marionette INFO Stopped listening on port 52134 + +###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +"--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileCwGwMr" +1623616703806 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileCwGwMr\\search.json.mozlz4", (void 0))) +1623616707040 Marionette INFO Listening on port 52183 +1623616707514 Marionette WARN TLS certificate errors will be ignored for this session +1623617254544 Marionette INFO Stopped listening on port 52183 +3616808792 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileRB7Oze" +1623616809185 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileRB7Oze\\search.json.mozlz4", (void 0))) +1623616812643 Marionette INFO Listening on port 52299 +1623616812972 Marionette WARN TLS certificate errors will be ignored for this session +1623617254111 Marionette INFO Stopped listening on port 52299 + +###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +"--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileEi5C8p" +1623616816552 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileEi5C8p\\search.json.mozlz4", (void 0))) +1623616820266 Marionette INFO Listening on port 52346 +1623616820314 Marionette WARN TLS certificate errors will be ignored for this session +1623616827560 Marionette WARN Ignoring event 'DOMContentLoaded' because document has an invalid readyState of 'complete'. +1623617259152 Marionette INFO Stopped listening on port 52346 +3616918402 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileJqHI1M" +1623616918830 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileJqHI1M\\search.json.mozlz4", (void 0))) +1623616923012 Marionette INFO Listening on port 52469 +1623616923243 Marionette WARN TLS certificate errors will be ignored for this session +1623617258029 Marionette INFO Stopped listening on port 52469 + +###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +"--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileXwHSYv" +1623617046289 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileXwHSYv\\search.json.mozlz4", (void 0))) +1623617049816 Marionette INFO Listening on port 52588 +1623617050051 Marionette WARN TLS certificate errors will be ignored for this session +1623617257263 Marionette INFO Stopped listening on port 52588 + +###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +"--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileKbS8gv" +1623617112887 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileKbS8gv\\search.json.mozlz4", (void 0))) +1623617116429 Marionette INFO Listening on port 52698 +1623617116549 Marionette WARN TLS certificate errors will be ignored for this session +1623617256628 Marionette INFO Stopped listening on port 52698 + +###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost + +"--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileZOTKaF" +1623617241870 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileZOTKaF\\search.json.mozlz4", (void 0))) +1623617245668 Marionette INFO Listening on port 52818 +1623617246118 Marionette WARN TLS certificate errors will be ignored for this session +JavaScript error: , line 0: NS_ERROR_NOT_AVAILABLE +1:52909 +1623617456437 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofile4YionA" +1623617456926 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofile4YionA\\search.json.mozlz4", (void 0))) +1623617460489 Marionette INFO Listening on port 52917 +1623617460656 Marionette WARN TLS certificate errors will be ignored for this session +JavaScript error: , line 0: NS_ERROR_NOT_AVAILABLE +1:53052 +1623617999467 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileT6wXn9" +1623617999841 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileT6wXn9\\search.json.mozlz4", (void 0))) +1623618003246 Marionette INFO Listening on port 53060 +1623618003654 Marionette WARN TLS certificate errors will be ignored for this session +1623618092602 geckodriver INFO Listening on 127.0.0.1:53166 +1623618095645 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofilearl8EF" +1623618097038 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofilearl8EF\\search.json.mozlz4", (void 0))) +1623618101719 Marionette INFO Listening on port 53175 +1623618102195 Marionette WARN TLS certificate errors will be ignored for this session +1623618176645 geckodriver INFO Listening on 127.0.0.1:53314 +1623618179652 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofilepKMkE6" +1623618180145 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofilepKMkE6\\search.json.mozlz4", (void 0))) +1623618183698 Marionette INFO Listening on port 53323 +1623618183838 Marionette WARN TLS certificate errors will be ignored for this session +1623618235629 geckodriver INFO Listening on 127.0.0.1:53416 +1623618238667 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileBH6nMe" +1623618239447 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileBH6nMe\\search.json.mozlz4", (void 0))) +1623618242899 Marionette INFO Listening on port 53424 +1623618243002 Marionette WARN TLS certificate errors will be ignored for this session +1623618332284 geckodriver INFO Listening on 127.0.0.1:53513 +1623618335309 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileNFe5nV" +1623618335722 Marionette INFO Marionette enabled +console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileNFe5nV\\search.json.mozlz4", (void 0))) +1623618338965 Marionette INFO Listening on port 53522 +1623618339024 Marionette WARN TLS certificate errors will be ignored for this session diff --git a/get_events_FB.py b/get_events_FB.py index e298969..5daf1a1 100644 --- a/get_events_FB.py +++ b/get_events_FB.py @@ -2,8 +2,11 @@ import sys from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains +import requests +from bs4 import BeautifulSoup import time import re +import json #login url = 'https://www.facebook.com/login/' @@ -36,11 +39,11 @@ browser.get(url) time.sleep(3) #scrolling -i = 0 -for _ in range(5): - browser.execute_script("window.scrollTo(0, " + str(900+i) + ");") - i += 500 - time.sleep(3) +# i = 0 +# for _ in range(5): +# browser.execute_script("window.scrollTo(0, " + str(900+i) + ");") +# i += 500 +# time.sleep(3) #get_events html_page = browser.page_source @@ -53,3 +56,83 @@ links = [] for r in result: links.append(r.split(" ")[0][:-1]) print(links) + +#get data +canonical_url = 'https://www.facebook.com' +kategorii = ["Cele doroczynne", "Dom", "Film", "Fitnrss", "Gry", "Impreza", "Jedzenie", "Komedia", "Literature", "Muzyka", "Napoje", "Ogrodnictwo", "Rękodzieło", "Sieci kontaktów", "Sport", "Sztuka", "Taniec", "Teatr", "Wellness", "Wydarzenia religijne", "Zakupy", "Zdrowie"] +number = 0 +for link in links: + url = canonical_url + link + browser.get(url) + time.sleep(3) + flag = 0 + data = {} + res = requests.get(url) + html_page = res.content + soup = BeautifulSoup(html_page, 'html.parser') + script = soup.select("script[type='application/ld+json']") + result = str(script) + for i in result.split(',"'): + i = i.replace('":"', ' : ') + + if 'location":{' in i: + flag = 1 + elif 'description :' in i: + flag = 0 + + if (flag == 0) and (re.search(r'^(name|startDate|description|endDate)', i)): + string = i.split(' : ')[1] + string = string.encode().decode('unicode_escape') + string = string.encode('unicode_escape').decode('ascii') #usuwa emoji + string = re.sub(r'\\ud(.){3}', '', string) + string = string.encode().decode('unicode_escape') + string = re.sub(r'\\', '', string) + data[i.split(' : ')[0]] = string + + text = [] + flag = 1 + elements = browser.find_elements_by_tag_name('span') + for e in elements: + if "użytkowników odpowiedziało" in e.text: + flag = 0 + elif "Wybierz się ze znajomymi" in e.text or "Jak wziąć udział" in e.text: + break + + if (flag == 0 and e.text != ""): + if re.search(r'[A-Za-z]', e.text): + text.append(e.text) + + + if len(text) > 0: + if "Wydarzenie" in text[1]: + data['organizer'] = text[1].replace("Wydarzenie", "") + + if 'online' in text[2] or 'Online' in text[2]: + data['online'] = "true" + else: + data['online'] = "false" + data['place'] = [text[2]] + for t in text: + if "ul." in t: + data['place'].append(t) + elif re.match("\d+\, Poznań", t): + data['place'].apeend(t) + + if 'za darmo' in text[3]: + data['price'] = 'za darmo' + elif 'Bilety': + data['price'] = ['bilet'] + data['price'].append(text[4]) + + data["category"] = [] + for t in text: + if t in kategorii: + data["category"].append(t) + + # for key in data.keys(): + # print(key + " : " + str(data[key]) + "\n") + + file = "event_" + str(number) + ".json" + with open(file, "w") as outfile: + json.dump(data, outfile) + number =+ 1