pobieranie informacji o wydarzeniach do plików json; problemy z kodowanie

This commit is contained in:
liza 2021-06-13 23:11:09 +02:00
parent ce56eae03e
commit 1efd08287c
4 changed files with 235 additions and 5 deletions

1
event_0.json Normal file
View File

@ -0,0 +1 @@
{"startDate": "2021-06-16T22:15:00+0200\"", "name": "Darmowy webinar \"Jak odzwyczai\u0107 dziecko korzystania z urz\u0105dze\u0144 mobilnych?\"\"", "description": "Ju\u017c 16.06.2021 (\u015broda) o godzinie 22:15 wyj\u0105tkowy webinar\n \"Jak odzwyczai\u0107 dziecko korzystania z urz\u0105dze\u0144 mobilnych?\"\n\nPodczas webinaru:\n Dowiedz\u0105 si\u0119 Pa\u0144stwo co zrobi\u0107, gdy dziecko sp\u0119dza sw\u00f3j ca\u0142y wolny czas przed tabletem, telefonem czy komputerem,\n\n Otrzymaj\u0105 Pa\u0144stwo wskaz\u00f3wki jak odzwyczai\u0107 dziecko korzystania z urz\u0105dze\u0144 mobilnych\n\nGo\u015bciem specjalnym b\u0119dzie mgr Sonia Michalik, psycholog, coach, familiolog\n\nSzanowni Pa\u0144stwo\u2757\ufe0f\n\nMusicie by\u0107 z nami \n\nZapisuj\u0105c si\u0119 na webinar - otrzymuj\u0105 Pa\u0144stwo darmowego e-booka !\n\u27a1\ufe0f https://webinar.misjarodzic.com/jak-odzwyczaic-dziecko-korzystania-zurzadzen-mobilnych/\n\nSpotkajmy si\u0119 na BEZP\u0141ATNYM webinarze z cyklu Misja Rodzic TV \u2764\n\nDo zobaczenia ! \"", "organizer": " Sonia Michalik - psycholog dzieci\u0119cy", "online": "true", "price": "za darmo", "category": ["Film"]}

1
event_1.json Normal file
View File

@ -0,0 +1 @@
{"startDate": "2021-06-13T10:30:00+0200\"", "name": "Rodzinna joga na \u015bwie\u017cym powietrzu\"", "description": "Joga rodzinna, czyli co? Czego si\u0119 mo\u017cesz spodziewa\u0107?... Postawimy las drzew, b\u0119dziemy si\u0119 rusza\u0107 jak koty i psy, zrobimy or\u0142a cie\u0144 i wiele innych pozycji b\u0119dzie du\u017co ruchu i wsp\u00f3lnej zabawy z dzie\u0107mi! Przyjd\u017a i sprawd\u017a czy i Wam si\u0119 spodoba!\n\n\nZapisy pod numerem 780044816 (mo\u017cna SMSem). Op\u0142ata za zaj\u0119cia:\nKa\u017cda \u0107wicz\u0105ca osoba (r\u00f3wnie\u017c dzieci) - 10 z\u0142:)\n...przelewem na konto:\nFundacja Pozna\u0144czycy\nSantander Bank Polska\n511090136200000001 34828100\n\n\nNale\u017cy mie\u0107 mat\u0119 lub karimat\u0119 (je\u015bli nie ma si\u0119 maty). Warto zabezpieczy\u0107 si\u0119 przed kleszczami \nWej\u015bcie na teren od ul. Smardzewskiej. W razie deszczu przeniesiemy si\u0119 do sali w \nMinor Studio Aktywno\u015bci ul. Podchor\u0105\u017cych 15. \n\"", "organizer": " Grunwaldzki Park Spo\u0142eczny i Kalejdoskop jogi", "online": "false", "place": ["Projekt schron"], "price": "za darmo", "category": ["Wellness"]}

145
geckodriver.log Normal file
View File

@ -0,0 +1,145 @@
1623615934569 geckodriver INFO Listening on 127.0.0.1:51621
1623615937594 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofilewC24Bi"
1623615937968 Marionette INFO Marionette enabled
console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofilewC24Bi\\search.json.mozlz4", (void 0)))
1623615941225 Marionette INFO Listening on port 51629
1623615941332 Marionette WARN TLS certificate errors will be ignored for this session
JavaScript error: , line 0: NS_ERROR_NOT_AVAILABLE
###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
1623617250054 Marionette INFO Stopped listening on port 51629
"C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofile5TtyBH"
1623616342641 Marionette INFO Marionette enabled
console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofile5TtyBH\\search.json.mozlz4", (void 0)))
1623616346488 Marionette INFO Listening on port 51774
1623616346554 Marionette WARN TLS certificate errors will be ignored for this session
JavaScript error: , line 0: NS_ERROR_NOT_AVAILABLE
###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
1623617253590 Marionette INFO Stopped listening on port 51774
ionette enabled
console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileznducQ\\search.json.mozlz4", (void 0)))
1623616524743 Marionette INFO Listening on port 51888
1623616525165 Marionette WARN TLS certificate errors will be ignored for this session
1623617252176 Marionette INFO Stopped listening on port 51888
###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
"--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileXHB4nx"
1623616606356 Marionette INFO Marionette enabled
console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileXHB4nx\\search.json.mozlz4", (void 0)))
1623616609691 Marionette INFO Listening on port 51983
1623616610176 Marionette WARN TLS certificate errors will be ignored for this session
1623617251535 Marionette INFO Stopped listening on port 51983
3616613285 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileRUFZHJ"
1623616613765 Marionette INFO Marionette enabled
console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileRUFZHJ\\search.json.mozlz4", (void 0)))
1623616617502 Marionette INFO Listening on port 52032
1623616617558 Marionette WARN TLS certificate errors will be ignored for this session
1623617252912 Marionette INFO Stopped listening on port 52032
3616696590 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileBkXZdk"
1623616696991 Marionette INFO Marionette enabled
console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileBkXZdk\\search.json.mozlz4", (void 0)))
1623616700059 Marionette INFO Listening on port 52134
1623616700285 Marionette WARN TLS certificate errors will be ignored for this session
1623617255094 Marionette INFO Stopped listening on port 52134
###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
"--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileCwGwMr"
1623616703806 Marionette INFO Marionette enabled
console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileCwGwMr\\search.json.mozlz4", (void 0)))
1623616707040 Marionette INFO Listening on port 52183
1623616707514 Marionette WARN TLS certificate errors will be ignored for this session
1623617254544 Marionette INFO Stopped listening on port 52183
3616808792 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileRB7Oze"
1623616809185 Marionette INFO Marionette enabled
console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileRB7Oze\\search.json.mozlz4", (void 0)))
1623616812643 Marionette INFO Listening on port 52299
1623616812972 Marionette WARN TLS certificate errors will be ignored for this session
1623617254111 Marionette INFO Stopped listening on port 52299
###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
"--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileEi5C8p"
1623616816552 Marionette INFO Marionette enabled
console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileEi5C8p\\search.json.mozlz4", (void 0)))
1623616820266 Marionette INFO Listening on port 52346
1623616820314 Marionette WARN TLS certificate errors will be ignored for this session
1623616827560 Marionette WARN Ignoring event 'DOMContentLoaded' because document has an invalid readyState of 'complete'.
1623617259152 Marionette INFO Stopped listening on port 52346
3616918402 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileJqHI1M"
1623616918830 Marionette INFO Marionette enabled
console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileJqHI1M\\search.json.mozlz4", (void 0)))
1623616923012 Marionette INFO Listening on port 52469
1623616923243 Marionette WARN TLS certificate errors will be ignored for this session
1623617258029 Marionette INFO Stopped listening on port 52469
###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
"--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileXwHSYv"
1623617046289 Marionette INFO Marionette enabled
console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileXwHSYv\\search.json.mozlz4", (void 0)))
1623617049816 Marionette INFO Listening on port 52588
1623617050051 Marionette WARN TLS certificate errors will be ignored for this session
1623617257263 Marionette INFO Stopped listening on port 52588
###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
"--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileKbS8gv"
1623617112887 Marionette INFO Marionette enabled
console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileKbS8gv\\search.json.mozlz4", (void 0)))
1623617116429 Marionette INFO Listening on port 52698
1623617116549 Marionette WARN TLS certificate errors will be ignored for this session
1623617256628 Marionette INFO Stopped listening on port 52698
###!!! [Child][RunMessage] Error: Channel closing: too late to send/recv, messages will be lost
"--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileZOTKaF"
1623617241870 Marionette INFO Marionette enabled
console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileZOTKaF\\search.json.mozlz4", (void 0)))
1623617245668 Marionette INFO Listening on port 52818
1623617246118 Marionette WARN TLS certificate errors will be ignored for this session
JavaScript error: , line 0: NS_ERROR_NOT_AVAILABLE
1:52909
1623617456437 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofile4YionA"
1623617456926 Marionette INFO Marionette enabled
console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofile4YionA\\search.json.mozlz4", (void 0)))
1623617460489 Marionette INFO Listening on port 52917
1623617460656 Marionette WARN TLS certificate errors will be ignored for this session
JavaScript error: , line 0: NS_ERROR_NOT_AVAILABLE
1:53052
1623617999467 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileT6wXn9"
1623617999841 Marionette INFO Marionette enabled
console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileT6wXn9\\search.json.mozlz4", (void 0)))
1623618003246 Marionette INFO Listening on port 53060
1623618003654 Marionette WARN TLS certificate errors will be ignored for this session
1623618092602 geckodriver INFO Listening on 127.0.0.1:53166
1623618095645 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofilearl8EF"
1623618097038 Marionette INFO Marionette enabled
console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofilearl8EF\\search.json.mozlz4", (void 0)))
1623618101719 Marionette INFO Listening on port 53175
1623618102195 Marionette WARN TLS certificate errors will be ignored for this session
1623618176645 geckodriver INFO Listening on 127.0.0.1:53314
1623618179652 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofilepKMkE6"
1623618180145 Marionette INFO Marionette enabled
console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofilepKMkE6\\search.json.mozlz4", (void 0)))
1623618183698 Marionette INFO Listening on port 53323
1623618183838 Marionette WARN TLS certificate errors will be ignored for this session
1623618235629 geckodriver INFO Listening on 127.0.0.1:53416
1623618238667 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileBH6nMe"
1623618239447 Marionette INFO Marionette enabled
console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileBH6nMe\\search.json.mozlz4", (void 0)))
1623618242899 Marionette INFO Listening on port 53424
1623618243002 Marionette WARN TLS certificate errors will be ignored for this session
1623618332284 geckodriver INFO Listening on 127.0.0.1:53513
1623618335309 mozrunner::runner INFO Running command: "C:\\Program Files\\Mozilla Firefox\\firefox.exe" "--marionette" "-foreground" "-no-remote" "-profile" "C:\\Users\\YELYZA~1\\AppData\\Local\\Temp\\rust_mozprofileNFe5nV"
1623618335722 Marionette INFO Marionette enabled
console.warn: SearchSettings: "get: No settings file exists, new profile?" (new NotFoundError("Could not open the file at C:\\Users\\Yelyzaveta\\AppData\\Local\\Temp\\rust_mozprofileNFe5nV\\search.json.mozlz4", (void 0)))
1623618338965 Marionette INFO Listening on port 53522
1623618339024 Marionette WARN TLS certificate errors will be ignored for this session

View File

@ -2,8 +2,11 @@
import sys import sys
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.action_chains import ActionChains
import requests
from bs4 import BeautifulSoup
import time import time
import re import re
import json
#login #login
url = 'https://www.facebook.com/login/' url = 'https://www.facebook.com/login/'
@ -36,11 +39,11 @@ browser.get(url)
time.sleep(3) time.sleep(3)
#scrolling #scrolling
i = 0 # i = 0
for _ in range(5): # for _ in range(5):
browser.execute_script("window.scrollTo(0, " + str(900+i) + ");") # browser.execute_script("window.scrollTo(0, " + str(900+i) + ");")
i += 500 # i += 500
time.sleep(3) # time.sleep(3)
#get_events #get_events
html_page = browser.page_source html_page = browser.page_source
@ -53,3 +56,83 @@ links = []
for r in result: for r in result:
links.append(r.split(" ")[0][:-1]) links.append(r.split(" ")[0][:-1])
print(links) print(links)
#get data
canonical_url = 'https://www.facebook.com'
kategorii = ["Cele doroczynne", "Dom", "Film", "Fitnrss", "Gry", "Impreza", "Jedzenie", "Komedia", "Literature", "Muzyka", "Napoje", "Ogrodnictwo", "Rękodzieło", "Sieci kontaktów", "Sport", "Sztuka", "Taniec", "Teatr", "Wellness", "Wydarzenia religijne", "Zakupy", "Zdrowie"]
number = 0
for link in links:
url = canonical_url + link
browser.get(url)
time.sleep(3)
flag = 0
data = {}
res = requests.get(url)
html_page = res.content
soup = BeautifulSoup(html_page, 'html.parser')
script = soup.select("script[type='application/ld+json']")
result = str(script)
for i in result.split(',"'):
i = i.replace('":"', ' : ')
if 'location":{' in i:
flag = 1
elif 'description :' in i:
flag = 0
if (flag == 0) and (re.search(r'^(name|startDate|description|endDate)', i)):
string = i.split(' : ')[1]
string = string.encode().decode('unicode_escape')
string = string.encode('unicode_escape').decode('ascii') #usuwa emoji
string = re.sub(r'\\ud(.){3}', '', string)
string = string.encode().decode('unicode_escape')
string = re.sub(r'\\', '', string)
data[i.split(' : ')[0]] = string
text = []
flag = 1
elements = browser.find_elements_by_tag_name('span')
for e in elements:
if "użytkowników odpowiedziało" in e.text:
flag = 0
elif "Wybierz się ze znajomymi" in e.text or "Jak wziąć udział" in e.text:
break
if (flag == 0 and e.text != ""):
if re.search(r'[A-Za-z]', e.text):
text.append(e.text)
if len(text) > 0:
if "Wydarzenie" in text[1]:
data['organizer'] = text[1].replace("Wydarzenie", "")
if 'online' in text[2] or 'Online' in text[2]:
data['online'] = "true"
else:
data['online'] = "false"
data['place'] = [text[2]]
for t in text:
if "ul." in t:
data['place'].append(t)
elif re.match("\d+\, Poznań", t):
data['place'].apeend(t)
if 'za darmo' in text[3]:
data['price'] = 'za darmo'
elif 'Bilety':
data['price'] = ['bilet']
data['price'].append(text[4])
data["category"] = []
for t in text:
if t in kategorii:
data["category"].append(t)
# for key in data.keys():
# print(key + " : " + str(data[key]) + "\n")
file = "event_" + str(number) + ".json"
with open(file, "w") as outfile:
json.dump(data, outfile)
number =+ 1