code refactorings and improvements
This commit is contained in:
parent
b16f29ef6d
commit
f54e01581c
@ -1,38 +0,0 @@
|
|||||||
import requests
|
|
||||||
from string import Template
|
|
||||||
from random import choice
|
|
||||||
|
|
||||||
|
|
||||||
class DuckDuckGo(object):
|
|
||||||
"""Documentation for DuckDuckGo
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, proxies=None, language=''):
|
|
||||||
self.proxies = [] if proxies is None else proxies
|
|
||||||
self.language = language
|
|
||||||
self.query = Template('https://duckduckgo.com/html/?q=$query&kl=$lang')
|
|
||||||
|
|
||||||
def _get(self, query, language):
|
|
||||||
if self.proxies:
|
|
||||||
proxy = choice(self.proxies)
|
|
||||||
ip_port = proxy[0]
|
|
||||||
protocol = proxy[1]
|
|
||||||
link = self.query.substitute(query=query, lang=language)
|
|
||||||
proxies = {protocol: ip_port}
|
|
||||||
requests.get(link, proxies=proxies)
|
|
||||||
else:
|
|
||||||
|
|
||||||
def body(self, query, language):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def links(self, query, language):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
BIN
chromedriver
BIN
chromedriver
Binary file not shown.
@ -1,7 +1,3 @@
|
|||||||
pip-tools
|
pip-tools
|
||||||
jedi
|
|
||||||
rope
|
|
||||||
importmagic
|
|
||||||
autopep8
|
|
||||||
yapf
|
|
||||||
ipdb
|
ipdb
|
||||||
|
pytest
|
||||||
|
@ -4,25 +4,21 @@
|
|||||||
#
|
#
|
||||||
# pip-compile --output-file dev-requirements.txt dev-requirements.in
|
# pip-compile --output-file dev-requirements.txt dev-requirements.in
|
||||||
#
|
#
|
||||||
autopep8==1.3.1
|
|
||||||
click==6.7 # via pip-tools
|
click==6.7 # via pip-tools
|
||||||
decorator==4.0.11 # via ipython, traitlets
|
decorator==4.0.11 # via ipython, traitlets
|
||||||
first==2.0.1 # via pip-tools
|
first==2.0.1 # via pip-tools
|
||||||
importmagic==0.1.7
|
|
||||||
ipdb==0.10.2
|
ipdb==0.10.2
|
||||||
ipython-genutils==0.2.0 # via traitlets
|
ipython-genutils==0.2.0 # via traitlets
|
||||||
ipython==5.3.0 # via ipdb
|
ipython==5.3.0 # via ipdb
|
||||||
jedi==0.10.2
|
|
||||||
pexpect==4.2.1 # via ipython
|
pexpect==4.2.1 # via ipython
|
||||||
pickleshare==0.7.4 # via ipython
|
pickleshare==0.7.4 # via ipython
|
||||||
pip-tools==1.9.0
|
pip-tools==1.9.0
|
||||||
prompt-toolkit==1.0.14 # via ipython
|
prompt-toolkit==1.0.14 # via ipython
|
||||||
ptyprocess==0.5.1 # via pexpect
|
ptyprocess==0.5.1 # via pexpect
|
||||||
pycodestyle==2.3.1 # via autopep8
|
py==1.4.34 # via pytest
|
||||||
pygments==2.2.0 # via ipython
|
pygments==2.2.0 # via ipython
|
||||||
rope==0.10.5
|
pytest==3.1.2
|
||||||
simplegeneric==0.8.1 # via ipython
|
simplegeneric==0.8.1 # via ipython
|
||||||
six==1.10.0 # via pip-tools, prompt-toolkit, traitlets
|
six==1.10.0 # via pip-tools, prompt-toolkit, traitlets
|
||||||
traitlets==4.3.2 # via ipython
|
traitlets==4.3.2 # via ipython
|
||||||
wcwidth==0.1.7 # via prompt-toolkit
|
wcwidth==0.1.7 # via prompt-toolkit
|
||||||
yapf==0.16.1
|
|
||||||
|
@ -1,63 +0,0 @@
|
|||||||
import requests
|
|
||||||
from string import Template
|
|
||||||
from random import choice
|
|
||||||
from proxy import Proxy
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from bs4.dammit import EncodingDetector
|
|
||||||
|
|
||||||
|
|
||||||
class DuckDuckGo(object):
|
|
||||||
"""Documentation for DuckDuckGo
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, proxies=None, language=''):
|
|
||||||
self.proxy_obj = Proxy() if proxies is None else Proxy(proxies)
|
|
||||||
self.query = Template('https://duckduckgo.com/html/?q=$query&kl=' +
|
|
||||||
language)
|
|
||||||
|
|
||||||
def _get(self, query):
|
|
||||||
query = query.replace(' ', '+')
|
|
||||||
link = self.query.substitute(query=query)
|
|
||||||
if self.proxy_obj.proxies:
|
|
||||||
proxy = self.proxy_obj.random()
|
|
||||||
print(proxy)
|
|
||||||
return requests.post(link, proxies=proxy)
|
|
||||||
return requests.post(link)
|
|
||||||
|
|
||||||
def _proxy_to_dict(self, proxy):
|
|
||||||
proxy_string = str(proxy[0]) + ':' + str(proxy[1])
|
|
||||||
return {"http": proxy_string, "https": proxy_string}
|
|
||||||
|
|
||||||
def download_proxies(self):
|
|
||||||
self.proxy_obj.download()
|
|
||||||
|
|
||||||
def _soup(self, query):
|
|
||||||
resp = self._get(query)
|
|
||||||
content_type = resp.headers.get('content-type', '').lower()
|
|
||||||
http_encoding = resp.encoding if 'charset' in content_type else None
|
|
||||||
html_encoding = EncodingDetector.find_declared_encoding(
|
|
||||||
resp.content, is_html=True)
|
|
||||||
encoding = html_encoding or http_encoding
|
|
||||||
return BeautifulSoup(resp.content, 'lxml', from_encoding=encoding)
|
|
||||||
|
|
||||||
def html(self, query):
|
|
||||||
soup = self._soup(query)
|
|
||||||
return soup.prettify()
|
|
||||||
|
|
||||||
def links(self, query):
|
|
||||||
soup = self._soup(query)
|
|
||||||
return [
|
|
||||||
link.get('href')
|
|
||||||
for link in soup.find_all('a', class_='result__snippet')
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
duck = DuckDuckGo(language='pl-pl')
|
|
||||||
links = duck.links('koscioly polska')
|
|
||||||
print(links)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
@ -1,68 +0,0 @@
|
|||||||
import dill
|
|
||||||
from duckduckgo import DuckDuckGo
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
import time
|
|
||||||
import random
|
|
||||||
|
|
||||||
tsv = ''
|
|
||||||
urls = ''
|
|
||||||
|
|
||||||
|
|
||||||
def check(parish, duck):
|
|
||||||
global urls
|
|
||||||
global tsv
|
|
||||||
links = _urls(parish, duck)
|
|
||||||
for link in links:
|
|
||||||
parish_root_url = urlparse(parish.url).netloc
|
|
||||||
if parish_root_url == urlparse(link).netloc:
|
|
||||||
urls += parish_root_url + '\n'
|
|
||||||
tsv += parish.name + '\t' + parish.city + '\t' + parish.street + '\t' + parish.postal_code + '\t' + parish_root_url + '\t' + parish.meta_url + '\t' + parish.gps + '\n'
|
|
||||||
print('added')
|
|
||||||
# TODO: save links to txt file, one per line
|
|
||||||
# TODO: wget -r -i file all links
|
|
||||||
# TODO: save parishes to jsonline format
|
|
||||||
return True # mark as ok url
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _urls(parish, duck):
|
|
||||||
query = parish.name + ' ' + parish.street + ' ' + parish.postal_code
|
|
||||||
links = duck.links(query)
|
|
||||||
time.sleep(1)
|
|
||||||
while not links:
|
|
||||||
print('retry')
|
|
||||||
random.randint(3, 10)
|
|
||||||
time.sleep(10)
|
|
||||||
links = duck.links(query)
|
|
||||||
return links
|
|
||||||
|
|
||||||
|
|
||||||
def find_url(parish):
|
|
||||||
links = _urls(parish)
|
|
||||||
import ipdb
|
|
||||||
ipdb.set_trace()
|
|
||||||
print(links)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parishes = []
|
|
||||||
with open('./parishes.dill', 'rb') as f:
|
|
||||||
parishes = dill.load(f)
|
|
||||||
|
|
||||||
duck = DuckDuckGo(language='pl-pl')
|
|
||||||
print('Downloading proxies')
|
|
||||||
duck.download_proxies()
|
|
||||||
i = 0
|
|
||||||
for parish in parishes:
|
|
||||||
print(str(i / len(parishes)) + '% done. Nr: ' + str(i))
|
|
||||||
i += 1
|
|
||||||
if parish.url:
|
|
||||||
check(parish, duck)
|
|
||||||
with open('urls.txt', 'w') as f:
|
|
||||||
f.write(urls)
|
|
||||||
with open('parishes.tsv', 'w') as f:
|
|
||||||
f.write(tsv)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
@ -1,88 +0,0 @@
|
|||||||
import requests
|
|
||||||
# from bs4 import BeautifulSoup
|
|
||||||
import re
|
|
||||||
from collections import namedtuple
|
|
||||||
import pickle
|
|
||||||
import time
|
|
||||||
|
|
||||||
|
|
||||||
class ParishScraper(object):
|
|
||||||
"""Documentation for ParishScraper
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.website_prefix = 'http://colaska.pl/index/parafia/id/'
|
|
||||||
|
|
||||||
def _scrap(self):
|
|
||||||
parishes = []
|
|
||||||
for page_nr in range(1, 11000):
|
|
||||||
page = requests.get(self.website_prefix + str(page_nr))
|
|
||||||
sleep_time = 2
|
|
||||||
while page.status_code == 500:
|
|
||||||
print('Status code 500 error')
|
|
||||||
sleep_time = sleep_time**2
|
|
||||||
print('Waiting ' + str(sleep_time) + ' sec')
|
|
||||||
time.sleep(sleep_time)
|
|
||||||
page = requests.get(self.website_prefix + str(page_nr))
|
|
||||||
if 'id' in page.url:
|
|
||||||
page_nr += 1
|
|
||||||
parish = self._retrieve_info(page)
|
|
||||||
print(parish)
|
|
||||||
print('\n')
|
|
||||||
parishes.append(parish)
|
|
||||||
return parishes
|
|
||||||
|
|
||||||
def _retrieve_info(self, page):
|
|
||||||
page.encoding = 'utf-8'
|
|
||||||
html_doc = page.text
|
|
||||||
meta_url = page.url
|
|
||||||
print(meta_url)
|
|
||||||
try:
|
|
||||||
search_result = re.search(
|
|
||||||
'pHead rel">[\w\W]*?<p class="title">(.*?)</p>[\w\W]*?class="city">(.*?)</span>[\w\W]*?<p>(.*?)<br />(.*?)</p>',
|
|
||||||
html_doc)
|
|
||||||
if search_result is None:
|
|
||||||
search_result = re.search(
|
|
||||||
'pHead rel">[\w\W]*?<p class="title">(.*?)</p>[\w\W]*?class="city">(.*?)</span>[\w\W]*?<p>(.*?)</p>',
|
|
||||||
html_doc)
|
|
||||||
street = ''
|
|
||||||
postal_code = search_result.group(3)
|
|
||||||
else:
|
|
||||||
street = search_result.group(3)
|
|
||||||
postal_code = search_result.group(4)
|
|
||||||
|
|
||||||
name = search_result.group(1)
|
|
||||||
city = search_result.group(2)
|
|
||||||
|
|
||||||
url_search = re.search('link mt10"><a href="(.*?)">', html_doc)
|
|
||||||
url = '' if url_search is None else url_search.group(1)
|
|
||||||
|
|
||||||
gps = re.search('id="tabsmaps" gps="(.*?)"><span',
|
|
||||||
html_doc).group(1)
|
|
||||||
Parish = namedtuple('Parish', [
|
|
||||||
'meta_url', 'url', 'name', 'city', 'street', 'postal_code',
|
|
||||||
'gps'
|
|
||||||
])
|
|
||||||
|
|
||||||
parish = Parish(meta_url, url, name, city, street, postal_code,
|
|
||||||
gps)
|
|
||||||
except AttributeError:
|
|
||||||
import ipdb
|
|
||||||
ipdb.set_trace()
|
|
||||||
return parish
|
|
||||||
|
|
||||||
def scrap_and_save(self):
|
|
||||||
parishes = self._scrap()
|
|
||||||
with open('parishes.pickle', 'wb') as f:
|
|
||||||
pickle.dump(parishes, f, pickle.HIGHEST_PROTOCOL)
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parish_scraper = ParishScraper()
|
|
||||||
parish_scraper.scrap_and_save()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
BIN
parishes.pickle
Normal file
BIN
parishes.pickle
Normal file
Binary file not shown.
10100
parishes.tsv
10100
parishes.tsv
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,4 @@
|
|||||||
requests
|
requests
|
||||||
dill
|
|
||||||
beautifulsoup4
|
beautifulsoup4
|
||||||
lxml
|
|
||||||
selenium
|
selenium
|
||||||
|
lxml
|
||||||
|
@ -5,7 +5,6 @@
|
|||||||
# pip-compile --output-file requirements.txt requirements.in
|
# pip-compile --output-file requirements.txt requirements.in
|
||||||
#
|
#
|
||||||
beautifulsoup4==4.6.0
|
beautifulsoup4==4.6.0
|
||||||
dill==0.2.6
|
|
||||||
lxml==3.8.0
|
lxml==3.8.0
|
||||||
requests==2.13.0
|
requests==2.13.0
|
||||||
selenium==3.4.3
|
selenium==3.4.3
|
||||||
|
16
scraper/convert.py
Normal file
16
scraper/convert.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
import pickle
|
||||||
|
|
||||||
|
parishes = []
|
||||||
|
with open('parishes.pickle', 'rb') as f:
|
||||||
|
parishes = dill.load(f)
|
||||||
|
|
||||||
|
tsv = ''
|
||||||
|
i = 0
|
||||||
|
for parish in parishes:
|
||||||
|
t_tsv = parish['name'] + '\t' + parish['url'] + '\t' + parish['city'] + '\t' + parish['street'] + '\t' + parish['postal_code'] + '\t' + parish['meta_url'] + '\t' + parish['gps'] + '\n'
|
||||||
|
tsv += t_tsv
|
||||||
|
if parish.url:
|
||||||
|
i += 1
|
||||||
|
print(i)
|
||||||
|
with open('parishes.tsv', 'w') as f:
|
||||||
|
f.write(tsv)
|
106
scraper/duckduckgo.py
Normal file
106
scraper/duckduckgo.py
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
import requests
|
||||||
|
from string import Template
|
||||||
|
from random import choice
|
||||||
|
from proxy import Proxy
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from bs4.dammit import EncodingDetector
|
||||||
|
|
||||||
|
|
||||||
|
class DuckDuckGo(object):
|
||||||
|
"""Documentation for DuckDuckGo
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, proxies=None, language='', external_download=True):
|
||||||
|
self.proxy_obj = Proxy() if proxies is None else Proxy(proxies)
|
||||||
|
self.query = Template('https://duckduckgo.com/html/?q=$query&kl=' +
|
||||||
|
language)
|
||||||
|
self.falitures = 0
|
||||||
|
self.golden_proxies = []
|
||||||
|
self.external_download = external_download
|
||||||
|
self.download = False
|
||||||
|
|
||||||
|
def _get(self, query):
|
||||||
|
query = query.replace(' ', '+')
|
||||||
|
link = self.query.substitute(query=query)
|
||||||
|
if self.proxy_obj.proxies:
|
||||||
|
return self._request(link)
|
||||||
|
return requests.post(link)
|
||||||
|
|
||||||
|
def _request(self, link):
|
||||||
|
proxy = self.proxy_obj.random()
|
||||||
|
proxy_dict = self._proxy_to_dict(proxy)
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
resp = requests.post(link, proxies=proxy_dict, timeout=2)
|
||||||
|
print(proxy_dict)
|
||||||
|
self.golden_proxies.append(proxy)
|
||||||
|
return resp
|
||||||
|
except:
|
||||||
|
print('Nr of falitures: ' + str(self.falitures) + ' Proxies: '
|
||||||
|
+ str(len(self.proxy_obj.proxies)) + ' Golden proxies: '
|
||||||
|
+ str(len(self.golden_proxies)))
|
||||||
|
self.proxy_obj.proxies.remove(proxy)
|
||||||
|
proxy = self.proxy_obj.random()
|
||||||
|
proxy_dict = self._proxy_to_dict(proxy)
|
||||||
|
|
||||||
|
self.falitures += 1
|
||||||
|
if self.falitures > 0.95 * len(self.proxy_obj.proxies):
|
||||||
|
if self.download:
|
||||||
|
self.download_proxies()
|
||||||
|
self.download = False
|
||||||
|
self.download = True
|
||||||
|
self.falitures = 0
|
||||||
|
self.proxy_obj.proxies.extend(self.golden_proxies)
|
||||||
|
del self.golden_proxies[:]
|
||||||
|
|
||||||
|
def _proxy_to_dict(self, proxy):
|
||||||
|
proxy_string = str(proxy[0]) + ':' + str(proxy[1])
|
||||||
|
return {
|
||||||
|
"http": 'http://' + proxy_string,
|
||||||
|
"https": 'https://' + proxy_string
|
||||||
|
}
|
||||||
|
|
||||||
|
def download_proxies(self, limit=0):
|
||||||
|
self.proxy_obj.download(limit)
|
||||||
|
|
||||||
|
def _soup(self, query):
|
||||||
|
resp = self._get(query)
|
||||||
|
content_type = resp.headers.get('content-type', '').lower()
|
||||||
|
http_encoding = resp.encoding if 'charset' in content_type else None
|
||||||
|
html_encoding = EncodingDetector.find_declared_encoding(
|
||||||
|
resp.content, is_html=True)
|
||||||
|
encoding = html_encoding or http_encoding
|
||||||
|
return BeautifulSoup(resp.content, 'lxml', from_encoding=encoding)
|
||||||
|
|
||||||
|
def html(self, query):
|
||||||
|
soup = self._soup(query)
|
||||||
|
return soup.prettify()
|
||||||
|
|
||||||
|
def links(self, query):
|
||||||
|
soup = self._soup(query)
|
||||||
|
return [
|
||||||
|
link.get('href')
|
||||||
|
for link in soup.find_all('a', class_='result__snippet')
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
duck = DuckDuckGo(language='pl-pl')
|
||||||
|
duck.download_proxies(1)
|
||||||
|
proxy = duck.proxy_obj.random()
|
||||||
|
proxy = duck._proxy_to_dict(proxy)
|
||||||
|
print(proxy)
|
||||||
|
link = 'https://duckduckgo.com/?q=my+ip&t=canonical&atb=v67-1&ia=answer'
|
||||||
|
resp = requests.get(link, proxies=proxy, verify=False)
|
||||||
|
print(resp.content)
|
||||||
|
#link = 'http://www.whatismyproxy.com/'
|
||||||
|
#resp = requests.get(link, proxies=proxy, verify=False)
|
||||||
|
#print(resp.content)
|
||||||
|
|
||||||
|
import ipdb
|
||||||
|
ipdb.set_trace()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
19
scraper/parishduck.py
Normal file
19
scraper/parishduck.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
from duckduckgo import DuckDuckGo
|
||||||
|
|
||||||
|
|
||||||
|
class ParishDuck():
|
||||||
|
def __init__(self):
|
||||||
|
""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def urls(self, parish, duck):
|
||||||
|
query = parish['name'] + ' ' + parish['street'] + ' ' + parish['postal_code']
|
||||||
|
links = duck.links(query)
|
||||||
|
#sleep_time = random.randint(0, 1)
|
||||||
|
#time.sleep(sleep_time)
|
||||||
|
while not links:
|
||||||
|
sleep_time = random.randint(1, 3)
|
||||||
|
print('retry, sleeping ' + str(sleep_time) + 's')
|
||||||
|
time.sleep(sleep_time)
|
||||||
|
links = duck.links(query)
|
||||||
|
return links
|
@ -2,7 +2,9 @@ import requests
|
|||||||
import re
|
import re
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
import time
|
import time
|
||||||
import dill
|
import pickle
|
||||||
|
from lxml import html
|
||||||
|
import html as python_html
|
||||||
|
|
||||||
|
|
||||||
class ParishScraper(object):
|
class ParishScraper(object):
|
||||||
@ -47,43 +49,45 @@ class ParishScraper(object):
|
|||||||
meta_url = page.url
|
meta_url = page.url
|
||||||
print(meta_url)
|
print(meta_url)
|
||||||
try:
|
try:
|
||||||
search_result = re.search(
|
html_tree = html.document_fromstring(html_doc)
|
||||||
'pHead rel">[\w\W]*?<p class="title">(.*?)</p>[\w\W]*?class="city">(.*?)</span>[\w\W]*?<p>(.*?)<br />(.*?)</p>',
|
url = html_tree.xpath('//p[@class="link mt10"]/a/@href')
|
||||||
html_doc)
|
url = url[0] if url else ''
|
||||||
if search_result is None:
|
name = html_tree.xpath(
|
||||||
search_result = re.search(
|
'//div[@class="pHead rel"]/p[@class="title"]/text()')[0]
|
||||||
'pHead rel">[\w\W]*?<p class="title">(.*?)</p>[\w\W]*?class="city">(.*?)</span>[\w\W]*?<p>(.*?)</p>',
|
city = html_tree.xpath('//span[@class="city"]/text()')[0]
|
||||||
html_doc)
|
street_and_postal_code = html_tree.xpath(
|
||||||
street = ''
|
'//span[@class="city"]/following-sibling::p[1]')[0]
|
||||||
postal_code = search_result.group(3)
|
street_and_postal_code_string = html.tostring(
|
||||||
|
street_and_postal_code).decode('utf-8')
|
||||||
|
street_and_postal_code_string = python_html.unescape(
|
||||||
|
street_and_postal_code_string)
|
||||||
|
if '<br>' in street_and_postal_code_string:
|
||||||
|
search_result = re.search('<p>(.*?)<br>(.*?)</p>',
|
||||||
|
street_and_postal_code_string)
|
||||||
|
street = search_result.group(1)
|
||||||
|
postal_code = search_result.group(2)
|
||||||
else:
|
else:
|
||||||
street = search_result.group(3)
|
postal_code = street_and_postal_code.text_content()
|
||||||
postal_code = search_result.group(4)
|
street = ''
|
||||||
|
gps = html_tree.xpath('//@gps')[0].replace(' ', '')
|
||||||
name = search_result.group(1)
|
parish = {
|
||||||
city = search_result.group(2)
|
'name': name,
|
||||||
|
'city': city,
|
||||||
url_search = re.search('link mt10"><a href="(.*?)">', html_doc)
|
'url': url,
|
||||||
url = '' if url_search is None else url_search.group(1)
|
'meta_url': meta_url,
|
||||||
|
'street': street,
|
||||||
gps = re.search('id="tabsmaps" gps="(.*?)"><span',
|
'postal_code': postal_code,
|
||||||
html_doc).group(1)
|
'gps': gps
|
||||||
Parish = namedtuple('Parish', [
|
}
|
||||||
'meta_url', 'url', 'name', 'city', 'street', 'postal_code',
|
except:
|
||||||
'gps'
|
|
||||||
])
|
|
||||||
|
|
||||||
parish = Parish(meta_url, url, name, city, street, postal_code,
|
|
||||||
gps)
|
|
||||||
except AttributeError:
|
|
||||||
import ipdb
|
import ipdb
|
||||||
ipdb.set_trace()
|
ipdb.set_trace()
|
||||||
return parish
|
return parish
|
||||||
|
|
||||||
def scrap_and_save(self):
|
def scrap_and_save(self):
|
||||||
parishes = self._scrap()
|
parishes = self._scrap()
|
||||||
with open('parishes.dill', 'wb') as f:
|
with open('parishes.pickle', 'wb') as f:
|
||||||
dill.dump(parishes, f, dill.HIGHEST_PROTOCOL)
|
pickle.dump(parishes, f, pickle.HIGHEST_PROTOCOL)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
|||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
|
from selenium import common
|
||||||
import re
|
import re
|
||||||
import random
|
import random
|
||||||
|
|
||||||
@ -8,12 +9,19 @@ import random
|
|||||||
class Proxy():
|
class Proxy():
|
||||||
def __init__(self, proxies=None):
|
def __init__(self, proxies=None):
|
||||||
"docstring"
|
"docstring"
|
||||||
self.proxies = [] if proxies is None else proxies
|
#self.proxies = [] if proxies is None else proxies
|
||||||
|
self.proxies = proxies or []
|
||||||
|
|
||||||
def download(self):
|
def download(self, limit=0):
|
||||||
driver = webdriver.Chrome('./chromedriver')
|
print('Transparent proxies')
|
||||||
|
self._download('Transparent', limit)
|
||||||
|
print('Elite proxies')
|
||||||
|
self._download('elite', limit)
|
||||||
|
|
||||||
|
def _download(self, type, limit=0):
|
||||||
|
driver = webdriver.PhantomJS('./phantomjs')
|
||||||
driver.maximize_window()
|
driver.maximize_window()
|
||||||
driver.get('http://www.gatherproxy.com/proxylist/anonymity/?t=elite')
|
driver.get('http://www.gatherproxy.com/proxylist/anonymity/?t=' + type)
|
||||||
full_list_button = driver.find_element_by_xpath(
|
full_list_button = driver.find_element_by_xpath(
|
||||||
'//input[@type="submit" and @value="Show Full List"]')
|
'//input[@type="submit" and @value="Show Full List"]')
|
||||||
full_list_button.click()
|
full_list_button.click()
|
||||||
@ -22,9 +30,17 @@ class Proxy():
|
|||||||
'<a href="#(.*?)" class="inactive" onclick="gp.pageClick',
|
'<a href="#(.*?)" class="inactive" onclick="gp.pageClick',
|
||||||
driver.page_source):
|
driver.page_source):
|
||||||
pass
|
pass
|
||||||
|
if limit == 0:
|
||||||
pages_nr = int(match.group(1))
|
pages_nr = int(match.group(1))
|
||||||
|
else:
|
||||||
|
pages_nr = limit
|
||||||
for i in range(1, pages_nr + 1):
|
for i in range(1, pages_nr + 1):
|
||||||
self._get_proxies(driver.page_source)
|
self._get_proxies(driver.page_source)
|
||||||
|
try:
|
||||||
|
driver.execute_script('gp.pageClick(' + str(i) + ')')
|
||||||
|
except common.exceptions.WebDriverException:
|
||||||
|
import ipdb
|
||||||
|
ipdb.set_trace()
|
||||||
driver.execute_script('gp.pageClick(' + str(i) + ')')
|
driver.execute_script('gp.pageClick(' + str(i) + ')')
|
||||||
print(i)
|
print(i)
|
||||||
|
|
||||||
@ -42,4 +58,4 @@ class Proxy():
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
p = Proxy()
|
p = Proxy()
|
||||||
p.download()
|
p.download()
|
||||||
print(p.random())
|
proxy = p.random()
|
108
scraper/urlschecker.py
Normal file
108
scraper/urlschecker.py
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
import pickle
|
||||||
|
from duckduckgo import DuckDuckGo
|
||||||
|
from parishduck import ParishDuck
|
||||||
|
import time
|
||||||
|
import random
|
||||||
|
import requests
|
||||||
|
|
||||||
|
urls_append_filename = 'urls_checked_a.txt'
|
||||||
|
parishes_append_filename = 'parishes_checked_a.txt'
|
||||||
|
|
||||||
|
|
||||||
|
class ParishUrlChecker():
|
||||||
|
def __init__(self):
|
||||||
|
"docstring"
|
||||||
|
self.tsv = ''
|
||||||
|
self.urls = ''
|
||||||
|
self.added = 0
|
||||||
|
self.tried_urls = 0
|
||||||
|
|
||||||
|
def check(self, parish, duck):
|
||||||
|
self.tried_urls += 1
|
||||||
|
parish_duck = ParishDuck()
|
||||||
|
links = parish_duck.urls(parish, duck)
|
||||||
|
parish_url = self._get_true_url(parish['url'])
|
||||||
|
if not parish_url:
|
||||||
|
return False
|
||||||
|
for link in links:
|
||||||
|
link = self._get_true_url(link)
|
||||||
|
if parish_url == link:
|
||||||
|
t_parish_url = parish_url + '\n'
|
||||||
|
self.urls += t_parish_url
|
||||||
|
t_tsv = parish['name'] + '\t' + parish_url + '\t' + parish['city'] + '\t' + parish['street'] + '\t' + parish['postal_code'] + '\t' + parish['meta_url'] + '\t' + parish['gps'] + '\n'
|
||||||
|
|
||||||
|
self.tsv += t_tsv
|
||||||
|
with open(urls_append_filename, 'a') as file:
|
||||||
|
file.write(t_parish_url)
|
||||||
|
with open(parishes_append_filename, 'a') as file:
|
||||||
|
file.write(t_tsv)
|
||||||
|
|
||||||
|
self.added += 1
|
||||||
|
print('Added: ' + parish_url)
|
||||||
|
# TODO: save links to txt file, one per line
|
||||||
|
# TODO: wget -r -i file all links
|
||||||
|
# TODO: not wget, but spider
|
||||||
|
# TODO: save parishes to jsonline format?
|
||||||
|
return True # mark as ok url
|
||||||
|
#print(links)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _convert_url(self, url):
|
||||||
|
if url.endswith('/'):
|
||||||
|
url = url[:-1]
|
||||||
|
if url.startswith('http://'):
|
||||||
|
url = url[7:]
|
||||||
|
if url.startswith('https://'):
|
||||||
|
url = url[8:]
|
||||||
|
if url.startswith('www.'):
|
||||||
|
url = url[4:]
|
||||||
|
return url
|
||||||
|
|
||||||
|
def _get_true_url(self, url):
|
||||||
|
if 'http://' not in url and 'https://' not in url:
|
||||||
|
url = 'http://' + url
|
||||||
|
for i in range(5):
|
||||||
|
try:
|
||||||
|
new_url = requests.get(url, timeout=3).url
|
||||||
|
return new_url
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
print('Falied url: ' + url)
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
with open(urls_append_filename, 'w') as file:
|
||||||
|
pass
|
||||||
|
with open(parishes_append_filename, 'w') as file:
|
||||||
|
pass
|
||||||
|
|
||||||
|
duck = DuckDuckGo(language='pl-pl')
|
||||||
|
print('Downloading proxies')
|
||||||
|
duck.download_proxies()
|
||||||
|
parishes = []
|
||||||
|
urls_checker = ParishUrlChecker()
|
||||||
|
with open('./parishes.pickle', 'rb') as f:
|
||||||
|
parishes = pickle.load(f)
|
||||||
|
|
||||||
|
i = 1
|
||||||
|
for parish in parishes:
|
||||||
|
if parish['url']:
|
||||||
|
urls_checker.check(parish, duck)
|
||||||
|
else:
|
||||||
|
print('none')
|
||||||
|
print(
|
||||||
|
str(i * 100 / len(parishes)) + '% done. Nr: ' + str(i) +
|
||||||
|
' Performance: ' + str(urls_checker.added) + '/' +
|
||||||
|
str(urls_checker.tried_urls) + ' ' + str(
|
||||||
|
(urls_checker.added /
|
||||||
|
(urls_checker.tried_urls or 1)) * 100) + '%')
|
||||||
|
i += 1
|
||||||
|
with open('urls_checked.txt', 'w') as f:
|
||||||
|
f.write(urls)
|
||||||
|
with open('parishes_checked.tsv', 'w') as f:
|
||||||
|
f.write(tsv)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
20
spider.py
Normal file
20
spider.py
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
from scrapy.selector import HtmlXPathSelector
|
||||||
|
from scrapy.spider import BaseSpider
|
||||||
|
from scrapy.http import Request
|
||||||
|
|
||||||
|
DOMAIN = 'example.com'
|
||||||
|
URL = 'http://%s' % DOMAIN
|
||||||
|
|
||||||
|
|
||||||
|
class MySpider(BaseSpider):
|
||||||
|
name = DOMAIN
|
||||||
|
allowed_domains = [DOMAIN]
|
||||||
|
start_urls = [URL]
|
||||||
|
|
||||||
|
def parse(self, response):
|
||||||
|
hxs = HtmlXPathSelector(response)
|
||||||
|
for url in hxs.select('//a/@href').extract():
|
||||||
|
if not (url.startswith('http://') or url.startswith('https://')):
|
||||||
|
url = URL + url
|
||||||
|
print url
|
||||||
|
yield Request(url, callback=self.parse)
|
16
tests/convert.py
Normal file
16
tests/convert.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
import pickle
|
||||||
|
|
||||||
|
parishes = []
|
||||||
|
with open('parishes.pickle', 'rb') as f:
|
||||||
|
parishes = pickle.load(f)
|
||||||
|
|
||||||
|
tsv = ''
|
||||||
|
i = 0
|
||||||
|
for parish in parishes:
|
||||||
|
t_tsv = parish['name'] + '\t' + parish['url'] + '\t' + parish['city'] + '\t' + parish['street'] + '\t' + parish['postal_code'] + '\t' + parish['meta_url'] + '\t' + parish['gps'] + '\n'
|
||||||
|
tsv += t_tsv
|
||||||
|
if parish['url']:
|
||||||
|
i += 1
|
||||||
|
print(i)
|
||||||
|
with open('parishes.tsv', 'w') as f:
|
||||||
|
f.write(tsv)
|
334
tests/parish_186.html
Normal file
334
tests/parish_186.html
Normal file
@ -0,0 +1,334 @@
|
|||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||||
|
<head>
|
||||||
|
<link rel="shortcut icon" href="http://colaska.pl/favicon.ico" type="image/x-icon" />
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||||
|
<title>colaska.pl - mapa parafii, kościoły, śluby, chrzty, pogrzeby. Msze on-line- Parafia pod wezwaniem Niepokalanego Poczęcia Najświętszej Maryi Panny (Będzin)</title>
|
||||||
|
<meta name="description" content="Lista parafii polskich wraz z cennikiem usług. Msze on-line" />
|
||||||
|
<meta name="keywords" content="parafie, msze św, księża, usługi, chrzest, pogrzeb, wesele, zaświadczenia, bierzmowanie, msze online, msze on-line" />
|
||||||
|
<meta name="classification" content="global,all" />
|
||||||
|
<meta name="robots" content="all,index,follow" />
|
||||||
|
<link href="/css/style.css" rel="stylesheet" type="text/css" />
|
||||||
|
<link href="/css/prettyPhoto.css" rel="stylesheet" type="text/css" />
|
||||||
|
<link href="/css/jquery.popup.css" rel="stylesheet" type="text/css" />
|
||||||
|
<script type="text/javascript" src="/js/jquery.js"></script>
|
||||||
|
<script type="text/javascript" src="/js/jquery.popup.js"></script>
|
||||||
|
<script type="text/javascript" src="/js/jquery.toggleformtext.js"></script>
|
||||||
|
<script type="text/javascript" src="/js/script.js"></script>
|
||||||
|
<script type="text/javascript" src="/js/easyTooltip.js"></script>
|
||||||
|
<script type="text/javascript" src="/js/prettyPhoto.js"></script>
|
||||||
|
<script type="text/javascript" src="/js/jquery.blink.js"></script>
|
||||||
|
|
||||||
|
<script type="text/javascript">
|
||||||
|
if (top.location==document.location){
|
||||||
|
} else {
|
||||||
|
parent.location= 'http://kasuj.pl';
|
||||||
|
}
|
||||||
|
var _gaq = _gaq || [];
|
||||||
|
_gaq.push(['_setAccount', 'UA-23764225-1']);
|
||||||
|
_gaq.push(['_trackPageview']);
|
||||||
|
|
||||||
|
(function() {
|
||||||
|
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
|
||||||
|
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
|
||||||
|
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
|
||||||
|
})();
|
||||||
|
|
||||||
|
</script>
|
||||||
|
<script src="http://maps.google.com/maps/api/js?sensor=false" type="text/javascript"></script>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div id="header">
|
||||||
|
|
||||||
|
<form id="search" method="get" action="/index/szukaj/">
|
||||||
|
<input type="text" name="phrase" class="input topSearch" title="miejscowość, kod lub nazwa parafii" size="25"/><input type="submit" class="button imgV" value="szukaj"/>
|
||||||
|
<div id="searchSelector"></div>
|
||||||
|
<div id="suggestions" class="suggestionsBox radius" style="display: none;">
|
||||||
|
<div id="suggestionsList" class="suggestionList"></div>
|
||||||
|
</div>
|
||||||
|
<input type="hidden" name="dest" id="destSelector" value="p" />
|
||||||
|
<div id="searchSelect">
|
||||||
|
<span class="block pb5 pt5 jsDestSelect pl10" name="p">szukaj w bazie danych parafii</span>
|
||||||
|
<!--<span class="block pb5 pt5 jsDestSelect pl10" name="f">szukaj w wątkach forum dyskusyjnego</span>-->
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<ul class="menuTop radius">
|
||||||
|
<li><a href="/" >Strona główna</a></li>
|
||||||
|
<!--<li><a href="/forum" >Forum</a></li>
|
||||||
|
<li><a href="/forum/hot" >Gorące tematy</a></li>-->
|
||||||
|
<li><a href="/index/press" >Piszą o nas</a></li>
|
||||||
|
<li><a href="/authfront/login" >Logowanie</a></li>
|
||||||
|
<li><a href="/profile/register" >Rejestracja</a></li>
|
||||||
|
<li><a href="/index/page/name/pomoc" >Jak pomóc?</a></li>
|
||||||
|
<li class="hit"><a href="/msze" >Msze online</a></li>
|
||||||
|
</ul>
|
||||||
|
<div class="logo pointer" onclick="window.location='/'"><h1>colaska.pl</h1></div>
|
||||||
|
</div>
|
||||||
|
<div id="rrec">
|
||||||
|
<script async src="//pagead2.googlesyndication.com/pagead/js/adsbygoogle.js"></script>
|
||||||
|
<!-- Colaska duża -->
|
||||||
|
<ins class="adsbygoogle"
|
||||||
|
style="display:inline-block;width:728px;height:90px"
|
||||||
|
data-ad-client="ca-pub-9503012770174245"
|
||||||
|
data-ad-slot="5302778274"></ins>
|
||||||
|
<script>
|
||||||
|
(adsbygoogle = window.adsbygoogle || []).push({});
|
||||||
|
</script>
|
||||||
|
</div>
|
||||||
|
<div id="main">
|
||||||
|
<div id="otolBaner"><a href="http://otolista.pl" alt="Układanie list prezentów na każdą okazję. Edycja, wysyłka do wskazanych odbiorców. Rezerwacja prezentów z listy."><img src="/gfx/otolistaFixed.png" /></a></div>
|
||||||
|
<div class="left">
|
||||||
|
<!--
|
||||||
|
<div class="box small radius">
|
||||||
|
<h2 class="rel hot">Gorący temat</h2>
|
||||||
|
<div class="p10">
|
||||||
|
<p class="pb10"><a href="/forum/post/id/692"><strong>Kościół w oficjalnym związku</strong><span class="block">Kościół a polityka<br />
|
||||||
|
Tema...</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/148"><strong>Chcesz czy nie zapłacić musisz</strong><span class="block">Szczególny dzień dla rodziców,...</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/247"><strong>Przepraszamy, zamknięte</strong><span class="block">Większość kościołów jest zamkn...</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/15"><strong>Dać czy nie dać</strong><span class="block">Prozaiczna rzecz - taca w kośc...</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/1767"><strong>Reko-lekcje czy re-kolekcje?</strong><span class="block">Reko-lekcje czy re-kolekcje?<b...</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/544"><strong>Biznes na wysokościach</strong><span class="block">W dobie techniki jaką mamy prz...</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/291"><strong>Pan i władca na tronie w Toruniu</strong><span class="block">No właśnie. Podobnie jak nasza...</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/193"><strong>Raz a dobrze, bo tanio nie jest</strong><span class="block">Ważne tematy: sala, samochód,...</span></a></p>
|
||||||
|
<div class="mt10 tRight">
|
||||||
|
<a href="/forum/hot" class="button pt5 pb5 pl10 pr10 mr5">zobacz więcej</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
-->
|
||||||
|
<div class="box small radius">
|
||||||
|
<h2 class="rel">Mapy parafii</h2>
|
||||||
|
<div class="p10">
|
||||||
|
<form action="/index/region/" id="formRegion" method="get" >
|
||||||
|
<select class="input" onchange="submit()" name="wojid">
|
||||||
|
<option value="0">Wybierz województwo</option>
|
||||||
|
<option value="1" >Dolnośląskie (729)</option>
|
||||||
|
<option value="2" >Kujawsko-pomorskie (546)</option>
|
||||||
|
<option value="3" >Lubelskie (603)</option>
|
||||||
|
<option value="4" >Lubuskie (248)</option>
|
||||||
|
<option value="5" >Lódzkie (576)</option>
|
||||||
|
<option value="6" >Małopolskie (952)</option>
|
||||||
|
<option value="7" >Mazowieckie (1052)</option>
|
||||||
|
<option value="8" >Opolskie (408)</option>
|
||||||
|
<option value="9" >Podkarpackie (823)</option>
|
||||||
|
<option value="10" >Podlaskie (297)</option>
|
||||||
|
<option value="11" >Pomorskie (538)</option>
|
||||||
|
<option value="12" selected>Śląskie (1040)</option>
|
||||||
|
<option value="13" >Świętokrzyskie (405)</option>
|
||||||
|
<option value="14" >Warmińsko-mazurskie (507)</option>
|
||||||
|
<option value="15" >Wielkopolskie (936)</option>
|
||||||
|
<option value="16" >Zachodniopomorskie (438)</option>
|
||||||
|
</select>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="box small radius">
|
||||||
|
<h2 class="rel">Poinformuj</h2>
|
||||||
|
<div class="p10">
|
||||||
|
<p class="">
|
||||||
|
<span class="pointer sendInfo">Powiadom swoich znajomych</span>
|
||||||
|
</p>
|
||||||
|
<div class="hr"></div>
|
||||||
|
<div class="">
|
||||||
|
<iframe src="http://www.facebook.com/plugins/likebox.php?href=http%3A%2F%2Fwww.facebook.com%2Fpages%2Fcolaskapl%2F183674431692242&width=210&colorscheme=light&show_faces=false&border_color=%23D0DBE8&stream=false&header=true&height=62" scrolling="no" frameborder="0" style="border:none; overflow:hidden; width:210px; height:62px;" allowTransparency="true"></iframe>
|
||||||
|
<!--
|
||||||
|
<div class="hr"></div>
|
||||||
|
<div id="fb-root"></div><script src="http://connect.facebook.net/en_US/all.js#xfbml=1"></script><fb:like href="http://colaska.pl" send="true" width="210" show_faces="false" font="verdana"></fb:like>
|
||||||
|
-->
|
||||||
|
</div>
|
||||||
|
<div class="hr"></div>
|
||||||
|
<div class="pb10">
|
||||||
|
<div class="tlMain b">
|
||||||
|
<script type="text/javascript" src="http://apis.google.com/js/plusone.js"></script>
|
||||||
|
<g:plusone></g:plusone>
|
||||||
|
</div>
|
||||||
|
<div class="tlMain">
|
||||||
|
<a href="http://twitter.com/share" class="twitter-share-button" data-count="horizontal">Tweet</a><script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script>
|
||||||
|
</div>
|
||||||
|
<br class="clear"/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<!-- <div class="box small radius">
|
||||||
|
<h2 class="rel">Statystyki</h2>
|
||||||
|
<div class="p10">
|
||||||
|
Parafii w bazie: <strong></strong><br />
|
||||||
|
Parafii z opisem: <strong></strong><br />
|
||||||
|
Parafii ze zdjęciem: <strong></strong><br />
|
||||||
|
Zapisanych cen: <strong></strong><br />
|
||||||
|
Ocenionych parafii: <strong></strong><br />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
-->
|
||||||
|
<div class="box small radius">
|
||||||
|
<h2>Świeczka oraz reklama</h2>
|
||||||
|
<div class="p10 center">
|
||||||
|
|
||||||
|
|
||||||
|
<a rel="prettyPhoto" href="/gfx/runCandle.png"><img src="/gfx/candle_b.png" class="pb20" alt="Wejdź na stronę parafii, kliknij zakładkę ŚWIECZKI." title="Wejdź na stronę parafii, kliknij zakładkę ŚWIECZKI."/></a><br />
|
||||||
|
<script type="text/javascript"><!--
|
||||||
|
google_ad_client = "ca-pub-9503012770174245";
|
||||||
|
/* colaska */
|
||||||
|
google_ad_slot = "3980761610";
|
||||||
|
google_ad_width = 200;
|
||||||
|
google_ad_height = 200;
|
||||||
|
//-->
|
||||||
|
</script>
|
||||||
|
<script type="text/javascript"
|
||||||
|
src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
|
||||||
|
</script><br /><br />
|
||||||
|
<script type="text/javascript" id="AdTaily_Widget" src="http://static.adtaily.pl/widget.js#UMUB0WcRqxL5eIG"></script>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div class="right">
|
||||||
|
<div class="box big radius">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<h2>Szczegóły parafii</h2>
|
||||||
|
<div class="p10">
|
||||||
|
<p class="smallF"><a href="/">Strona główna</a> » <a href="/index/region/wojid/12/page/1">Śląskie</a> » Będzin » Niepokalanego Poczęcia Najświętszej Maryi Panny<br /><br /></p>
|
||||||
|
<div class="pHead rel">
|
||||||
|
<p class="title"><a href="/files/pictures/186/1.jpg" alt="colaska.pl" title="Parafia pod wezwaniem Niepokalanego Poczęcia Najświętszej Maryi Panny" rel="prettyPhoto[pp_gal]"><img src="/files/pictures/186/m1.jpg" class="mainImage radius"/></a>Parafia pod wezwaniem Niepokalanego Poczęcia Najświętszej Maryi Panny</p>
|
||||||
|
<span class="city">Będzin</span>
|
||||||
|
|
||||||
|
|
||||||
|
<p>ul. Pokoju 28<br />42-504 Będzin-Łagisza</p>
|
||||||
|
<p>(32)2675572</p>
|
||||||
|
<div class="voteIcon radius" id="186">
|
||||||
|
<p class="inf f14 strong">Oceń parafię</p>
|
||||||
|
<p class="votePlus rel"><span class="cGreen">Polecam</span><span class="cx">1</span></p>
|
||||||
|
<p class="voteMinus rel"><span class="cRed">Nie polecam</span><span class="cx">0</span></p>
|
||||||
|
<span class="com"></span>
|
||||||
|
</div>
|
||||||
|
<a href="/index/parafia/id/186/tabs/fifth" class="candleIcon"><img src="/gfx/runCandleSmall.png" /></a>
|
||||||
|
<br class="clear"/>
|
||||||
|
</div>
|
||||||
|
<div class="priceInfo rel radius">
|
||||||
|
<div class="head" name="box1" id="tabsprice">Koszty usług</div>
|
||||||
|
<!--<div class="head second nonActive" name="box2" id="tabscommnet">Komentarze</div>-->
|
||||||
|
<div class="head second nonActive" name="box3" id="tabsinfo">Informacje</div>
|
||||||
|
<div class="head third nonActive map" name="box4" id="tabsmaps" gps=" 19.140243530273438,50.354281540838365"><span class="">Lokalizacja</span></div>
|
||||||
|
<div class="head fourth nonActive" name="box5" id="tabscandle">Świeczki</div>
|
||||||
|
|
||||||
|
<div id="box1" class="boxElement">
|
||||||
|
<div class="element rel">
|
||||||
|
<p class="name">Ślub</p>
|
||||||
|
<p class="desc">Koszt usługi czyli ceremonia zaślubin w kościele. Łącznie z ew. ubraniem kościoła, kosztami kościelnego, organisty itp. Czyli łączny koszt "pakietu".</p>
|
||||||
|
<div class="votePrice">Średnia cena: <br /><span class="price">1300</span> zł </div>
|
||||||
|
<div class="userPrice" id="186" name="1">Twoja cena: <br /><input type="text" maxlength="4" id="price_1" name="price" class="input small center bigFont cGreen strong"/> zł <span class="savePrice">zapisz</span></div><div class="infoBack cRed"></div>
|
||||||
|
</div>
|
||||||
|
<div class="element rel">
|
||||||
|
<p class="name">Chrzest</p>
|
||||||
|
<p class="desc">Koszt ceremonii chrztu, bez kosztów zaświadczeń z innych parafii.</p>
|
||||||
|
<div class="votePrice">Średnia cena: <br /><span class="price">100</span> zł </div>
|
||||||
|
<div class="userPrice" id="186" name="2">Twoja cena: <br /><input type="text" maxlength="4" id="price_2" name="price" class="input small center bigFont cGreen strong"/> zł <span class="savePrice">zapisz</span></div><div class="infoBack cRed"></div>
|
||||||
|
</div>
|
||||||
|
<div class="element rel">
|
||||||
|
<p class="name">Zaświadczenie</p>
|
||||||
|
<p class="desc">Cena zaświadczenia potrzebna do tego by być chrzestnym czy wziąć ślub.</p>
|
||||||
|
<div class="votePrice">Średnia cena: <br /><span class="price">10</span> zł </div>
|
||||||
|
<div class="userPrice" id="186" name="3">Twoja cena: <br /><input type="text" maxlength="4" id="price_3" name="price" class="input small center bigFont cGreen strong"/> zł <span class="savePrice">zapisz</span></div><div class="infoBack cRed"></div>
|
||||||
|
</div>
|
||||||
|
<div class="element rel">
|
||||||
|
<p class="name">Pogrzeb</p>
|
||||||
|
<p class="desc">Łączny wymagany przez proboszcza koszt. W skład usługi wchodzi msza pogrzebowa, miejsce na cmentarzu, organista czy msze żałobne. Wszystko w ramach jednorazowej opłaty.</p>
|
||||||
|
<div class="votePrice">Średnia cena: <br /><span class="price">-</span> zł </div>
|
||||||
|
<div class="userPrice" id="186" name="4">Twoja cena: <br /><input type="text" maxlength="4" id="price_4" name="price" class="input small center bigFont cGreen strong"/> zł <span class="savePrice">zapisz</span></div><div class="infoBack cRed"></div>
|
||||||
|
</div>
|
||||||
|
<div class="element rel">
|
||||||
|
<p class="name">Msza</p>
|
||||||
|
<p class="desc">Kwota za jaką zazwyczaj zamawia się mszę. Bez znaczenia czy jest to dzień powszedni, niedziela czy święto.
|
||||||
|
Koszt jednej mszy.</p>
|
||||||
|
<div class="votePrice">Średnia cena: <br /><span class="price">50</span> zł </div>
|
||||||
|
<div class="userPrice" id="186" name="5">Twoja cena: <br /><input type="text" maxlength="4" id="price_5" name="price" class="input small center bigFont cGreen strong"/> zł <span class="savePrice">zapisz</span></div><div class="infoBack cRed"></div>
|
||||||
|
</div>
|
||||||
|
<div class="element rel">
|
||||||
|
<p class="name">Wypominki</p>
|
||||||
|
<p class="desc">Opłata za "jedną duszę" która ma być wymieniona w wypominkach czy to rocznych, czy półrocznych, czy jednorazowych.</p>
|
||||||
|
<div class="votePrice">Średnia cena: <br /><span class="price">-</span> zł </div>
|
||||||
|
<div class="userPrice" id="186" name="6">Twoja cena: <br /><input type="text" maxlength="4" id="price_6" name="price" class="input small center bigFont cGreen strong"/> zł <span class="savePrice">zapisz</span></div><div class="infoBack cRed"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div id="box2" class="none boxElement">
|
||||||
|
<div class="p10 mt30">
|
||||||
|
<div class="info">
|
||||||
|
Tej parafii jeszcze nikt nie skomentował.<br />Bądź pierwszy. Kliknij <a href="/forum/threads/id/14" class="strong">tutaj</a> aby przejść do forum.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div id="box3" class="none boxElement p10">
|
||||||
|
|
||||||
|
<br />
|
||||||
|
Data powołania parafii-1924 rok<br />
|
||||||
|
<br />
|
||||||
|
<p class="p10 strong cGreen">Uzupełnij dane o parafii. Zbieraj punkty.</p>
|
||||||
|
<p class="p10 strong cRed">Aby wysłać formularz musisz być zalogowany.</p>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="box4" class="boxElement none">
|
||||||
|
<div class="p10 mt30">
|
||||||
|
<p class="mt20 mb20">
|
||||||
|
Nie ta lokalizacja? Kliknij na mapie aby wskazać poprawne miejsce. Nowo utworzony punkt można również przenieść za pomocą myszki. Po wybraniu nowej lokalizacji kliknij pod mapą "wyślij nową lokalizację".
|
||||||
|
</p>
|
||||||
|
<div id="mapka" style="height:500px;">
|
||||||
|
</div>
|
||||||
|
<div id="newPos" rel="186" class="none mt20 mb20" name="">Zaznaczono nową pozycję punktu: <span class="jsAddPosition spanLink ml10">wyślij nową lokalizację</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div id="box5" class="boxElement none">
|
||||||
|
<div class="p10 mt30">
|
||||||
|
|
||||||
|
<br class="clear"/>
|
||||||
|
<p class="pb20 pt20">Aby zapalić świeczkę w tej parafii na 30 dni wyślij sms o treści <br /><span class="f13"><strong>TC.URGG.186.TWOJA_INTENCJA</strong></span> na numer <span class="f13"><strong>72068</strong></span><br /> (zamiast TWOJA_INTENCJA wpisz osobę lub osoby za które chcesz zapalić świeczkę)<br />Koszt wysłania SMS wynosi 2,46zł (z VAT).</p>
|
||||||
|
<p class="pb20 pt20">Usługa dostępna w sieciach Orange, Era, Plus, Play.<br />
|
||||||
|
Serwis SMS obsługuje Dotpay.pl. <br />
|
||||||
|
Opłaty za wysłanie wiadomości SMS 2,46zł z VAT.<br />
|
||||||
|
Wszelkie pytania proszę wysyłać do właściciela serwisu na adres email <a href="mailto:pomoc@colaska.pl">pomoc@colaska.pl</a><br />
|
||||||
|
Regulamin usługi http://www.dotpay.pl/regulaminsms.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div style="text-align:center;margin:20px 0px;">
|
||||||
|
<!-- Pula: 300x250 //-->
|
||||||
|
<script type="text/javascript">
|
||||||
|
document.write(unescape('%3Cscript type="text/javascript" src="' + document.location.protocol +
|
||||||
|
'//ec.bankier.pl/show2/MjQ0MzIsMjI1NzMsMzcwNDg/' + Math.random() + '/'+ '"%3E%3C/script%3E'));
|
||||||
|
</script>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div> </div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<br class="clear"/>
|
||||||
|
<div class="infoSuggest">
|
||||||
|
<div class="click">
|
||||||
|
Twój wkład
|
||||||
|
</div>
|
||||||
|
<div class="textSugest f12">
|
||||||
|
<span class="block f13 cBlue center strong pb10">Twoje zdjęcia na portalu?</span>
|
||||||
|
To możliwe, prześlij nam zdjęcie dowolnego kościoła. Zamieścimy je na stronie parafii z informacją kto zdjęcie nadesłał (opcja).<br /><br />
|
||||||
|
Wystarczy wysłać zdjęcie na adres <a href="mailto:pomoc@colaska.pl" class="cRed">pomoc@colaska.pl</a> lub skorzystać z formularza na stronie parafii w zakładce "Informacje".
|
||||||
|
<span class="tRight block pt10">Dziękujemy za pomoc</span>
|
||||||
|
</div>
|
||||||
|
</div><!-- .infoSuggest -->
|
||||||
|
<div id="links" class="radius"><a href="http://otolista.pl"><img src="/links/otolista_pl.png" alt="otolista.pl"/></a><a href="http://wieloletnie.pl"><img src="/links/wieloletnie_pl.png" alt="wieloletnie.pl"/></a><a href="http://androidapp.pl"><img src="/links/androidapp_pl.png" alt="androidapp.pl"/></a></div>
|
||||||
|
|
||||||
|
<div id="footer" style="padding-bottom:10px;"><a href="/">Strona główna</a> | <!--<a href="/forum">Forum</a> |--> <a href="/index/page/name/kontakt">Kontakt</a><span style="padding-left:100px">© 2017 <a href="http://netcomplete.pl" rel="external">Net Complete</a>. All Rights Reserved</span></div>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
336
tests/parish_2765.html
Normal file
336
tests/parish_2765.html
Normal file
@ -0,0 +1,336 @@
|
|||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||||
|
<head>
|
||||||
|
<link rel="shortcut icon" href="http://colaska.pl/favicon.ico" type="image/x-icon" />
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||||
|
<title>colaska.pl - mapa parafii, kościoły, śluby, chrzty, pogrzeby. Msze on-line- Parafia pod wezwaniem Św. Rocha (Jasieniec)</title>
|
||||||
|
<meta name="description" content="Lista parafii polskich wraz z cennikiem usług. Msze on-line" />
|
||||||
|
<meta name="keywords" content="parafie, msze św, księża, usługi, chrzest, pogrzeb, wesele, zaświadczenia, bierzmowanie, msze online, msze on-line" />
|
||||||
|
<meta name="classification" content="global,all" />
|
||||||
|
<meta name="robots" content="all,index,follow" />
|
||||||
|
<link href="/css/style.css" rel="stylesheet" type="text/css" />
|
||||||
|
<link href="/css/prettyPhoto.css" rel="stylesheet" type="text/css" />
|
||||||
|
<link href="/css/jquery.popup.css" rel="stylesheet" type="text/css" />
|
||||||
|
<script type="text/javascript" src="/js/jquery.js"></script>
|
||||||
|
<script type="text/javascript" src="/js/jquery.popup.js"></script>
|
||||||
|
<script type="text/javascript" src="/js/jquery.toggleformtext.js"></script>
|
||||||
|
<script type="text/javascript" src="/js/script.js"></script>
|
||||||
|
<script type="text/javascript" src="/js/easyTooltip.js"></script>
|
||||||
|
<script type="text/javascript" src="/js/prettyPhoto.js"></script>
|
||||||
|
<script type="text/javascript" src="/js/jquery.blink.js"></script>
|
||||||
|
|
||||||
|
<script type="text/javascript">
|
||||||
|
if (top.location==document.location){
|
||||||
|
} else {
|
||||||
|
parent.location= 'http://kasuj.pl';
|
||||||
|
}
|
||||||
|
var _gaq = _gaq || [];
|
||||||
|
_gaq.push(['_setAccount', 'UA-23764225-1']);
|
||||||
|
_gaq.push(['_trackPageview']);
|
||||||
|
|
||||||
|
(function() {
|
||||||
|
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
|
||||||
|
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
|
||||||
|
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
|
||||||
|
})();
|
||||||
|
|
||||||
|
</script>
|
||||||
|
<script src="http://maps.google.com/maps/api/js?sensor=false" type="text/javascript"></script>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div id="header">
|
||||||
|
|
||||||
|
<form id="search" method="get" action="/index/szukaj/">
|
||||||
|
<input type="text" name="phrase" class="input topSearch" title="miejscowość, kod lub nazwa parafii" size="25"/><input type="submit" class="button imgV" value="szukaj"/>
|
||||||
|
<div id="searchSelector"></div>
|
||||||
|
<div id="suggestions" class="suggestionsBox radius" style="display: none;">
|
||||||
|
<div id="suggestionsList" class="suggestionList"></div>
|
||||||
|
</div>
|
||||||
|
<input type="hidden" name="dest" id="destSelector" value="p" />
|
||||||
|
<div id="searchSelect">
|
||||||
|
<span class="block pb5 pt5 jsDestSelect pl10" name="p">szukaj w bazie danych parafii</span>
|
||||||
|
<!--<span class="block pb5 pt5 jsDestSelect pl10" name="f">szukaj w wątkach forum dyskusyjnego</span>-->
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<ul class="menuTop radius">
|
||||||
|
<li><a href="/" >Strona główna</a></li>
|
||||||
|
<!--<li><a href="/forum" >Forum</a></li>
|
||||||
|
<li><a href="/forum/hot" >Gorące tematy</a></li>-->
|
||||||
|
<li><a href="/index/press" >Piszą o nas</a></li>
|
||||||
|
<li><a href="/authfront/login" >Logowanie</a></li>
|
||||||
|
<li><a href="/profile/register" >Rejestracja</a></li>
|
||||||
|
<li><a href="/index/page/name/pomoc" >Jak pomóc?</a></li>
|
||||||
|
<li class="hit"><a href="/msze" >Msze online</a></li>
|
||||||
|
</ul>
|
||||||
|
<div class="logo pointer" onclick="window.location='/'"><h1>colaska.pl</h1></div>
|
||||||
|
</div>
|
||||||
|
<div id="rrec">
|
||||||
|
<script async src="//pagead2.googlesyndication.com/pagead/js/adsbygoogle.js"></script>
|
||||||
|
<!-- Colaska duża -->
|
||||||
|
<ins class="adsbygoogle"
|
||||||
|
style="display:inline-block;width:728px;height:90px"
|
||||||
|
data-ad-client="ca-pub-9503012770174245"
|
||||||
|
data-ad-slot="5302778274"></ins>
|
||||||
|
<script>
|
||||||
|
(adsbygoogle = window.adsbygoogle || []).push({});
|
||||||
|
</script>
|
||||||
|
</div>
|
||||||
|
<div id="main">
|
||||||
|
<div id="otolBaner"><a href="http://otolista.pl" alt="Układanie list prezentów na każdą okazję. Edycja, wysyłka do wskazanych odbiorców. Rezerwacja prezentów z listy."><img src="/gfx/otolistaFixed.png" /></a></div>
|
||||||
|
<div class="left">
|
||||||
|
<!--
|
||||||
|
<div class="box small radius">
|
||||||
|
<h2 class="rel hot">Gorący temat</h2>
|
||||||
|
<div class="p10">
|
||||||
|
<p class="pb10"><a href="/forum/post/id/193"><strong>Raz a dobrze, bo tanio nie jest</strong><span class="block">Ważne tematy: sala, samochód,...</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/247"><strong>Przepraszamy, zamknięte</strong><span class="block">Większość kościołów jest zamkn...</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/405"><strong>Śmierć i podatki</strong><span class="block">Podobno to są jedyne pewne rze...</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/544"><strong>Biznes na wysokościach</strong><span class="block">W dobie techniki jaką mamy prz...</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/484"><strong>Do szkoły marsz</strong><span class="block">a tam już czeka na nas ksiądz...</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/1767"><strong>Reko-lekcje czy re-kolekcje?</strong><span class="block">Reko-lekcje czy re-kolekcje?<b...</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/372"><strong>Kościół poprawia orientację</strong><span class="block">Dużo w Polsce jest kościołów....</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/291"><strong>Pan i władca na tronie w Toruniu</strong><span class="block">No właśnie. Podobnie jak nasza...</span></a></p>
|
||||||
|
<div class="mt10 tRight">
|
||||||
|
<a href="/forum/hot" class="button pt5 pb5 pl10 pr10 mr5">zobacz więcej</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
-->
|
||||||
|
<div class="box small radius">
|
||||||
|
<h2 class="rel">Mapy parafii</h2>
|
||||||
|
<div class="p10">
|
||||||
|
<form action="/index/region/" id="formRegion" method="get" >
|
||||||
|
<select class="input" onchange="submit()" name="wojid">
|
||||||
|
<option value="0">Wybierz województwo</option>
|
||||||
|
<option value="1" >Dolnośląskie (729)</option>
|
||||||
|
<option value="2" >Kujawsko-pomorskie (546)</option>
|
||||||
|
<option value="3" >Lubelskie (603)</option>
|
||||||
|
<option value="4" >Lubuskie (248)</option>
|
||||||
|
<option value="5" >Lódzkie (576)</option>
|
||||||
|
<option value="6" >Małopolskie (952)</option>
|
||||||
|
<option value="7" selected>Mazowieckie (1052)</option>
|
||||||
|
<option value="8" >Opolskie (408)</option>
|
||||||
|
<option value="9" >Podkarpackie (823)</option>
|
||||||
|
<option value="10" >Podlaskie (297)</option>
|
||||||
|
<option value="11" >Pomorskie (538)</option>
|
||||||
|
<option value="12" >Śląskie (1040)</option>
|
||||||
|
<option value="13" >Świętokrzyskie (405)</option>
|
||||||
|
<option value="14" >Warmińsko-mazurskie (507)</option>
|
||||||
|
<option value="15" >Wielkopolskie (936)</option>
|
||||||
|
<option value="16" >Zachodniopomorskie (438)</option>
|
||||||
|
</select>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="box small radius">
|
||||||
|
<h2 class="rel">Poinformuj</h2>
|
||||||
|
<div class="p10">
|
||||||
|
<p class="">
|
||||||
|
<span class="pointer sendInfo">Powiadom swoich znajomych</span>
|
||||||
|
</p>
|
||||||
|
<div class="hr"></div>
|
||||||
|
<div class="">
|
||||||
|
<iframe src="http://www.facebook.com/plugins/likebox.php?href=http%3A%2F%2Fwww.facebook.com%2Fpages%2Fcolaskapl%2F183674431692242&width=210&colorscheme=light&show_faces=false&border_color=%23D0DBE8&stream=false&header=true&height=62" scrolling="no" frameborder="0" style="border:none; overflow:hidden; width:210px; height:62px;" allowTransparency="true"></iframe>
|
||||||
|
<!--
|
||||||
|
<div class="hr"></div>
|
||||||
|
<div id="fb-root"></div><script src="http://connect.facebook.net/en_US/all.js#xfbml=1"></script><fb:like href="http://colaska.pl" send="true" width="210" show_faces="false" font="verdana"></fb:like>
|
||||||
|
-->
|
||||||
|
</div>
|
||||||
|
<div class="hr"></div>
|
||||||
|
<div class="pb10">
|
||||||
|
<div class="tlMain b">
|
||||||
|
<script type="text/javascript" src="http://apis.google.com/js/plusone.js"></script>
|
||||||
|
<g:plusone></g:plusone>
|
||||||
|
</div>
|
||||||
|
<div class="tlMain">
|
||||||
|
<a href="http://twitter.com/share" class="twitter-share-button" data-count="horizontal">Tweet</a><script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script>
|
||||||
|
</div>
|
||||||
|
<br class="clear"/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<!-- <div class="box small radius">
|
||||||
|
<h2 class="rel">Statystyki</h2>
|
||||||
|
<div class="p10">
|
||||||
|
Parafii w bazie: <strong></strong><br />
|
||||||
|
Parafii z opisem: <strong></strong><br />
|
||||||
|
Parafii ze zdjęciem: <strong></strong><br />
|
||||||
|
Zapisanych cen: <strong></strong><br />
|
||||||
|
Ocenionych parafii: <strong></strong><br />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
-->
|
||||||
|
<div class="box small radius">
|
||||||
|
<h2>Świeczka oraz reklama</h2>
|
||||||
|
<div class="p10 center">
|
||||||
|
|
||||||
|
|
||||||
|
<a rel="prettyPhoto" href="/gfx/runCandle.png"><img src="/gfx/candle_b.png" class="pb20" alt="Wejdź na stronę parafii, kliknij zakładkę ŚWIECZKI." title="Wejdź na stronę parafii, kliknij zakładkę ŚWIECZKI."/></a><br />
|
||||||
|
<script type="text/javascript"><!--
|
||||||
|
google_ad_client = "ca-pub-9503012770174245";
|
||||||
|
/* colaska */
|
||||||
|
google_ad_slot = "3980761610";
|
||||||
|
google_ad_width = 200;
|
||||||
|
google_ad_height = 200;
|
||||||
|
//-->
|
||||||
|
</script>
|
||||||
|
<script type="text/javascript"
|
||||||
|
src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
|
||||||
|
</script><br /><br />
|
||||||
|
<script type="text/javascript" id="AdTaily_Widget" src="http://static.adtaily.pl/widget.js#UMUB0WcRqxL5eIG"></script>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div class="right">
|
||||||
|
<div class="box big radius">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<h2>Szczegóły parafii</h2>
|
||||||
|
<div class="p10">
|
||||||
|
<p class="smallF"><a href="/">Strona główna</a> » <a href="/index/region/wojid/7/page/1">Mazowieckie</a> » Jasieniec » Św. Rocha<br /><br /></p>
|
||||||
|
<div class="pHead rel">
|
||||||
|
<p class="title"><a href="/files/pictures/2765/1.jpg" alt="colaska.pl" title="Parafia pod wezwaniem Św. Rocha" rel="prettyPhoto[pp_gal]"><img src="/files/pictures/2765/m1.jpg" class="mainImage radius"/></a>Parafia pod wezwaniem Św. Rocha</p>
|
||||||
|
<span class="city">Jasieniec</span>
|
||||||
|
|
||||||
|
|
||||||
|
<p>ul. Warecka 37<br />05-604 Jasieniec k/Grójca</p>
|
||||||
|
<p>(48) 661 3512</p>
|
||||||
|
<div class="voteIcon radius" id="2765">
|
||||||
|
<p class="inf f14 strong">Oceń parafię</p>
|
||||||
|
<p class="votePlus rel"><span class="cGreen">Polecam</span><span class="cx">0</span></p>
|
||||||
|
<p class="voteMinus rel"><span class="cRed">Nie polecam</span><span class="cx">0</span></p>
|
||||||
|
<span class="com"></span>
|
||||||
|
</div>
|
||||||
|
<a href="/index/parafia/id/2765/tabs/fifth" class="candleIcon"><img src="/gfx/runCandleSmall.png" /></a>
|
||||||
|
<br class="clear"/>
|
||||||
|
<p class="link mt10"><a href="http://www.parafia-jasieniec.pl">http://www.parafia-jasieniec.pl</a></p>
|
||||||
|
</div>
|
||||||
|
<div class="priceInfo rel radius">
|
||||||
|
<div class="head" name="box1" id="tabsprice">Koszty usług</div>
|
||||||
|
<!--<div class="head second nonActive" name="box2" id="tabscommnet">Komentarze</div>-->
|
||||||
|
<div class="head second nonActive" name="box3" id="tabsinfo">Informacje</div>
|
||||||
|
<div class="head third nonActive map" name="box4" id="tabsmaps" gps=","><span class="blink">Lokalizacja</span></div>
|
||||||
|
<div class="head fourth nonActive" name="box5" id="tabscandle">Świeczki</div>
|
||||||
|
|
||||||
|
<div id="box1" class="boxElement">
|
||||||
|
<div class="element rel">
|
||||||
|
<p class="name">Ślub</p>
|
||||||
|
<p class="desc">Koszt usługi czyli ceremonia zaślubin w kościele. Łącznie z ew. ubraniem kościoła, kosztami kościelnego, organisty itp. Czyli łączny koszt "pakietu".</p>
|
||||||
|
<div class="votePrice">Średnia cena: <br /><span class="price">-</span> zł </div>
|
||||||
|
<div class="userPrice" id="2765" name="1">Twoja cena: <br /><input type="text" maxlength="4" id="price_1" name="price" class="input small center bigFont cGreen strong"/> zł <span class="savePrice">zapisz</span></div><div class="infoBack cRed"></div>
|
||||||
|
</div>
|
||||||
|
<div class="element rel">
|
||||||
|
<p class="name">Chrzest</p>
|
||||||
|
<p class="desc">Koszt ceremonii chrztu, bez kosztów zaświadczeń z innych parafii.</p>
|
||||||
|
<div class="votePrice">Średnia cena: <br /><span class="price">-</span> zł </div>
|
||||||
|
<div class="userPrice" id="2765" name="2">Twoja cena: <br /><input type="text" maxlength="4" id="price_2" name="price" class="input small center bigFont cGreen strong"/> zł <span class="savePrice">zapisz</span></div><div class="infoBack cRed"></div>
|
||||||
|
</div>
|
||||||
|
<div class="element rel">
|
||||||
|
<p class="name">Zaświadczenie</p>
|
||||||
|
<p class="desc">Cena zaświadczenia potrzebna do tego by być chrzestnym czy wziąć ślub.</p>
|
||||||
|
<div class="votePrice">Średnia cena: <br /><span class="price">-</span> zł </div>
|
||||||
|
<div class="userPrice" id="2765" name="3">Twoja cena: <br /><input type="text" maxlength="4" id="price_3" name="price" class="input small center bigFont cGreen strong"/> zł <span class="savePrice">zapisz</span></div><div class="infoBack cRed"></div>
|
||||||
|
</div>
|
||||||
|
<div class="element rel">
|
||||||
|
<p class="name">Pogrzeb</p>
|
||||||
|
<p class="desc">Łączny wymagany przez proboszcza koszt. W skład usługi wchodzi msza pogrzebowa, miejsce na cmentarzu, organista czy msze żałobne. Wszystko w ramach jednorazowej opłaty.</p>
|
||||||
|
<div class="votePrice">Średnia cena: <br /><span class="price">-</span> zł </div>
|
||||||
|
<div class="userPrice" id="2765" name="4">Twoja cena: <br /><input type="text" maxlength="4" id="price_4" name="price" class="input small center bigFont cGreen strong"/> zł <span class="savePrice">zapisz</span></div><div class="infoBack cRed"></div>
|
||||||
|
</div>
|
||||||
|
<div class="element rel">
|
||||||
|
<p class="name">Msza</p>
|
||||||
|
<p class="desc">Kwota za jaką zazwyczaj zamawia się mszę. Bez znaczenia czy jest to dzień powszedni, niedziela czy święto.
|
||||||
|
Koszt jednej mszy.</p>
|
||||||
|
<div class="votePrice">Średnia cena: <br /><span class="price">-</span> zł </div>
|
||||||
|
<div class="userPrice" id="2765" name="5">Twoja cena: <br /><input type="text" maxlength="4" id="price_5" name="price" class="input small center bigFont cGreen strong"/> zł <span class="savePrice">zapisz</span></div><div class="infoBack cRed"></div>
|
||||||
|
</div>
|
||||||
|
<div class="element rel">
|
||||||
|
<p class="name">Wypominki</p>
|
||||||
|
<p class="desc">Opłata za "jedną duszę" która ma być wymieniona w wypominkach czy to rocznych, czy półrocznych, czy jednorazowych.</p>
|
||||||
|
<div class="votePrice">Średnia cena: <br /><span class="price">-</span> zł </div>
|
||||||
|
<div class="userPrice" id="2765" name="6">Twoja cena: <br /><input type="text" maxlength="4" id="price_6" name="price" class="input small center bigFont cGreen strong"/> zł <span class="savePrice">zapisz</span></div><div class="infoBack cRed"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div id="box2" class="none boxElement">
|
||||||
|
<div class="p10 mt30">
|
||||||
|
<div class="info">
|
||||||
|
Tej parafii jeszcze nikt nie skomentował.<br />Bądź pierwszy. Kliknij <a href="/forum/threads/id/9" class="strong">tutaj</a> aby przejść do forum.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div id="box3" class="none boxElement p10">
|
||||||
|
|
||||||
|
<div class="mt30">
|
||||||
|
<div class="info">
|
||||||
|
Brak dodatkowych informacji na temat parafii.<br />Jeśli chcesz uzupełnić informacje o tej parafii wyślij je na adres <a href="mailto:parafie@colaska.pl" class="strong">parafie@colaska.pl</a>.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<p class="p10 strong cGreen">Uzupełnij dane o parafii. Zbieraj punkty.</p>
|
||||||
|
<p class="p10 strong cRed">Aby wysłać formularz musisz być zalogowany.</p>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="box4" class="boxElement none">
|
||||||
|
<div class="p10 mt30">
|
||||||
|
<p class="mt20 mb20">
|
||||||
|
<strong>Brak lokalizacji.</strong><br /> Kliknij na mapie aby wskazać poprawne miejsce. Nowo utworzony punkt można również przenieść za pomocą myszki. Po wybraniu poprawnej lokalizacji kliknij pod mapą "wyślij nową lokalizację".
|
||||||
|
</p>
|
||||||
|
<div id="mapka" style="height:500px;">
|
||||||
|
</div>
|
||||||
|
<div id="newPos" rel="2765" class="none mt20 mb20" name="">Zaznaczono nową pozycję punktu: <span class="jsAddPosition spanLink ml10">wyślij nową lokalizację</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div id="box5" class="boxElement none">
|
||||||
|
<div class="p10 mt30">
|
||||||
|
|
||||||
|
<br class="clear"/>
|
||||||
|
<p class="pb20 pt20">Aby zapalić świeczkę w tej parafii na 30 dni wyślij sms o treści <br /><span class="f13"><strong>TC.URGG.2765.TWOJA_INTENCJA</strong></span> na numer <span class="f13"><strong>72068</strong></span><br /> (zamiast TWOJA_INTENCJA wpisz osobę lub osoby za które chcesz zapalić świeczkę)<br />Koszt wysłania SMS wynosi 2,46zł (z VAT).</p>
|
||||||
|
<p class="pb20 pt20">Usługa dostępna w sieciach Orange, Era, Plus, Play.<br />
|
||||||
|
Serwis SMS obsługuje Dotpay.pl. <br />
|
||||||
|
Opłaty za wysłanie wiadomości SMS 2,46zł z VAT.<br />
|
||||||
|
Wszelkie pytania proszę wysyłać do właściciela serwisu na adres email <a href="mailto:pomoc@colaska.pl">pomoc@colaska.pl</a><br />
|
||||||
|
Regulamin usługi http://www.dotpay.pl/regulaminsms.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div style="text-align:center;margin:20px 0px;">
|
||||||
|
<!-- Pula: 300x250 //-->
|
||||||
|
<script type="text/javascript">
|
||||||
|
document.write(unescape('%3Cscript type="text/javascript" src="' + document.location.protocol +
|
||||||
|
'//ec.bankier.pl/show2/MjQ0MzIsMjI1NzMsMzcwNDg/' + Math.random() + '/'+ '"%3E%3C/script%3E'));
|
||||||
|
</script>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div> </div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<br class="clear"/>
|
||||||
|
<div class="infoSuggest">
|
||||||
|
<div class="click">
|
||||||
|
Twój wkład
|
||||||
|
</div>
|
||||||
|
<div class="textSugest f12">
|
||||||
|
<span class="block f13 cBlue center strong pb10">Twoje zdjęcia na portalu?</span>
|
||||||
|
To możliwe, prześlij nam zdjęcie dowolnego kościoła. Zamieścimy je na stronie parafii z informacją kto zdjęcie nadesłał (opcja).<br /><br />
|
||||||
|
Wystarczy wysłać zdjęcie na adres <a href="mailto:pomoc@colaska.pl" class="cRed">pomoc@colaska.pl</a> lub skorzystać z formularza na stronie parafii w zakładce "Informacje".
|
||||||
|
<span class="tRight block pt10">Dziękujemy za pomoc</span>
|
||||||
|
</div>
|
||||||
|
</div><!-- .infoSuggest -->
|
||||||
|
<div id="links" class="radius"><a href="http://otolista.pl"><img src="/links/otolista_pl.png" alt="otolista.pl"/></a><a href="http://wieloletnie.pl"><img src="/links/wieloletnie_pl.png" alt="wieloletnie.pl"/></a><a href="http://androidapp.pl"><img src="/links/androidapp_pl.png" alt="androidapp.pl"/></a></div>
|
||||||
|
|
||||||
|
<div id="footer" style="padding-bottom:10px;"><a href="/">Strona główna</a> | <!--<a href="/forum">Forum</a> |--> <a href="/index/page/name/kontakt">Kontakt</a><span style="padding-left:100px">© 2017 <a href="http://netcomplete.pl" rel="external">Net Complete</a>. All Rights Reserved</span></div>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
336
tests/parish_4.html
Normal file
336
tests/parish_4.html
Normal file
@ -0,0 +1,336 @@
|
|||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||||
|
<head>
|
||||||
|
<link rel="shortcut icon" href="http://colaska.pl/favicon.ico" type="image/x-icon" />
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||||
|
<title>colaska.pl - mapa parafii, kościoły, śluby, chrzty, pogrzeby. Msze on-line- Parafia pod wezwaniem św.Jana Chrzciciela (Adamów)</title>
|
||||||
|
<meta name="description" content="Lista parafii polskich wraz z cennikiem usług. Msze on-line" />
|
||||||
|
<meta name="keywords" content="parafie, msze św, księża, usługi, chrzest, pogrzeb, wesele, zaświadczenia, bierzmowanie, msze online, msze on-line" />
|
||||||
|
<meta name="classification" content="global,all" />
|
||||||
|
<meta name="robots" content="all,index,follow" />
|
||||||
|
<link href="/css/style.css" rel="stylesheet" type="text/css" />
|
||||||
|
<link href="/css/prettyPhoto.css" rel="stylesheet" type="text/css" />
|
||||||
|
<link href="/css/jquery.popup.css" rel="stylesheet" type="text/css" />
|
||||||
|
<script type="text/javascript" src="/js/jquery.js"></script>
|
||||||
|
<script type="text/javascript" src="/js/jquery.popup.js"></script>
|
||||||
|
<script type="text/javascript" src="/js/jquery.toggleformtext.js"></script>
|
||||||
|
<script type="text/javascript" src="/js/script.js"></script>
|
||||||
|
<script type="text/javascript" src="/js/easyTooltip.js"></script>
|
||||||
|
<script type="text/javascript" src="/js/prettyPhoto.js"></script>
|
||||||
|
<script type="text/javascript" src="/js/jquery.blink.js"></script>
|
||||||
|
|
||||||
|
<script type="text/javascript">
|
||||||
|
if (top.location==document.location){
|
||||||
|
} else {
|
||||||
|
parent.location= 'http://kasuj.pl';
|
||||||
|
}
|
||||||
|
var _gaq = _gaq || [];
|
||||||
|
_gaq.push(['_setAccount', 'UA-23764225-1']);
|
||||||
|
_gaq.push(['_trackPageview']);
|
||||||
|
|
||||||
|
(function() {
|
||||||
|
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
|
||||||
|
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
|
||||||
|
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
|
||||||
|
})();
|
||||||
|
|
||||||
|
</script>
|
||||||
|
<script src="http://maps.google.com/maps/api/js?sensor=false" type="text/javascript"></script>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div id="header">
|
||||||
|
|
||||||
|
<form id="search" method="get" action="/index/szukaj/">
|
||||||
|
<input type="text" name="phrase" class="input topSearch" title="miejscowość, kod lub nazwa parafii" size="25"/><input type="submit" class="button imgV" value="szukaj"/>
|
||||||
|
<div id="searchSelector"></div>
|
||||||
|
<div id="suggestions" class="suggestionsBox radius" style="display: none;">
|
||||||
|
<div id="suggestionsList" class="suggestionList"></div>
|
||||||
|
</div>
|
||||||
|
<input type="hidden" name="dest" id="destSelector" value="p" />
|
||||||
|
<div id="searchSelect">
|
||||||
|
<span class="block pb5 pt5 jsDestSelect pl10" name="p">szukaj w bazie danych parafii</span>
|
||||||
|
<!--<span class="block pb5 pt5 jsDestSelect pl10" name="f">szukaj w wątkach forum dyskusyjnego</span>-->
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<ul class="menuTop radius">
|
||||||
|
<li><a href="/" >Strona główna</a></li>
|
||||||
|
<!--<li><a href="/forum" >Forum</a></li>
|
||||||
|
<li><a href="/forum/hot" >Gorące tematy</a></li>-->
|
||||||
|
<li><a href="/index/press" >Piszą o nas</a></li>
|
||||||
|
<li><a href="/authfront/login" >Logowanie</a></li>
|
||||||
|
<li><a href="/profile/register" >Rejestracja</a></li>
|
||||||
|
<li><a href="/index/page/name/pomoc" >Jak pomóc?</a></li>
|
||||||
|
<li class="hit"><a href="/msze" >Msze online</a></li>
|
||||||
|
</ul>
|
||||||
|
<div class="logo pointer" onclick="window.location='/'"><h1>colaska.pl</h1></div>
|
||||||
|
</div>
|
||||||
|
<div id="rrec">
|
||||||
|
<script async src="//pagead2.googlesyndication.com/pagead/js/adsbygoogle.js"></script>
|
||||||
|
<!-- Colaska duża -->
|
||||||
|
<ins class="adsbygoogle"
|
||||||
|
style="display:inline-block;width:728px;height:90px"
|
||||||
|
data-ad-client="ca-pub-9503012770174245"
|
||||||
|
data-ad-slot="5302778274"></ins>
|
||||||
|
<script>
|
||||||
|
(adsbygoogle = window.adsbygoogle || []).push({});
|
||||||
|
</script>
|
||||||
|
</div>
|
||||||
|
<div id="main">
|
||||||
|
<div id="otolBaner"><a href="http://otolista.pl" alt="Układanie list prezentów na każdą okazję. Edycja, wysyłka do wskazanych odbiorców. Rezerwacja prezentów z listy."><img src="/gfx/otolistaFixed.png" /></a></div>
|
||||||
|
<div class="left">
|
||||||
|
<!--
|
||||||
|
<div class="box small radius">
|
||||||
|
<h2 class="rel hot">Gorący temat</h2>
|
||||||
|
<div class="p10">
|
||||||
|
<p class="pb10"><a href="/forum/post/id/372"><strong>Kościół poprawia orientację</strong><span class="block">Dużo w Polsce jest kościołów....</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/89"><strong>Szukajcie a znajdziecie</strong><span class="block">Zawsze chętny do rozmowy, zaws...</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/1182"><strong>Będzie nowy papież?</strong><span class="block">Wydawałoby się, że papież to n...</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/561"><strong>Z czego składa się kościół?</strong><span class="block">Jest ołtarz, nawy boczne, chór...</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/405"><strong>Śmierć i podatki</strong><span class="block">Podobno to są jedyne pewne rze...</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/473"><strong>Słuchać czy przespać?</strong><span class="block">Może mieć wenę lub nie, może c...</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/692"><strong>Kościół w oficjalnym związku</strong><span class="block">Kościół a polityka<br />
|
||||||
|
Tema...</span></a></p>
|
||||||
|
<p class="pb10"><a href="/forum/post/id/720"><strong>Pieniądz ma głos?</strong><span class="block">Wyjątkowością pracy księdza je...</span></a></p>
|
||||||
|
<div class="mt10 tRight">
|
||||||
|
<a href="/forum/hot" class="button pt5 pb5 pl10 pr10 mr5">zobacz więcej</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
-->
|
||||||
|
<div class="box small radius">
|
||||||
|
<h2 class="rel">Mapy parafii</h2>
|
||||||
|
<div class="p10">
|
||||||
|
<form action="/index/region/" id="formRegion" method="get" >
|
||||||
|
<select class="input" onchange="submit()" name="wojid">
|
||||||
|
<option value="0">Wybierz województwo</option>
|
||||||
|
<option value="1" >Dolnośląskie (729)</option>
|
||||||
|
<option value="2" >Kujawsko-pomorskie (546)</option>
|
||||||
|
<option value="3" >Lubelskie (603)</option>
|
||||||
|
<option value="4" >Lubuskie (248)</option>
|
||||||
|
<option value="5" >Lódzkie (576)</option>
|
||||||
|
<option value="6" >Małopolskie (952)</option>
|
||||||
|
<option value="7" selected>Mazowieckie (1052)</option>
|
||||||
|
<option value="8" >Opolskie (408)</option>
|
||||||
|
<option value="9" >Podkarpackie (823)</option>
|
||||||
|
<option value="10" >Podlaskie (297)</option>
|
||||||
|
<option value="11" >Pomorskie (538)</option>
|
||||||
|
<option value="12" >Śląskie (1040)</option>
|
||||||
|
<option value="13" >Świętokrzyskie (405)</option>
|
||||||
|
<option value="14" >Warmińsko-mazurskie (507)</option>
|
||||||
|
<option value="15" >Wielkopolskie (936)</option>
|
||||||
|
<option value="16" >Zachodniopomorskie (438)</option>
|
||||||
|
</select>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="box small radius">
|
||||||
|
<h2 class="rel">Poinformuj</h2>
|
||||||
|
<div class="p10">
|
||||||
|
<p class="">
|
||||||
|
<span class="pointer sendInfo">Powiadom swoich znajomych</span>
|
||||||
|
</p>
|
||||||
|
<div class="hr"></div>
|
||||||
|
<div class="">
|
||||||
|
<iframe src="http://www.facebook.com/plugins/likebox.php?href=http%3A%2F%2Fwww.facebook.com%2Fpages%2Fcolaskapl%2F183674431692242&width=210&colorscheme=light&show_faces=false&border_color=%23D0DBE8&stream=false&header=true&height=62" scrolling="no" frameborder="0" style="border:none; overflow:hidden; width:210px; height:62px;" allowTransparency="true"></iframe>
|
||||||
|
<!--
|
||||||
|
<div class="hr"></div>
|
||||||
|
<div id="fb-root"></div><script src="http://connect.facebook.net/en_US/all.js#xfbml=1"></script><fb:like href="http://colaska.pl" send="true" width="210" show_faces="false" font="verdana"></fb:like>
|
||||||
|
-->
|
||||||
|
</div>
|
||||||
|
<div class="hr"></div>
|
||||||
|
<div class="pb10">
|
||||||
|
<div class="tlMain b">
|
||||||
|
<script type="text/javascript" src="http://apis.google.com/js/plusone.js"></script>
|
||||||
|
<g:plusone></g:plusone>
|
||||||
|
</div>
|
||||||
|
<div class="tlMain">
|
||||||
|
<a href="http://twitter.com/share" class="twitter-share-button" data-count="horizontal">Tweet</a><script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script>
|
||||||
|
</div>
|
||||||
|
<br class="clear"/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<!-- <div class="box small radius">
|
||||||
|
<h2 class="rel">Statystyki</h2>
|
||||||
|
<div class="p10">
|
||||||
|
Parafii w bazie: <strong></strong><br />
|
||||||
|
Parafii z opisem: <strong></strong><br />
|
||||||
|
Parafii ze zdjęciem: <strong></strong><br />
|
||||||
|
Zapisanych cen: <strong></strong><br />
|
||||||
|
Ocenionych parafii: <strong></strong><br />
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
-->
|
||||||
|
<div class="box small radius">
|
||||||
|
<h2>Świeczka oraz reklama</h2>
|
||||||
|
<div class="p10 center">
|
||||||
|
|
||||||
|
|
||||||
|
<a rel="prettyPhoto" href="/gfx/runCandle.png"><img src="/gfx/candle_b.png" class="pb20" alt="Wejdź na stronę parafii, kliknij zakładkę ŚWIECZKI." title="Wejdź na stronę parafii, kliknij zakładkę ŚWIECZKI."/></a><br />
|
||||||
|
<script type="text/javascript"><!--
|
||||||
|
google_ad_client = "ca-pub-9503012770174245";
|
||||||
|
/* colaska */
|
||||||
|
google_ad_slot = "3980761610";
|
||||||
|
google_ad_width = 200;
|
||||||
|
google_ad_height = 200;
|
||||||
|
//-->
|
||||||
|
</script>
|
||||||
|
<script type="text/javascript"
|
||||||
|
src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
|
||||||
|
</script><br /><br />
|
||||||
|
<script type="text/javascript" id="AdTaily_Widget" src="http://static.adtaily.pl/widget.js#UMUB0WcRqxL5eIG"></script>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div class="right">
|
||||||
|
<div class="box big radius">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<h2>Szczegóły parafii</h2>
|
||||||
|
<div class="p10">
|
||||||
|
<p class="smallF"><a href="/">Strona główna</a> » <a href="/index/region/wojid/7/page/1">Mazowieckie</a> » Adamów » św.Jana Chrzciciela<br /><br /></p>
|
||||||
|
<div class="pHead rel">
|
||||||
|
<p class="title">Parafia pod wezwaniem św.Jana Chrzciciela</p>
|
||||||
|
<span class="city">Adamów</span>
|
||||||
|
|
||||||
|
|
||||||
|
<p>27-300 Brody Iłżeckie</p>
|
||||||
|
<p>(41) 2716375 </p>
|
||||||
|
<div class="voteIcon radius" id="4">
|
||||||
|
<p class="inf f14 strong">Oceń parafię</p>
|
||||||
|
<p class="votePlus rel"><span class="cGreen">Polecam</span><span class="cx">0</span></p>
|
||||||
|
<p class="voteMinus rel"><span class="cRed">Nie polecam</span><span class="cx">0</span></p>
|
||||||
|
<span class="com"></span>
|
||||||
|
</div>
|
||||||
|
<a href="/index/parafia/id/4/tabs/fifth" class="candleIcon"><img src="/gfx/runCandleSmall.png" /></a>
|
||||||
|
<br class="clear"/>
|
||||||
|
</div>
|
||||||
|
<div class="priceInfo rel radius">
|
||||||
|
<div class="head" name="box1" id="tabsprice">Koszty usług</div>
|
||||||
|
<!--<div class="head second nonActive" name="box2" id="tabscommnet">Komentarze</div>-->
|
||||||
|
<div class="head second nonActive" name="box3" id="tabsinfo">Informacje</div>
|
||||||
|
<div class="head third nonActive map" name="box4" id="tabsmaps" gps=","><span class="blink">Lokalizacja</span></div>
|
||||||
|
<div class="head fourth nonActive" name="box5" id="tabscandle">Świeczki</div>
|
||||||
|
|
||||||
|
<div id="box1" class="boxElement">
|
||||||
|
<div class="element rel">
|
||||||
|
<p class="name">Ślub</p>
|
||||||
|
<p class="desc">Koszt usługi czyli ceremonia zaślubin w kościele. Łącznie z ew. ubraniem kościoła, kosztami kościelnego, organisty itp. Czyli łączny koszt "pakietu".</p>
|
||||||
|
<div class="votePrice">Średnia cena: <br /><span class="price">-</span> zł </div>
|
||||||
|
<div class="userPrice" id="4" name="1">Twoja cena: <br /><input type="text" maxlength="4" id="price_1" name="price" class="input small center bigFont cGreen strong"/> zł <span class="savePrice">zapisz</span></div><div class="infoBack cRed"></div>
|
||||||
|
</div>
|
||||||
|
<div class="element rel">
|
||||||
|
<p class="name">Chrzest</p>
|
||||||
|
<p class="desc">Koszt ceremonii chrztu, bez kosztów zaświadczeń z innych parafii.</p>
|
||||||
|
<div class="votePrice">Średnia cena: <br /><span class="price">-</span> zł </div>
|
||||||
|
<div class="userPrice" id="4" name="2">Twoja cena: <br /><input type="text" maxlength="4" id="price_2" name="price" class="input small center bigFont cGreen strong"/> zł <span class="savePrice">zapisz</span></div><div class="infoBack cRed"></div>
|
||||||
|
</div>
|
||||||
|
<div class="element rel">
|
||||||
|
<p class="name">Zaświadczenie</p>
|
||||||
|
<p class="desc">Cena zaświadczenia potrzebna do tego by być chrzestnym czy wziąć ślub.</p>
|
||||||
|
<div class="votePrice">Średnia cena: <br /><span class="price">-</span> zł </div>
|
||||||
|
<div class="userPrice" id="4" name="3">Twoja cena: <br /><input type="text" maxlength="4" id="price_3" name="price" class="input small center bigFont cGreen strong"/> zł <span class="savePrice">zapisz</span></div><div class="infoBack cRed"></div>
|
||||||
|
</div>
|
||||||
|
<div class="element rel">
|
||||||
|
<p class="name">Pogrzeb</p>
|
||||||
|
<p class="desc">Łączny wymagany przez proboszcza koszt. W skład usługi wchodzi msza pogrzebowa, miejsce na cmentarzu, organista czy msze żałobne. Wszystko w ramach jednorazowej opłaty.</p>
|
||||||
|
<div class="votePrice">Średnia cena: <br /><span class="price">-</span> zł </div>
|
||||||
|
<div class="userPrice" id="4" name="4">Twoja cena: <br /><input type="text" maxlength="4" id="price_4" name="price" class="input small center bigFont cGreen strong"/> zł <span class="savePrice">zapisz</span></div><div class="infoBack cRed"></div>
|
||||||
|
</div>
|
||||||
|
<div class="element rel">
|
||||||
|
<p class="name">Msza</p>
|
||||||
|
<p class="desc">Kwota za jaką zazwyczaj zamawia się mszę. Bez znaczenia czy jest to dzień powszedni, niedziela czy święto.
|
||||||
|
Koszt jednej mszy.</p>
|
||||||
|
<div class="votePrice">Średnia cena: <br /><span class="price">-</span> zł </div>
|
||||||
|
<div class="userPrice" id="4" name="5">Twoja cena: <br /><input type="text" maxlength="4" id="price_5" name="price" class="input small center bigFont cGreen strong"/> zł <span class="savePrice">zapisz</span></div><div class="infoBack cRed"></div>
|
||||||
|
</div>
|
||||||
|
<div class="element rel">
|
||||||
|
<p class="name">Wypominki</p>
|
||||||
|
<p class="desc">Opłata za "jedną duszę" która ma być wymieniona w wypominkach czy to rocznych, czy półrocznych, czy jednorazowych.</p>
|
||||||
|
<div class="votePrice">Średnia cena: <br /><span class="price">-</span> zł </div>
|
||||||
|
<div class="userPrice" id="4" name="6">Twoja cena: <br /><input type="text" maxlength="4" id="price_6" name="price" class="input small center bigFont cGreen strong"/> zł <span class="savePrice">zapisz</span></div><div class="infoBack cRed"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div id="box2" class="none boxElement">
|
||||||
|
<div class="p10 mt30">
|
||||||
|
<div class="info">
|
||||||
|
Tej parafii jeszcze nikt nie skomentował.<br />Bądź pierwszy. Kliknij <a href="/forum/threads/id/9" class="strong">tutaj</a> aby przejść do forum.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div id="box3" class="none boxElement p10">
|
||||||
|
|
||||||
|
<div class="mt30">
|
||||||
|
<div class="info">
|
||||||
|
Brak dodatkowych informacji na temat parafii.<br />Jeśli chcesz uzupełnić informacje o tej parafii wyślij je na adres <a href="mailto:parafie@colaska.pl" class="strong">parafie@colaska.pl</a>.
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<p class="p10 strong cGreen">Uzupełnij dane o parafii. Zbieraj punkty.</p>
|
||||||
|
<p class="p10 strong cRed">Aby wysłać formularz musisz być zalogowany.</p>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="box4" class="boxElement none">
|
||||||
|
<div class="p10 mt30">
|
||||||
|
<p class="mt20 mb20">
|
||||||
|
<strong>Brak lokalizacji.</strong><br /> Kliknij na mapie aby wskazać poprawne miejsce. Nowo utworzony punkt można również przenieść za pomocą myszki. Po wybraniu poprawnej lokalizacji kliknij pod mapą "wyślij nową lokalizację".
|
||||||
|
</p>
|
||||||
|
<div id="mapka" style="height:500px;">
|
||||||
|
</div>
|
||||||
|
<div id="newPos" rel="4" class="none mt20 mb20" name="">Zaznaczono nową pozycję punktu: <span class="jsAddPosition spanLink ml10">wyślij nową lokalizację</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div id="box5" class="boxElement none">
|
||||||
|
<div class="p10 mt30">
|
||||||
|
|
||||||
|
<br class="clear"/>
|
||||||
|
<p class="pb20 pt20">Aby zapalić świeczkę w tej parafii na 30 dni wyślij sms o treści <br /><span class="f13"><strong>TC.URGG.4.TWOJA_INTENCJA</strong></span> na numer <span class="f13"><strong>72068</strong></span><br /> (zamiast TWOJA_INTENCJA wpisz osobę lub osoby za które chcesz zapalić świeczkę)<br />Koszt wysłania SMS wynosi 2,46zł (z VAT).</p>
|
||||||
|
<p class="pb20 pt20">Usługa dostępna w sieciach Orange, Era, Plus, Play.<br />
|
||||||
|
Serwis SMS obsługuje Dotpay.pl. <br />
|
||||||
|
Opłaty za wysłanie wiadomości SMS 2,46zł z VAT.<br />
|
||||||
|
Wszelkie pytania proszę wysyłać do właściciela serwisu na adres email <a href="mailto:pomoc@colaska.pl">pomoc@colaska.pl</a><br />
|
||||||
|
Regulamin usługi http://www.dotpay.pl/regulaminsms.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div style="text-align:center;margin:20px 0px;">
|
||||||
|
<!-- Pula: 300x250 //-->
|
||||||
|
<script type="text/javascript">
|
||||||
|
document.write(unescape('%3Cscript type="text/javascript" src="' + document.location.protocol +
|
||||||
|
'//ec.bankier.pl/show2/MjQ0MzIsMjI1NzMsMzcwNDg/' + Math.random() + '/'+ '"%3E%3C/script%3E'));
|
||||||
|
</script>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div> </div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<br class="clear"/>
|
||||||
|
<div class="infoSuggest">
|
||||||
|
<div class="click">
|
||||||
|
Twój wkład
|
||||||
|
</div>
|
||||||
|
<div class="textSugest f12">
|
||||||
|
<span class="block f13 cBlue center strong pb10">Twoje zdjęcia na portalu?</span>
|
||||||
|
To możliwe, prześlij nam zdjęcie dowolnego kościoła. Zamieścimy je na stronie parafii z informacją kto zdjęcie nadesłał (opcja).<br /><br />
|
||||||
|
Wystarczy wysłać zdjęcie na adres <a href="mailto:pomoc@colaska.pl" class="cRed">pomoc@colaska.pl</a> lub skorzystać z formularza na stronie parafii w zakładce "Informacje".
|
||||||
|
<span class="tRight block pt10">Dziękujemy za pomoc</span>
|
||||||
|
</div>
|
||||||
|
</div><!-- .infoSuggest -->
|
||||||
|
<div id="links" class="radius"><a href="http://otolista.pl"><img src="/links/otolista_pl.png" alt="otolista.pl"/></a><a href="http://wieloletnie.pl"><img src="/links/wieloletnie_pl.png" alt="wieloletnie.pl"/></a><a href="http://androidapp.pl"><img src="/links/androidapp_pl.png" alt="androidapp.pl"/></a></div>
|
||||||
|
|
||||||
|
<div id="footer" style="padding-bottom:10px;"><a href="/">Strona główna</a> | <!--<a href="/forum">Forum</a> |--> <a href="/index/page/name/kontakt">Kontakt</a><span style="padding-left:100px">© 2017 <a href="http://netcomplete.pl" rel="external">Net Complete</a>. All Rights Reserved</span></div>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
62
tests/test_parishesinfo.py
Normal file
62
tests/test_parishesinfo.py
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
import unittest
|
||||||
|
import sys, os
|
||||||
|
#print('!!! Dawid: ' + os.path.realpath('.'))
|
||||||
|
#print(sys.path)
|
||||||
|
|
||||||
|
# sys.path.append(os.path.realpath('..'))
|
||||||
|
# sys.path.append(os.path.realpath('.'))
|
||||||
|
from scraper.parishesinfo import ParishScraper
|
||||||
|
|
||||||
|
|
||||||
|
class TestParishScraper(unittest.TestCase):
|
||||||
|
class RequestMock(object):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _scrape_info(self, filename, url):
|
||||||
|
scraper = ParishScraper()
|
||||||
|
page = self.RequestMock()
|
||||||
|
page.url = url
|
||||||
|
with open(filename, 'r') as f:
|
||||||
|
page.text = f.read()
|
||||||
|
return scraper._retrieve_info(page)
|
||||||
|
|
||||||
|
def test_retrieve_info_from_page_186(self):
|
||||||
|
url = 'http://colaska.pl/index/parafia/id/186'
|
||||||
|
result = self._scrape_info('./tests/parish_186.html', url)
|
||||||
|
self.assertEqual(
|
||||||
|
'Parafia pod wezwaniem Niepokalanego Poczęcia Najświętszej Maryi Panny',
|
||||||
|
result['name'])
|
||||||
|
self.assertEqual('Będzin', result['city'])
|
||||||
|
self.assertEqual('', result['url'])
|
||||||
|
self.assertEqual(url, result['meta_url'])
|
||||||
|
self.assertEqual('ul. Pokoju 28', result['street'])
|
||||||
|
self.assertEqual('42-504 Będzin-Łagisza', result['postal_code'])
|
||||||
|
self.assertEqual('19.140243530273438,50.354281540838365',
|
||||||
|
result['gps'])
|
||||||
|
|
||||||
|
def test_retrieve_info_from_page_2765(self):
|
||||||
|
url = 'http://colaska.pl/index/parafia/id/2765'
|
||||||
|
result = self._scrape_info('./tests/parish_2765.html', url)
|
||||||
|
self.assertEqual('Parafia pod wezwaniem Św. Rocha', result['name'])
|
||||||
|
self.assertEqual('Jasieniec', result['city'])
|
||||||
|
self.assertEqual('http://www.parafia-jasieniec.pl', result['url'])
|
||||||
|
self.assertEqual(url, result['meta_url'])
|
||||||
|
self.assertEqual('ul. Warecka 37', result['street'])
|
||||||
|
self.assertEqual('05-604 Jasieniec k/Grójca', result['postal_code'])
|
||||||
|
self.assertEqual(',', result['gps'])
|
||||||
|
|
||||||
|
def test_retrieve_info_from_page_4(self):
|
||||||
|
url = 'http://colaska.pl/index/parafia/id/4'
|
||||||
|
result = self._scrape_info('./tests/parish_4.html', url)
|
||||||
|
self.assertEqual('Parafia pod wezwaniem św.Jana Chrzciciela',
|
||||||
|
result['name'])
|
||||||
|
self.assertEqual('Adamów', result['city'])
|
||||||
|
self.assertEqual('', result['url'])
|
||||||
|
self.assertEqual(url, result['meta_url'])
|
||||||
|
self.assertEqual('', result['street'])
|
||||||
|
self.assertEqual('27-300 Brody Iłżeckie', result['postal_code'])
|
||||||
|
self.assertEqual(',', result['gps'])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
10098
tsv_parishes.tsv
Normal file
10098
tsv_parishes.tsv
Normal file
File diff suppressed because it is too large
Load Diff
2
urls.txt
2
urls.txt
@ -1,2 +0,0 @@
|
|||||||
www.adamowice.katowice.opoka.org.pl
|
|
||||||
www.albigowa.parafia.info.pl
|
|
42
urls_a.txt
Normal file
42
urls_a.txt
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
adamowice.katowice.opoka.org.pl
|
||||||
|
aleksandrow-ww.salezjanie.pl
|
||||||
|
archidiecezja.lodz.pl/~andrespol
|
||||||
|
andrychow.bielsko.opoka.org.pl
|
||||||
|
parafiababiak.cba.pl
|
||||||
|
barcino.koszalin.opoka.org.pl
|
||||||
|
swanna.barczewo.pl
|
||||||
|
barglow.diecezja.elk.pl
|
||||||
|
parafiabarlinek.pl
|
||||||
|
bonifacy.rel.pl
|
||||||
|
pocieszna-gorka.czestochowa.opoka.org.pl
|
||||||
|
brat-albert.pl
|
||||||
|
jadwiga.wiara.org.pl
|
||||||
|
parafia.bedzin.pl
|
||||||
|
parafia.starabiala.pl
|
||||||
|
parafia-honorata.pl
|
||||||
|
parafianabrzeskiej.pl
|
||||||
|
parafia-bialobrzegi.pl
|
||||||
|
nmp.bialogard.koszalin.opoka.org.pl
|
||||||
|
matkakosciola.pl
|
||||||
|
nsj.bialystok.pl
|
||||||
|
abobola.aplus.pl
|
||||||
|
jadwiga.bialystok.opoka.org.pl
|
||||||
|
swroch.bialystok.opoka.org.pl
|
||||||
|
katedrabialostocka.pl
|
||||||
|
karmelwbielsku.pl
|
||||||
|
bielsko.salwatorianie.pl
|
||||||
|
parafiacyglas.pl
|
||||||
|
aleksandrowice.bielsko.opoka.org.pl
|
||||||
|
aleksandrowice.bielsko.opoka.org.pl
|
||||||
|
pawelbielsko.pl
|
||||||
|
bierutow.archidiecezja.wroc.pl
|
||||||
|
parafiabiery.pl
|
||||||
|
parafia.biesiekierz.eu
|
||||||
|
kosciolek.parafia.info.pl
|
||||||
|
objawieniepanskie.waw.pl
|
||||||
|
swmarcin.ostnet.pl
|
||||||
|
adamowice.katowice.opoka.org.pl
|
||||||
|
aleksandrow-ww.salezjanie.pl
|
||||||
|
archidiecezja.lodz.pl/~andrespol
|
||||||
|
andrychow.bielsko.opoka.org.pl
|
||||||
|
parafiababiak.cba.pl
|
1
urls_checked_a.txt
Normal file
1
urls_checked_a.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
http://www.adamowice.katowice.opoka.org.pl/
|
Loading…
Reference in New Issue
Block a user