Compare commits
4 Commits
cc79bb9d9d
...
ba9e42b9c6
Author | SHA1 | Date | |
---|---|---|---|
ba9e42b9c6 | |||
6f3567982d | |||
d76914b4c5 | |||
4bb6619911 |
53
README.md
53
README.md
@ -2,18 +2,15 @@
|
||||
|
||||
## Description
|
||||
|
||||
This project is a web scraper designed to extract data from websites. It can be customized to scrape various types of data and save it in different formats.
|
||||
This project is a web scraper designed to extract data from websites.
|
||||
|
||||
## Features
|
||||
|
||||
- Extracts data from web pages
|
||||
<!-- - Supports multiple data formats (CSV, JSON, etc.)
|
||||
- Customizable scraping rules
|
||||
- Error handling and logging -->
|
||||
☑️ Extracts data from web pages
|
||||
|
||||
## Installation
|
||||
## Usage
|
||||
|
||||
### Using Docker
|
||||
### With Docker
|
||||
|
||||
1. Clone the repository:
|
||||
|
||||
@ -27,57 +24,51 @@ git clone https://git.wmi.amu.edu.pl/s500042/webscraper
|
||||
cd webscraper
|
||||
```
|
||||
|
||||
3. Build the Docker image and run it using script:
|
||||
- On Linux, ?Mac <!-- I haven't tested it yet -->
|
||||
3. Build the Docker image and run it using `start.py` script:
|
||||
|
||||
```bash
|
||||
./start.sh
|
||||
python scripts/start.py
|
||||
```
|
||||
|
||||
- Windows 🤡
|
||||
On Mac, you'll have to use
|
||||
|
||||
```bash
|
||||
python start.py
|
||||
python3 scripts/start.py
|
||||
```
|
||||
|
||||
This one will work just fine on Linux, but on Mac, you'll have to use
|
||||
|
||||
```bash
|
||||
python3 start.py
|
||||
```
|
||||
4. Check `/app/dist/data.json` file to see the extracted data.
|
||||
|
||||
### Without Docker
|
||||
|
||||
1. Clone the repository:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yourusername/webscraper.git
|
||||
git clone https://git.wmi.amu.edu.pl/s500042/webscraper
|
||||
```
|
||||
|
||||
2. Navigate to the project directory:
|
||||
2. Install the required dependencies:
|
||||
|
||||
```bash
|
||||
cd webscraper/app
|
||||
```
|
||||
|
||||
3. Install the required dependencies:
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
pip install -r app/requirements.txt
|
||||
```
|
||||
|
||||
If you're on Arch Linux, you'll need to create a virtual environment.
|
||||
Here's is a [Step by step guide](#) that will help you create it.
|
||||
|
||||
## Usage
|
||||
|
||||
1. Configure the scraper by editing the `config.json` file.
|
||||
2. Run the scraper:
|
||||
3. Run `run_with_no_docker.py` script:
|
||||
|
||||
```bash
|
||||
python scraper.py
|
||||
python scripts/run_with_no_docker.py
|
||||
```
|
||||
|
||||
On Mac you'll, need to use:
|
||||
|
||||
```bash
|
||||
python3 scripts/run_with_no_docker.py
|
||||
```
|
||||
|
||||
4. Check `/app/dist/data.json` file to see the extracted data.
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|
||||
|
@ -1,33 +0,0 @@
|
||||
import subprocess
|
||||
|
||||
|
||||
def quitCondition(command: str) -> bool:
|
||||
return command in ["q", "quit", "exit", "stop"]
|
||||
|
||||
|
||||
def helpCondition(command: str) -> bool:
|
||||
return command in ["h", "help"]
|
||||
|
||||
|
||||
def clearCondition(command: str) -> bool:
|
||||
return command in ["c", "clear", "cls"]
|
||||
|
||||
|
||||
def systemCommand(command: str) -> str:
|
||||
words = command[slice(1, len(command))].split()
|
||||
if words[0] == "":
|
||||
return "Command not found. Write 'h' for help."
|
||||
return subprocess.run(
|
||||
f'docker exec -it webscraper {" ".join(words)}',
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
).stdout.decode()
|
||||
|
||||
|
||||
def restartCondition(command: str) -> bool:
|
||||
return command in ["r", "restart"]
|
||||
|
||||
|
||||
def runCondition(command: str) -> bool:
|
||||
return command in ["run"]
|
30
scripts/get_path.py
Normal file
30
scripts/get_path.py
Normal file
@ -0,0 +1,30 @@
|
||||
import os
|
||||
from run_command import run_command
|
||||
|
||||
|
||||
def get_path():
|
||||
pwd = run_command("pwd")
|
||||
splitted = pwd.split("/")
|
||||
splitted[-1] = splitted[-1].replace("\n", "")
|
||||
|
||||
print(splitted[: splitted.index("webscraper") + 1])
|
||||
|
||||
if splitted.count("webscraper") > 1 and "webscraper" in splitted:
|
||||
for i in range(len(splitted) - 1, -1, -1):
|
||||
potential_path = "/".join(splitted[: i + 1])
|
||||
if "webscraper" in potential_path:
|
||||
script_path = f"{potential_path}/scripts"
|
||||
if os.path.isdir(script_path):
|
||||
return potential_path
|
||||
else:
|
||||
return "This is not a valid webscraper project."
|
||||
else:
|
||||
return "/".join(splitted[: splitted.index("webscraper") + 1])
|
||||
|
||||
|
||||
def run_main():
|
||||
print(get_path())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_main()
|
13
scripts/run_command.py
Normal file
13
scripts/run_command.py
Normal file
@ -0,0 +1,13 @@
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
def run_command(command: str) -> str:
|
||||
process = subprocess.run(
|
||||
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
)
|
||||
if process.returncode != 0:
|
||||
print(f"Error running command: {command}")
|
||||
print(process.stderr.decode())
|
||||
sys.exit(process.returncode)
|
||||
return process.stdout.decode()
|
17
scripts/run_with_no_docker.py
Normal file
17
scripts/run_with_no_docker.py
Normal file
@ -0,0 +1,17 @@
|
||||
import subprocess
|
||||
import os
|
||||
from run_command import run_command
|
||||
from get_path import get_path
|
||||
|
||||
|
||||
def run_main():
|
||||
path = get_path()
|
||||
try:
|
||||
result = run_command(f"python3 {path}/app/main.py")
|
||||
print(result)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error occurred: {e.stderr}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_main()
|
@ -1,31 +1,25 @@
|
||||
import subprocess
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def run_command(command: str) -> str:
|
||||
process = subprocess.run(
|
||||
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
)
|
||||
if process.returncode != 0:
|
||||
print(f"Error running command: {command}")
|
||||
print(process.stderr.decode())
|
||||
sys.exit(process.returncode)
|
||||
return process.stdout.decode()
|
||||
from run_command import run_command
|
||||
|
||||
|
||||
def main():
|
||||
docker_compose_file = os.getenv("DOCKER_COMPOSE_FILE", "./app/docker-compose.yaml")
|
||||
docker_compose_file = os.getenv(
|
||||
"DOCKER_COMPOSE_FILE", "$WEBSCRAPER/app/docker-compose.yaml"
|
||||
)
|
||||
service_name = os.getenv("SERVICE_NAME", "webscraper")
|
||||
script_name = os.getenv("SCRIPT_NAME", "main.py")
|
||||
try:
|
||||
print("Starting Docker Compose services...\n")
|
||||
run_command(f"docker compose -f {docker_compose_file} up -d")
|
||||
|
||||
print("Starting Docker Compose services...\n")
|
||||
run_command(f"docker compose -f {docker_compose_file} up -d")
|
||||
print(run_command(f"docker exec {service_name} python {script_name}"))
|
||||
|
||||
print(run_command(f"docker exec {service_name} python {script_name}"))
|
||||
|
||||
print("Stopping and removing Docker Compose services...")
|
||||
run_command(f"docker compose -f {docker_compose_file} down")
|
||||
print("Stopping and removing Docker Compose services...")
|
||||
run_command(f"docker compose -f {docker_compose_file} down")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print("An error occurred while running the script.")
|
||||
print(e)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
57
scripts/threads/commands.py
Normal file
57
scripts/threads/commands.py
Normal file
@ -0,0 +1,57 @@
|
||||
from run_command import run_command
|
||||
|
||||
|
||||
def quitCondition(command: str) -> bool:
|
||||
return command in ["q", "quit", "exit", "stop"]
|
||||
|
||||
|
||||
def quitService(path: str):
|
||||
print("Stopping and removing Docker Compose services...")
|
||||
run_command(f"docker compose -f {path}/app/docker-compose.yaml down")
|
||||
return None
|
||||
|
||||
|
||||
def helpCondition(command: str) -> bool:
|
||||
return command in ["h", "help"]
|
||||
|
||||
|
||||
def clearCondition(command: str) -> bool:
|
||||
return command in ["c", "clear", "cls"]
|
||||
|
||||
|
||||
def systemCommand(command: str) -> str:
|
||||
words = command[1:].split()
|
||||
if words[0] == "":
|
||||
return "Command not found. Write 'h' for help."
|
||||
print(
|
||||
run_command(
|
||||
f'docker exec -it webscraper {" ".join(words)}',
|
||||
)
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def restartCondition(command: str) -> bool:
|
||||
return command in ["r", "restart"]
|
||||
|
||||
|
||||
def restartService(path: str):
|
||||
print("Restarting Docker Compose services...")
|
||||
run_command(f"docker compose -f {path}/app/docker-compose.yaml down")
|
||||
run_command(f"docker compose -f {path}/app/docker-compose.yaml up -d")
|
||||
print("Composed!")
|
||||
return None
|
||||
|
||||
|
||||
def runCondition(command: str) -> bool:
|
||||
return command in ["run"]
|
||||
|
||||
|
||||
def runService():
|
||||
print("Running main.py...")
|
||||
print(
|
||||
run_command(
|
||||
"docker exec -it webscraper python main.py",
|
||||
)
|
||||
)
|
||||
return None
|
9
scripts/threads/help_list.py
Normal file
9
scripts/threads/help_list.py
Normal file
@ -0,0 +1,9 @@
|
||||
def help_list():
|
||||
return """
|
||||
["h", "help"], - for help.
|
||||
["q", "quit", "exit", "stop"], - to stop program.
|
||||
["c", "clear", "cls"], - to clear console.
|
||||
["r", "restart"], - to restart Docker Compose services.
|
||||
["run"], - to run main.py in docker container.
|
||||
["$..."], - to evaluate command in docker container.
|
||||
"""
|
@ -1,65 +1,30 @@
|
||||
import commands
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
from threads.commands import *
|
||||
from run_command import run_command
|
||||
from get_path import get_path
|
||||
from threads.help_list import help_list
|
||||
|
||||
|
||||
def prompt():
|
||||
while True:
|
||||
command = input("> ")
|
||||
if commands.quitCondition(command):
|
||||
print("Stopping and removing Docker Compose services...")
|
||||
subprocess.run(
|
||||
"docker compose -f ../app/docker-compose.yaml down",
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
)
|
||||
if quitCondition(command):
|
||||
quitService(get_path())
|
||||
break
|
||||
if commands.helpCondition(command):
|
||||
print(
|
||||
"""
|
||||
["h", "help"], - for help.
|
||||
["q", "quit", "exit", "stop"], - to stop program.
|
||||
["c", "clear", "cls"], - to clear console.
|
||||
["r", "restart"], - to restart Docker Compose services.
|
||||
["run"], - to run main.py in docker container.
|
||||
["$..."], - to evaluate command in docker container.
|
||||
"""
|
||||
)
|
||||
if helpCondition(command):
|
||||
print(help_list())
|
||||
continue
|
||||
if commands.clearCondition(command):
|
||||
print("\n" * 100)
|
||||
if clearCondition(command):
|
||||
run_command("clear")
|
||||
continue
|
||||
if command.startswith("$"):
|
||||
print(commands.systemCommand(command))
|
||||
systemCommand(command)
|
||||
continue
|
||||
if commands.restartCondition(command):
|
||||
print("Restarting Docker Compose services...")
|
||||
subprocess.run(
|
||||
"docker compose -f ../app/docker-compose.yaml down",
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
)
|
||||
subprocess.run(
|
||||
"docker compose -f ../app/docker-compose.yaml up -d",
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
)
|
||||
print("Composed!")
|
||||
if restartCondition(command):
|
||||
restartService(get_path())
|
||||
continue
|
||||
if commands.runCondition(command):
|
||||
print("Running main.py...")
|
||||
print(
|
||||
subprocess.run(
|
||||
"docker exec -it webscraper python main.py",
|
||||
shell=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
).stdout.decode()
|
||||
)
|
||||
if runCondition(command):
|
||||
runService()
|
||||
continue
|
||||
if command == "":
|
||||
continue
|
||||
|
@ -1,26 +1,20 @@
|
||||
import subprocess
|
||||
import time
|
||||
import os
|
||||
import threading
|
||||
from threads.prompt import prompt
|
||||
|
||||
|
||||
def run_command(command: str) -> str:
|
||||
process = subprocess.run(
|
||||
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
)
|
||||
if process.returncode != 0:
|
||||
print(f"Error running command: {command}")
|
||||
return process.stderr.decode()
|
||||
return process.stdout.decode()
|
||||
|
||||
from run_command import run_command
|
||||
from get_path import get_path
|
||||
|
||||
thread = threading.Thread(target=prompt)
|
||||
|
||||
|
||||
def main():
|
||||
path = get_path()
|
||||
docker_compose_file = os.getenv(
|
||||
"DOCKER_COMPOSE_FILE", f"{path}/app/docker-compose.yaml"
|
||||
)
|
||||
print("Starting Docker Compose services...")
|
||||
run_command("docker compose -f ../app/docker-compose.yaml up -d")
|
||||
run_command(f"docker compose -f {docker_compose_file} up -d")
|
||||
print("Composed!\n")
|
||||
print("Running main.py...")
|
||||
print(run_command("docker exec -it webscraper python main.py"))
|
||||
@ -31,10 +25,9 @@ def main():
|
||||
print("\nWatching for changes...")
|
||||
thread.start()
|
||||
|
||||
path_to_watch = "/home/paprykdev/uni/webscraper/app"
|
||||
before = {
|
||||
f: os.stat(os.path.join(path_to_watch, f)).st_mtime
|
||||
for f in os.listdir(path_to_watch)
|
||||
f: os.stat(os.path.join(path, "app", f)).st_mtime
|
||||
for f in os.listdir(os.path.join(path, "app"))
|
||||
if f.endswith(".py")
|
||||
}
|
||||
|
||||
@ -43,8 +36,8 @@ def main():
|
||||
break
|
||||
time.sleep(1)
|
||||
after = {
|
||||
f: os.stat(os.path.join(path_to_watch, f)).st_mtime
|
||||
for f in os.listdir(path_to_watch)
|
||||
f: os.stat(os.path.join(path, "app", f)).st_mtime
|
||||
for f in os.listdir(os.path.join(path, "app"))
|
||||
if f.endswith(".py")
|
||||
}
|
||||
for f in before:
|
||||
|
Loading…
Reference in New Issue
Block a user