BeautifulSoup Installationsanleitung

BeautifulSoup installieren

@sudo apt-get install python3-bs4
@sudo apt-get install python3-lxml
@sudo apt-get install python3-html5lib

BeautifulSoup Script Beispiele

Datei Beispielscript.py erstellen und Inhalt einfügen und speichern. Scrip wird dann folgendermaßen ausgeführt, in meinen Beispiel habe ich python3 auf mein System installiert:
@python3 Beispielscript.py

Alle Webseiten werden ausgelesen inkl. Inhalte und im Terminal angezeigt

 import requests

from bs4 import BeautifulSoup import time

def get_current_date():

  return time.strftime("%A, %B %d, %Y", time.localtime())

def get_current_time():

  return f"The current time is {time.strftime('%H:%M %p', time.localtime())}"

def scrape_website(url):

  response = requests.get(url)
  soup = BeautifulSoup(response.text, 'html.parser')
  
  # Extract all text content from the website
  text_content = " ".join([item.text for item in soup.find_all()])
  
  print("Extracted Text Content:\n", text_content)

if name == „main“:

  start_date = get_current_date()
  start_time = get_current_time()
  
  print("Tool 'gettimedate/get_current_date' Output:\n", start_date)
  print("Tool 'gettimedate/get_current_time' Output:\n", start_time)
  
  # Specify the website URL to scrape
  url = "https://www.beispielwebseite.com"
  scrape_website(url)

Alle Webseiten werden ausgelesen inkl. Inhalte, im Terminal angezeigt und in Textdatei gespeichert. import requests from bs4 import BeautifulSoup import time

def get_current_date():

  return time.strftime("%A, %B %d, %Y", time.localtime())

def get_current_time():

  return f"The current time is {time.strftime('%H:%M %p', time.localtime())}"

def scrape_website(url):

  response = requests.get(url)
  soup = BeautifulSoup(response.text, 'html.parser')
  
  # Extract all text content from the website
  text_content = " ".join([item.text for item in soup.find_all()])
  
  print("Extracted Text Content:\n", text_content)
  
  with open("www.beispielwebseite.com.txt", "w") as file:
      file.write(text_content)

if name == „main“:

  start_date = get_current_date()
  start_time = get_current_time()
  
  print("Tool 'gettimedate/get_current_date' Output:\n", start_date)
  print("Tool 'gettimedate/get_current_time' Output:\n", start_time)
  
  # Specify the website URL to scrape
  url = "https://www.beispielwebseite.com"
  scrape_website(url)