Prompt
import requests
from bs4 import BeautifulSoup
# Base url of the website
base_url = 'https://www.example.com/product-page-'
# A list to store scraped data
data_list = []
# Loop over the page numbers you want to scrape
for page_number in range(1, 11): # Scraping 10 pages here
url = base_url + str(page_number)
# Request the page
response = requests.get(url)
# If the request is successful, the status code will be 200
if response.status_code == 200:
# Get the content of the response
page_content = response.content
# Create a Beautiful Soup object and specify the parser
soup = BeautifulSoup(page_content, 'html.parser')
# Let's say we want to scrape the product names and prices, and they
# are in divs with the class 'product'. Replace this with your actual scenario.
products = soup.find_all('div', class_='product')
# Loop over the product details
for product in products:
name = product.find('h2', class_='product-name').text # Replace with actual class name
price = product.find('span', class_='product-price').text # Replace with actual class name
# Append this data as a tuple to the data list.
data_list.append((name, price))
# Now you have data from multiple pages. Do whatever you want with it.
# For instance, you could write it to a CSV file.
import csv
with open('product_data.csv', 'w', newline='') as file:
writer = csv.writer(file)
# Write the headers
writer.writerow(['Product Name', 'Price'])
# Write the data
writer.writerows(data_list)
Answer
import requests
from bs4 import BeautifulSoup
import csv
base_url = 'https://www.example.com/product-page-'
data_list = []
for page_number in range(1, 11):
response = requests.get(f'{base_url}{page_number}')
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
products = soup.find_all('div', class_='product')
for product in products:
name = product.find('h2', class_='product-name').text
price = product.find('span', class_='product-price').text
data_list.append((name, price))
with open('product_data.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(['Product Name', 'Price'])
writer.writerows(data_list)
- Simplified the code by removing unnecessary comments and extra whitespace.
- Used f-string to format the URL dynamically.
- Merged import statements to improve readability.
- Combined code related to CSV writing for better organization.
Unit Testing:
- Test if the correct URL is generated for each page number.
- Test if the data is correctly scraped and stored in the data_list.
- Test if the CSV file is created with the headers and data written correctly.
Description
A script that automates web scraping from multiple pages, extracts product names and prices, stores the data in a CSV file, and performs unit testing for validation.
More Code Simplifiers
Apache Flink Code SimplifierApache Pig Code SimplifierAzure Data Factory Code SimplifierC/C++ Code SimplifierCouchDB Code SimplifierDAX Code SimplifierExcel Code SimplifierFirebase Code SimplifierGoogle BigQuery Code SimplifierGoogle Sheets Code SimplifierGraphQL Code SimplifierHive Code SimplifierJava Code SimplifierJavaScript Code SimplifierJulia Code SimplifierLua Code SimplifierM (Power Query) Code SimplifierMATLAB Code SimplifierMongoDB Code SimplifierOracle Code SimplifierPostgreSQL Code SimplifierPower BI Code SimplifierPython Code SimplifierR Code SimplifierRedis Code SimplifierRegex Code SimplifierRuby Code SimplifierSAS Code SimplifierScala Code SimplifierShell Code SimplifierSPSS Code SimplifierSQL Code SimplifierSQLite Code SimplifierStata Code SimplifierTableau Code SimplifierVBA Code Simplifier