6.8 xml, xpath פתרון
עשה לי XPath
import requests
from lxml import html
def fetch_html(url):
response = requests.get(url)
if response.status_code == 200:
return response.text
else:
raise ValueError(f"Failed to fetch HTML content from {url}. Status code: {response.status_code}")
def extract_information(html_content):
tree = html.fromstring(html_content)
# Extracting book titles, prices, and availability
titles = tree.xpath('//h3/a/@title')
prices = tree.xpath('//p[@class="price_color"]/text()')
availability = tree.xpath('//p[@class="instock availability"]/text()')
# Return the extracted information as a list of dictionaries
extracted_data = []
for title, price, avail in zip(titles, prices, availability):
data = {
'title': title.strip(),
'price': price.strip(),
'availability': avail.strip()
}
extracted_data.append(data)
return extracted_data
def main():
url = 'http://books.toscrape.com/'
html_content = fetch_html(url)
extracted_data = extract_information(html_content)
# Print the extracted information
for data in extracted_data:
print(data)
if __name__ == "__main__":
main()