לדלג לתוכן

6.8 xml, xpath פתרון

עשה לי XPath

import requests
from lxml import html

def fetch_html(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.text
    else:
        raise ValueError(f"Failed to fetch HTML content from {url}. Status code: {response.status_code}")

def extract_information(html_content):
    tree = html.fromstring(html_content)

    # Extracting book titles, prices, and availability
    titles = tree.xpath('//h3/a/@title')
    prices = tree.xpath('//p[@class="price_color"]/text()')
    availability = tree.xpath('//p[@class="instock availability"]/text()')

    # Return the extracted information as a list of dictionaries
    extracted_data = []
    for title, price, avail in zip(titles, prices, availability):
        data = {
            'title': title.strip(),
            'price': price.strip(),
            'availability': avail.strip()
        }
        extracted_data.append(data)
    return extracted_data

def main():
    url = 'http://books.toscrape.com/'
    html_content = fetch_html(url)
    extracted_data = extract_information(html_content)

    # Print the extracted information
    for data in extracted_data:
        print(data)

if __name__ == "__main__":
    main()