Я пытаюсь получить значение атрибута data-content_published_date, который является датой публикации статьи с помощью xPath, но по какой-то причине я не могу!
Вот как я пытался:
//div[@id='page-segment-values']/@data-content_published_date
А вот тег div:
<div id = "page-segment-values" class = "">
<div class = "keyvals" data-content_author_name = "Eli Meixler" data-content_cms_id = "5461956" data-content_headline = "British Academic" data-content_modified_date = "" data-content_published_date = "2018-11-22T08:15:43.000Z" data-content_shown_on_platform = "own" data-content_type = "article" data-path = "/5461956/british-academic-sentenced-spying-united-arab-emirates/" data-referrer = "" data-search = "" data-content_is_post = "post" data-title = "A British Academic Has Been Sentenced to Life in Prison on Espionage Charges in the United Arab Emirates" data-affiliate_link_count = "0" data-content_cms_category = "World" data-content_cms_tags = "onetime|overnight|United Arab Emirates" data-content_cms_terms = "World,onetime,overnight,United Arab Emirates" data-time_inc_brand = "time.com" data-time_inc_application = "front end" data-content_syndicated = "false" data-content_syndicated_brand = "" data-content_syndicated_url = "" data-content_nlp_sentiment_label = "negative" data-content_nlp_sentiment_score = "-0.2" data-content_nlp_sentiment_magnitude = "5.9" data-content_nlp_entities = "Matthew Hedges" data-content_nlp_payload = "{"entities":[{"type":"PERSON","text":"Matthew Hedges","relevance":0.57477295,"disambiguation":{}},{"type":"WORK_OF_ART","text":"British Academic Sentenced to Life on Spying Charges","relevance":0.02937225,"disambiguation":{}},{"type":"LOCATION","text":"British","relevance":0.029201617,"disambiguation":{"mid":"/m/07ssc","wikipedia_url":"https://en.wikipedia.org/wiki/United_Kingdom"}},{"type":"PERSON","text":"academic","relevance":0.020707963,"disambiguation":{}},{"type":"OTHER","text":"life","relevance":0.018888554,"disambiguation":{}},{"type":"OTHER","text":"spying charges","relevance":0.013851213,"disambiguation":{}},{"type":"LOCATION","text":"prison","relevance":0.0133453375,"disambiguation":{}},{"type":"LOCATION","text":"United Arab Emirates","relevance":0.011457251,"disambiguation":{"wikipedia_url":"https://en.wikipedia.org/wiki/United_Arab_Emirates","mid":"/m/0j1z8"}},{"type":"OTHER","text":"threats","relevance":0.009783207,"disambiguation":{}},{"type":"EVENT","text":"blowback","relevance":0.009783207,"disambiguation":{}}],"categories":[{"label":"/Law & Government/Public Safety/Crime & Justice","score":0.64}],"docSentiment":{"magnitude":5.9,"score":-0.2,"label":"negative"},"language":"en"}" data-content_nlp_categories = "/Law & Government/Public Safety/Crime & Justice"></div>
</div>
Кто-нибудь знает, почему я не могу получить доступ к его значению? И как его получить?
Вы пропустили вложенный элемент div
, поэтому пытаетесь получить значение data-content_published_date
внешнего div
.
Правильный запрос XPath должен быть
//div[@id='page-segment-values']/div/@data-content_published_date
Или еще точнее
//div[@id='page-segment-values']/div[@class='keyvals']/@data-content_published_date
Используйте внутренний элемент <div>
и нужный вам атрибут.
//div[@class='keyvals']/@data-content_published_date