2021-05-26 05:41:35 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module Onebox
|
|
|
|
module Engine
|
|
|
|
class PubmedOnebox
|
|
|
|
include Engine
|
|
|
|
include LayoutSupport
|
|
|
|
|
|
|
|
matches_regexp(%r{^https?://(?:(?:\w)+\.)?(www.ncbi.nlm.nih)\.gov(?:/)?/pubmed/\d+})
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def xml
|
|
|
|
return @xml if defined?(@xml)
|
2021-10-27 04:39:28 -04:00
|
|
|
doc = Nokogiri.XML(URI.join(@url, "?report=xml&format=text").open)
|
2021-05-26 05:41:35 -04:00
|
|
|
pre = doc.xpath("//pre")
|
|
|
|
@xml = Nokogiri.XML("<root>" + pre.text + "</root>")
|
|
|
|
end
|
|
|
|
|
|
|
|
def authors
|
|
|
|
initials = xml.css("Initials").map { |x| x.content }
|
|
|
|
last_names = xml.css("LastName").map { |x| x.content }
|
|
|
|
author_list = (initials.zip(last_names)).map { |i, l| i + " " + l }
|
|
|
|
if author_list.length > 1
|
|
|
|
author_list[-2] = author_list[-2] + " and " + author_list[-1]
|
|
|
|
author_list.pop
|
|
|
|
end
|
|
|
|
author_list.join(", ")
|
|
|
|
end
|
|
|
|
|
|
|
|
def date
|
|
|
|
xml
|
|
|
|
.css("PubDate")
|
|
|
|
.children
|
|
|
|
.map { |x| x.content }
|
|
|
|
.select { |s| !s.match(/^\s+$/) }
|
|
|
|
.map { |s| s.split }
|
|
|
|
.flatten
|
|
|
|
.sort
|
|
|
|
.reverse
|
|
|
|
.join(" ") # Reverse sort so month before year.
|
|
|
|
end
|
|
|
|
|
|
|
|
def data
|
|
|
|
{
|
|
|
|
title: xml.css("ArticleTitle").text,
|
|
|
|
authors: authors,
|
|
|
|
journal: xml.css("Title").text,
|
|
|
|
abstract: xml.css("AbstractText").text,
|
|
|
|
date: date,
|
|
|
|
link: @url,
|
|
|
|
pmid: match[:pmid],
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def match
|
|
|
|
@match ||= @url.match(%r{www\.ncbi\.nlm\.nih\.gov/pubmed/(?<pmid>[0-9]+)})
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|