G

Untitled

unlisted
Guest May 09, 2025 29 days 27
Clone
Python paste1.py 105 lines (89 loc) | 4.02 KB
1
#by m36-intj: makes bibtex entries from a list of pubmed article pmids, uses pmc article link, if available
2
#list format is
3
#%comment
4
#12345678
5
#23415251
6
#....
7
8
9
from pymed import PubMed
10
pubmed = PubMed()
11
from Bio import Entrez
12
from Bio import Medline
13
14
def test():
15
output_file = open('/storage/emulated/0/Download/python/bibtex.txt', "w")
16
pmids= open('/storage/emulated/0/Download/python/pmids', "r")
17
18
for pmid in pmids:
19
if pmid.startswith("%"):
20
continue
21
pmid = pmid.strip()
22
results = pubmed.query(f"{pmid}[PMID]", max_results=1)
23
article = next(results, None)
24
if article:
25
print(f"PMID: {pmid}")
26
print(f"Title: {article.title}")
27
raw_id = str(article.pubmed_id)
28
clean_pmid = raw_id.split()[0] # Takes first entry if multiple exist
29
30
first_author = article.authors[0] # Get first author dict
31
first_author_name = f"{first_author.get('firstname', '')} {first_author.get('lastname', '')}".strip()
32
print(f"First Author: {first_author_name}")
33
34
url = get_pmc(clean_pmid)
35
print(f"URL: {url}")
36
# Extract year (handles string or datetime.date formats)
37
pub_date = article.publication_date
38
if hasattr(pub_date, 'year'): # If it's a datetime.date object
39
year = pub_date.year
40
else: # If it's a string (e.g., "2023-05-15")
41
year = pub_date.split('-')[0] # Takes the first part (YYYY)
42
print(f"Publication Year: {year}")
43
# Example usage:
44
abbrev = get_journal_abbreviation(clean_pmid) # Replace with your PMID
45
print(f"ISO Abbreviation: {abbrev}") # e.g., "Nat. Biotechnol."
46
bibtex_entry(output_file,first_author_name, url, article.title, abbrev, year)
47
print("")
48
else:
49
print(f"\nPMID {pmid} not found.")
50
51
# Example:
52
def get_pmc(pmid):
53
Entrez.email = "[email protected]" # Required by NCBI
54
"""Fetch PMCID given a PMID using elink."""
55
try:
56
handle = Entrez.elink(dbfrom="pubmed", db="pmc", id=pmid)
57
record = Entrez.read(handle)
58
#time.sleep(0.34) # Respect NCBI rate limits
59
60
# Check if PMC links exist
61
if record and "LinkSetDb" in record[0] and record[0]["LinkSetDb"]:
62
pmcid = record[0]["LinkSetDb"][0]["Link"][0]["Id"]
63
return f"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC{pmcid}/"
64
except Exception as e:
65
print(f"Error fetching PMCID for PMID {pmid}: {str(e)}")
66
#if no PMC article is available return base pmid link to article
67
return f"https://pubmed.ncbi.nlm.nih.gov/{pmid}"
68
69
70
def get_journal_abbreviation(pmid):
71
Entrez.email = "[email protected]" # Required by NCBI
72
try:
73
# Step 1: Fetch record in Medline format (more reliable than XML)
74
handle = Entrez.efetch(db="pubmed", id=pmid, rettype="medline", retmode="text")
75
record = Medline.read(handle)
76
77
# Step 2: Check all possible journal name fields
78
journal_fields = ['TA', 'JT', 'SO']
79
for field in journal_fields:
80
if field in record:
81
return record[field]
82
83
# Step 3: Ultimate fallback
84
return record.get('SO', 'Journal name unavailable')
85
86
except Exception as e:
87
print(f"Error processing PMID {pmid}: {str(e)}")
88
return "Journal name unavailable"
89
90
#@Article{hallmarks_aging,
91
# Author = "Carlos Lopez-Otin and others",
92
# Title = "\href{https://pubmed.ncbi.nlm.nih.gov/36599349/}{Hallmarks of aging: An expanding universe}",
93
#Journal = "Cell Metabolism",
94
# Year = 2023,
95
#}
96
def bibtex_entry(text_file, author, url, title, journal, year):
97
print(f"@Article {{key,",file=text_file)
98
print(f"Author = \"{author} and others\",",file=text_file)
99
print(f"Title = \"\\href{{{url}}}{{{title}}}\",",file=text_file)
100
print(f"Journal = \"{journal}.\",",file=text_file)
101
print(f"Year = \"{year}\",\n}}\n",file=text_file)
102
return
103
104
105
test()