pubmed to bibtex converter
unlisted
May 11, 2025
23 days
11
1 #V2.0 2 #by m36-intj: makes bibtex entries from a list of pubmed article pmids, uses pmc article link, if available 3 #fixed the problem with sometimes wrong publication date, uses now biopython instead of pymed to get publication date 4 #same with title, uses now biopython for titles 5 #added unicode to latex translation of author names 6 #uses now regex special rules for title conversion to latex format 7 8 #input list file format is 9 #%comment 10 #12345678 11 #23415251 12 #.... 13 14 from pymed import PubMed 15 pubmed = PubMed() 16 from Bio import Entrez 17 from Bio import Medline 18 from pylatexenc.latexencode import unicode_to_latex 19 from pylatexenc.latexencode import UnicodeToLatexEncoder, UnicodeToLatexConversionRule, RULE_REGEX 20 import re 21 22 Entrez.email = "[email protected]" # Required by NCBI 23 24 def test(): 25 output_file = open('/storage/emulated/0/Download/python/bibtex.txt', "w") 26 pmids= open('/storage/emulated/0/Download/python/pmids', "r") 27 28 for pmid in pmids: 29 if pmid.startswith("%"): 30 continue 31 pmid = pmid.strip() 32 results = pubmed.query(f"{pmid}[PMID]", max_results=1) 33 article = next(results, None) 34 if article: 35 print(f"PMID: {pmid}") 36 title = fetch_title(pmid) 37 title = special_convert(title) 38 print(f"Title: {title}") 39 raw_id = str(article.pubmed_id) 40 clean_pmid = raw_id.split()[0] # Takes first entry if multiple exist 41 42 first_author = article.authors[0] # Get first author dict 43 first_author_name = f"{first_author.get('firstname', '')} {first_author.get('lastname', '')}".strip() 44 #translate to latex syntax 45 first_author_name = unicode_to_latex(first_author_name) 46 print(f"First Author: {first_author_name}") 47 48 url = get_pmc(clean_pmid) 49 print(f"URL: {url}") 50 # Extract year 51 handle = Entrez.esummary(db="pubmed", id=clean_pmid) 52 record = Entrez.read(handle) 53 handle.close() 54 year=record[0]["PubDate"].split()[0] 55 print(f"Publication year: {year}") 56 57 # get ISO abbreviation for journal name 58 abbrev = get_journal_abbreviation(clean_pmid) # Replace with your PMID 59 print(f"ISO Abbreviation: {abbrev}") # e.g., "Nat. Biotechnol." 60 #make a bibtex entry 61 bibtex_entry(output_file,first_author_name, url, title, abbrev, year) 62 print("") 63 else: 64 print(f"\nPMID {pmid} not found.") 65 66 # Example: 67 def get_pmc(pmid): 68 try: 69 handle = Entrez.elink(dbfrom="pubmed", db="pmc", id=pmid) 70 record = Entrez.read(handle) 71 if record and "LinkSetDb" in record[0] and record[0]["LinkSetDb"]: 72 pmcid = record[0]["LinkSetDb"][0]["Link"][0]["Id"] 73 return f"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC{pmcid}/" 74 except Exception as e: 75 print(f"Error fetching PMCID for PMID {pmid}: {str(e)}") 76 return f"https://pubmed.ncbi.nlm.nih.gov/{pmid}" 77 78 79 def get_journal_abbreviation(pmid): 80 81 try: 82 # Step 1: Fetch record in Medline format (more reliable than XML) 83 handle = Entrez.efetch(db="pubmed", id=pmid, rettype="medline", retmode="text") 84 record = Medline.read(handle) 85 86 # Step 2: Check all possible journal name fields 87 journal_fields = ['TA', 'JT', 'SO'] 88 for field in journal_fields: 89 if field in record: 90 return record[field] 91 92 # Step 3: Ultimate fallback 93 return record.get('SO', 'Journal name unavailable') 94 95 except Exception as e: 96 print(f"Error processing PMID {pmid}: {str(e)}") 97 return "Journal name unavailable" 98 99 def fetch_title(pmid): 100 """Fetch article title for a given PMID.""" 101 try: 102 # Fetch XML data for the PMID 103 handle = Entrez.efetch(db="pubmed", id=pmid, retmode="xml") 104 record = Entrez.read(handle) 105 106 # Extract title from the nested XML structure 107 if "PubmedArticle" in record: 108 article = record["PubmedArticle"][0] 109 title = article["MedlineCitation"]["Article"]["ArticleTitle"] 110 return title 111 except Exception as e: 112 print(f"Error fetching title for PMID {pmid}: {e}") 113 return None 114 115 #this converts strings to LaTex format, there might be special rules which are needed e.g. <i>text</i> -> \textit{text} 116 def special_convert(string): 117 u = UnicodeToLatexEncoder( 118 conversion_rules=[ 119 UnicodeToLatexConversionRule(rule_type=RULE_REGEX, rule=[ 120 (re.compile(r'<i>'), r'\\textit{'), 121 (re.compile(r'</i>'), r'}'), 122 ]), 123 'defaults' 124 ]) 125 return u.unicode_to_latex(string) 126 127 #@Article{hallmarks_aging, 128 # Author = "Carlos Lopez-Otin and others", 129 # Title = "\href{https://pubmed.ncbi.nlm.nih.gov/36599349/}{Hallmarks of aging: An expanding universe}", 130 # Journal = "Cell Metabolism", 131 # Year = 2023, 132 #} 133 def bibtex_entry(text_file, author, url, title, journal, year): 134 print(f"@Article {{key,",file=text_file) 135 print(f"Author = \"{author} and others\",",file=text_file) 136 print(f"Title = \"\\href{{{url}}}{{{title}}}\",",file=text_file) 137 print(f"Journal = \"{journal}.\",",file=text_file) 138 print(f"Year = {year},\n}}\n",file=text_file) 139 return 140 141 test()