spring library

2021-04-08 20:52:17 -07:00
parent 6e92164bc3
commit 980e493cf7
5 changed files with 95 additions and 57 deletions
--- a/7
+++ b/7
@@ -33,11 +33,12 @@ fi

 if [ -z "$1" ]; then
    #FZF_DEFAULT_COMMAND=rg -i --files --glob "!.git/*"
-    fzf --delimiter : --preview 'less {1}' \
+    # fzf --delimiter : --preview 'less {1}' \
+    fzf --delimiter : --preview 'bat --color=always --style=numbers --line-range=:500 {}' \
        --preview-window=up:70% --bind "enter:execute-silent(gvim {1} &)" 

 else
-
-    rg $1 | fzf --delimiter : --preview 'less {1}' \
+    # rg $1 | fzf --delimiter : --preview 'less {1}' \
+    rg $1 | fzf --delimiter : --preview 'bat --color=always --style=numbers --line-range=:500 {}' \
        --preview-window=up:70% --bind "enter:execute-silent(gvim {1} &)" 
 fi
--- a/pdf2bib.sh
+++ b/pdf2bib.sh
@@ -31,23 +31,24 @@ echo "using $bibdFileOut"

 #try to extract doi from pdf and retrieve a pubmed id
 #for 'DOI:' syntax
-# doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -i "doi:" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#doi:(.+)#\1#")
+# doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -i "doi:" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|doi:(.+)|\1|")

-doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi:? ?/?10\." --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#.*doi:? ?/?(10.+)#\1#")
+# search for doi string between first page last page 10
+doi=$(pdftotext -q -f 1 -l 10 $fn - | grep -iE "doi:? ?/?10\." --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|.*doi:? ?/?(10.+)|\1|")


 #for 'https://doi.org' syntax
 if [ -z "$doi" ]; then
-  doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi\.org/10\." --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#.+doi\.org/(10.+)#\1#")
+  doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi\.org/10\." --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|.+doi\.org/(10.+)|\1|")
 fi

 # for 'https://doi.org' syntax
 # if [ -z "$doi" ]; then
-  # doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -i "doi.org/" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#.+doi\.org\/(.+)#\1#")
+  # doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -i "doi.org/" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|.+doi\.org\/(.+)|\1|")
 # fi
 # 
 # if [ -z "$doi" ]; then
-# doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi ?" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#doi ?(.+)#\1#")
+# doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi ?" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|doi ?(.+)|\1|")
 # fi

 if [ -z "$doi" ]; then
@@ -57,7 +58,7 @@ fi


 ## TODO: dedupe this with sdoi.sh
-uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=doi&retmode=xml" | grep -E "<Id>[0-9]+</Id>" | sed -E "s#<Id>([0-9]+)</Id>#\1#")
+uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=doi&retmode=xml" | grep -E "<Id>[0-9]+</Id>" | sed -E "s|<Id>([0-9]+)</Id>|\1|")

 if [ -z "$uid" ]; then
  echo "pubmed id not found"
@@ -70,13 +71,13 @@ xsltproc --novalid $styleSheet $uid.xml > $uid.bib

 #extract some strings to make a nice filename for the pdf
 key="LastName"; 
-author=$(grep $key --max-count=1 $uid.xml | sed -E "s#\W*<$key>(.+)</$key>\W*#\1#" | tr -d " ")
+author=$(grep $key --max-count=1 $uid.xml | sed -E "s|\W*<$key>(.+)</$key>\W*|\1|" | tr -d " ")

 key="MedlineTA"; 
-journal=$(grep $key --max-count=1 $uid.xml | sed -E "s#\W*<$key>(.+)</$key>\W*#\1#" | tr -d " ")
+journal=$(grep $key --max-count=1 $uid.xml | sed -E "s|\W*<$key>(.+)</$key>\W*|\1|" | tr -d " ")

 key1="PubDate"; 
-key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $uid.xml | grep $key2 | sed -E "s#\W*<$key2>(.+)</$key2>\W*#\1#")
+key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $uid.xml | grep $key2 | sed -E "s|\W*<$key2>(.+)</$key2>\W*|\1|")

 fn2=${author}_${journal}$year-$uid.pdf

--- a/pubmed2bibtex.xsl
+++ b/pubmed2bibtex.xsl
@@ -34,12 +34,12 @@
 <xsl:value-of select="MedlineCitation/Article/Journal/JournalIssue/PubDate/Year"/>
 <!-- <xsl:text>_</xsl:text><xsl:value-of select="MedlineCitation/PMID" /> -->
 <xsl:apply-templates select="MedlineCitation/Article"/>
-<xsl:apply-templates select="MedlineCitation/KeywordList"/>
-<!-- <xsl:apply-templates select="MedlineCitation/MeshHeadingList"/> -->
+<!-- <xsl:apply-templates select="MedlineCitation/KeywordList"/> -->
+<xsl:apply-templates select="MedlineCitation/MeshHeadingList"/>
 <xsl:apply-templates select="PubmedData/ArticleIdList/ArticleId"/>
+<!-- url = {https://www.ncbi.nlm.nih.gov/pubmed/</xsl:text><xsl:value-of select="MedlineCitation/PMID"/><xsl:text>}, -->
 <xsl:text>,
-  url = {https://www.ncbi.nlm.nih.gov/pubmed/</xsl:text><xsl:value-of select="MedlineCitation/PMID"/><xsl:text>},
-  file = {}</xsl:text>
+  url = {}</xsl:text>
 <xsl:if test="string-length(MedlineCitation/MedlineJournalInfo/NlmUniqueID) > 0"><xsl:text>,
  nlmuniqueid = {</xsl:text><xsl:value-of select="MedlineCitation/MedlineJournalInfo/NlmUniqueID" /><xsl:text>}</xsl:text></xsl:if>
 <xsl:text>
@@ -108,7 +108,7 @@
 <xsl:apply-templates select="ForeName"/>
 </xsl:template>

-
+<!--
 <xsl:template match="KeywordList">
 <xsl:text>,
  keywords = {</xsl:text>
@@ -120,16 +120,18 @@
 </xsl:for-each>
 <xsl:text>}</xsl:text>
 </xsl:template>
+-->

-<!--
 <xsl:template match="MeshHeadingList">
 <xsl:text>,
-  mesh = {</xsl:text>
-<xsl:for-each select="MeshHeading">
+  keywords = {</xsl:text>
+<xsl:for-each select="/PubmedArticleSet/PubmedArticle/MedlineCitation/KeywordList/Keyword">
+<xsl:value-of select="."/><xsl:text>; </xsl:text>
+</xsl:for-each>
+<xsl:for-each select="/PubmedArticleSet/PubmedArticle/MedlineCitation/MeshHeadingList/MeshHeading">
 <xsl:value-of select="DescriptorName"/><xsl:text>; </xsl:text>
 </xsl:for-each>
 <xsl:text>}</xsl:text>
 </xsl:template>
-->

 </xsl:stylesheet>
--- a/6
+++ b/6
@@ -12,8 +12,11 @@ if [ "$1" == "-h" ] ; then
         depends: 
          fzf
          git grep
+          sed tail
+          bat or less
          pandoc
          pandoc-citeproc
+          echo
          wl-copy

         defaults:
@@ -42,7 +45,8 @@ fi
 # str=$(cat $fn | fzf)

 # str=$(git grep -E --line-number $sPattern $bibdFile | fzf --delimiter : --preview 'nl {1} --body-numbering=a' --preview-window=:up:70%:+{2}-5)
-str=$(git grep -E --line-number $sPattern $bibdFile | fzf --delimiter : --preview 'less {1}' --preview-window=:up:70%:+{2}-5)
+# str=$(git grep -E --line-number $sPattern $bibdFile | fzf --delimiter : --preview 'less {1}' --preview-window=:up:70%:+{2}-5)
+str=$(git grep -E --line-number $sPattern $bibdFile | fzf --delimiter : --preview 'bat --color=always --style=numbers --line-range=:500 {1}' --preview-window=:up:70%:+{2}-5)


 # extract citation key from the fzf string
--- a/sdoi.sh
+++ b/sdoi.sh
@@ -27,57 +27,87 @@ fn=$2

 set -e #exit if an error

-uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=doi&retmode=xml" | grep -E "<Id>[0-9]+</Id>" | sed -E "s#<Id>([0-9]+)</Id>#\1#")
+uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=doi&retmode=xml" | grep -E "<Id>[0-9]+</Id>" | sed -E "s|<Id>([0-9]+)</Id>|\1|")
+
+tmpBib=$(mktemp -p --suffix=.bib)

 if [ -z "$uid" ]; then
-  echo "pubmed id not found"
-  exit 1
+  fetchBib_doiDotOrg
+else
+  fetchBib_pubmed
 fi

-#request pubmed xml and transform into bibtex
-curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$uid&retmode=xml" > $uid.xml
-xsltproc --novalid $styleSheet $uid.xml > $uid.bib
+if [ -s "$tmpBib" ]; then
+  import_bib
+else
+  echo "sorry, doi not found.."
+  clean_up
+fi

+  
+
+function import_bib {
  #decide whether to process and move an associated pdf or just exit
  if [ -z "$fn" ]; then
-
-  if [[ -z $(rg $uid $bibdFileOut) ]]; then
-    #import bibtex
-    echo "importing $uid.bib"
-    cat $uid.bib >> $bibdFileOut
+    append_bibfile
+    clean_up
  else
-    echo "$uid already found in $bibdFileOut, exiting"
+    extract_name
+    append_pdf
+    append_bibfile
+    clean_up
  fi
-  #clean up
-  rm $uid.xml $uid.bib
-  exit 1
+}

-else
+function fetchBib_pubmed {
+  #request pubmed xml and transform into bibtex
+  curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$uid&retmode=xml" > $tmpBib.xml
+  xsltproc --novalid $styleSheet $tmpBib.xml > $tmpBib
+}
+
+function fetchBib_doiDotOrg {
+  echo "pubmed id not found, trying doi.org.."
+  curl -LH 'Accept: application/x-bibtex' "http//dx.doi.org/"$doi >> $tmpBib
+  echo -e "\n" >> $tmpBib
+}
+
+function extract_name {
  #extract some strings to make a nice filename for the pdf
  key="LastName"; 
-  author=$(grep $key --max-count=1 $uid.xml | sed -E "s#\W*<$key>(.+)</$key>\W*#\1#" | tr -d " ")
+  author=$(grep $key --max-count=1 $tmpBib.xml | sed -E "s|\W*<$key>(.+)</$key>\W*|\1|" | tr -d " ")

  key="MedlineTA"; 
-  journal=$(grep $key --max-count=1 $uid.xml | sed -E "s#\W*<$key>(.+)</$key>\W*#\1#" | tr -d " ")
+  journal=$(grep $key --max-count=1 $tmpBib.xml | sed -E "s|\W*<$key>(.+)</$key>\W*|\1|" | tr -d " ")

  key1="PubDate"; 
-  key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $uid.xml | grep $key2 | sed -E "s#\W*<$key2>(.+)</$key2>\W*#\1#")
+  key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $tmpBib.xml | grep $key2 | sed -E "s|\W*<$key2>(.+)</$key2>\W*|\1|")

+}
+
+function append_bibfile {
+  #import bibtex
+  #first grep for a uid (doi) in case its already in db
+  if [[ -z $(rg $doi $bibdFileOut) ]]; then
+    echo "importing $tmpBib"
+    cat $tmpBib >> $bibdFileOut
+  else
+    echo "$doi already found in $bibdFileOut, exiting"
+  fi
+}
+
+
+function append_pdf {
  fn2=${author}_${journal}$year-$uid.pdf
-
  #move pdf file to papers repository, add file name to bibtex file field
  mv $fn $pdfPathOut/$fn2
  echo "moved to $pdfPathOut/$fn2"
-  sed -i -E "s|(\W*file = \{).*(\}.*)|\1$relPath/$fn2\2|" $uid.bib
+  sed -i -E "s|(\W*file = \{).*(\}.*)|\1$relPath/$fn2\2|" $tmpBib
+}

-  if [[ -z $(rg $uid $bibdFileOut) ]]; then
-    #import bibtex
-    echo "importing $uid.bib"
-    cat $uid.bib >> $bibdFileOut
-  else
-    echo "$uid already found in $bibdFileOut, exiting"
-  fi

+function clean_up {
  #clean up
-  rm $uid.xml $uid.bib
-fi
+  rm -f $tmpBib $tmpBib.xml
+  exit 1
+}
+