diff --git a/f b/f index 43c78f5..39bda17 100755 --- a/f +++ b/f @@ -33,11 +33,12 @@ fi if [ -z "$1" ]; then #FZF_DEFAULT_COMMAND=rg -i --files --glob "!.git/*" - fzf --delimiter : --preview 'less {1}' \ + # fzf --delimiter : --preview 'less {1}' \ + fzf --delimiter : --preview 'bat --color=always --style=numbers --line-range=:500 {}' \ --preview-window=up:70% --bind "enter:execute-silent(gvim {1} &)" else - - rg $1 | fzf --delimiter : --preview 'less {1}' \ + # rg $1 | fzf --delimiter : --preview 'less {1}' \ + rg $1 | fzf --delimiter : --preview 'bat --color=always --style=numbers --line-range=:500 {}' \ --preview-window=up:70% --bind "enter:execute-silent(gvim {1} &)" fi diff --git a/pdf2bib.sh b/pdf2bib.sh index 8e8dcdc..c86ae35 100755 --- a/pdf2bib.sh +++ b/pdf2bib.sh @@ -31,23 +31,24 @@ echo "using $bibdFileOut" #try to extract doi from pdf and retrieve a pubmed id #for 'DOI:' syntax -# doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -i "doi:" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#doi:(.+)#\1#") +# doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -i "doi:" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|doi:(.+)|\1|") -doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi:? ?/?10\." --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#.*doi:? ?/?(10.+)#\1#") +# search for doi string between first page last page 10 +doi=$(pdftotext -q -f 1 -l 10 $fn - | grep -iE "doi:? ?/?10\." --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|.*doi:? ?/?(10.+)|\1|") #for 'https://doi.org' syntax if [ -z "$doi" ]; then - doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi\.org/10\." --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#.+doi\.org/(10.+)#\1#") + doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi\.org/10\." --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|.+doi\.org/(10.+)|\1|") fi # for 'https://doi.org' syntax # if [ -z "$doi" ]; then - # doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -i "doi.org/" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#.+doi\.org\/(.+)#\1#") + # doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -i "doi.org/" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|.+doi\.org\/(.+)|\1|") # fi # # if [ -z "$doi" ]; then -# doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi ?" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s#doi ?(.+)#\1#") +# doi=$(pdftotext -q -f 1 -l 1 $fn - | grep -iE "doi ?" --max-count=1 | tr [:upper:] [:lower:] | sed -E "s|doi ?(.+)|\1|") # fi if [ -z "$doi" ]; then @@ -57,7 +58,7 @@ fi ## TODO: dedupe this with sdoi.sh -uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=doi&retmode=xml" | grep -E "[0-9]+" | sed -E "s#([0-9]+)#\1#") +uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=doi&retmode=xml" | grep -E "[0-9]+" | sed -E "s|([0-9]+)|\1|") if [ -z "$uid" ]; then echo "pubmed id not found" @@ -70,13 +71,13 @@ xsltproc --novalid $styleSheet $uid.xml > $uid.bib #extract some strings to make a nice filename for the pdf key="LastName"; -author=$(grep $key --max-count=1 $uid.xml | sed -E "s#\W*<$key>(.+)\W*#\1#" | tr -d " ") +author=$(grep $key --max-count=1 $uid.xml | sed -E "s|\W*<$key>(.+)\W*|\1|" | tr -d " ") key="MedlineTA"; -journal=$(grep $key --max-count=1 $uid.xml | sed -E "s#\W*<$key>(.+)\W*#\1#" | tr -d " ") +journal=$(grep $key --max-count=1 $uid.xml | sed -E "s|\W*<$key>(.+)\W*|\1|" | tr -d " ") key1="PubDate"; -key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $uid.xml | grep $key2 | sed -E "s#\W*<$key2>(.+)\W*#\1#") +key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $uid.xml | grep $key2 | sed -E "s|\W*<$key2>(.+)\W*|\1|") fn2=${author}_${journal}$year-$uid.pdf diff --git a/pubmed2bibtex.xsl b/pubmed2bibtex.xsl index c92a286..0181361 100644 --- a/pubmed2bibtex.xsl +++ b/pubmed2bibtex.xsl @@ -34,12 +34,12 @@ - - + + + , - url = {https://www.ncbi.nlm.nih.gov/pubmed/}, - file = {} + url = {} , nlmuniqueid = {} @@ -108,7 +108,7 @@ - + - diff --git a/sbib b/sbib index dbf73ab..664d6fb 100755 --- a/sbib +++ b/sbib @@ -12,8 +12,11 @@ if [ "$1" == "-h" ] ; then depends: fzf git grep + sed tail + bat or less pandoc pandoc-citeproc + echo wl-copy defaults: @@ -42,7 +45,8 @@ fi # str=$(cat $fn | fzf) # str=$(git grep -E --line-number $sPattern $bibdFile | fzf --delimiter : --preview 'nl {1} --body-numbering=a' --preview-window=:up:70%:+{2}-5) -str=$(git grep -E --line-number $sPattern $bibdFile | fzf --delimiter : --preview 'less {1}' --preview-window=:up:70%:+{2}-5) +# str=$(git grep -E --line-number $sPattern $bibdFile | fzf --delimiter : --preview 'less {1}' --preview-window=:up:70%:+{2}-5) +str=$(git grep -E --line-number $sPattern $bibdFile | fzf --delimiter : --preview 'bat --color=always --style=numbers --line-range=:500 {1}' --preview-window=:up:70%:+{2}-5) # extract citation key from the fzf string diff --git a/sdoi.sh b/sdoi.sh index 79388b0..4fc15ab 100755 --- a/sdoi.sh +++ b/sdoi.sh @@ -27,57 +27,87 @@ fn=$2 set -e #exit if an error -uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=doi&retmode=xml" | grep -E "[0-9]+" | sed -E "s#([0-9]+)#\1#") +uid=$(curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=$doi&field=doi&retmode=xml" | grep -E "[0-9]+" | sed -E "s|([0-9]+)|\1|") + +tmpBib=$(mktemp -p --suffix=.bib) if [ -z "$uid" ]; then - echo "pubmed id not found" - exit 1 + fetchBib_doiDotOrg +else + fetchBib_pubmed fi -#request pubmed xml and transform into bibtex -curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$uid&retmode=xml" > $uid.xml -xsltproc --novalid $styleSheet $uid.xml > $uid.bib - -#decide whether to process and move an associated pdf or just exit -if [ -z "$fn" ]; then - - if [[ -z $(rg $uid $bibdFileOut) ]]; then - #import bibtex - echo "importing $uid.bib" - cat $uid.bib >> $bibdFileOut - else - echo "$uid already found in $bibdFileOut, exiting" - fi - #clean up - rm $uid.xml $uid.bib - exit 1 - +if [ -s "$tmpBib" ]; then + import_bib else + echo "sorry, doi not found.." + clean_up +fi + + + +function import_bib { + #decide whether to process and move an associated pdf or just exit + if [ -z "$fn" ]; then + append_bibfile + clean_up + else + extract_name + append_pdf + append_bibfile + clean_up + fi +} + +function fetchBib_pubmed { + #request pubmed xml and transform into bibtex + curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$uid&retmode=xml" > $tmpBib.xml + xsltproc --novalid $styleSheet $tmpBib.xml > $tmpBib +} + +function fetchBib_doiDotOrg { + echo "pubmed id not found, trying doi.org.." + curl -LH 'Accept: application/x-bibtex' "http//dx.doi.org/"$doi >> $tmpBib + echo -e "\n" >> $tmpBib +} + +function extract_name { #extract some strings to make a nice filename for the pdf key="LastName"; - author=$(grep $key --max-count=1 $uid.xml | sed -E "s#\W*<$key>(.+)\W*#\1#" | tr -d " ") + author=$(grep $key --max-count=1 $tmpBib.xml | sed -E "s|\W*<$key>(.+)\W*|\1|" | tr -d " ") key="MedlineTA"; - journal=$(grep $key --max-count=1 $uid.xml | sed -E "s#\W*<$key>(.+)\W*#\1#" | tr -d " ") + journal=$(grep $key --max-count=1 $tmpBib.xml | sed -E "s|\W*<$key>(.+)\W*|\1|" | tr -d " ") key1="PubDate"; - key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $uid.xml | grep $key2 | sed -E "s#\W*<$key2>(.+)\W*#\1#") + key2="Year"; year=$(awk "/<$key1>/,/<\/$key1>/" $tmpBib.xml | grep $key2 | sed -E "s|\W*<$key2>(.+)\W*|\1|") +} + +function append_bibfile { + #import bibtex + #first grep for a uid (doi) in case its already in db + if [[ -z $(rg $doi $bibdFileOut) ]]; then + echo "importing $tmpBib" + cat $tmpBib >> $bibdFileOut + else + echo "$doi already found in $bibdFileOut, exiting" + fi +} + + +function append_pdf { fn2=${author}_${journal}$year-$uid.pdf - #move pdf file to papers repository, add file name to bibtex file field mv $fn $pdfPathOut/$fn2 echo "moved to $pdfPathOut/$fn2" - sed -i -E "s|(\W*file = \{).*(\}.*)|\1$relPath/$fn2\2|" $uid.bib + sed -i -E "s|(\W*file = \{).*(\}.*)|\1$relPath/$fn2\2|" $tmpBib +} - if [[ -z $(rg $uid $bibdFileOut) ]]; then - #import bibtex - echo "importing $uid.bib" - cat $uid.bib >> $bibdFileOut - else - echo "$uid already found in $bibdFileOut, exiting" - fi +function clean_up { #clean up - rm $uid.xml $uid.bib -fi + rm -f $tmpBib $tmpBib.xml + exit 1 +} +