#!/bin/bash

# ===========================================================================
#
#                            PUBLIC DOMAIN NOTICE
#            National Center for Biotechnology Information (NCBI)
#
#  This software/database is a "United States Government Work" under the
#  terms of the United States Copyright Act.  It was written as part of
#  the author's official duties as a United States Government employee and
#  thus cannot be copyrighted.  This software/database is freely available
#  to the public for use. The National Library of Medicine and the U.S.
#  Government do not place any restriction on its use or reproduction.
#  We would, however, appreciate having the NCBI and the author cited in
#  any work or product based on this material.
#
#  Although all reasonable efforts have been taken to ensure the accuracy
#  and reliability of the software and data, the NLM and the U.S.
#  Government do not and cannot warrant the performance or results that
#  may be obtained by using this software or data. The NLM and the U.S.
#  Government disclaim all warranties, express or implied, including
#  warranties of performance, merchantability or fitness for any particular
#  purpose.
#
# ===========================================================================
#
# File Name:  esearch
#
# Author:  Jonathan Kans, Aaron Ucko
#
# Version Creation Date:   06/15/2020
#
# ==========================================================================

pth=$( dirname "$0" )

case "$pth" in
  /* )
    ;; # already absolute
  *  )
    pth=$(cd "$pth" && pwd)
    ;;
esac

case ":$PATH:" in
  *:"$pth":* )
    ;;
  * )
    PATH="$PATH:$pth"
    export PATH
    ;;
esac

# handle common flags - dot command is equivalent of "source"

if [ ! -f "$pth"/ecommon.sh ]
then
  echo "ERROR: Unable to find '$pth/ecommon.sh' file" >&2
  exit 1
fi

. "$pth"/ecommon.sh

# initialize specific flags

internal=false

isFilter=false

query=""
spell=false
transl=false
tstack=false

sort=""

# initialize shortcuts

pub=""
titl=""
jour=""
rlsd=""

ctry=""
feat=""
locn=""
mol=""
orgn=""
sorc=""
divn=""
kywd=""
prps=""

stts=""
type=""

clss=""

kind=""
ptwy=""

# check for deprecated arguments

for elm in "$@"
do
  case "$elm" in
    -count | -subset | -uids )
      # forward entire request to ecollect
      ecollect "$@"
      exit 0
      ;;
  esac
done

# read command-line arguments

while [ $# -gt 0 ]
do
  tag="$1"
  rem="$#"
  case "$tag" in
    -internal )
      internal=true
      shift
      ;;
    -newmode | -oldmode )
      shift
      ;;
    -db )
      CheckForArgumentValue "$tag" "$rem"
      shift
      db="$1"
      shift
      ;;
    -query )
      shift
      if [ $# -gt 0 ]
      then
        if [ -z "$query"]
        then
          query="$1"
          shift
        else
          DisplayError "Multiple -query arguments"
          exit 1
        fi
      else
        DisplayError "Missing -query argument"
        exit 1
      fi
      ;;
    -filter )
      # efilter is implemented as esearch -filter "$@"
      isFilter=true
      shift
      ;;
    # -query VARIANTS
    -spell )
      spell=true
      shift
      if [ $# -gt 0 ] && [ -z "$query" ]
      then
        case "$1" in
          -* )
            # do not advance, will process next command time through the while loop
            ;;
          * )
            # query string is immediately after -spell flag (undocumented)
            query="$1"
            shift
            ;;
        esac
      fi
      ;;
    -translate | -transl | -translation )
      shift
      transl=true
      if [ $# -gt 0 ]
      then
        if [ -z "$query"]
        then
          case "$1" in
            -* )
              # just set flag if next argument starts with hyphen
              ;;
            * )
              query="$1"
              shift
              ;;
          esac
        else
          DisplayError "Multiple -query arguments"
          exit 1
        fi
      else
        DisplayError "Missing -query argument"
        exit 1
      fi
      ;;
    -component | -components | -stack )
      shift
      tstack=true
      if [ $# -gt 0 ]
      then
        if [ -z "$query"]
        then
          case "$1" in
            -* )
              # just set flag if next argument starts with hyphen
              ;;
            * )
              query="$1"
              shift
              ;;
          esac
        else
          DisplayError "Multiple -query arguments"
          exit 1
        fi
      else
        DisplayError "Missing -query argument"
        exit 1
      fi
      ;;
    # SORT
    -sort )
      CheckForArgumentValue "$tag" "$rem"
      shift
      sort="$1"
      shift
      ;;
    # DATE RESTRICTION
    -days | -reldate )
      CheckForArgumentValue "$tag" "$rem"
      shift
      reldate=$(( $1 ))
      shift
      ;;
    -mindate )
      CheckForArgumentValue "$tag" "$rem"
      shift
      mindate="$1"
      shift
      ;;
    -maxdate )
      CheckForArgumentValue "$tag" "$rem"
      shift
      maxdate="$1"
      shift
      ;;
    -datetype )
      CheckForArgumentValue "$tag" "$rem"
      shift
      datetype="$1"
      shift
      ;;
    # FILTERS
    -pub )
      CheckForArgumentValue "$tag" "$rem"
      shift
      pub="$1"
      shift
      ;;
    -title )
      CheckForArgumentValue "$tag" "$rem"
      shift
      titl="$1"
      shift
      ;;
    -journal )
      CheckForArgumentValue "$tag" "$rem"
      shift
      jour="$1"
      shift
      ;;
    -released )
      CheckForArgumentValue "$tag" "$rem"
      shift
      rlsd="$1"
      shift
      ;;
    -country | -geo_loc_name )
      CheckForArgumentValue "$tag" "$rem"
      shift
      ctry="$1"
      shift
      ;;
    -feature )
      CheckForArgumentValue "$tag" "$rem"
      shift
      feat="$1"
      shift
      ;;
    -location )
      CheckForArgumentValue "$tag" "$rem"
      shift
      locn="$1"
      shift
      ;;
    -molecule )
      CheckForArgumentValue "$tag" "$rem"
      shift
      mol="$1"
      shift
      ;;
    -organism )
      CheckForArgumentValue "$tag" "$rem"
      shift
      orgn="$1"
      shift
      ;;
    -source )
      CheckForArgumentValue "$tag" "$rem"
      shift
      sorc="$1"
      shift
      ;;
    -division )
      CheckForArgumentValue "$tag" "$rem"
      shift
      divn="$1"
      shift
      ;;
    -keyword )
      CheckForArgumentValue "$tag" "$rem"
      shift
      kywd="$1"
      shift
      ;;
    -purpose )
      CheckForArgumentValue "$tag" "$rem"
      shift
      prps="$1"
      shift
      ;;
    -status )
      CheckForArgumentValue "$tag" "$rem"
      shift
      stts="$1"
      shift
      ;;
    -type )
      CheckForArgumentValue "$tag" "$rem"
      shift
      type="$1"
      shift
      ;;
    -class )
      CheckForArgumentValue "$tag" "$rem"
      shift
      clss="$1"
      shift
      ;;
    -kind )
      CheckForArgumentValue "$tag" "$rem"
      shift
      kind="$1"
      shift
      ;;
    -pathway )
      CheckForArgumentValue "$tag" "$rem"
      shift
      ptwy="$1"
      shift
      ;;
    # HELP
    -h | -help | --help | help )
      newVersion=$( NewerEntrezDirectVersion )
      if [ "$isFilter" = true ]
      then
        echo "efilter $version"
        echo ""
        if [ -n "$newVersion" ]
        then
          DisplayNote "EDirect version ${newVersion} is now available"
          echo "" >&2
          cat "$pth/help/efilter-help.txt"
          echo ""
          DisplayNote "EDirect version ${newVersion} is now available"
          echo "" >&2
        else
          cat "$pth/help/efilter-help.txt"
          echo ""
        fi
      else
        echo "esearch $version"
        echo ""
        if [ -n "$newVersion" ]
        then
          DisplayNote "EDirect version ${newVersion} is now available"
          echo "" >&2
          cat "$pth/help/esearch-help.txt"
          echo ""
          DisplayNote "EDirect version ${newVersion} is now available"
          echo "" >&2
        else
          cat "$pth/help/esearch-help.txt"
          echo ""
        fi
      fi
      exit 0
      ;;
    # OTHER COMMON ARGUMENTS
    -* )
      ParseCommonArgs "$@"
      if [ "$argsConsumed" -gt 0 ]
      then
        shift "$argsConsumed"
      else
        DisplayError "Unrecognized option $1"
        exit 1
      fi
      ;;
    * )
      DisplayError "Unrecognized argument $1"
      shift
      ;;
  esac
done

FinishSetup

# check for ENTREZ_DIRECT message from stdin (for combining independent queries)

ParseStdin

# prefer -db argument over WebEnv value

if [ -n "$db" ]
then
  dbase="$db"
fi

if [ -z "$ids$rest$qury$query$input" ]
then
  needHistory=true
fi

if [ "$dbase" = "nucleotide" ]
then
  dbase="nuccore"
fi

# check for missing database argument

if [ -z "$dbase" ]
then
  DisplayError "Missing -db argument"
  exit 1
fi

# normalize to lower-case (e.g., SRA -> sra)

dbase=$( echo "$dbase" | tr '[:upper:]' '[:lower:]' )

# take optional days and datetype arguments from message

if [ -z "$reldate" ] && [ -n "$reldatex" ]
then
  reldate="$reldatex"
fi
if [ -z "$mindate" ] && [ -n "$mindatex" ]
then
  mindate="$mindatex"
fi
if [ -z "$maxdate" ] && [ -n "$maxdatex" ]
then
  maxdate="$maxdatex"
fi
if [ -z "$datetype" ] && [ -n "$datetypex" ]
then
  datetype="$datetypex"
fi

# -titl argument can be used in lieu of an explicit -query

AddTitleArg() {

  qry="$1"
  arg="$2"

  sep=""

  if [ -n "$qry" ]
  then
    sep=" AND "
  fi

  trm=$(
    echo "$arg" |
    word-at-a-time |
    filter-stop-words |
    sort -f |
    uniq -i |
    sed -e 's/$/ [TITL]/' |
    tr '\n' ',' |
    sed -e 's/,$//g' |
    sed -e 's/,/ AND /g'
  )

  if [ -n "$trm" ]
  then
    qry=$( echo "${qry}${sep}${trm}" )
  fi
  echo "$qry"
}

if [ -n "$titl" ]
then
  query=$( AddTitleArg "$query" "$titl" )
fi

# remove internal apostrophes - otherwise can lead to discrepancy between expected and observed count

if [ -n "$query" ]
then
  query=$( echo "$query" | sed -e "s/\&apos;/'/g" -e "s/'//g" )
fi

# check for missing esearch query argument

if [ -z "$query" ] && [ "$isFilter" = false ]
then
  DisplayError "Missing -query argument"
  exit 1
fi

# convert labels in query to history access numbers

ConvertLabels() {

  qry="$1"
  echo "$labels" |
  xtract -pattern Label -element Key Val |
  while read key val
  do
    # do successive conversions one at a time
    qry=$( echo "$qry" | sed -e "s/(#${key})/(#${val})/g" )
    echo "$qry"
  done |
  # only report last line with all conversions done
  tail -n 1
}

if [ -n "$query" ] && [ -n "$labels" ]
then
  conv=$( ConvertLabels "$query" )
  if [ -n "$conv" ]
  then
    query="$conv"
  fi
fi

# shortcut functions

AddPubArg() {

  qry="$1"
  arg="$2"

  sep=""
  if [ -n "$qry" ]
  then
    sep=" AND "
  fi

  echo "$arg" |
  word-at-a-time |
  (
    while read itm
    do
      trm=""
      case "$itm" in
        abstract )
          trm="has abstract [FILT]"
          ;;
        clinical )
          trm="clinical trial [FILT]"
          ;;
        english )
          trm="english [FILT]"
          ;;
        free )
          trm="freetext [FILT]"
          ;;
        historical )
          trm="historical article  [FILT]"
          ;;
        journal )
          trm="journal article [FILT]"
          ;;
        last_month | "last month" )
          trm="published last month [FILT]"
          ;;
        last_week | "last week" )
          trm="published last week [FILT]"
          ;;
        last_year | "last year" )
          trm="published last year [FILT]"
          ;;
        medline )
          trm="medline [FILT]"
          ;;
        preprint )
          trm="ahead of print [FILT]"
          ;;
        published )
          trm="ahead of print [FILT]"
          sep=" NOT "
          ;;
        retracted )
          trm="retracted publication [PTYP]"
          ;;
        retraction )
          trm="retraction of publication [PTYP]"
          ;;
        review )
          trm="review [FILT]"
          ;;
        structured )
          trm="hasstructuredabstract [WORD]"
          ;;
        trial )
          trm="clinical trial [FILT]"
          ;;
        * )
          DisplayError "Unrecognized -pub option $itm"
          exit 1
          ;;
      esac
      if [ -n "$trm" ]
      then
        qry=$( echo "${qry}${sep}${trm}" )
      fi
      sep=" AND "
    done
    echo "$qry"
  )
}

AddReleasedArg() {

  qry="$1"
  arg="$2"

  sep=""

  if [ -n "$qry" ]
  then
    sep=" AND "
  fi

  trm=""
  case "$arg" in
    last_month | "last month" )
      trm="published last month [FILT]"
      ;;
    last_week | "last week" )
      trm="published last week [FILT]"
      ;;
    last_year | "last year" )
      trm="published last year [FILT]"
      ;;
    prev_years | "prev years" )
      trm="published last year [FILT]"
      sep=" NOT "
      ;;
    * )
      DisplayError "Unrecognized -released option $arg"
      exit 1
      ;;
  esac
  if [ -n "$trm" ]
  then
    qry=$( echo "${qry}${sep}${trm}" )
  fi
  echo "$qry"
}

AddJournalArg() {

  qry="$1"
  arg="$2"

  sep=""

  if [ -n "$qry" ]
  then
    sep=" AND "
  fi

  qry=$( echo "${qry}${sep}${arg} [JOUR]" )

  echo "$qry"
}

AddCountryArg() {

  qry="$1"
  arg="$2"

  sep=""

  if [ -n "$qry" ]
  then
    sep=" AND "
  fi

  qry=$( echo "${qry}${sep}country ${arg} [TEXT]" )

  echo "$qry"
}

AddFeatureArg() {

  qry="$1"
  arg="$2"

  sep=""

  if [ -n "$qry" ]
  then
    sep=" AND "
  fi

  echo "$arg" |
  sed "s/[^a-zA-Z0-9_.'-]/ /g; s/^ *//" |
  tr 'A-Z' 'a-z' |
  fmt -w 1 |
  (
    while read itm
    do
      if [ -n "$itm" ]
      then
        qry=$( echo "${qry}${sep}${itm} [FKEY]" )
      fi
      sep=" AND "
    done
    echo "$qry"
  )
}

AddLocationArg() {

  qry="$1"
  arg="$2"

  sep=""

  if [ -n "$qry" ]
  then
    sep=" AND "
  fi

  trm=""
  case "$arg" in
    mitochondria | mitochondrial | mitochondrion )
      trm="mitochondrion [FILT]"
      ;;
    chloroplast )
      trm="chloroplast [FILT]"
      ;;
    plasmid )
      trm="plasmid [FILT]"
      ;;
    plastid )
      trm="plastid [FILT]"
      ;;
    * )
      DisplayError "Unrecognized -location option $arg"
      exit 1
      ;;
  esac
  if [ -n "$trm" ]
  then
    qry=$( echo "${qry}${sep}${trm}" )
  fi
  echo "$qry"
}

AddMoleculeArg() {

  qry="$1"
  arg="$2"

  sep=""

  if [ -n "$qry" ]
  then
    sep=" AND "
  fi

  trm=""
  case "$arg" in
    genomic )
      trm="biomol genomic [PROP]"
      ;;
    mrna )
      trm="biomol mrna [PROP]"
      ;;
    trna )
      trm="biomol trna [PROP]"
      ;;
    rrna )
      trm="biomol rrna [PROP]"
      ;;
    ncrna )
      trm="biomol ncrna [PROP]"
      ;;
    * )
      DisplayError "Unrecognized -molecule option $arg"
      exit 1
      ;;
  esac
  if [ -n "$trm" ]
  then
    qry=$( echo "${qry}${sep}${trm}" )
  fi
  echo "$qry"
}

AddOrganismArg() {

  qry="$1"
  arg="$2"

  sep=""

  if [ -n "$qry" ]
  then
    sep=" AND "
  fi

  trm=""
  case "$arg" in
    animal | animals | metazoa )
      trm="animals [FILT]"
      ;;
    archaea | archaeal | archaebacteria | archaebacterial )
      trm="archaea [FILT]"
      ;;
    bacteria | bacterial | bacterium | eubacteria | eubacterial )
      trm="bacteria [FILT]"
      ;;
    eukaryota | eukaryote | eukaryotes )
      trm="eukaryota [ORGN]"
      ;;
    fungal | fungi | fungus )
      trm="fungi [FILT]"
      ;;
    human | humans | man )
      trm="human [ORGN]"
      ;;
    insect | insecta | insects )
      trm="insecta [ORGN]"
      ;;
    mammal | mammalia | mammalian | mammals )
      trm="mammals [FILT]"
      ;;
    metaphyta | plant | plants )
      trm="plants [FILT]"
      ;;
    prokaryota | prokaryote | prokaryotes )
      trm="prokaryota [ORGN]"
      ;;
    protist | protista | protists )
      trm="protists [FILT]"
      ;;
    rodent | rodentia | rodents )
      trm="rodents [ORGN]"
      ;;
    viral | virus | viruses )
      trm="viruses [FILT]"
      ;;
    * )
      # allow any organism
      trm="$arg [ORGN]"
      ;;
  esac
  if [ -n "$trm" ]
  then
    qry=$( echo "${qry}${sep}${trm}" )
  fi
  echo "$qry"
}

AddSourceArg() {

  qry="$1"
  arg="$2"

  sep=""

  if [ -n "$qry" ]
  then
    sep=" AND "
  fi

  trm=""
  case "$arg" in
    ddbj )
      trm="srcdb ddbj [PROP]"
      ;;
    embl )
      trm="srcdb embl [PROP]"
      ;;
    genbank )
      trm="srcdb genbank [PROP]"
      ;;
    insd )
      trm="srcdb ddbj/embl/genbank [PROP]"
      ;;
    pdb )
      trm="srcdb pdb [PROP]"
      ;;
    pir )
      trm="srcdb pir [PROP]"
      ;;
    refseq )
      trm="srcdb refseq [PROP]"
      ;;
    "select" )
      trm="refseq select [FILT]"
      ;;
    swissprot )
      trm="srcdb swiss prot [PROP]"
      ;;
    tpa )
      trm="srcdb tpa ddbj/embl/genbank [PROP]"
      ;;
    * )
      DisplayError "Unrecognized -source option $arg"
      exit 1
      ;;
  esac
  if [ -n "$trm" ]
  then
    qry=$( echo "${qry}${sep}${trm}" )
  fi
  echo "$qry"
}

AddDivisionArg() {

  qry="$1"
  arg="$2"

  sep=""

  if [ -n "$qry" ]
  then
    sep=" AND "
  fi

  divs=""
  seps=""
  for itm in $( echo $arg | sed "s/,/ /g" )
  do
    trm=""
    case "$itm" in
      bct | con | env | est | gss | htc | htg | inv | mam | pat | phg | pln | pri | rod | sts | syn | una | vrl | vrt )
        trm="gbdiv $itm [PROP]"
        ;;
      * )
        DisplayError "Unrecognized -division option $itm"
        exit 1
        ;;
    esac
    if [ -n "$trm" ]
    then
      divs=$( echo "${divs}${seps}${trm}" )
      seps=" OR "
    fi
  done
  if [ -n "$divs" ]
  then
    qry=$( echo "${qry}${sep} ( ${divs} )" )
  fi
  echo "$qry"
}

AddKeywordArg() {

  qry="$1"
  arg="$2"

  sep=""

  if [ -n "$qry" ]
  then
    sep=" AND "
  fi

  trm=""
  case "$arg" in
    purpose )
      trm="purpose* [KYWD]"
      ;;
    * )
      DisplayError "Unrecognized -keyword option $arg"
      exit 1
      ;;
  esac
  if [ -n "$trm" ]
  then
    qry=$( echo "${qry}${sep}${trm}" )
  fi
  echo "$qry"
}

AddPurposeArg() {

  qry="$1"
  arg="$2"

  sep=""

  if [ -n "$qry" ]
  then
    sep=" AND "
  fi

  trm=""
  case "$arg" in
    baseline )
      trm="( purposeofsampling baselinesurveillance [KYWD] OR purpose of sequencing baselinesurveillance [KYWD] )"
      ;;
    targeted )
      trm="purposeofsampling targetedefforts [KYWD]"
      ;;
    * )
      DisplayError "Unrecognized -purpose option $arg"
      exit 1
      ;;
  esac
  if [ -n "$trm" ]
  then
    qry=$( echo "${qry}${sep}${trm}" )
  fi
  echo "$qry"
}

AddStatusArg() {

  qry="$1"
  arg="$2"
  dbs="$3"

  sep=""

  if [ -n "$qry" ]
  then
    sep=" AND "
  fi

  trm=""
  case "$dbs" in
    gene )
      case "$arg" in
        alive | live | living )
          trm="alive [PROP]"
          ;;
        * )
          DisplayError "Unrecognized -status option $arg"
          exit 1
          ;;
      esac
      ;;
    assembly )
      case "$arg" in
        latest )
          trm="latest [PROP]"
          ;;
        replaced )
          trm="replaced [PROP]"
          ;;
        * )
          DisplayError "Unrecognized -status option $arg"
          exit 1
          ;;
      esac
      ;;
    * )
      DisplayError "Unrecognized -status option $arg"
      exit 1
      ;;
  esac
  if [ -n "$trm" ]
  then
    qry=$( echo "${qry}${sep}${trm}" )
  fi
  echo "$qry"
}

AddTypeArg() {

  qry="$1"
  arg="$2"

  sep=""

  if [ -n "$qry" ]
  then
    sep=" AND "
  fi

  trm=""
  case "$arg" in
    coding )
      trm="genetype protein coding [PROP]"
      ;;
    pseudo )
      trm="genetype pseudo [PROP]"
      ;;
    * )
      DisplayError "Unrecognized -type option $arg"
      exit 1
      ;;
  esac
  if [ -n "$trm" ]
  then
    qry=$( echo "${qry}${sep}${trm}" )
  fi
  echo "$qry"
}

AddClassArg() {

  qry="$1"
  arg="$2"

  sep=""

  if [ -n "$qry" ]
  then
    sep=" AND "
  fi

  trm=""
  case "$arg" in
    acceptor )
      trm="splice acceptor variant [FXN]"
      ;;
    donor )
      trm="splice donor variant [FXN]"
      ;;
    coding )
      trm="coding sequence variant [FXN]"
      ;;
    frameshift )
      trm="frameshift variant [FXN]"
      ;;
    indel )
      trm="inframe indel [FXN]"
      ;;
    intron )
      trm="intron variant [FXN]"
      ;;
    missense )
      trm="missense variant [FXN]"
      ;;
    nonsense )
      trm="terminator codon variant [FXN]"
      ;;
    synonymous )
      trm="synonymous variant [FXN]"
      ;;
    * )
      DisplayError "Unrecognized -class option $arg"
      exit 1
      ;;
  esac
  if [ -n "$trm" ]
  then
    qry=$( echo "${qry}${sep}${trm}" )
  fi
  echo "$qry"
}

AddKindArg() {

  qry="$1"
  arg="$2"

  sep=""

  if [ -n "$qry" ]
  then
    sep=" AND "
  fi

  trm=""
  case "$arg" in
    pathway )
      trm="pathway [TYPE]"
      ;;
    * )
      DisplayError "Unrecognized -kind option $arg"
      exit 1
      ;;
  esac
  if [ -n "$trm" ]
  then
    qry=$( echo "${qry}${sep}${trm}" )
  fi
  echo "$qry"
}

AddPathwayArg() {

  qry="$1"
  arg="$2"

  sep=""

  if [ -n "$qry" ]
  then
    sep=" AND "
  fi

  trm=""
  case "$arg" in
    reactome )
      trm="src reactome [FILT]"
      ;;
    wikipathways )
      trm="src wikipathways [FILT]"
      ;;
    * )
      DisplayError "Unrecognized -pathway option $arg"
      exit 1
      ;;
  esac
  if [ -n "$trm" ]
  then
    qry=$( echo "${qry}${sep}${trm}" )
  fi
  echo "$qry"
}

# warn on mismatch between filter argument and database

case "$dbase" in
  nucleotide | nuccore | protein )
    ;;
  * )
    if [ "$dbase" != "gene" ] || [ -z "$orgn" ]
    then
      if [ -n "$ctry" ] || [ -n "$feat" ] || [ -n "$locn" ] || [ -n "$mol" ] || [ -n "$orgn" ] || [ -n "$sorc" ] || [ -n "$divn" ] || [ -n "$kywd" ] || [ -n "$prps" ]
      then
        DisplayError "Unexpected use of sequence filter argument"
        exit 1
      fi
    fi
    ;;
esac

# spell check each query word

if [ "$spell" = true ]
then
  spl=$( RunWithCommonArgs nquire -url "$base" espell.fcgi -db "$dbase" -term "$query" |
         xtract -pattern eSpellResult -element CorrectedQuery )
  if [ -n "$spl" ]
  then
    query="$spl"
  fi
fi

# add shortcuts to query

if [ -n "$pub" ]
then
  query=$( AddPubArg "$query" "$pub" )
fi

if [ -n "$rlsd" ]
then
  query=$( AddReleasedArg "$query" "$rlsd" )
fi

if [ -n "$jour" ]
then
  query=$( AddJournalArg "$query" "$jour" )
fi

if [ -n "$ctry" ]
then
  query=$( AddCountryArg "$query" "$ctry" )
fi

if [ -n "$feat" ]
then
  query=$( AddFeatureArg "$query" "$feat" )
fi

if [ -n "$locn" ]
then
  query=$( AddLocationArg "$query" "$locn" )
fi

if [ -n "$mol" ]
then
  query=$( AddMoleculeArg "$query" "$mol" )
fi

if [ -n "$orgn" ]
then
  query=$( AddOrganismArg "$query" "$orgn" )
fi

if [ -n "$sorc" ]
then
  query=$( AddSourceArg "$query" "$sorc" )
fi

if [ -n "$divn" ]
then
  query=$( AddDivisionArg "$query" "$divn" )
fi

if [ -n "$kywd" ]
then
  query=$( AddKeywordArg "$query" "$kywd" )
fi

if [ -n "$prps" ]
then
  query=$( AddPurposeArg "$query" "$prps" )
fi

if [ -n "$stts" ]
then
  query=$( AddStatusArg "$query" "$stts" "$dbase" )
fi

if [ -n "$type" ]
then
  query=$( AddTypeArg "$query" "$type" )
fi

if [ -n "$clss" ]
then
  query=$( AddClassArg "$query" "$clss" )
fi

if [ -n "$kind" ]
then
  query=$( AddKindArg "$query" "$kind" )
fi

if [ -n "$ptwy" ]
then
  query=$( AddPathwayArg "$query" "$ptwy" )
fi

# remove leading, trailing, and multiple spaces

if [ -n "$query" ]
then
  query=$( echo "$query" | sed -e 's/^ *//g; s/ *$//g; s/  */ /g' )
fi

# adjust for -db assembly ACCN field inconsistency

if [ "$dbase" = "assembly" ]
then
  query=$( echo "$query" | sed -e 's/\[ACCN\]/[ASAC]/g' )
fi

# -query string required for esearch

if [ -z "$query" ] && [ "$isFilter" = false ]
then
  # warn on insufficient arguments
  DisplayError "Missing -query argument"
  exit 1
fi

# -query or -days or -mindate and -maxdate arguments sufficient for efilter

ok=false
if [ -n "$query" ]
then
  ok=true
elif [ -n "$reldate" ]
then
  ok=true
elif [ -n "$mindate" ] && [ -n "$maxdate" ]
then
  ok=true
fi

if [ "$ok" != true ]
then
  DisplayError "Missing -query argument"
  exit 1
fi

# no key if combining independent queries

if [ "$isFilter" = false ]
then
  qry_key=""
fi

# adjustments on search variables

if [ -n "$sort" ] && [ "$sort" = "Relevance" ]
then
  sort="relevance"
fi

# normalize date arguments

FixDateConstraints

# protect embedded 'and', 'or', and 'not' terms in single token filter,
# properties, and organism fields, in select biological databases, since
# lower-case Boolean operators will be replaced with AND according to:
#   https://www.nlm.nih.gov/pubs/techbull/ja97/ja97_pubmed.html
# although only 'or' and 'not' actually cause misinterpretation of:
#   -db biosample -query "package metagenome or environmental version 1 0 [PROP]"

# changed to replace all internal spaces with underscore, except leaving one
# space before field bracket, and added assembly to list of databases given
# this special processing of FILT, PROP, and ORGN controlled vocabularies

ProtectWithUnderscores() {

  echo "$1" | sed -e 's/ \[/\[/g; s/ /_/g; s/\[/ \[/g;  s/\[/ \[/g; s/_ \[/ \[/g'
}

ProcessEntrezQuery() {

  echo "$1" |
  sed -e 's/(/ | ( | /g' \
      -e 's/)/ | ) | /g' |
  sed -e "s/ AND / | AND | /g" \
      -e "s/ OR / | OR | /g" \
      -e "s/ NOT / | NOT | /g" |
  tr '|' '\n' |
  while read item
  do
    item=$( echo "$item" | sed -e 's/^ *//g; s/ *$//g; s/  */ /g' )
    opt=$( echo "$item" | tr '[:upper:]' '[:lower:]' )
    case "$opt" in
      "" )
        ;;
      *"[filt]" | *"[filter]" )
        ProtectWithUnderscores "$item"
        ;;
      *"[prop]" | *"[properties]" )
        ProtectWithUnderscores "$item"
        ;;
      *"[orgn]" | *"[organism]" )
        ProtectWithUnderscores "$item"
        ;;
      * )
        echo "$item"
        ;;
    esac
  done
}

case "$dbase" in
  nuc* | prot* | gene | genome | taxonomy | assembly | clinvar | cdd | sra | ipg | bio* )
    case "$query" in
      *\|* )
        # skip if query contains an embedded vertical bar, reserved for splitting in ProcessEntrezQuery
        ;;
      * )
        query=$( ProcessEntrezQuery "$query" | tr '\n' ' ' | sed -e 's/^ *//g; s/ *$//g; s/  */ /g' )
        ;;
    esac
    ;;
esac

# if "$query" argument and "$qry_key" history, parentheses around $query ensures "AND" on any internal expression

if [ -n "$qry_key" ] && [ -n "$query" ]
then
  query=$( echo "( $query )" )
fi

# helper function adds search-specific arguments (if set)

RunWithSearchArgs() {

  AddIfNotEmpty -WebEnv "$web_env" \
  AddIfNotEmpty -query_key "$qry_key" \
  AddIfNotEmpty -sort "$sort" \
  AddIfNotEmpty -reldate "$reldate" \
  AddIfNotEmpty -mindate "$mindate" \
  AddIfNotEmpty -maxdate "$maxdate" \
  AddIfNotEmpty -datetype "$datetype" \
  AddIfNotEmpty -term "$query" \
  RunWithCommonArgs "$@"
}

if [ "$log" = true ]
then
  if [ "$isFilter" = true ]
  then
    printf "EFilter\n" >&2
  else
    printf "ESearch\n" >&2
  fi
fi

# specialized query variants

if [ "$transl" = true ]
then
  RunWithSearchArgs nquire -url "$base" esearch.fcgi -db "$dbase" |
  xtract -pattern eSearchResult -element QueryTranslation

  exit 0
fi

if [ "$tstack" = true ]
then
  if [ "$dbase" = "pubmed" ]
  then
    RunWithSearchArgs nquire -url "$base" esearch.fcgi -db "$dbase" |
    xtract -mixed -pattern eSearchResult -element QueryTranslation |
    sed -e 's/(/ /g' -e 's/)/ /g' |
    sed -e 's/ AND /\n/g' -e 's/ OR /\n/g' -e 's/ NOT /\n/g' |
    grep '.'
  else
    RunWithSearchArgs nquire -url "$base" esearch.fcgi -db "$dbase" |
    xtract -pattern eSearchResult -group TranslationStack -block TermSet -tab "\n" -element Term
  fi

  exit 0
fi

# future PubMed SOLR server query

RunWithFilterArgs() {

  AddIfNotEmpty -reldate "$reldate" \
  AddIfNotEmpty -mindate "$mindate" \
  AddIfNotEmpty -maxdate "$maxdate" \
  AddIfNotEmpty -datetype "$datetype" \
  AddIfNotEmpty -term "$query" \
  RunWithCommonArgs "$@"
}

FilterSOLR() {

  tranquil=true

  GetUIDs |
  join-into-groups-of 1000 |
  while read uids
  do
    if [ "$log" = true ]
    then
      printf "." >&2
    fi

    uids=$( echo "$uids" | tr ',' ' ' )
    res=$( RunWithCommonArgs nquire -url "$base" epost.fcgi -db "$dbase" -id "$uids" )
    if [ -n "$res" ]
    then
      err=""
      wenv=""
      qkey=""
      ParseMessage "$res" ePostResult wenv WebEnv qkey QueryKey
      sch=$( RunWithFilterArgs nquire -url "$base" esearch.fcgi -db "$dbase" \
             -WebEnv "$wenv" -query_key "$qkey" -usehistory y -retmax 9999 < /dev/null )
      if [ -n "$sch" ]
      then
        echo "$sch" | xtract -pattern eSearchResult -block IdList -sep "\n" -element Id
      fi
    fi
  done |
  sort -n | uniq
}

# ( will also be used to handle <Id> fields in message for any database )
if [ -n "$rest" ] && [ "$isFilter" = true ]
then

  uids=$( FilterSOLR )

  if [ "$log" = true ]
  then
    printf "\n" >&2
  fi

  nm=""
  if [ -n "$uids" ]
  then
    nm=$( echo "$uids" | wc -l | tr -d ' ' )
  fi

  seconds_end=$(date "+%s")
  seconds_elapsed=$((seconds_end - seconds_start))

  echo "<ENTREZ_DIRECT>"
  if [ -n "$dbase" ]
  then
    echo "  <Db>${dbase}</Db>"
  fi
  if [ -n "$nm" ]
  then
    echo "  <Count>${nm}</Count>"
  else
    echo "  <Count>0</Count>"
  fi
  if [ -n "$stp" ]
  then
    stp=$(( stp + 1 ))
    echo "  <Step>${stp}</Step>"
  fi
  if [ -n "$err" ]
  then
    echo "  <Error>${err}</Error>"
  fi
  if [ "$timer" = true ] && [ -n "$seconds_elapsed" ]
  then
    echo "  <Elapsed>${seconds_elapsed}</Elapsed>"
  fi

  # instantiate UIDs within ENTREZ_DIRECT message
  echo "$uids" |
  grep '.' |
  while read uid
  do
    echo "  <Id>${uid}</Id>"
  done

  echo "</ENTREZ_DIRECT>"

  exit 0
fi

if [ "$dbase" = "pubmed" ] && [ "$isFilter" = true ]
then

  if [ -n "$query" ] && [ -n "$qury" ]
  then
    qury=$( echo "(${qury}) AND ${query}" )
  fi

  num=$( ecollect -db pubmed -count "${qury}" -reldate "$reldate" -mindate "$mindate" -maxdate "$maxdate" -datetype "$datetype" -tranquil < /dev/null )

  echo "<ENTREZ_DIRECT>"
  if [ -n "$dbase" ]
  then
    echo "  <Db>${dbase}</Db>"
  fi
  echo "  <Query>${qury}</Query>"
  if [ -n "$mindate" ]
  then
    echo "  <MinDate>${mindate}</MinDate>"
  fi
  if [ -n "$maxdate" ]
  then
    echo "  <MaxDate>${maxdate}</MaxDate>"
  fi
  if [ -n "$reldate" ]
  then
    echo "  <RelDate>${reldate}</RelDate>"
  fi
  if [ -n "$datetype" ]
  then
    echo "  <DateType>${datetype}</DateType>"
  fi
  if [ -n "$num" ]
  then
    echo "  <Count>${num}</Count>"
  else
    echo "  <Count>0</Count>"
  fi
  if [ -n "$stp" ]
  then
    stp=$(( stp + 1 ))
    echo "  <Step>${stp}</Step>"
  fi
  if [ -n "$err" ]
  then
    echo "  <Error>${err}</Error>"
  fi
  if [ "$timer" = true ] && [ -n "$seconds_elapsed" ]
  then
    echo "  <Elapsed>${seconds_elapsed}</Elapsed>"
  fi

  echo "</ENTREZ_DIRECT>"

  exit 0
fi

if [ "$dbase" = "pubmed" ]
then
  num="0"
  # count is accurate even for SOLR-based PubMed server
  tranquil=true
  res=$( RunWithSearchArgs nquire -url "$base" esearch.fcgi -retmax 0 -db "$dbase" )
  if [ -n "$res" ]
  then
    res=$(
      echo "$res" |
      sed -e 's|<QueryTranslation>.*</QueryTranslation>||' |
      sed -e 's|<TranslationSet>.*</TranslationSet>||' |
      sed -e 's|<TranslationStack>.*</TranslationStack>||' |
      grep '.' | sed -e "s|$(printf '\t')|  |"
    )
    num=$( echo "$res" | xtract -pattern eSearchResult -element Count )

    # decodeXML, then encodeXML (but do not reencode double-quote as &quot;)
    query=$( echo "$query" | sed -e "s/\&amp;/\&/; s/\&gt;/>/; s/\&lt;/</; s/\&apos;/'/g" -e 's/\&quot;/"/g' )
    query=$( echo "$query" | sed -e "s/\&/\&amp;/; s/>/\&gt;/; s/</\&lt;/; s/'/\&apos;/g" )

    # WriteEDirect with Query field

    seconds_end=$(date "+%s")
    seconds_elapsed=$((seconds_end - seconds_start))

    echo "<ENTREZ_DIRECT>"
    echo "  <Db>${dbase}</Db>"
    echo "  <Query>${query}</Query>"
    if [ -n "$mindate" ]
    then
      echo "  <MinDate>${mindate}</MinDate>"
    fi
    if [ -n "$maxdate" ]
    then
      echo "  <MaxDate>${maxdate}</MaxDate>"
    fi
    if [ -n "$reldate" ]
    then
      echo "  <RelDate>${reldate}</RelDate>"
    fi
    if [ -n "$datetype" ]
    then
      echo "  <DateType>${datetype}</DateType>"
    fi
    if [ -n "$num" ]
    then
      echo "  <Count>${num}</Count>"
    else
      echo "  <Count>0</Count>"
    fi
    if [ -n "$stp" ]
    then
      stp=$(( stp + 1 ))
      echo "  <Step>${stp}</Step>"
    fi
    if [ -n "$err" ]
    then
      echo "  <Error>${err}</Error>"
    fi
    if [ "$quick" = true ] || [ "$quickx" = "Y" ]
    then
      echo "  <Quick>Y</Quick>"
    fi
    if [ "$debug" = true ] || [ "$debugx" = "Y" ]
    then
      echo "  <Debug>Y</Debug>"
    fi
    if [ "$log" = true ] || [ "$logx" = "Y" ]
    then
      echo "  <Log>Y</Log>"
    fi
    if [ "$timer" = true ] && [ -n "$seconds_elapsed" ]
    then
      echo "  <Elapsed>${seconds_elapsed}</Elapsed>"
    fi
    echo "</ENTREZ_DIRECT>"
  fi

  exit 0
fi

# normal query

err=""
num=""
res=$( RunWithSearchArgs nquire -url "$base" esearch.fcgi -retmax 0 -usehistory y -db "$dbase" )

if [ -n "$res" ]
then
  res=$( echo "$res" | sed -e 's|<TranslationStack>.*</TranslationStack>||' )
  ParseMessage "$res" eSearchResult web_env WebEnv qry_key QueryKey num Count
fi

WriteEDirect "$dbase" "$web_env" "$qry_key" "$num" "$stp" "$err"

exit 0

