#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# 
# The Pig command script
#
# Environment Variables
#
#     JAVA_HOME                The java implementation to use.    Overrides JAVA_HOME.
#
#     PIG_CLASSPATH Extra Java CLASSPATH entries.
#
#     HADOOP_HOME/HADOOP_PREFIX     Environment HADOOP_HOME/HADOOP_PREFIX(0.20.205)
#
#     HADOOP_CONF_DIR     Hadoop conf dir
#
#     PIG_HEAPSIZE    The maximum amount of heap to use, in MB. 
#                                        Default is 1000.
#
#     PIG_OPTS            Extra Java runtime options.
#
#     PIG_CONF_DIR    Alternate conf dir. Default is ${PIG_HOME}/conf.
#
#     HBASE_CONF_DIR - Optionally, the HBase configuration to run against
#                      when using HBaseStorage

cygwin=false
case "`uname`" in
CYGWIN*) cygwin=true;;
esac
debug=false

remaining=()
includeHCatalog="";
addJarString=-Dpig.additional.jars\=;
additionalJars="";
# filter command line parameter
for f in "$@"; do
     if [[ $f == "-secretDebugCmd" ]]; then
        debug=true
      elif [[ $f == "-useHCatalog" ]]; then
        # if need to use hcatalog, we need to add the hcatalog and hive jars
        # to the classpath and also include the hive configuration xml file
        # for pig to work correctly with hcatalog
        # because of PIG-2532, including the jars in the classpath is 
        # sufficient to ensure that they are registered as well
        includeHCatalog=true;
      elif [[ "$includeHCatalog" == "true" && $f == $addJarString* ]]; then
        additionalJars=`echo $f | sed s/$addJarString//`
      else
        remaining[${#remaining[@]}]="$f"
     fi
done

# resolve links - $0 may be a softlink
this="${BASH_SOURCE-$0}"

# convert relative path to absolute path
bin=$(cd -P -- "$(dirname -- "$this")">/dev/null && pwd -P)
script="$(basename -- "$this")"
this="$bin/$script"

# the root of the Pig installation
export PIG_HOME=`dirname "$this"`/..

if [ -z "$PIG_CONF_DIR" ]; then
    if [ -f ${PIG_HOME}/conf/pig.properties ]; then
        PIG_CONF_DIR=${PIG_HOME}/conf
    fi
fi

if [ -z "$PIG_CONF_DIR" ]; then
    if [ -d /etc/pig ]; then
        # if installed with rpm/deb package
        PIG_CONF_DIR="/etc/pig"
    fi
fi

if [ -f "${PIG_CONF_DIR}/pig-env.sh" ]; then
    . "${PIG_CONF_DIR}/pig-env.sh"
fi

# some Java parameters
if [ "$JAVA_HOME" != "" ]; then
    #echo "run java in $JAVA_HOME"
    JAVA_HOME=$JAVA_HOME
fi
    
if [ "$JAVA_HOME" = "" ]; then
    echo "Error: JAVA_HOME is not set."
    exit 1
fi

JAVA=$JAVA_HOME/bin/java
JAVA_HEAP_MAX=-Xmx1000m 

# check envvars which might override default args
if [ "$PIG_HEAPSIZE" != "" ]; then
    JAVA_HEAP_MAX="-Xmx""$PIG_HEAPSIZE""m"
fi

# CLASSPATH initially contains $PIG_CONF_DIR
CLASSPATH="${PIG_CONF_DIR}"
CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
if [ "$includeHCatalog" == "true" ]; then
  # need to provide the hcatalog jar file path as well as
  # the location of the hive jars on which hcatalog depends
  hiveMetaStoreJar=hive-metastore-*.jar
  thriftJar=libthrift-*.jar
  hiveExecJar=hive-exec-*.jar
  fbJar=libfb303-*.jar
  jdoECJar=jdo2-api-*-ec.jar
  slfJar=slf4j-api-*.jar
  hbaseHiveJar=hive-hbase-handler-*.jar
  if [ "$HIVE_HOME" == "" ]; then
    if [ -d "/usr/lib/hive" ]; then
      HIVE_HOME=/usr/lib/hive
    else
      echo "Please initialize HIVE_HOME"
      exit -1
    fi
  fi

  hiveMetaStoreVersion=`ls $HIVE_HOME/lib/$hiveMetaStoreJar`
  thriftVersion=`ls $HIVE_HOME/lib/$thriftJar`
  hiveExecVersion=`ls $HIVE_HOME/lib/$hiveExecJar`
  fbJarVersion=`ls $HIVE_HOME/lib/$fbJar`
  jdoECJarVersion=`ls $HIVE_HOME/lib/$jdoECJar`
  slfJarVersion=`ls $HIVE_HOME/lib/$slfJar`
  hbaseHiveVersion=`ls $HIVE_HOME/lib/$hbaseHiveJar`

  hcatJar=hcatalog-*.jar
  hbaseHCatJar=hbase-storage-handler-*.jar
  if [ "$HCAT_HOME" == "" ]; then
    if [ -d "/usr/lib/hcatalog" ]; then
      HCAT_HOME=/usr/lib/hcatalog
    else
      echo "Please initialize HCAT_HOME"
      exit -1
    fi
  fi
  hcatVersion=`ls $HCAT_HOME/share/hcatalog/$hcatJar | grep -v server`
  hbaseHCatVersion=`ls $HCAT_HOME/lib/$hbaseHCatJar`

  ADDITIONAL_CLASSPATHS=$hiveMetaStoreVersion:$thriftVersion:$hiveExecVersion:$fbJarVersion:$jdoECJarVersion:$slfJarVersion:$hbaseHiveVersion:$hcatVersion:$hbaseHCatVersion
  if [ "$additionalJars" != "" ]; then
    ADDITIONAL_CLASSPATHS=$ADDITIONAL_CLASSPATHS:$additionalJars
  fi
  CLASSPATH=${CLASSPATH}:$ADDITIONAL_CLASSPATHS:$HIVE_HOME/conf
fi

# add user-specified CLASSPATH
if [ "$PIG_CLASSPATH" != "" ]; then
    CLASSPATH=${CLASSPATH}:${PIG_CLASSPATH}
fi
 
# add HADOOP_CONF_DIR
if [ "$HADOOP_CONF_DIR" != "" ]; then
    CLASSPATH=${CLASSPATH}:${HADOOP_CONF_DIR}
fi

# so that filenames w/ spaces are handled correctly in loops below
IFS=

shopt -s extglob
shopt -s nullglob

for f in $PIG_HOME/lib/*.jar; do
    CLASSPATH=${CLASSPATH}:$f;
done

JYTHON_JAR=`echo ${PIG_HOME}/lib/jython*.jar`

if [ -z "$JYTHON_JAR" ]; then
    JYTHON_JAR=`echo $PIG_HOME/build/ivy/lib/Pig/jython*.jar`
    if [ -n "$JYTHON_JAR" ]; then
        CLASSPATH=${CLASSPATH}:$JYTHON_JAR
    fi
fi

JRUBY_JAR=`echo ${PIG_HOME}/lib/jruby-complete-*.jar`

if [ -z "$JRUBY_JAR" ]; then
    JRUBY_JAR=`echo $PIG_HOME/build/ivy/lib/Pig/jruby-complete-*.jar`
    if [ -n "$JRUBY_JAR" ]; then
        CLASSPATH=${CLASSPATH}:$JRUBY_JAR
    fi
fi

for f in $PIG_HOME/share/pig/lib/*.jar; do
    CLASSPATH=${CLASSPATH}:$f;
done

# For Hadoop 0.23.0+
#
#if [ -d "${PIG_HOME}/share/hadoop/common" ]; then
#    for f in ${PIG_HOME}/share/hadoop/common/hadoop*.jar; do
#        CLASSPATH=${CLASSPATH}:$f;
#    done
#fi
#
#if [ -d "${PIG_HOME}/share/hadoop/hdfs" ]; then
#    for f in ${PIG_HOME}/share/hadoop/hdfs/hadoop*.jar; do
#        CLASSPATH=${CLASSPATH}:$f;
#    done
#fi
#
#if [ -d "${PIG_HOME}/share/hadoop/mapreduce" ]; then
#    for f in ${PIG_HOME}/share/hadoop/mapreduce/hadoop*.jar; do
#        CLASSPATH=${CLASSPATH}:$f;
#    done
#fi

if which hadoop >/dev/null; then
    HADOOP_BIN=`which hadoop`
fi

if [[ -z "$HADOOP_BIN" && -n "$HADOOP_PREFIX" ]]; then
    if [ -f $HADOOP_PREFIX/bin/hadoop ]; then
        HADOOP_BIN=$HADOOP_PREFIX/bin/hadoop
    fi
fi

if [[ -z "$HADOOP_BIN" && -n "$HADOOP_HOME" && -d "$HADOOP_HOME" ]]; then
    if [ -f $HADOOP_HOME/bin/hadoop ]; then
        HADOOP_BIN=$HADOOP_HOME/bin/hadoop
    fi
fi

if [ -z "$HADOOP_BIN" ]; then
    # if installed with rpm/deb package
    if [ -f /usr/bin/hadoop ]; then
        HADOOP_BIN=/usr/bin/hadoop
    fi
fi

# if using HBase, likely want to include HBase config
HBASE_CONF_DIR=${HBASE_CONF_DIR:-/etc/hbase}
if [ -n "$HBASE_CONF_DIR" ] && [ -d "$HBASE_CONF_DIR" ]; then
    CLASSPATH=$HBASE_CONF_DIR:$CLASSPATH
fi

if [ -d "${PIG_HOME}/etc/hadoop" ]; then
    CLASSPATH=${CLASSPATH}:${PIG_HOME}/etc/hadoop;
fi

# locate ZooKeeper
if [ -d "${PIG_HOME}/share/zookeeper" ]; then
    for f in ${PIG_HOME}/share/zookeeper/zookeeper-*.jar; do
        CLASSPATH=${CLASSPATH}:$f
    done
fi

# locate HBase
if [ -d "${PIG_HOME}/share/hbase" ]; then
    for f in ${PIG_HOME}/share/hbase/hbase-*.jar; do
        CLASSPATH=${CLASSPATH}:$f
    done
fi

# default log directory & file
if [ "$PIG_LOG_DIR" = "" ]; then
    PIG_LOG_DIR="$PIG_HOME/logs"
fi
if [ "$PIG_LOGFILE" = "" ]; then
    PIG_LOGFILE='pig.log'
fi

# cygwin path translation
if $cygwin; then
    CLASSPATH=`cygpath -p -w "$CLASSPATH"`
    PIG_HOME=`cygpath -d "$PIG_HOME"`
    PIG_LOG_DIR=`cygpath -d "$PIG_LOG_DIR"`
fi
 
# restore ordinary behaviour
unset IFS

PIG_OPTS="$PIG_OPTS -Dpig.log.dir=$PIG_LOG_DIR"
PIG_OPTS="$PIG_OPTS -Dpig.log.file=$PIG_LOGFILE"
PIG_OPTS="$PIG_OPTS -Dpig.home.dir=$PIG_HOME"
if [ "$includeHCatalog" == "true" ]; then
  addJars=`echo $PIG_OPTS | awk '{ for (i=1; i<=NF; i++) print $i; }' | grep "\-Dpig.additional.jars=" | sed s/-Dpig.additional.jars=//`
  if [ "$addJars" != "" ]; then
    ADDITIONAL_CLASSPATHS=$addJars:$ADDITIONAL_CLASSPATHS
    PIG_OPTS=`echo $PIG_OPTS | sed 's/-Dpig.additional.jars=[^ ]*//'`
  fi
  PIG_OPTS="$PIG_OPTS -Dpig.additional.jars=$ADDITIONAL_CLASSPATHS"
fi

# run it
if [ -n "$HADOOP_BIN" ]; then
    if [ "$debug" == "true" ]; then
        echo "Find hadoop at $HADOOP_BIN"
    fi

    if [ -f $PIG_HOME/pig-withouthadoop.jar ]; then
        PIG_JAR=$PIG_HOME/pig-withouthadoop.jar
    else
        PIG_JAR=`echo $PIG_HOME/pig-?.*withouthadoop.jar`
    fi

    # for deb/rpm package, add pig jar in /usr/share/pig
    if [ -z "$PIG_JAR" ]; then
        PIG_JAR=`echo $PIG_HOME/share/pig/pig-*withouthadoop.jar`
    fi

    if [ -n "$PIG_JAR" ]; then
        CLASSPATH=${CLASSPATH}:$PIG_JAR
    else
        echo "Cannot locate pig-withouthadoop.jar. do 'ant jar-withouthadoop', and try again"
        exit 1
    fi

    export HADOOP_CLASSPATH=$CLASSPATH:$HADOOP_CLASSPATH
    export HADOOP_OPTS="$JAVA_HEAP_MAX $PIG_OPTS $HADOOP_OPTS"
    if [ "$debug" == "true" ]; then
        echo "dry run:"
        echo "HADOOP_CLASSPATH: $HADOOP_CLASSPATH"
        echo "HADOOP_OPTS: $HADOOP_OPTS"
        echo "$HADOOP_BIN" jar "$PIG_JAR" "${remaining[@]}"
        echo
    else
        exec "$HADOOP_BIN" jar "$PIG_JAR" "${remaining[@]}"
    fi
else
    # fall back to use fat pig.jar
    if [ "$debug" == "true" ]; then
        echo "Cannot find local hadoop installation, using bundled hadoop 20.2"
    fi

    if [ -f $PIG_HOME/pig.jar ]; then
        PIG_JAR=$PIG_HOME/pig.jar
    else
        PIG_JAR=`echo $PIG_HOME/pig-?.!(*withouthadoop).jar`
    fi

    if [ -n "$PIG_JAR" ]; then
        CLASSPATH="${CLASSPATH}:$PIG_JAR"
    else
        echo "Cannot locate pig.jar. do 'ant jar', and try again"
        exit 1
    fi

    CLASS=org.apache.pig.Main
    if [ "$debug" == "true" ]; then
        echo "dry run:"
        echo "$JAVA" $JAVA_HEAP_MAX $PIG_OPTS -classpath "$CLASSPATH" $CLASS "${remaining[@]}"
        echo
    else
        exec "$JAVA" $JAVA_HEAP_MAX $PIG_OPTS -classpath "$CLASSPATH" $CLASS "${remaining[@]}"
    fi
fi
shopt -u nullglob
shopt -u extglob
