rm bootstrap

2018-12-12 02:32:50 -06:00 · 2018-12-12 02:32:50 -06:00 · a211b5f66e
parent 48397395a1
commit a211b5f66e
26 changed files with 2017 additions and 0 deletions
--- a/aws/emr/bootstrap/MyAWSCredentialsProviderWithUri.jar
+++ b/aws/emr/bootstrap/MyAWSCredentialsProviderWithUri.jar
--- a/aws/emr/bootstrap/ba_test.sh
+++ b/aws/emr/bootstrap/ba_test.sh
@ -0,0 +1,154 @@
 #!/bin/bash
 set -x
 install_jupyter=false
 build_vim() {
  cd /tmp
  git clone http://luajit.org/git/luajit-2.0.git
  cd luajit-2.0
  make
  sudo make install
  cd /tmp
  git clone https://github.com/vim/vim.git
  cd vim
  ./configure \
  --with-features=huge \
  --enable-cscope \
  --enable-pythoninterp \
  --enable-luainterp \
  --enable-multibyte \
  --enable-fontset \
  --disable-gui \
  --without-x \
  --disable-netbeans \
  --enable-largefile
  make
  sudo make install
  if [ -e /usr/bin/vi ]; then
    sudo rm /usr/bin/vi
  fi
  sudo ln -s /usr/local/bin/vim /usr/bin/vi
  rm -rf /tmp/vim
 }
 provision_packages() {
  sudo yum groupinstall -y "Development Tools"
  sudo yum install -y \
  tmux \
  wget \
  htop \
  mlocate \
  git \
  rake \
  zsh \
  jq \
  at \
  bind-utils \
  strace \
  lua \
  lua-devel \
  ncurses \
  ncurses-devel \
  gmp \
  gmp-devel \
  ctags \
  tcl-devel \
  perl \
  perl-devel \
  perl-ExtUtils-ParseXS \
  perl-ExtUtils-CBuilder \
  perl-ExtUtils-Embed
  wget https://bootstrap.pypa.io/get-pip.py
  sudo python2.7 ./get-pip.py
  sudo env "PATH=$PATH" pip install awscli
  cd ~
  wget https://raw.githubusercontent.com/o0beaner/dotfiles/master/install.sh
  chmod +x install.sh
  ./install.sh
  sudo chmod 644 /usr/bin/chsh
  sudo chmod +x /usr/bin/chsh
  sudo /usr/bin/chsh -s /bin/zsh $USER
  sudo updatedb
  cd $util_path
  wget --no-check-certificate $s3_utils/suntracker.sh
  chmod +x $util_path/suntracker.sh
  (crontab -l ; echo "0 3 * * * $util_path/suntracker.sh") | crontab -
  $util_path/suntracker.sh
  touch ~/.zsh.prompts
  mkdir ~/.zsh.after/
  echo "prompt agnoster" > ~/.zsh.after/prompt.zsh
 }
 install_ssm() {
  cd /tmp
  sudo yum install -y https://s3.amazonaws.com/ec2-downloads-windows/SSMAgent/latest/linux_amd64/amazon-ssm-agent.rpm
 }
 s3ify_zeppelin() {
  cd $util_path
  wget --no-check-certificate $s3_utils/configure_zeppelin_s3.sh
  chmod +x $util_path/configure_zeppelin_s3.sh
  aws emr add-steps --cluster-id $cluster_id --steps Type=CUSTOM_JAR,Name="Configure Zeppelin for S3",Jar="command-runner.jar",Args=[$util_path/configure_zeppelin_s3.sh]
 }
 install_jupyter() {
  cd $util_path
  wget --no-check-certificate https://s3.amazonaws.com/aws-bigdata-blog/artifacts/aws-blog-emr-jupyter/install-jupyter-emr5.sh
  chmod +x $util_path/install-jupyter-emr5.sh
  $util_path/install-jupyter-emr5.sh \
  --r \
  --julia \
  --toree \
  --torch \
  --ruby \
  --ds-packages \
  --ml-packages \
  --python-packages ggplot nilearn \
  --port 8002 \
  --password jupyter \
  --jupyterhub \
  --jupyterhub-port 8001 \
  --cached-install \
  --notebook-dir s3://ty-emr/XRR/jupyter/notebooks/ \
  --copy-samples \
  --s3fs
 }
 # get input parameters
 while [ $# -gt 0 ]; do
    case "$1" in
    --jupyter)
      install_jupyter=true
      ;;
    -*)
      error_msg "unrecognized option: $1"
      ;;
    *)
      break;
      ;;
    esac
    shift
 done
 s3_utils='https://s3.amazonaws.com/ty-emr/XRR/utils'
 build_vim=false
 util_path="~/.utils"
 mkdir -p $util_path
 provision_packages &
 build_vim &
 install_ssm &
 is_master=false
 if grep isMaster /mnt/var/lib/info/instance.json | grep true;
 then
  s3ify_zeppelin &
  if [ "$install_jupyter" == true ]; then
    install_jupyter &
  fi
 fi
--- a/aws/emr/bootstrap/boot_strappy_486.sh
+++ b/aws/emr/bootstrap/boot_strappy_486.sh
@ -0,0 +1,274 @@
 #!/bin/bash
 logfile=bootstrap.txt
 exec > $logfile 2>&1
 set -x
 install_jupyter=false
 # get input parameters
 while [ $# -gt 0 ]; do
    case "$1" in
    --jupyter)
      install_jupyter=true
      ;;
    -*)
      error_msg "unrecognized option: $1"
      ;;
    *)
      break;
      ;;
    esac
    shift
 done
 platform=`uname`
 s3_utils='https://s3.amazonaws.com/ty-emr/XRR/utils'
 build_vim=false
 if [ "$USER" == "root" ]; then
  util_path="/root/.utils"
  home="/root"
 else
  if [ "$platform" == "Darwin" ]; then
    users_dir="Users"
  fi
  if [ "$platform" == "Linux" ]; then
    users_dir="home"
  fi
  util_path="/$users_dir/$USER/.utils"
 fi
 vim_check=`vim --version`
 if [[ $vim_check != *"+lua"* ]]; then
  build_vim=true
 fi
 mkdir -p $util_path
 release=`cat /etc/*release* | tr '[:upper:]' '[:lower:]'`
 if [[ $release != *"smartos"* ]]; then
  if [[ $release == *"rhel fedora"* ]]; then
    echo "Looks like we're running on something that is kinda like RHEL..."
    sudo yum groupinstall -y "Development Tools"
    sudo yum install -y \
    tmux \
    wget \
    htop \
    mlocate \
    git \
    rake \
    zsh \
    jq \
    at \
    bind-utils \
    strace \
    lua \
    lua-devel \
    ncurses \
    ncurses-devel \
    gmp \
    gmp-devel \
    ctags \
    tcl-devel \
    perl \
    perl-devel \
    perl-ExtUtils-ParseXS \
    perl-ExtUtils-CBuilder \
    perl-ExtUtils-Embed
    if [[ $build_vim == true ]]; then
      cd /tmp
      git clone http://luajit.org/git/luajit-2.0.git
      cd luajit-2.0
      make
      sudo make install
      cd /tmp
      git clone https://github.com/vim/vim.git
      cd vim
      ./configure \
      --with-features=huge \
      --enable-cscope \
      --enable-pythoninterp \
      --enable-luainterp \
      --enable-multibyte \
      --enable-fontset \
      --disable-gui \
      --without-x \
      --disable-netbeans \
      --enable-largefile
      make
      sudo make install
      if [ -e /usr/bin/vi ]; then
        sudo rm /usr/bin/vi
      fi
      sudo ln -s /usr/local/bin/vim /usr/bin/vi
      rm -rf /tmp/vim
    fi
  fi
  if [[ $release == *"debian"* ]]; then
    echo "Looks like we're running on a Debian based system!"
    sudo apt-get update
    sudo apt-get install -y \
    tmux \
    htop \
    wget \
    mlocate \
    git \
    rake \
    zsh \
    jq \
    at \
    dnsutils \
    strace \
    libncurses5-dev \
    libncursesw5-dev \
    python-dev \
    ruby-dev \
    lua5.1 \
    lua5.1-dev \
    luajit \
    libluajit-5.1 \
    libperl-dev \
    build-essential
    if [[ $build_vim == true ]]; then
      sudo ln -sf /usr/include/lua5.1 /usr/include/lua5.1/include
      sudo ln -sf /usr/lib/x86_64-linux-gnu/liblua5.1.so /usr/local/lib/liblua.so
      cd /tmp
      git clone https://github.com/vim/vim.git
      cd vim
      ./configure \
      --with-features=huge \
      --enable-cscope \
      --enable-pythoninterp=yes \
      --enable-rubyinterp=yes \
      --with-python-config-dir=/usr/lib/python2.7/config-x86_64-linux-gnu \
      --enable-multibyte \
      --enable-fontset \
      --disable-gui \
      --disable-netbeans \
      --enable-luainterp=yes \
      --with-luajit \
      --with-lua-prefix=/usr/include/lua5.1 \
      --enable-largefile
      make
      sudo make install
      if [ -e /usr/bin/vi ]; then
        sudo rm /usr/bin/vi
      fi
      sudo ln -s /usr/local/bin/vim /usr/bin/vi
      rm -rf /tmp/vim
    fi
  fi
  if [[ $release == *"Arch Linux"* ]]; then
    echo "Looks like we're running on Arch!"
    yaourt -S --noconfirm \
    gnu-netcat \
    cron \
    tmux \
    htop \
    wget \
    mlocate \
    git \
    rake \
    zsh \
    jq \
    at \
    vim\
    bind-tools \
    strace \
    ncurses \
    ctags
  fi
  wget https://bootstrap.pypa.io/get-pip.py
  sudo python2.7 ./get-pip.py
  sudo env "PATH=$PATH" pip install awscli
  su -c "`curl -fksSL https://raw.githubusercontent.com/o0beaner/dotfiles/master/install.sh`" $USER
  sudo chmod 644 /usr/bin/chsh
  sudo chmod +x /usr/bin/chsh
  sudo /usr/bin/chsh -s /bin/zsh $USER
  sudo updatedb
  cd $util_path
  wget --no-check-certificate $s3_utils/suntracker.sh
  chmod +x $util_path/suntracker.sh
  (crontab -l ; echo "0 3 * * * $util_path/suntracker.sh") | crontab -
  $util_path/suntracker.sh
 else
  BOOTSTRAP_TAR="bootstrap-2017Q1-x86_64.tar.gz"
  curl -Ok https://pkgsrc.joyent.com/packages/SmartOS/bootstrap/${BOOTSTRAP_TAR}
  tar -zxpf ${BOOTSTRAP_TAR} -C /
  rm -f boots*
  PATH=/opt/local/sbin:/opt/local/bin:$PATH
  MANPATH=/opt/local/man:$MANPATH
  pkgin -y in jq tmux git ruby22-rake zsh at || true
  mkdir /usbkey/root
  mv /root/.[!.]* /usbkey/root
  cd /
  rm -rf /root
  ln -s /usbkey/root /root
  su -c "`curl -fksSL https://raw.githubusercontent.com/o0beaner/dotfiles/master/install.sh`" $USER
  echo 'if [ -n "$BASH_EXECUTION_STRING" ]; then' >> ~/.bashrc
  echo '  export SHELL=/opt/local/bin/zsh' >> ~/.bashrc
  echo '    exec "$SHELL" -c "$BASH_EXECUTION_STRING"' >> ~/.bashrc
  echo 'fi' >> ~/.bashrc
  echo 'SHELL=/opt/local/bin/zsh; exec "$SHELL"' >> ~/.bashrc
 fi
 # AWS Instance customization
 if [ -e /usr/bin/cloud-init ]; then
  # Install SSM Agent
  cd /tmp
  sudo yum install -y https://s3.amazonaws.com/ec2-downloads-windows/SSMAgent/latest/linux_amd64/amazon-ssm-agent.rpm
  # Am I running EMR?
  instance=`aws ec2 describe-instances --instance-ids $(curl -s 169.254.169.254/latest/meta-data/instance-id)`
  tags=`echo $instance | jq -r '.Reservations[0].Instances[0].Tags[]'`
  cluster_id=`echo $tags | jq -r '. | select(.Key=="aws:elasticmapreduce:job-flow-id") | .Value'`
  if [ -n "$cluster_id" ]; then
    echo "$cluster_id" > ~/.cluster_id
    role=`echo $tags | jq -r '. | select(.Key=="aws:elasticmapreduce:instance-group-role") | .Value'`
    if [ "$role" == "MASTER" ]; then
      # ToDo: Incorporate Hue?
      cd $util_path
      wget --no-check-certificate $s3_utils/configure_zeppelin_s3.sh
      chmod +x $util_path/configure_zeppelin_s3.sh
      aws emr add-steps --cluster-id $cluster_id --steps Type=CUSTOM_JAR,Name="Configure Zeppelin for S3",Jar="command-runner.jar",Args=[$util_path/configure_zeppelin_s3.sh]
    fi
    # install jupyter
    if [ $install_jupyter == true ]; then
      cd $util_path
      wget --no-check-certificate https://s3.amazonaws.com/aws-bigdata-blog/artifacts/aws-blog-emr-jupyter/install-jupyter-emr5.sh
      chmod +x $util_path/install-jupyter-emr5.sh
      $util_path/install-jupyter-emr5.sh \
      --r \
      --julia \
      --toree \
      --torch \
      --ruby \
      --ds-packages \
      --ml-packages \
      --python-packages ggplot nilearn \
      --port 8002 \
      --password jupyter \
      --jupyterhub \
      --jupyterhub-port 8001 \
      --cached-install \
      --notebook-dir s3://ty-emr/XRR/jupyter/notebooks/ \
      --copy-samples \
      --s3fs
    fi
  fi
 fi
 touch ~/.zsh.prompts
 mkdir ~/.zsh.after/
 echo "prompt agnoster" > ~/.zsh.after/prompt.zsh
--- a/aws/emr/bootstrap/boot_strappy_short.sh
+++ b/aws/emr/bootstrap/boot_strappy_short.sh
@ -0,0 +1,22 @@
 #!/bin/bash
 set -x
 logfile=test.txt
 exec > $logfile 2>&1
 sudo apt-get update
 sudo apt-get install -y \
    tmux \
    htop \
    wget \
    git \
    rake \
    zsh \
 rm -rf .yadr/
 HOME=/root
 wget https://raw.githubusercontent.com/o0beaner/dotfiles/master/install.sh
 chmod +x ./install.sh
 su -c "`curl -fksSL https://raw.githubusercontent.com/o0beaner/dotfiles/master/install.sh`" $USER
--- a/aws/emr/bootstrap/certs.zip
+++ b/aws/emr/bootstrap/certs.zip
--- a/aws/emr/bootstrap/configure-hbase-daemons
+++ b/aws/emr/bootstrap/configure-hbase-daemons
@ -0,0 +1,70 @@
 #!/bin/bash
 set -e
 # first validate the arguments
 REPLACE_FILE=false
 for i in "$@" ; do
  case $i in
  --*-opts*)
    if ! echo $i | grep -E -- '--[a-zA-Z-]+?-opts=.+' > /dev/null 2>&1 ; then
      echo "Couldn't parse option $i expected --cmd-opts=-XX:+UseG1GC where cmd is hadoop-master or some such and -XX:+UseG1GC is the option to pass to the JVM" 1>&2 
      exit 1
    fi
  ;;
  --help)
    set +x
    echo "Usage: "
    echo "--<daemon>-opts"
    echo "   Set additional Java options for the specified daemon."
    echo " "
    echo "--replace"
    echo "   Replace the existing hbase-user-env.sh file if it exists."
    echo " "
    echo "<daemon> is one of:"
    echo "  hbase-master, hbase-regionserver, zookeeper"
    echo " "
    echo " "
    echo "Example Usage:"
    echo " --hbase-master-opts=-Xmx2048 --zookeeper-opts=-XX:GCTimeRatio=19"
    exit 1
  ;;
  --replace)
    REPLACE_FILE=true
  ;;
  *)
    echo "Unknown option $i" 1>&2
    exit 1
  ;;
  esac
 done
 set -x
 mkdir /home/hadoop/conf
 HBASE_ENV_FILE=/home/hadoop/conf/hbase-user-env.sh
 if [ -d "/home/hadoop/hbase/conf" ] ; then
  HBASE_ENV_FILE=/home/hadoop/hbase/conf/hbase-user-env.sh
 fi
 if [ $REPLACE_FILE == "true" ] ; then
  rm -rf $HBASE_ENV_FILE
 fi
 if [ -e $HBASE_ENV_FILE ] ; then
  [[ ! -n $(grep "#\\!/bin/bash" $HBASE_ENV_FILE ) ]] && echo "#!/bin/bash" >> $HBASE_ENV_FILE
 else
  echo "#!/bin/bash" >> $HBASE_ENV_FILE
 fi
 for i in "$@" ; do
  case $i in
  --*-opts*)
    OPTS_CMD=$(echo $i | sed -r 's|--(.*?)-opts=.*|\1|' | tr 'a-z-' 'A-Z_')_OPTS
    OPTS_VALUE=$(echo $i | sed -r 's|--.*?-opts=(.*)|\1|')
    cat >> $HBASE_ENV_FILE <<EOF
 $OPTS_CMD="$OPTS_VALUE"
 EOF
  ;;
  esac
 done
--- a/aws/emr/bootstrap/configurekdc.sh
+++ b/aws/emr/bootstrap/configurekdc.sh
@ -0,0 +1,15 @@
 #!/bin/bash
 #Add a principal to the KDC for the master node, using the master node's returned host name
 sudo kadmin.local -q "ktadd -k /etc/krb5.keytab host/`hostname -f`"
 #Assign plain language variables for clarity
 name=tyler
 password=derpderp
 sudo kadmin.local -q "addprinc -pw $password +needchange $name"
 hdfs dfs -mkdir /user/$name
 hdfs dfs -chown $name:$name /user/$name
 # Enable GSSAPI authentication for SSH and restart SSH service
 sudo sed -i 's/^.*GSSAPIAuthentication.*$/GSSAPIAuthentication yes/' /etc/ssh/sshd_config
 sudo sed -i 's/^.*GSSAPICleanupCredentials.*$/GSSAPICleanupCredentials yes/' /etc/ssh/sshd_config
 sudo /etc/init.d/sshd restart
--- a/aws/emr/bootstrap/createlinuxusers.sh
+++ b/aws/emr/bootstrap/createlinuxusers.sh
@ -0,0 +1,3 @@
 #!/bin/bash
 sudo adduser tyler
--- a/aws/emr/bootstrap/credprovider.sh
+++ b/aws/emr/bootstrap/credprovider.sh
@ -0,0 +1 @@
 sudo aws s3 cp s3://ty-emr/XRR/bootstrap/MyAWSCredentialsProviderWithUri.jar /usr/share/aws/emr/emrfs/auxlib/
--- a/aws/emr/bootstrap/dns_fix.sh
+++ b/aws/emr/bootstrap/dns_fix.sh
@ -0,0 +1,210 @@
 #!/bin/bash
 get_vpc_cidrs()
 {
    cidrs=$(curl -s $metadata/network/interfaces/macs/$mac_address/vpc-ipv4-cidr-blocks)
    echo "$cidrs"
 }
 run_dnsmasq()
 {
    all_domains="$(grep ^search $resolv_conf | cut -d' ' -f2- )"
    pid=$(ps -ef | grep dnsmasq | grep synth-domain | awk '{print $2}')
    if [ $pid ]; then
        sudo kill $pid
    fi
    for d in $all_domains; do
        for c in $(get_vpc_cidrs); do
            syn_domains="$syn_domains --synth-domain=$d,$c,ip- "
        done
    done
    runmasq="sudo dnsmasq --listen-address=127.0.0.1 $syn_domains "
    eval "$runmasq"
    echo "started dnsmasq : $runmasq"
 }
 rewrite_setup_dns()
 {
  tmpfile=$(mktemp /tmp/setupdnsXXXXXX)
  cat > "$tmpfile" << 'EOF'
  #!/bin/bash
  #
  # Set up DNS for EMR master/slave instance in VPC.
  # This script also set up DNS in us-east-1 for non-VPC to handle ec2 instances,
  # whose host name begin with domU, with invalid dns domain name (TT0055043598).
  #
  set -e
  set -x
  alias curl="curl --connect-timeout 2 -q -f --retry-delay 2 --retry 5"
  resolv_conf="/etc/resolv.conf"
  dhclient_conf="/etc/dhcp/dhclient.conf"
  localhost="127.0.0.1"
  metadata="http://169.254.169.254/latest/meta-data"
  restart_network="false"
  in_vpc="false"
  mac_address="$(curl $metadata/mac/ | tr '[:upper:]' '[:lower:]')"
  region="$(curl http://169.254.169.254/latest/dynamic/instance-identity/document | jq -r .region)"
  # wait for the network to come up before proceeding
  if [ -e /usr/bin/nm-online ]; then
      /usr/bin/nm-online
  fi
  get_default_domain()
  {
      if [ "$region" = "us-east-1" ]; then
          echo 'ec2.internal'
      else
          echo "$region.compute.internal"
      fi
  }
  get_first_nameserver_from_resolv_conf()
  {
      awk '$1 ~ /^nameserver/ { print $2 }' "$resolv_conf"
  }
  check_vpc()
  {
      if "$(curl $metadata/network/interfaces/macs/$mac_address/)" | grep -q vpc; then
          in_vpc="true"
      fi
  }
  get_vpc_cidrs()
  {
      cidrs=$(curl $metadata/network/interfaces/macs/$mac_address/vpc-ipv4-cidr-blocks)
      echo "$cidrs"
  }
  append_line_to_dhclient_conf()
  {
      echo "$1" | tee -a "$dhclient_conf"
  }
  prepend_domain()
  {
      #sample line : prepend domain-name "ec2.internal  ";
      if grep -Eq "^prepend domain-name \"$1[:space:]+\";$" "$dhclient_conf"; then
          return
      else
          append_line_to_dhclient_conf "prepend domain-name \"$1 \";"
          restart_network="true"
      fi
  }
  prepend_domain_server()
  {
      #sample line : prepend domain-name-servers 127.0.0.1;
      if grep -Eq "^prepend domain-name-servers $1;$" "$dhclient_conf"; then
          return
      fi
      append_line_to_dhclient_conf "prepend domain-name-servers $1;"
      restart_network="true"
  }
  run_dnsmasq()
  {
      all_domains="$(grep ^search $resolv_conf | cut -d' ' -f2- )"
      pid=$(ps -ef | grep dnsmasq | grep synth-domain | awk '{print $2}')
      if [ $pid ]; then
          kill $pid
      fi
      for d in $all_domains; do
          for c in $(get_vpc_cidrs); do
              syn_domains="$syn_domains --synth-domain=$d,$c,ip- "
          done
      done
      runmasq="dnsmasq --listen-address=127.0.0.1 $syn_domains "
      eval "$runmasq"
      echo "started dnsmasq : $runmasq"
  }
  get_host_name()
  {
      echo "$(hostname -f)"
  }
  show_dns_status()
  {
      type="$1"
      echo "------------ $type $resolv_conf ------------"
      cat "$resolv_conf"
      echo "------------ $type $dhclient_conf ------------"
      cat "$dhclient_conf"
      hostname="$(get_host_name)"
      status="$?"
      "'hostname -f' returns : $hostname"
      return $status
  }
  restart_network_if_needed()
  {
      if "$restart_network"; then
          echo "Updating DNS settings."
          service network restart
          restart_network="false"
      fi
  }
  main()
  {
      show_dns_status "BeforeSetup"
      old_domain="$(grep search $resolv_conf | cut -d' ' -f2-)"
      default_domain="$(get_default_domain)"
      check_vpc
      if [ "$in_vpc" = "false" ]; then
          # NON-VPC
          if [ "$region" = "us-east-1" ]; then
              if [[ "$old_domain" == "${default_domain}"* ]]; then
                  echo "$default_domain is already used in us-east-1."
              else
                  echo "Making sure $default_domain is used in us-east-1."
                  prepend_domain $default_domain
              fi
          else
              echo "Not in VPC, do nothing and exit."
          fi
      else
          # VPC
          first_nameserver="$(get_first_nameserver_from_resolv_conf)"
          resolving_host_name="$(get_host_name)"
          if [ "$1" = "rundnsmasq" -o -z "$resolving_host_name" ]; then
              echo "Run dnsmasq"
              run_dnsmasq
              if [ "$first_nameserver" != "$localhost" ]; then
                  prepend_domain_server "$localhost"
              fi
          else
              echo "Resolving hostname(${resolving_host_name}) successfully, do nothing and exit."
          fi
      fi
      restart_network_if_needed
      return show_dns_status "AfterSetup"
  }
  main "$@"
  exit "$?"
 EOF
  sudo mv $tmpfile /usr/bin/setup-dns
 }
 if [ ! -f /tmp/dns_flag ]; then
  resolv_conf="/etc/resolv.conf"
  metadata="http://169.254.169.254/latest/meta-data"
  mac_address=`curl -s $metadata/mac`
  run_dnsmasq
  rewrite_setup_dns
  touch /tmp/dns_flag
  pid="$(/bin/ps axwwo pid,cmd | awk '$12 ~ /aws157.instancecontroller.Main/ { print $1 }')"
  sudo kill "$pid"
 fi
--- a/aws/emr/bootstrap/downloadjars.sh
+++ b/aws/emr/bootstrap/downloadjars.sh
@ -0,0 +1,4 @@
 #!/bin/sh
 sudo aws s3 cp s3://ty-emr-pdx/job_input/seagate/lib/atlas-hive-udfs.jar /usr/lib/hive/lib/
 sudo aws s3 cp s3://ty-emr-pdx/job_input/seagate/lib/updates.jar /usr/lib/hive/lib/
--- a/aws/emr/bootstrap/dummy_step_1.sh
+++ b/aws/emr/bootstrap/dummy_step_1.sh
@ -0,0 +1,4 @@
 #!/bin/bash
 set -e
 echo "Hallo!"
 echo "Ich heisse dummkopf Stepf eins!"
--- a/aws/emr/bootstrap/dummy_step_2.sh
+++ b/aws/emr/bootstrap/dummy_step_2.sh
@ -0,0 +1,4 @@
 #!/bin/bash
 set -e
 echo "Hallo!"
 echo "Ich heisse dummkopf Stepf zwei!"
--- a/aws/emr/bootstrap/emr_bootstrap_base.sh
+++ b/aws/emr/bootstrap/emr_bootstrap_base.sh
@ -0,0 +1,174 @@
 #!/bin/bash
 set -x
 install_jupyter=false
 # get input parameters
 while [ $# -gt 0 ]; do
    case "$1" in
    --jupyter)
      install_jupyter=true
      ;;
    -*)
      error_msg "unrecognized option: $1"
      ;;
    *)
      break;
      ;;
    esac
    shift
 done
 s3_utils='https://s3.amazonaws.com/ty-emr/XRR/utils'
 build_vim=false
 if [ "$USER" == "root" ]; then
  util_path="/root/.utils"
  home="/root"
 else
  if [ "$platform" == "Darwin" ]; then
    users_dir="Users"
  fi
  if [ "$platform" == "Linux" ]; then
    users_dir="home"
  fi
  util_path="/$users_dir/$USER/.utils"
 fi
 vim_check=`vim --version`
 if [[ $vim_check != *"+lua"* ]]; then
  build_vim=true
 fi
 mkdir -p $util_path
 release=`cat /etc/*release* | tr '[:upper:]' '[:lower:]'`
  if [[ $release == *"rhel fedora"* ]]; then
    echo "Looks like we're running on something that is kinda like RHEL..."
    sudo yum groupinstall -y "Development Tools"
    sudo yum install -y \
    tmux \
    wget \
    htop \
    mlocate \
    git \
    rake \
    zsh \
    jq \
    at \
    bind-utils \
    strace \
    lua \
    lua-devel \
    ncurses \
    ncurses-devel \
    gmp \
    gmp-devel \
    ctags \
    tcl-devel \
    perl \
    perl-devel \
    perl-ExtUtils-ParseXS \
    perl-ExtUtils-CBuilder \
    perl-ExtUtils-Embed
    if [[ $build_vim == true ]]; then
      cd /tmp
      git clone http://luajit.org/git/luajit-2.0.git
      cd luajit-2.0
      make
      sudo make install
      cd /tmp
      git clone https://github.com/vim/vim.git
      cd vim
      ./configure \
      --with-features=huge \
      --enable-cscope \
      --enable-pythoninterp \
      --enable-luainterp \
      --enable-multibyte \
      --enable-fontset \
      --disable-gui \
      --without-x \
      --disable-netbeans \
      --enable-largefile
      make
      sudo make install
      if [ -e /usr/bin/vi ]; then
        sudo rm /usr/bin/vi
      fi
      sudo ln -s /usr/local/bin/vim /usr/bin/vi
      rm -rf /tmp/vim
    fi
  fi
  wget https://bootstrap.pypa.io/get-pip.py
  sudo python2.7 ./get-pip.py
  sudo env "PATH=$PATH" pip install awscli
  wget "https://raw.githubusercontent.com/o0beaner/dotfiles/master/install.sh"
  chmod +x install.sh
  ./install.sh
  sudo chmod 644 /usr/bin/chsh
  sudo chmod +x /usr/bin/chsh
  sudo /usr/bin/chsh -s /bin/zsh $USER
  sudo updatedb
  cd $util_path
  wget --no-check-certificate $s3_utils/suntracker.sh
  chmod +x $util_path/suntracker.sh
  (crontab -l ; echo "0 3 * * * $util_path/suntracker.sh") | crontab -
  $util_path/suntracker.sh
 # AWS Instance customization
 if [ -e /usr/bin/cloud-init ]; then
  # Install SSM Agent
  cd /tmp
  sudo yum install -y https://s3.amazonaws.com/ec2-downloads-windows/SSMAgent/latest/linux_amd64/amazon-ssm-agent.rpm
  # Am I running EMR?
  instance=`aws ec2 describe-instances --instance-ids $(curl -s 169.254.169.254/latest/meta-data/instance-id)`
  tags=`echo $instance | jq -r '.Reservations[0].Instances[0].Tags[]'`
  cluster_id=`echo $tags | jq -r '. | select(.Key=="aws:elasticmapreduce:job-flow-id") | .Value'`
  if [ -n "$cluster_id" ]; then
    echo "$cluster_id" > ~/.cluster_id
    role=`echo $tags | jq -r '. | select(.Key=="aws:elasticmapreduce:instance-group-role") | .Value'`
    if [ "$role" == "MASTER" ]; then
      # ToDo: Incorporate Hue?
      cd $util_path
      wget --no-check-certificate $s3_utils/configure_zeppelin_s3.sh
      chmod +x $util_path/configure_zeppelin_s3.sh
      aws emr add-steps --cluster-id $cluster_id --steps Type=CUSTOM_JAR,Name="Configure Zeppelin for S3",Jar="command-runner.jar",Args=[$util_path/configure_zeppelin_s3.sh]
    fi
    # install jupyter
    if [ $install_jupyter == true ]; then
      cd $util_path
      wget --no-check-certificate https://s3.amazonaws.com/aws-bigdata-blog/artifacts/aws-blog-emr-jupyter/install-jupyter-emr5.sh
      chmod +x $util_path/install-jupyter-emr5.sh
      $util_path/install-jupyter-emr5.sh \
      --r \
      --julia \
      --toree \
      --torch \
      --ruby \
      --ds-packages \
      --ml-packages \
      --python-packages ggplot nilearn \
      --port 8002 \
      --password jupyter \
      --jupyterhub \
      --jupyterhub-port 8001 \
      --cached-install \
      --notebook-dir s3://ty-emr/XRR/jupyter/notebooks/ \
      --copy-samples \
      --s3fs
    fi
  fi
 fi
 touch ~/.zsh.prompts
 mkdir ~/.zsh.after/
 echo "prompt agnoster" > ~/.zsh.after/prompt.zsh
--- a/aws/emr/bootstrap/hue_aws.json
+++ b/aws/emr/bootstrap/hue_aws.json
@ -0,0 +1,21 @@
 [{ 
  "Classification": "hue-ini", 
  "Properties": {}, 
  "Configurations": [ 
  { 
    "Classification": "aws", 
    "Properties": {}, 
    "Configurations": [ 
    { 
      "Classification": "aws_accounts", 
      "Properties": {}, 
      "Configurations": [ 
      { 
        "Classification": "default", 
        "Properties": { 
          "region": "us-east-1" 
        }
      }] 
    }] 
  }] 
 }]
--- a/aws/emr/bootstrap/hue_ldap.json
+++ b/aws/emr/bootstrap/hue_ldap.json
@ -0,0 +1,46 @@
 [
  {
    "Classification": "hue-ini",
    "Properties": {},
    "Configurations": [
      {
        "Classification": "desktop",
        "Properties": {},
        "Configurations": [
          {
            "Classification": "ldap",
            "Properties": {
              "create_users_on_login": "false"
            },
            "Configurations": [
              {
                "Classification": "ldap_servers",
                "Properties": {},
                "Configurations": [
                  {
                    "Classification": "blipsandchitz.local",
                    "Properties": {
                      "base_dn": "DC=blipsandchitz,DC=local",
                      "ldap_url": "ldap://10.0.8.254:389",
                      "search_bind_authentication": "true",
                      "bind_dn": "CN=hue,OU=ServiceAccounts,OU=UserAccounts,DC=blipsandchitz,DC=local",
                      "bind_password": "Badpassword1"
                    },
                    "Configurations": []
                  }
                ]
              }
            ]
          },
          {
            "Classification": "auth",
            "Properties": {
              "backend": "desktop.auth.backend.LdapBackend,desktop.auth.backend.AllowFirstUserDjangoBackend"
            },
            "Configurations": []
          }
        ]
      }
    ]
  }
 ]
--- a/aws/emr/bootstrap/hue_ldap_group.json
+++ b/aws/emr/bootstrap/hue_ldap_group.json
@ -0,0 +1,63 @@
 [
  {
    "Classification": "hue-ini",
    "Properties": {},
    "Configurations": [
      {
        "Classification": "desktop",
        "Properties": {},
        "Configurations": [
          {
            "Classification": "ldap",
            "Properties": {
              "create_users_on_login": "false"
            },
            "Configurations": [
              {
                "Classification": "ldap_servers",
                "Properties": {},
                "Configurations": [
                  {
                    "Classification": "blipsandchitz.local",
                    "Properties": {
                      "base_dn": "DC=blipsandchitz,DC=local",
                      "ldap_url": "ldap://10.0.8.254:389",
                      "search_bind_authentication": "true",
                      "bind_dn": "CN=hue,OU=ServiceAccounts,OU=UserAccounts,DC=blipsandchitz,DC=local",
                      "bind_password": "Badpassword1"
                    },
                    "Configurations": []
                  }
                ]
              },
              {
                "Classification": "users",
                "Properties": {
                  "user_filter": "objectclass=person",
                  "user_name_attr": "uid"
                },
                "Configurations": []
              },
              {
                "Classification": "groups",
                "Properties": {
                  "group_filter": "objectclass=groupOfUniqueNames",
                  "group_name_attr": "cn",
                  "group_member_attr": "uniqueMember"
                },
                "Configurations": []
              }
            ]
          },
          {
            "Classification": "auth",
            "Properties": {
              "backend": "desktop.auth.backend.LdapBackend,desktop.auth.backend.AllowFirstUserDjangoBackend"
            },
            "Configurations": []
          }
        ]
      }
    ]
  }
 ]
--- a/aws/emr/bootstrap/hue_ldap_group_weirdauth.json
+++ b/aws/emr/bootstrap/hue_ldap_group_weirdauth.json
@ -0,0 +1,63 @@
 [
  {
    "Classification": "hue-ini",
    "Properties": {},
    "Configurations": [
      {
        "Classification": "desktop",
        "Properties": {},
        "Configurations": [
          {
            "Classification": "ldap",
            "Properties": {
              "create_users_on_login": "false"
            },
            "Configurations": [
              {
                "Classification": "ldap_servers",
                "Properties": {},
                "Configurations": [
                  {
                    "Classification": "blipsandchitz.local",
                    "Properties": {
                      "base_dn": "DC=blipsandchitz,DC=local",
                      "ldap_url": "ldap://10.0.8.254:389",
                      "search_bind_authentication": "true",
                      "bind_dn": "CN=hue,OU=ServiceAccounts,OU=UserAccounts,DC=blipsandchitz,DC=local",
                      "bind_password": "Badpassword1"
                    },
                    "Configurations": []
                  }
                ]
              },
              {
                "Classification": "users",
                "Properties": {
                  "user_filter": "objectclass=person",
                  "user_name_attr": "uid"
                },
                "Configurations": []
              },
              {
                "Classification": "groups",
                "Properties": {
                  "group_filter": "objectclass=groupOfUniqueNames",
                  "group_name_attr": "cn",
                  "group_member_attr": "uniqueMember"
                },
                "Configurations": []
              }
            ]
          },
          {
            "Classification": "auth",
            "Properties": {
              "backend": "desktop.auth.backend.AllowFirstUserDjangoBackend"
            },
            "Configurations": []
          }
        ]
      }
    ]
  }
 ]
--- a/aws/emr/bootstrap/install-jupyter-emr5-payload.sh
+++ b/aws/emr/bootstrap/install-jupyter-emr5-payload.sh
@ -0,0 +1,832 @@
 #!/bin/bash
 set -x -e
 # AWS EMR bootstrap script
 # for installing Jupyter notebook on AWS EMR 5+
 #
 # 2016-11-04 - Tom Zeng tomzeng@amazon.com, initial version
 # 2016-11-20 - Tom Zeng, add JupyterHub
 # 2016-12-01 - Tom Zeng, add s3 support and cached install
 # 2016-12-03 - Tom Zeng, use puppet to install/run services
 # 2016-12-06 - Tom Zeng, switch to s3fs for S3 support since s3nb is not fully working
 # 2016-12-29 - Tom Zeng, add Dask and Dask.distributed
 # 2017-04-18 - Tom Zeng, add BigDL support
 # 2017-05-16 = Tom Zeng, add cached install for EMR 5.5, updated yum rpm cache and miniCRAN
 # 2017-05-20 - Tom Zeng, add s3contents to replace s3nb which no longer works due to Jupyter update
 # 2017-05-23 - Tom Zeng, fix the s3contents dummy last_modified field
 # 2017-05-25 - Tom Zeng, turn off tensorflow, pip wheel install no longer working, will fix later
 # 2017-06-09 - Tom Zeng, fix install issue for EMR 5.6 caused by kernel source package already installed
 #
 # Usage:
 # --r - install the IRKernel for R (Sparklyr is installed with this option, but as of 2017-04-05 Sparklyr does not support Spark 2.x yet)
 # --toree - install the Apache Toree kernel that supports Scala, PySpark, SQL, SparkR for Apache Spark
 # --interpreters - specify Apache Toree interpreters, default is all: "Scala,SQL,PySpark,SparkR"
 # --julia - install the IJulia kernel for Julia
 # --bigdl - install Intel's BigDL Deep Learning framework
 # --ruby - install the iRuby kernel for Ruby
 # --torch - intall the iTorch kernel for Torch
 # --javascript - install the JavaScript and CoffeeScript kernels (only works for JupyterHub for now)
 # --dask - install Dask and Dask.distributed, with the scheduler on master instance and the workers on the slave instances
 # --ds-packages - install the Python Data Science related packages (scikit-learn pandas numpy numexpr statsmodels seaborn)
 # --ml-packages - install the Python Machine Learning related packages (theano keras tensorflow)
 # --python-packages - install specific python packages e.g. "ggplot nilean"
 # --port - set the port for Jupyter notebook, default is 8888
 # --user - create a default user for Jupyterhub
 # --password - set the password for Jupyter notebook and JupyterHub
 # --localhost-only - restrict jupyter to listen on localhost only, default to listen on all ip addresses for the instance
 # --jupyterhub - install JupyterHub
 # --jupyterhub-port - set the port for JupyterHub, default is 8000
 # --no-jupyter - if JupyterHub is installed, use this to disable Jupyter
 # --notebook-dir - specify notebook folder, this could be a local directory or a S3 bucket
 # --cached-install - use some cached dependency artifacts on s3 to speed up installation
 # --ssl - enable ssl, make sure to use your own cert and key files to get rid of the warning
 # --copy-samples - copy sample notebooks to samples sub folder under the notebook folder
 # --spark-opts - user supplied Spark options to pass to SPARK_OPTS
 # --s3fs - use s3fs instead of s3contents(default) for storing notebooks on s3, s3fs could cause slowness if the s3 bucket has lots of file
 # --python3 - install python 3 packages and use python3
 # check for master node
 IS_MASTER=false
 if grep isMaster /mnt/var/lib/info/instance.json | grep true;
 then
  IS_MASTER=true
 fi
 # error message
 error_msg ()
 {
  echo 1>&2 "Error: $1"
 }
 # some defaults
 RUBY_KERNEL=false
 R_KERNEL=false
 JULIA_KERNEL=false
 TOREE_KERNEL=false
 TORCH_KERNEL=false
 DS_PACKAGES=false
 ML_PACKAGES=false
 PYTHON_PACKAGES=""
 RUN_AS_STEP=false
 NOTEBOOK_DIR=""
 NOTEBOOK_DIR_S3=false
 JUPYTER_PORT=8888
 JUPYTER_PASSWORD=""
 JUPYTER_LOCALHOST_ONLY=false
 PYTHON3=false
 GPU=false
 CPU_GPU="cpu"
 GPUU=""
 JUPYTER_HUB=true
 JUPYTER_HUB_PORT=8000
 JUPYTER_HUB_IP="*"
 JUPYTER_HUB_DEFAULT_USER="jupyter"
 INTERPRETERS="Scala,SQL,PySpark,SparkR"
 R_REPOS_LOCAL="file:////mnt/miniCRAN"
 R_REPOS_REMOTE="http://cran.rstudio.com"
 R_REPOS=$R_REPOS_LOCAL
 USE_CACHED_DEPS=true
 SSL=false
 SSL_OPTS="--no-ssl"
 COPY_SAMPES=false
 USER_SPARK_OPTS=""
 NOTEBOOK_DIR_S3_S3NB=false
 NOTEBOOK_DIR_S3_S3CONTENTS=true
 JS_KERNEL=false
 NO_JUPYTER=false
 INSTALL_DASK=false
 INSTALL_PY3_PKGS=false
 APACHE_SPARK_VERSION="2.2.0"
 BIGDL=false
 MXNET=false
 DL4J=false
 # get input parameters
 while [ $# -gt 0 ]; do
    case "$1" in
    --r)
      R_KERNEL=true
      ;;
    --julia)
      JULIA_KERNEL=true
      ;;
    --toree)
      TOREE_KERNEL=true
      ;;
    --torch)
      TORCH_KERNEL=true
      ;;
    --javascript)
      JS_KERNEL=true
      ;;
    --ds-packages)
      DS_PACKAGES=true
      ;;
    --ml-packages)
      ML_PACKAGES=true
      ;;
    --python-packages)
      shift
      PYTHON_PACKAGES=$1
      ;;
    --bigdl)
      BIGDL=true
      ;;
    --mxnet)
      MXNET=true
      ;;
    --dl4j)
      DL4J=true
      ;;
    --ruby)
      RUBY_KERNEL=true
      ;;
    --gpu)
      GPU=true
      CPU_GPU="gpu"
      GPUU="_gpu"
      ;;
    --run-as-step)
      RUN_AS_STEP=true
      ;;
    --port)
      shift
      JUPYTER_PORT=$1
      ;;
    --user)
      shift
      JUPYTER_HUB_DEFAULT_USER=$1
      ;;
    --password)
      shift
      JUPYTER_PASSWORD=$1
      ;;
    --localhost-only)
      JUPYTER_LOCALHOST_ONLY=true
      JUPYTER_HUB_IP=""
      ;;
    --jupyterhub)
      JUPYTER_HUB=true
      #PYTHON3=true
      ;;
    --jupyterhub-port)
      shift
      JUPYTER_HUB_PORT=$1
      ;;
    --notebook-dir)
      shift
      NOTEBOOK_DIR=$1
      ;;
    --copy-samples)
      COPY_SAMPLES=true
      ;;
    --toree-interpreters)
      shift
      INTERPRETERS=$1
      ;;
    --cached-install)
      USE_CACHED_DEPS=true
      R_REPOS=$R_REPOS_LOCAL
      ;;
    --no-cached-install)
      USE_CACHED_DEPS=false
      R_REPOS=$R_REPOS_REMOTE
      ;;
    --no-jupyter)
      NO_JUPYTER=true
      ;;
    --ssl)
      SSL=true
      ;;
    --dask)
      INSTALL_DASK=true
      ;;
    --python3)
      INSTALL_PY3_PKGS=true
      ;;
    --spark-opts)
      shift
      USER_SPARK_OPTS=$1
      ;;
    --spark-version)
      shift
      APACHE_SPARK_VERSION=$1
      ;;
    --s3fs)
      #NOTEBOOK_DIR_S3_S3NB=false
      NOTEBOOK_DIR_S3_S3CONTENTS=false
      ;;
    #--s3nb) # this stopped working after Jupyter update in early 2017
    #  NOTEBOOK_DIR_S3_S3NB=true
    #  ;;
    -*)
      # do not exit out, just note failure
      error_msg "unrecognized option: $1"
      ;;
    *)
      break;
      ;;
    esac
    shift
 done
 RELEASE=$(cat /etc/system-release)
 REL_NUM=$(ruby -e "puts '$RELEASE'.split.last")
 sudo mkdir -p /mnt/var/aws/emr
 sudo cp -pr /var/aws/emr/packages /mnt/var/aws/emr/ && sudo rm -rf /var/aws/emr/packages && sudo mkdir /var/aws/emr/packages && sudo mount -o bind /mnt/var/aws/emr/packages /var/aws/emr/packages &
 # move /usr/local and usr/share to /mnt/usr-moved/ to avoid running out of space on /
 if [ ! -d /mnt/usr-moved ]; then
  echo "move local start" >> /tmp/install_time.log
  date >> /tmp/install_time.log
  sudo mkdir /mnt/usr-moved
  sudo mv /usr/local /mnt/usr-moved/ && sudo ln -s /mnt/usr-moved/local /usr/
  echo "move local end, move share start" >> /tmp/install_time.log
  date >> /tmp/install_time.log
  sudo mv /usr/share /mnt/usr-moved/ && sudo ln -s /mnt/usr-moved/share /usr/
  echo "move share end" >> /tmp/install_time.log
  date >> /tmp/install_time.log
 fi
 export MAKE='make -j 8'
 export NODE_PATH='/usr/lib/node_modules'
 if [ "$JS_KERNEL" = true ]; then
  sudo python -m pip install -U jinja2 tornado jsonschema pyzmq
  sudo npm cache clean -f
  sudo npm install -g npm
  sudo npm install -g n
  sudo n stable
 fi
 cd /mnt
 TF_BINARY_URL_PY3="https://storage.googleapis.com/tensorflow/linux/$CPU_GPU/tensorflow$GPUU-1.1.0-cp34-cp34m-linux_x86_64.whl"
 TF_BINARY_URL="https://storage.googleapis.com/tensorflow/linux/$CPU_GPU/tensorflow$GPUU-1.1.0-cp27-none-linux_x86_64.whl"
 if [ "$DS_PACKAGES" = true ]; then
  # Python
  if [ "$INSTALL_PY3_PKGS" = true ]; then
    sudo python3 -m pip install -U scikit-learn pandas numpy numexpr statsmodels scipy
  else
    sudo python -m pip install -U scikit-learn pandas numpy numexpr statsmodels scipy
  fi
  # Javascript
  if [ "$JS_KERNEL" = true ]; then
    sudo npm install -g --unsafe-perm stats-analysis decision-tree machine_learning limdu synaptic node-svm lda brain.js scikit-node
  fi
 fi
 if [ "$ML_PACKAGES" = true ]; then
  if [ "$INSTALL_PY3_PKGS" = true ]; then
    sudo python3 -m pip install -U theano
    sudo python3 -m pip install -U keras
    sudo python3 -m pip install -U $TF_BINARY_URL_PY3
  else
    sudo python -m pip install -U theano
    sudo python -m pip install -U keras
    sudo python -m pip install -U $TF_BINARY_URL
  fi
 fi
 if [ ! "$PYTHON_PACKAGES" = "" ]; then
  if [ "$INSTALL_PY3_PKGS" = true ]; then
    sudo python3 -m pip install -U $PYTHON_PACKAGES || true
  else
    sudo python -m pip install -U $PYTHON_PACKAGES || true
  fi
 fi
 if [ "$BIGDL" = true ]; then
  aws s3 cp s3://tomzeng/maven/apache-maven-3.3.3-bin.tar.gz .
  tar xvfz apache-maven-3.3.3-bin.tar.gz
  sudo mv apache-maven-3.3.3 /opt/maven
  sudo ln -s /opt/maven/bin/mvn /usr/bin/mvn
  git clone https://github.com/intel-analytics/BigDL.git
  cd BigDL/
  export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m"
  export BIGDL_HOME=/mnt/BigDL
  export BIGDL_VER="0.2.0-SNAPSHOT"
  bash make-dist.sh -P spark_2.1
  mkdir /tmp/bigdl_summaries
  /usr/local/bin/tensorboard --debug INFO --logdir /tmp/bigdl_summaries/ > /tmp/tensorboard_bigdl.log 2>&1 &
 fi
 if [ "$JULIA_KERNEL" = true ]; then
  # Julia install
  cd /mnt
  if [ ! "$USE_CACHED_DEPS" = true ]; then
    wget https://julialang.s3.amazonaws.com/bin/linux/x64/0.5/julia-0.5.0-linux-x86_64.tar.gz
    tar xvfz julia-0.5.0-linux-x86_64.tar.gz
  fi
  cd julia-3c9d75391c
  sudo cp -pr bin/* /usr/bin/
  sudo cp -pr lib/* /usr/lib/
  #sudo cp -pr libexec/* /usr/libexec/
  sudo cp -pr share/* /usr/share/
  sudo cp -pr include/* /usr/include/
 fi
 if [ "$INSTALL_DASK" = true ]; then
  if [ "$INSTALL_PY3_PKGS" = true ]; then
    sudo python3 -m pip install -U dask[complete] distributed
  else
    sudo python -m pip install -U dask[complete] distributed
  fi
  export PATH=$PATH:/usr/local/bin
  if [ "$IS_MASTER" = true ]; then
    dask-scheduler > /var/log/dask-scheduler.log 2>&1 &
  else
    MASTER_KV=$(grep masterHost /emr/instance-controller/lib/info/job-flow-state.txt)
    MASTER_HOST=$(ruby -e "puts '$MASTER_KV'.gsub('\"','').split.last")
    dask-worker $MASTER_HOST:8786 > /var/log/dask-worker.log 2>&1 &
  fi
 fi
 #echo ". /mnt/ipython-env/bin/activate" >> ~/.bashrc
 # only run below on master instance
 if [ "$IS_MASTER" = true ]; then
 sudo mkdir -p /var/log/jupyter
 mkdir -p ~/.jupyter
 touch ls ~/.jupyter/jupyter_notebook_config.py
 sed -i '/c.NotebookApp.open_browser/d' ~/.jupyter/jupyter_notebook_config.py
 echo "c.NotebookApp.open_browser = False" >> ~/.jupyter/jupyter_notebook_config.py
 if [ ! "$JUPYTER_LOCALHOST_ONLY" = true ]; then
 sed -i '/c.NotebookApp.ip/d' ~/.jupyter/jupyter_notebook_config.py
 echo "c.NotebookApp.ip='*'" >> ~/.jupyter/jupyter_notebook_config.py
 fi
 sed -i '/c.NotebookApp.port/d' ~/.jupyter/jupyter_notebook_config.py
 echo "c.NotebookApp.port = $JUPYTER_PORT" >> ~/.jupyter/jupyter_notebook_config.py
 if [ ! "$JUPYTER_PASSWORD" = "" ]; then
  sed -i '/c.NotebookApp.password/d' ~/.jupyter/jupyter_notebook_config.py
  HASHED_PASSWORD=$(python3 -c "from notebook.auth import passwd; print(passwd('$JUPYTER_PASSWORD'))")
  echo "c.NotebookApp.password = u'$HASHED_PASSWORD'" >> ~/.jupyter/jupyter_notebook_config.py
 else
  sed -i '/c.NotebookApp.token/d' ~/.jupyter/jupyter_notebook_config.py
  echo "c.NotebookApp.token = u''" >> ~/.jupyter/jupyter_notebook_config.py
 fi
 echo "c.Authenticator.admin_users = {'$JUPYTER_HUB_DEFAULT_USER'}" >> ~/.jupyter/jupyter_notebook_config.py
 echo "c.LocalAuthenticator.create_system_users = True" >> ~/.jupyter/jupyter_notebook_config.py
 if [ "$SSL" = true ]; then
  #NOTE - replace server.cert and server.key with your own cert and key files
  CERT=/usr/local/etc/server.cert
  KEY=/usr/local/etc/server.key
  sudo openssl req -x509 -nodes -days 3650 -newkey rsa:1024 -keyout $KEY -out $CERT -subj "/C=US/ST=Washington/L=Seattle/O=JupyterCert/CN=JupyterCert"
  # the following works for Jupyter but will fail JupyterHub, use options for both instead
  #echo "c.NotebookApp.certfile = u'/usr/local/etc/server.cert'" >> ~/.jupyter/jupyter_notebook_config.py
  #echo "c.NotebookApp.keyfile = u'/usr/local/etc/server.key'" >> ~/.jupyter/jupyter_notebook_config.py
  SSL_OPTS_JUPYTER="--keyfile=/usr/local/etc/server.key --certfile=/usr/local/etc/server.cert"
  SSL_OPTS_JUPYTERHUB="--ssl-key=/usr/local/etc/server.key --ssl-cert=/usr/local/etc/server.cert"
 fi
 # Javascript/CoffeeScript kernels
 if [ "$JS_KERNEL" = true ]; then
  sudo npm install -g --unsafe-perm ijavascript d3 lodash plotly jp-coffeescript
  sudo ijs --ijs-install=global
  sudo jp-coffee --jp-install=global
 fi
 if [ "$JULIA_KERNEL" = true ]; then
  julia -e 'Pkg.add("IJulia")'
  julia -e 'Pkg.add("RDatasets");Pkg.add("Gadfly");Pkg.add("DataFrames");Pkg.add("PyPlot")'
  # Julia's Spark support does not support Spark on Yarn yet
  # install mvn
  #cd /mnt
  #aws s3 cp s3://tomzeng/maven/apache-maven-3.3.9-bin.tar.gz .
  #tar xvfz apache-maven-3.3.9-bin.tar.gz
  #sudo mv apache-maven-3.3.9 /opt/maven
  #sudo ln -s /opt/maven/bin/mvn /usr/bin/mvn
  # install Spark for Julia
  #julia -e 'Pkg.clone("https://github.com/dfdx/Spark.jl"); Pkg.build("Spark"); Pkg.checkout("JavaCall")'
 fi
 # iTorch depends on Torch which is installed with --ml-packages
 if [ "$TORCH_KERNEL" = true ]; then
  set +e # workaround for the lengthy torch install-deps, esp when other background process are also running yum
  cd /mnt
  if [ ! "$USE_CACHED_DEPS" = true ]; then
    git clone https://github.com/torch/distro.git torch-distro
  fi
  cd torch-distro
  git pull
  ./install-deps
  ./install.sh -b
  export PATH=$PATH:/mnt/torch-distro/install/bin
  source ~/.profile
  luarocks install lzmq
  luarocks install gnuplot
  cd /mnt
  if [ ! "$USE_CACHED_DEPS" = true ]; then
    git clone https://github.com/facebook/iTorch.git
  fi
  cd iTorch
  luarocks make
  sudo cp -pr ~/.ipython/kernels/itorch /usr/local/share/jupyter/kernels/
  set -e
 fi
 if [ ! "$NOTEBOOK_DIR" = "" ]; then
  NOTEBOOK_DIR="${NOTEBOOK_DIR%/}/" # remove trailing / if exists then add /
  if [[ "$NOTEBOOK_DIR" == s3://* ]]; then
    NOTEBOOK_DIR_S3=true
    # the s3nb does not fully working yet(upload and createe folder not working)
    # s3nb does not work anymore due to Jupyter update
    if [ "$NOTEBOOK_DIR_S3_S3NB" = true ]; then
      cd /mnt
      if [ ! "$USE_CACHED_DEPS" = true ]; then
        git clone https://github.com/tomz/s3nb.git
      fi
      cd s3nb
      sudo python -m pip install -U entrypoints
      sudo python setup.py install
      if [ "$JUPYTER_HUB" = true ]; then
        sudo python3 -m pip install -U entrypoints
        sudo python3 setup.py install
      fi
      echo "c.NotebookApp.contents_manager_class = 's3nb.S3ContentsManager'" >> ~/.jupyter/jupyter_notebook_config.py
      echo "c.S3ContentsManager.checkpoints_kwargs = {'root_dir': '~/.checkpoints'}" >> ~/.jupyter/jupyter_notebook_config.py
      # if just bucket with no subfolder, a trailing / is required, otherwise s3nb will break
      echo "c.S3ContentsManager.s3_base_uri = '$NOTEBOOK_DIR'" >> ~/.jupyter/jupyter_notebook_config.py
      #echo "c.S3ContentsManager.s3_base_uri = '${NOTEBOOK_DIR_S3%/}/%U'" >> ~/.jupyter/jupyter_notebook_config.py
      #echo "c.Spawner.default_url = '${NOTEBOOK_DIR_S3%/}/%U'" >> ~/.jupyter/jupyter_notebook_config.py
      #echo "c.Spawner.notebook_dir = '/%U'" >> ~/.jupyter/jupyter_notebook_config.py
    elif [ "$NOTEBOOK_DIR_S3_S3CONTENTS" = true ]; then
      BUCKET=$(ruby -e "puts '$NOTEBOOK_DIR'.split('//')[1].split('/')[0]")
      FOLDER=$(ruby -e "puts '$NOTEBOOK_DIR'.split('//')[1].split('/')[1..-1].join('/')")
      #sudo python -m pip install -U s3contents
      cd /mnt
      #aws s3 cp s3://aws-bigdata-blog/artifacts/aws-blog-emr-jupyter/s3contents.zip .
      #unzip s3contents.zip
      git clone https://github.com/tomz/s3contents.git
      cd s3contents
      sudo python setup.py install
      echo "c.NotebookApp.contents_manager_class = 's3contents.S3ContentsManager'" >> ~/.jupyter/jupyter_notebook_config.py
      echo "c.S3ContentsManager.bucket_name = '$BUCKET'" >> ~/.jupyter/jupyter_notebook_config.py
      echo "c.S3ContentsManager.prefix = '$FOLDER'" >> ~/.jupyter/jupyter_notebook_config.py
      # this following is no longer needed, default was fixed in the latest on github
      #echo "c.S3ContentsManager.endpoint_url = 'https://s3.amazonaws.com'" >> ~/.jupyter/jupyter_notebook_config.py
    else
      BUCKET=$(ruby -e "puts '$NOTEBOOK_DIR'.split('//')[1].split('/')[0]")
      FOLDER=$(ruby -e "puts '$NOTEBOOK_DIR'.split('//')[1].split('/')[1..-1].join('/')")
      if [ "$USE_CACHED_DEPS" != true ]; then
        sudo yum install -y automake fuse fuse-devel libxml2-devel
      fi
      cd /mnt
      git clone https://github.com/s3fs-fuse/s3fs-fuse.git
      cd s3fs-fuse/
      ls -alrt
      ./autogen.sh
      ./configure
      make
      sudo make install
      sudo su -c 'echo user_allow_other >> /etc/fuse.conf'
      mkdir -p /mnt/s3fs-cache
      mkdir -p /mnt/$BUCKET
      #/usr/local/bin/s3fs -o allow_other -o iam_role=auto -o umask=0 $BUCKET /mnt/$BUCKET
      # -o nodnscache -o nosscache -o parallel_count=20  -o multipart_size=50
      /usr/local/bin/s3fs -o allow_other -o iam_role=auto -o umask=0 -o url=https://s3.amazonaws.com  -o no_check_certificate -o enable_noobj_cache -o use_cache=/mnt/s3fs-cache $BUCKET /mnt/$BUCKET
      #/usr/local/bin/s3fs -o allow_other -o iam_role=auto -o umask=0 -o use_cache=/mnt/s3fs-cache $BUCKET /mnt/$BUCKET
      echo "c.NotebookApp.notebook_dir = '/mnt/$BUCKET/$FOLDER'" >> ~/.jupyter/jupyter_notebook_config.py
      echo "c.ContentsManager.checkpoints_kwargs = {'root_dir': '.checkpoints'}" >> ~/.jupyter/jupyter_notebook_config.py
    fi
  else
    echo "c.NotebookApp.notebook_dir = '$NOTEBOOK_DIR'" >> ~/.jupyter/jupyter_notebook_config.py
    echo "c.ContentsManager.checkpoints_kwargs = {'root_dir': '.checkpoints'}" >> ~/.jupyter/jupyter_notebook_config.py
  fi
 fi
 if [ ! "$JUPYTER_HUB_DEFAULT_USER" = "" ]; then
  sudo adduser $JUPYTER_HUB_DEFAULT_USER
 fi
 if [ "$COPY_SAMPLES" = true ]; then
  cd ~
  if [ "$NOTEBOOK_DIR_S3" = true ]; then
    aws s3 sync s3://aws-bigdata-blog/artifacts/aws-blog-emr-jupyter/notebooks/ ${NOTEBOOK_DIR}samples/ || true
  else
    if [ ! "$NOTEBOOK_DIR" = "" ]; then
      mkdir -p ${NOTEBOOK_DIR}samples || true
      sudo mkdir /home/$JUPYTER_HUB_DEFAULT_USER/${NOTEBOOK_DIR}samples || true
    fi
    aws s3 sync s3://aws-bigdata-blog/artifacts/aws-blog-emr-jupyter/notebooks/ ${NOTEBOOK_DIR}samples || true
    sudo cp -pr ${NOTEBOOK_DIR}samples /home/$JUPYTER_HUB_DEFAULT_USER/
    sudo chown -R $JUPYTER_HUB_DEFAULT_USER:$JUPYTER_HUB_DEFAULT_USER /home/$JUPYTER_HUB_DEFAULT_USER/${NOTEBOOK_DIR}samples
  fi
  if [ "$BIGDL" = true ]; then
    aws s3 cp s3://aws-bigdata-blog/artifacts/aws-blog-emr-jupyter/notebooks/text_classfication.ipynb ${NOTEBOOK_DIR}.
    sudo cp ${NOTEBOOK_DIR}text_classfication.ipynb /home/$JUPYTER_HUB_DEFAULT_USER/${NOTEBOOK_DIR}
    sudo chown -R $JUPYTER_HUB_DEFAULT_USER:$JUPYTER_HUB_DEFAULT_USER /home/$JUPYTER_HUB_DEFAULT_USER/${NOTEBOOK_DIR}text_classfication.ipynb
  fi
 fi
 wait_for_spark() {
  # wait SparkR file to show up
  while [ ! -f /etc/spark/conf/spark-defaults.conf ]
  do
    sleep 10
  done
 }
 setup_jupyter_process_with_bigdl() {
  wait_for_spark
  export PYTHON_API_PATH=${BIGDL_HOME}/dist/lib/bigdl-$BIGDL_VER-python-api.zip
  export BIGDL_JAR_PATH=${BIGDL_HOME}/dist/lib/bigdl-$BIGDL_VER-jar-with-dependencies.jar
  cat ${BIGDL_HOME}/dist/conf/spark-bigdl.conf | sudo tee -a /etc/spark/conf/spark-defaults.conf
  sudo puppet apply << PUPPET_SCRIPT
  include 'upstart'
  upstart::job { 'jupyter':
    description    => 'Jupyter',
    respawn        => true,
    respawn_limit  => '0 10',
    start_on       => 'runlevel [2345]',
    stop_on        => 'runlevel [016]',
    console        => 'output',
    chdir          => '/home/hadoop',
    script           => '
    sudo su - hadoop > /var/log/jupyter/jupyter.log 2>&1 <<BASH_SCRIPT
    export NODE_PATH="$NODE_PATH"
    export PYSPARK_DRIVER_PYTHON="jupyter"
    export PYSPARK_DRIVER_PYTHON_OPTS="notebook --no-browser $SSL_OPTS_JUPYTER --log-level=INFO"
    export NOTEBOOK_DIR="$NOTEBOOK_DIR"
    export BIGDL_HOME=/mnt/BigDL
    export SPARK_HOME=/usr/lib/spark
    export YARN_CONF_DIR=/etc/hadoop/conf
    export PYTHONPATH=${PYTHON_API_PATH}:$PYTHONPATH
    source ${BIGDL_HOME}/dist/bin/bigdl.sh
    #pyspark --py-files ${PYTHON_API_PATH} --jars ${BIGDL_JAR_PATH} --conf spark.driver.extraClassPath=${BIGDL_JAR_PATH} --conf spark.executor.extraClassPath=bigdl-${BIGDL_VER}-jar-with-dependencies.jar
    pyspark --py-files ${PYTHON_API_PATH} --jars ${BIGDL_JAR_PATH}
 BASH_SCRIPT
    ',
  }
 PUPPET_SCRIPT
 }
 background_install_proc() {
  wait_for_spark
  if ! grep "spark.sql.catalogImplementation" /etc/spark/conf/spark-defaults.conf; then
    sudo bash -c "echo 'spark.sql.catalogImplementation  hive' >> /etc/spark/conf/spark-defaults.conf"
  fi
  if [ ! -f /tmp/Renvextra ]; then # check if the rstudio BA maybe already done this
    cat << 'EOF' > /tmp/Renvextra
 JAVA_HOME="/etc/alternatives/jre"
 HADOOP_HOME_WARN_SUPPRESS="true"
 HADOOP_HOME="/usr/lib/hadoop"
 HADOOP_PREFIX="/usr/lib/hadoop"
 HADOOP_MAPRED_HOME="/usr/lib/hadoop-mapreduce"
 HADOOP_YARN_HOME="/usr/lib/hadoop-yarn"
 HADOOP_COMMON_HOME="/usr/lib/hadoop"
 HADOOP_HDFS_HOME="/usr/lib/hadoop-hdfs"
 HADOOP_CONF_DIR="/usr/lib/hadoop/etc/hadoop"
 YARN_CONF_DIR="/usr/lib/hadoop/etc/hadoop"
 YARN_HOME="/usr/lib/hadoop-yarn"
 HIVE_HOME="/usr/lib/hive"
 HIVE_CONF_DIR="/usr/lib/hive/conf"
 HBASE_HOME="/usr/lib/hbase"
 HBASE_CONF_DIR="/usr/lib/hbase/conf"
 SPARK_HOME="/usr/lib/spark"
 SPARK_CONF_DIR="/usr/lib/spark/conf"
 PATH=${PWD}:${PATH}
 EOF
  #if [ "$PYSPARK_PYTHON" = "python3" ]; then
  if [ "$INSTALL_PY3_PKGS" = true ]; then
    cat << 'EOF' >> /tmp/Renvextra
 PYSPARK_PYTHON="python3"
 EOF
  fi
  cat /tmp/Renvextra | sudo tee -a /usr/lib64/R/etc/Renviron
  sudo mkdir -p /mnt/spark
  sudo chmod a+rwx /mnt/spark
  if [ -d /mnt1 ]; then
    sudo mkdir -p /mnt1/spark
    sudo chmod a+rwx /mnt1/spark
  fi
  set +e # workaround for if SparkR is already installed by other BA
  # install SparkR and SparklyR for R - toree ifself does not need this
  sudo R --no-save << R_SCRIPT
  library(devtools)
  install('/usr/lib/spark/R/lib/SparkR')
 R_SCRIPT
  set -e
  fi # end if -f /tmp/Renvextra
  export SPARK_HOME="/usr/lib/spark"
  SPARK_PACKAGES=""
  PYSPARK_PYTHON="python"
  if [ "$INSTALL_PY3_PKGS" = true ]; then
    PYSPARK_PYTHON="python3"
  fi
  if [ ! "$USER_SPARK_OPTS" = "" ]; then
    SPARK_OPTS=$USER_SPARK_OPTS
    SPARK_PACKAGES=$(ruby -e "opts='$SPARK_OPTS'.split;pkgs=nil;opts.each_with_index{|o,i| pkgs=opts[i+1] if o.start_with?('--packages')};puts pkgs || '$SPARK_PACKAGES'")
    export SPARK_OPTS
    export SPARK_PACKAGES
    sudo jupyter toree install --interpreters=$INTERPRETERS --spark_home=$SPARK_HOME --python_exec=$PYSPARK_PYTHON --spark_opts="$SPARK_OPTS"
    # NOTE - toree does not pick SPARK_OPTS, so use the following workaround until it's fixed
    if [ ! "$SPARK_PACKAGES" = "" ]; then
      if ! grep "spark.jars.packages" /etc/spark/conf/spark-defaults.conf; then
        sudo bash -c "echo 'spark.jars.packages              $SPARK_PACKAGES' >> /etc/spark/conf/spark-defaults.conf"
      fi
    fi
  else
    sudo jupyter toree install --interpreters=$INTERPRETERS --spark_home=$SPARK_HOME --python_exec=$PYSPARK_PYTHON
  fi
  if [ "$INSTALL_PY3_PKGS" = true ]; then
    sudo bash -c 'echo "" >> /etc/spark/conf/spark-env.sh'
    sudo bash -c 'echo "export PYSPARK_PYTHON=/usr/bin/python3" >> /etc/spark/conf/spark-env.sh'
    #if [ -f /usr/local/share/jupyter/kernels/apache_toree_pyspark/kernel.json ]; then
    #  sudo bash -c 'sed -i "s/\"PYTHON_EXEC\": \"python\"/\"PYTHON_EXEC\": \"\/usr\/bin\/python3\"/g" /usr/local/share/jupyter/kernels/apache_toree_pyspark/kernel.json'
    #fi
  fi
  # the following dirs could cause conflict, so remove them
  rm -rf ~/.m2/
  rm -rf ~/.ivy2/
  if [ "$NO_JUPYTER" = false ]; then
    echo "Starting Jupyter notebook via pyspark"
    cd ~
    #PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS="notebook --no-browser" pyspark > /var/log/jupyter/jupyter.log &
    if [ "$BIGDL" = false ]; then
      sudo puppet apply << PUPPET_SCRIPT
      include 'upstart'
      upstart::job { 'jupyter':
        description    => 'Jupyter',
        respawn        => true,
        respawn_limit  => '0 10',
        start_on       => 'runlevel [2345]',
        stop_on        => 'runlevel [016]',
        console        => 'output',
        chdir          => '/home/hadoop',
        script           => '
        sudo su - hadoop > /var/log/jupyter/jupyter.log 2>&1 <<BASH_SCRIPT
        export NODE_PATH="$NODE_PATH"
        export PYSPARK_DRIVER_PYTHON="jupyter"
        export PYSPARK_DRIVER_PYTHON_OPTS="notebook --no-browser $SSL_OPTS_JUPYTER --log-level=INFO"
        export NOTEBOOK_DIR="$NOTEBOOK_DIR"
        pyspark
 BASH_SCRIPT
        ',
      }
 PUPPET_SCRIPT
    else
      setup_jupyter_process_with_bigdl
    fi
  fi
 }
 create_hdfs_user() {
  wait_for_spark
  sudo -u hdfs hdfs dfs -mkdir /user/$JUPYTER_HUB_DEFAULT_USER
  sudo -u hdfs hdfs dfs -chown $JUPYTER_HUB_DEFAULT_USER:$JUPYTER_HUB_DEFAULT_USER /user/$JUPYTER_HUB_DEFAULT_USER
  sudo -u hdfs hdfs dfs -chmod -R 777 /user/$JUPYTER_HUB_DEFAULT_USER
 }
 # apache toree install
 if [ "$TOREE_KERNEL" = true ]; then
  echo "Running background process to install Apacke Toree"
  # spark 1.6
  #sudo pip install --pre toree
  #sudo jupyter toree install
  # spark 2.0
  cd /mnt
  if [ "$USE_CACHED_DEPS" != true ]; then
    curl https://bintray.com/sbt/rpm/rpm | sudo tee /etc/yum.repos.d/bintray-sbt-rpm.repo
    sudo yum install docker sbt -y
  fi
  if [ ! "$USE_CACHED_DEPS" = true ]; then
    git clone https://github.com/apache/incubator-toree.git
  fi
  cd incubator-toree/
  git pull
  export APACHE_SPARK_VERSION=$APACHE_SPARK_VERSION
  make -j8 dist
  make clean release APACHE_SPARK_VERSION=$APACHE_SPARK_VERSION || true # gettting the docker not running error, swallow it with || true
  if [ "$RUN_AS_STEP" = true ]; then
    background_install_proc
  else
    background_install_proc &
  fi
 else
  if [ "$NO_JUPYTER" = false ]; then
    echo "Starting Jupyter notebook"
    if [ "$BIGDL" = false ]; then
      sudo puppet apply << PUPPET_SCRIPT
      include 'upstart'
      upstart::job { 'jupyter':
          description    => 'Jupyter',
          respawn        => true,
          respawn_limit  => '0 10',
          start_on       => 'runlevel [2345]',
          stop_on        => 'runlevel [016]',
          console        => 'output',
          chdir          => '/home/hadoop',
          env            => { 'NOTEBOOK_DIR' => '$NOTEBOOK_DIR', 'NODE_PATH' => '$NODE_PATH' },
          exec           => 'sudo su - hadoop -c "jupyter notebook --no-browser $SSL_OPTS_JUPYTER" > /var/log/jupyter/jupyter.log 2>&1',
      }
 PUPPET_SCRIPT
    else
      setup_jupyter_process_with_bigdl &
    fi
  fi
 fi
 if [ "$JUPYTER_HUB" = true ]; then
  sudo npm install -g --unsafe-perm configurable-http-proxy
  sudo python3 -m pip install jupyterhub #notebook ipykernel
  #sudo python3 -m ipykernel install
  if [ ! "$JUPYTER_HUB_DEFAULT_USER" = "" ]; then
    create_hdfs_user &
  fi
  # change the password of the hadoop user to JUPYTER_PASSWORD
  if [ ! "$JUPYTER_PASSWORD" = "" ]; then
    sudo sh -c "echo '$JUPYTER_PASSWORD' | passwd $JUPYTER_HUB_DEFAULT_USER --stdin"
  fi
  sudo ln -sf /usr/local/bin/jupyterhub /usr/bin/
  sudo ln -sf /usr/local/bin/jupyterhub-singleuser /usr/bin/
  mkdir -p /mnt/jupyterhub
  cd /mnt/jupyterhub
  echo "Starting Jupyterhub"
  #sudo jupyterhub $SSL_OPTS_JUPYTERHUB --port=$JUPYTER_HUB_PORT --ip=$JUPYTER_HUB_IP --log-file=/var/log/jupyter/jupyterhub.log --config ~/.jupyter/jupyter_notebook_config.py &
  sudo puppet apply << PUPPET_SCRIPT
  include 'upstart'
  upstart::job { 'jupyterhub':
      description    => 'JupyterHub',
      respawn        => true,
      respawn_limit  => '0 10',
      start_on       => 'runlevel [2345]',
      stop_on        => 'runlevel [016]',
      console        => 'output',
      chdir          => '/mnt/jupyterhub',
      env            => { 'NOTEBOOK_DIR' => '$NOTEBOOK_DIR', 'NODE_PATH' => '$NODE_PATH' },
      exec           => 'sudo /usr/bin/jupyterhub --pid-file=/var/run/jupyter.pid $SSL_OPTS_JUPYTERHUB --port=$JUPYTER_HUB_PORT --ip=$JUPYTER_HUB_IP --log-file=/var/log/jupyter/jupyterhub.log --config /home/hadoop/.jupyter/jupyter_notebook_config.py'
  }
 PUPPET_SCRIPT
 fi
 cat << 'EOF' > /tmp/jupyter_logpusher.config
 {
  "/var/log/jupyter/" : {
    "includes" : [ "(.*)" ],
    "s3Path" : "node/$instance-id/applications/jupyter/$0",
    "retentionPeriod" : "5d",
    "logType" : [ "USER_LOG", "SYSTEM_LOG" ]
  }
 }
 EOF
 cat /tmp/jupyter_logpusher.config | sudo tee -a /etc/logpusher/jupyter.config
 fi
 echo "Bootstrap action finished"
--- a/aws/emr/bootstrap/install_jupyter.sh
+++ b/aws/emr/bootstrap/install_jupyter.sh
@ -0,0 +1,23 @@
 #!/bin/bash
 wget --no-check-certificate https://s3.amazonaws.com/aws-bigdata-blog/artifacts/aws-blog-emr-jupyter/install-jupyter-emr5.sh
 aws s3 cp s3://ty-emr/XRR/bootstrap/install-jupyter-emr5-payload.sh .
 chmod +x $util_path/install-jupyter-emr5-payload.sh
 chmod +x install-jupyter-emr5-payload.sh
 ./install-jupyter-emr5-payload.sh \
      --r \
      --julia \
      --toree \
      --torch \
      --ruby \
      --ds-packages \
      --ml-packages \
      --python-packages ggplot nilearn \
      --port 8002 \
      --password jupyter \
      --jupyterhub \
      --jupyterhub-port 8001 \
      --cached-install \
      --notebook-dir s3://ty-emr/XRR/jupyter/notebooks/ \
      --copy-samples \
      --s3fs
--- a/aws/emr/bootstrap/install_sift.sh
+++ b/aws/emr/bootstrap/install_sift.sh
@ -0,0 +1,7 @@
 #!/bin/sh
 cd ~
 wget https://sift-tool.org/downloads/sift/sift_0.9.0_linux_amd64.tar.gz
 tar xvzf sift*
 sudo mv sift*/sift /usr/local/bin/
 rm -rf sift*
--- a/aws/emr/bootstrap/instance-controller/instance-controller-interface.jar
+++ b/aws/emr/bootstrap/instance-controller/instance-controller-interface.jar
--- a/aws/emr/bootstrap/instance-controller/instance-controller.jar
+++ b/aws/emr/bootstrap/instance-controller/instance-controller.jar
--- a/aws/emr/bootstrap/presto-postgres.sh
+++ b/aws/emr/bootstrap/presto-postgres.sh
@ -0,0 +1,14 @@
 #!/bin/bash 
 configure_postgres() {
  while ! status presto-server | grep -q running 2>/dev/null
  do
  	sleep 1
  done
  aws s3 cp s3://ty-emr/XRR/presto/pg1.properties ~
  aws s3 cp s3://ty-emr/XRR/presto/pg2.properties ~
  sudo mv ~/*.properties /etc/presto/conf/catalog
  sudo restart presto-server
 }
 configure_postgres &
--- a/aws/emr/bootstrap/replace-instance-controller-and-interface.sh
+++ b/aws/emr/bootstrap/replace-instance-controller-and-interface.sh
@ -0,0 +1,7 @@
 [ -e /mnt/replaced-instance-controller ] && exit 0
 sudo find /usr/share/aws/emr/instance-controller/lib -name 'instance-controller*.jar' -exec rm {} \;
 sudo aws s3 cp s3://ty-emr/XRR/bootstrap/instance-controller/instance-controller.jar /usr/share/aws/emr/instance-controller/lib/
 sudo aws s3 cp s3://ty-emr/XRR/bootstrap/instance-controller/instance-controller-interface.jar /usr/share/aws/emr/instance-controller/lib/
 touch /mnt/replaced-instance-controller
 sudo service instance-controller stop
 sudo service logpusher stop
--- a/aws/emr/bootstrap/replace-instance-controller.sh
+++ b/aws/emr/bootstrap/replace-instance-controller.sh
@ -0,0 +1,6 @@
 [ -e /mnt/replaced-instance-controller ] && exit 0
 sudo find /usr/share/aws/emr/instance-controller/lib -name 'instance-controller*.jar' -and -not -name '*interface*' -exec rm {} \;
 sudo aws s3 cp s3://ty-emr/XRR/bootstrap/instance-controller/instance-controller.jar /usr/share/aws/emr/instance-controller/lib/
 touch /mnt/replaced-instance-controller
 sudo service instance-controller stop
 sudo service logpusher stop
		`@ -0,0 +1 @@`
							`sudo aws s3 cp s3://ty-emr/XRR/bootstrap/MyAWSCredentialsProviderWithUri.jar /usr/share/aws/emr/emrfs/auxlib/`