rm bootstrap
This commit is contained in:
parent
48397395a1
commit
a211b5f66e
Binary file not shown.
|
|
@ -0,0 +1,154 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -x
|
||||
|
||||
install_jupyter=false
|
||||
|
||||
build_vim() {
|
||||
cd /tmp
|
||||
git clone http://luajit.org/git/luajit-2.0.git
|
||||
cd luajit-2.0
|
||||
make
|
||||
sudo make install
|
||||
|
||||
cd /tmp
|
||||
git clone https://github.com/vim/vim.git
|
||||
cd vim
|
||||
./configure \
|
||||
--with-features=huge \
|
||||
--enable-cscope \
|
||||
--enable-pythoninterp \
|
||||
--enable-luainterp \
|
||||
--enable-multibyte \
|
||||
--enable-fontset \
|
||||
--disable-gui \
|
||||
--without-x \
|
||||
--disable-netbeans \
|
||||
--enable-largefile
|
||||
make
|
||||
sudo make install
|
||||
|
||||
if [ -e /usr/bin/vi ]; then
|
||||
sudo rm /usr/bin/vi
|
||||
fi
|
||||
sudo ln -s /usr/local/bin/vim /usr/bin/vi
|
||||
rm -rf /tmp/vim
|
||||
}
|
||||
|
||||
provision_packages() {
|
||||
sudo yum groupinstall -y "Development Tools"
|
||||
sudo yum install -y \
|
||||
tmux \
|
||||
wget \
|
||||
htop \
|
||||
mlocate \
|
||||
git \
|
||||
rake \
|
||||
zsh \
|
||||
jq \
|
||||
at \
|
||||
bind-utils \
|
||||
strace \
|
||||
lua \
|
||||
lua-devel \
|
||||
ncurses \
|
||||
ncurses-devel \
|
||||
gmp \
|
||||
gmp-devel \
|
||||
ctags \
|
||||
tcl-devel \
|
||||
perl \
|
||||
perl-devel \
|
||||
perl-ExtUtils-ParseXS \
|
||||
perl-ExtUtils-CBuilder \
|
||||
perl-ExtUtils-Embed
|
||||
wget https://bootstrap.pypa.io/get-pip.py
|
||||
sudo python2.7 ./get-pip.py
|
||||
sudo env "PATH=$PATH" pip install awscli
|
||||
cd ~
|
||||
wget https://raw.githubusercontent.com/o0beaner/dotfiles/master/install.sh
|
||||
chmod +x install.sh
|
||||
./install.sh
|
||||
sudo chmod 644 /usr/bin/chsh
|
||||
sudo chmod +x /usr/bin/chsh
|
||||
sudo /usr/bin/chsh -s /bin/zsh $USER
|
||||
sudo updatedb
|
||||
cd $util_path
|
||||
wget --no-check-certificate $s3_utils/suntracker.sh
|
||||
chmod +x $util_path/suntracker.sh
|
||||
(crontab -l ; echo "0 3 * * * $util_path/suntracker.sh") | crontab -
|
||||
$util_path/suntracker.sh
|
||||
touch ~/.zsh.prompts
|
||||
mkdir ~/.zsh.after/
|
||||
echo "prompt agnoster" > ~/.zsh.after/prompt.zsh
|
||||
}
|
||||
|
||||
install_ssm() {
|
||||
cd /tmp
|
||||
sudo yum install -y https://s3.amazonaws.com/ec2-downloads-windows/SSMAgent/latest/linux_amd64/amazon-ssm-agent.rpm
|
||||
}
|
||||
|
||||
s3ify_zeppelin() {
|
||||
cd $util_path
|
||||
wget --no-check-certificate $s3_utils/configure_zeppelin_s3.sh
|
||||
chmod +x $util_path/configure_zeppelin_s3.sh
|
||||
aws emr add-steps --cluster-id $cluster_id --steps Type=CUSTOM_JAR,Name="Configure Zeppelin for S3",Jar="command-runner.jar",Args=[$util_path/configure_zeppelin_s3.sh]
|
||||
}
|
||||
|
||||
install_jupyter() {
|
||||
cd $util_path
|
||||
wget --no-check-certificate https://s3.amazonaws.com/aws-bigdata-blog/artifacts/aws-blog-emr-jupyter/install-jupyter-emr5.sh
|
||||
chmod +x $util_path/install-jupyter-emr5.sh
|
||||
$util_path/install-jupyter-emr5.sh \
|
||||
--r \
|
||||
--julia \
|
||||
--toree \
|
||||
--torch \
|
||||
--ruby \
|
||||
--ds-packages \
|
||||
--ml-packages \
|
||||
--python-packages ggplot nilearn \
|
||||
--port 8002 \
|
||||
--password jupyter \
|
||||
--jupyterhub \
|
||||
--jupyterhub-port 8001 \
|
||||
--cached-install \
|
||||
--notebook-dir s3://ty-emr/XRR/jupyter/notebooks/ \
|
||||
--copy-samples \
|
||||
--s3fs
|
||||
}
|
||||
|
||||
# get input parameters
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--jupyter)
|
||||
install_jupyter=true
|
||||
;;
|
||||
-*)
|
||||
error_msg "unrecognized option: $1"
|
||||
;;
|
||||
*)
|
||||
break;
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
s3_utils='https://s3.amazonaws.com/ty-emr/XRR/utils'
|
||||
build_vim=false
|
||||
util_path="~/.utils"
|
||||
|
||||
mkdir -p $util_path
|
||||
|
||||
provision_packages &
|
||||
build_vim &
|
||||
install_ssm &
|
||||
|
||||
is_master=false
|
||||
if grep isMaster /mnt/var/lib/info/instance.json | grep true;
|
||||
then
|
||||
s3ify_zeppelin &
|
||||
if [ "$install_jupyter" == true ]; then
|
||||
install_jupyter &
|
||||
fi
|
||||
fi
|
||||
|
|
@ -0,0 +1,274 @@
|
|||
#!/bin/bash
|
||||
|
||||
logfile=bootstrap.txt
|
||||
exec > $logfile 2>&1
|
||||
|
||||
set -x
|
||||
|
||||
install_jupyter=false
|
||||
|
||||
# get input parameters
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--jupyter)
|
||||
install_jupyter=true
|
||||
;;
|
||||
-*)
|
||||
error_msg "unrecognized option: $1"
|
||||
;;
|
||||
*)
|
||||
break;
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
platform=`uname`
|
||||
s3_utils='https://s3.amazonaws.com/ty-emr/XRR/utils'
|
||||
build_vim=false
|
||||
|
||||
if [ "$USER" == "root" ]; then
|
||||
util_path="/root/.utils"
|
||||
home="/root"
|
||||
else
|
||||
if [ "$platform" == "Darwin" ]; then
|
||||
users_dir="Users"
|
||||
fi
|
||||
if [ "$platform" == "Linux" ]; then
|
||||
users_dir="home"
|
||||
fi
|
||||
util_path="/$users_dir/$USER/.utils"
|
||||
fi
|
||||
vim_check=`vim --version`
|
||||
if [[ $vim_check != *"+lua"* ]]; then
|
||||
build_vim=true
|
||||
fi
|
||||
mkdir -p $util_path
|
||||
|
||||
release=`cat /etc/*release* | tr '[:upper:]' '[:lower:]'`
|
||||
if [[ $release != *"smartos"* ]]; then
|
||||
if [[ $release == *"rhel fedora"* ]]; then
|
||||
echo "Looks like we're running on something that is kinda like RHEL..."
|
||||
sudo yum groupinstall -y "Development Tools"
|
||||
sudo yum install -y \
|
||||
tmux \
|
||||
wget \
|
||||
htop \
|
||||
mlocate \
|
||||
git \
|
||||
rake \
|
||||
zsh \
|
||||
jq \
|
||||
at \
|
||||
bind-utils \
|
||||
strace \
|
||||
lua \
|
||||
lua-devel \
|
||||
ncurses \
|
||||
ncurses-devel \
|
||||
gmp \
|
||||
gmp-devel \
|
||||
ctags \
|
||||
tcl-devel \
|
||||
perl \
|
||||
perl-devel \
|
||||
perl-ExtUtils-ParseXS \
|
||||
perl-ExtUtils-CBuilder \
|
||||
perl-ExtUtils-Embed
|
||||
|
||||
if [[ $build_vim == true ]]; then
|
||||
cd /tmp
|
||||
git clone http://luajit.org/git/luajit-2.0.git
|
||||
cd luajit-2.0
|
||||
make
|
||||
sudo make install
|
||||
|
||||
cd /tmp
|
||||
git clone https://github.com/vim/vim.git
|
||||
cd vim
|
||||
./configure \
|
||||
--with-features=huge \
|
||||
--enable-cscope \
|
||||
--enable-pythoninterp \
|
||||
--enable-luainterp \
|
||||
--enable-multibyte \
|
||||
--enable-fontset \
|
||||
--disable-gui \
|
||||
--without-x \
|
||||
--disable-netbeans \
|
||||
--enable-largefile
|
||||
make
|
||||
sudo make install
|
||||
|
||||
if [ -e /usr/bin/vi ]; then
|
||||
sudo rm /usr/bin/vi
|
||||
fi
|
||||
sudo ln -s /usr/local/bin/vim /usr/bin/vi
|
||||
rm -rf /tmp/vim
|
||||
fi
|
||||
fi
|
||||
if [[ $release == *"debian"* ]]; then
|
||||
echo "Looks like we're running on a Debian based system!"
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y \
|
||||
tmux \
|
||||
htop \
|
||||
wget \
|
||||
mlocate \
|
||||
git \
|
||||
rake \
|
||||
zsh \
|
||||
jq \
|
||||
at \
|
||||
dnsutils \
|
||||
strace \
|
||||
libncurses5-dev \
|
||||
libncursesw5-dev \
|
||||
python-dev \
|
||||
ruby-dev \
|
||||
lua5.1 \
|
||||
lua5.1-dev \
|
||||
luajit \
|
||||
libluajit-5.1 \
|
||||
libperl-dev \
|
||||
build-essential
|
||||
|
||||
if [[ $build_vim == true ]]; then
|
||||
sudo ln -sf /usr/include/lua5.1 /usr/include/lua5.1/include
|
||||
sudo ln -sf /usr/lib/x86_64-linux-gnu/liblua5.1.so /usr/local/lib/liblua.so
|
||||
cd /tmp
|
||||
git clone https://github.com/vim/vim.git
|
||||
cd vim
|
||||
./configure \
|
||||
--with-features=huge \
|
||||
--enable-cscope \
|
||||
--enable-pythoninterp=yes \
|
||||
--enable-rubyinterp=yes \
|
||||
--with-python-config-dir=/usr/lib/python2.7/config-x86_64-linux-gnu \
|
||||
--enable-multibyte \
|
||||
--enable-fontset \
|
||||
--disable-gui \
|
||||
--disable-netbeans \
|
||||
--enable-luainterp=yes \
|
||||
--with-luajit \
|
||||
--with-lua-prefix=/usr/include/lua5.1 \
|
||||
--enable-largefile
|
||||
|
||||
make
|
||||
sudo make install
|
||||
|
||||
if [ -e /usr/bin/vi ]; then
|
||||
sudo rm /usr/bin/vi
|
||||
fi
|
||||
sudo ln -s /usr/local/bin/vim /usr/bin/vi
|
||||
rm -rf /tmp/vim
|
||||
fi
|
||||
fi
|
||||
if [[ $release == *"Arch Linux"* ]]; then
|
||||
echo "Looks like we're running on Arch!"
|
||||
yaourt -S --noconfirm \
|
||||
gnu-netcat \
|
||||
cron \
|
||||
tmux \
|
||||
htop \
|
||||
wget \
|
||||
mlocate \
|
||||
git \
|
||||
rake \
|
||||
zsh \
|
||||
jq \
|
||||
at \
|
||||
vim\
|
||||
bind-tools \
|
||||
strace \
|
||||
ncurses \
|
||||
ctags
|
||||
fi
|
||||
|
||||
wget https://bootstrap.pypa.io/get-pip.py
|
||||
sudo python2.7 ./get-pip.py
|
||||
sudo env "PATH=$PATH" pip install awscli
|
||||
su -c "`curl -fksSL https://raw.githubusercontent.com/o0beaner/dotfiles/master/install.sh`" $USER
|
||||
sudo chmod 644 /usr/bin/chsh
|
||||
sudo chmod +x /usr/bin/chsh
|
||||
sudo /usr/bin/chsh -s /bin/zsh $USER
|
||||
sudo updatedb
|
||||
cd $util_path
|
||||
wget --no-check-certificate $s3_utils/suntracker.sh
|
||||
chmod +x $util_path/suntracker.sh
|
||||
(crontab -l ; echo "0 3 * * * $util_path/suntracker.sh") | crontab -
|
||||
$util_path/suntracker.sh
|
||||
else
|
||||
BOOTSTRAP_TAR="bootstrap-2017Q1-x86_64.tar.gz"
|
||||
curl -Ok https://pkgsrc.joyent.com/packages/SmartOS/bootstrap/${BOOTSTRAP_TAR}
|
||||
tar -zxpf ${BOOTSTRAP_TAR} -C /
|
||||
rm -f boots*
|
||||
PATH=/opt/local/sbin:/opt/local/bin:$PATH
|
||||
MANPATH=/opt/local/man:$MANPATH
|
||||
pkgin -y in jq tmux git ruby22-rake zsh at || true
|
||||
mkdir /usbkey/root
|
||||
mv /root/.[!.]* /usbkey/root
|
||||
cd /
|
||||
rm -rf /root
|
||||
ln -s /usbkey/root /root
|
||||
su -c "`curl -fksSL https://raw.githubusercontent.com/o0beaner/dotfiles/master/install.sh`" $USER
|
||||
echo 'if [ -n "$BASH_EXECUTION_STRING" ]; then' >> ~/.bashrc
|
||||
echo ' export SHELL=/opt/local/bin/zsh' >> ~/.bashrc
|
||||
echo ' exec "$SHELL" -c "$BASH_EXECUTION_STRING"' >> ~/.bashrc
|
||||
echo 'fi' >> ~/.bashrc
|
||||
echo 'SHELL=/opt/local/bin/zsh; exec "$SHELL"' >> ~/.bashrc
|
||||
fi
|
||||
|
||||
# AWS Instance customization
|
||||
if [ -e /usr/bin/cloud-init ]; then
|
||||
|
||||
# Install SSM Agent
|
||||
cd /tmp
|
||||
sudo yum install -y https://s3.amazonaws.com/ec2-downloads-windows/SSMAgent/latest/linux_amd64/amazon-ssm-agent.rpm
|
||||
|
||||
# Am I running EMR?
|
||||
instance=`aws ec2 describe-instances --instance-ids $(curl -s 169.254.169.254/latest/meta-data/instance-id)`
|
||||
tags=`echo $instance | jq -r '.Reservations[0].Instances[0].Tags[]'`
|
||||
cluster_id=`echo $tags | jq -r '. | select(.Key=="aws:elasticmapreduce:job-flow-id") | .Value'`
|
||||
if [ -n "$cluster_id" ]; then
|
||||
echo "$cluster_id" > ~/.cluster_id
|
||||
role=`echo $tags | jq -r '. | select(.Key=="aws:elasticmapreduce:instance-group-role") | .Value'`
|
||||
|
||||
if [ "$role" == "MASTER" ]; then
|
||||
# ToDo: Incorporate Hue?
|
||||
|
||||
cd $util_path
|
||||
wget --no-check-certificate $s3_utils/configure_zeppelin_s3.sh
|
||||
chmod +x $util_path/configure_zeppelin_s3.sh
|
||||
aws emr add-steps --cluster-id $cluster_id --steps Type=CUSTOM_JAR,Name="Configure Zeppelin for S3",Jar="command-runner.jar",Args=[$util_path/configure_zeppelin_s3.sh]
|
||||
fi
|
||||
|
||||
# install jupyter
|
||||
if [ $install_jupyter == true ]; then
|
||||
cd $util_path
|
||||
wget --no-check-certificate https://s3.amazonaws.com/aws-bigdata-blog/artifacts/aws-blog-emr-jupyter/install-jupyter-emr5.sh
|
||||
chmod +x $util_path/install-jupyter-emr5.sh
|
||||
$util_path/install-jupyter-emr5.sh \
|
||||
--r \
|
||||
--julia \
|
||||
--toree \
|
||||
--torch \
|
||||
--ruby \
|
||||
--ds-packages \
|
||||
--ml-packages \
|
||||
--python-packages ggplot nilearn \
|
||||
--port 8002 \
|
||||
--password jupyter \
|
||||
--jupyterhub \
|
||||
--jupyterhub-port 8001 \
|
||||
--cached-install \
|
||||
--notebook-dir s3://ty-emr/XRR/jupyter/notebooks/ \
|
||||
--copy-samples \
|
||||
--s3fs
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
touch ~/.zsh.prompts
|
||||
mkdir ~/.zsh.after/
|
||||
echo "prompt agnoster" > ~/.zsh.after/prompt.zsh
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
#!/bin/bash
|
||||
|
||||
|
||||
set -x
|
||||
logfile=test.txt
|
||||
|
||||
exec > $logfile 2>&1
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y \
|
||||
tmux \
|
||||
htop \
|
||||
wget \
|
||||
git \
|
||||
rake \
|
||||
zsh \
|
||||
|
||||
rm -rf .yadr/
|
||||
|
||||
HOME=/root
|
||||
wget https://raw.githubusercontent.com/o0beaner/dotfiles/master/install.sh
|
||||
chmod +x ./install.sh
|
||||
su -c "`curl -fksSL https://raw.githubusercontent.com/o0beaner/dotfiles/master/install.sh`" $USER
|
||||
Binary file not shown.
|
|
@ -0,0 +1,70 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
# first validate the arguments
|
||||
REPLACE_FILE=false
|
||||
for i in "$@" ; do
|
||||
case $i in
|
||||
--*-opts*)
|
||||
if ! echo $i | grep -E -- '--[a-zA-Z-]+?-opts=.+' > /dev/null 2>&1 ; then
|
||||
echo "Couldn't parse option $i expected --cmd-opts=-XX:+UseG1GC where cmd is hadoop-master or some such and -XX:+UseG1GC is the option to pass to the JVM" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
--help)
|
||||
set +x
|
||||
echo "Usage: "
|
||||
echo "--<daemon>-opts"
|
||||
echo " Set additional Java options for the specified daemon."
|
||||
echo " "
|
||||
echo "--replace"
|
||||
echo " Replace the existing hbase-user-env.sh file if it exists."
|
||||
echo " "
|
||||
echo "<daemon> is one of:"
|
||||
echo " hbase-master, hbase-regionserver, zookeeper"
|
||||
echo " "
|
||||
echo " "
|
||||
echo "Example Usage:"
|
||||
echo " --hbase-master-opts=-Xmx2048 --zookeeper-opts=-XX:GCTimeRatio=19"
|
||||
exit 1
|
||||
;;
|
||||
--replace)
|
||||
REPLACE_FILE=true
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option $i" 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
set -x
|
||||
mkdir /home/hadoop/conf
|
||||
HBASE_ENV_FILE=/home/hadoop/conf/hbase-user-env.sh
|
||||
|
||||
if [ -d "/home/hadoop/hbase/conf" ] ; then
|
||||
HBASE_ENV_FILE=/home/hadoop/hbase/conf/hbase-user-env.sh
|
||||
fi
|
||||
|
||||
if [ $REPLACE_FILE == "true" ] ; then
|
||||
rm -rf $HBASE_ENV_FILE
|
||||
fi
|
||||
|
||||
if [ -e $HBASE_ENV_FILE ] ; then
|
||||
[[ ! -n $(grep "#\\!/bin/bash" $HBASE_ENV_FILE ) ]] && echo "#!/bin/bash" >> $HBASE_ENV_FILE
|
||||
else
|
||||
echo "#!/bin/bash" >> $HBASE_ENV_FILE
|
||||
fi
|
||||
|
||||
for i in "$@" ; do
|
||||
case $i in
|
||||
--*-opts*)
|
||||
OPTS_CMD=$(echo $i | sed -r 's|--(.*?)-opts=.*|\1|' | tr 'a-z-' 'A-Z_')_OPTS
|
||||
OPTS_VALUE=$(echo $i | sed -r 's|--.*?-opts=(.*)|\1|')
|
||||
cat >> $HBASE_ENV_FILE <<EOF
|
||||
$OPTS_CMD="$OPTS_VALUE"
|
||||
EOF
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
#!/bin/bash
|
||||
|
||||
#Add a principal to the KDC for the master node, using the master node's returned host name
|
||||
sudo kadmin.local -q "ktadd -k /etc/krb5.keytab host/`hostname -f`"
|
||||
#Assign plain language variables for clarity
|
||||
name=tyler
|
||||
password=derpderp
|
||||
sudo kadmin.local -q "addprinc -pw $password +needchange $name"
|
||||
hdfs dfs -mkdir /user/$name
|
||||
hdfs dfs -chown $name:$name /user/$name
|
||||
|
||||
# Enable GSSAPI authentication for SSH and restart SSH service
|
||||
sudo sed -i 's/^.*GSSAPIAuthentication.*$/GSSAPIAuthentication yes/' /etc/ssh/sshd_config
|
||||
sudo sed -i 's/^.*GSSAPICleanupCredentials.*$/GSSAPICleanupCredentials yes/' /etc/ssh/sshd_config
|
||||
sudo /etc/init.d/sshd restart
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
#!/bin/bash
|
||||
|
||||
sudo adduser tyler
|
||||
|
|
@ -0,0 +1 @@
|
|||
sudo aws s3 cp s3://ty-emr/XRR/bootstrap/MyAWSCredentialsProviderWithUri.jar /usr/share/aws/emr/emrfs/auxlib/
|
||||
|
|
@ -0,0 +1,210 @@
|
|||
#!/bin/bash
|
||||
|
||||
get_vpc_cidrs()
|
||||
{
|
||||
cidrs=$(curl -s $metadata/network/interfaces/macs/$mac_address/vpc-ipv4-cidr-blocks)
|
||||
echo "$cidrs"
|
||||
}
|
||||
|
||||
run_dnsmasq()
|
||||
{
|
||||
all_domains="$(grep ^search $resolv_conf | cut -d' ' -f2- )"
|
||||
pid=$(ps -ef | grep dnsmasq | grep synth-domain | awk '{print $2}')
|
||||
if [ $pid ]; then
|
||||
sudo kill $pid
|
||||
fi
|
||||
for d in $all_domains; do
|
||||
for c in $(get_vpc_cidrs); do
|
||||
syn_domains="$syn_domains --synth-domain=$d,$c,ip- "
|
||||
done
|
||||
done
|
||||
runmasq="sudo dnsmasq --listen-address=127.0.0.1 $syn_domains "
|
||||
eval "$runmasq"
|
||||
echo "started dnsmasq : $runmasq"
|
||||
}
|
||||
|
||||
rewrite_setup_dns()
|
||||
{
|
||||
tmpfile=$(mktemp /tmp/setupdnsXXXXXX)
|
||||
cat > "$tmpfile" << 'EOF'
|
||||
#!/bin/bash
|
||||
#
|
||||
# Set up DNS for EMR master/slave instance in VPC.
|
||||
# This script also set up DNS in us-east-1 for non-VPC to handle ec2 instances,
|
||||
# whose host name begin with domU, with invalid dns domain name (TT0055043598).
|
||||
#
|
||||
set -e
|
||||
set -x
|
||||
|
||||
alias curl="curl --connect-timeout 2 -q -f --retry-delay 2 --retry 5"
|
||||
|
||||
resolv_conf="/etc/resolv.conf"
|
||||
dhclient_conf="/etc/dhcp/dhclient.conf"
|
||||
localhost="127.0.0.1"
|
||||
metadata="http://169.254.169.254/latest/meta-data"
|
||||
|
||||
restart_network="false"
|
||||
in_vpc="false"
|
||||
|
||||
mac_address="$(curl $metadata/mac/ | tr '[:upper:]' '[:lower:]')"
|
||||
region="$(curl http://169.254.169.254/latest/dynamic/instance-identity/document | jq -r .region)"
|
||||
|
||||
# wait for the network to come up before proceeding
|
||||
if [ -e /usr/bin/nm-online ]; then
|
||||
/usr/bin/nm-online
|
||||
fi
|
||||
|
||||
get_default_domain()
|
||||
{
|
||||
if [ "$region" = "us-east-1" ]; then
|
||||
echo 'ec2.internal'
|
||||
else
|
||||
echo "$region.compute.internal"
|
||||
fi
|
||||
}
|
||||
|
||||
get_first_nameserver_from_resolv_conf()
|
||||
{
|
||||
awk '$1 ~ /^nameserver/ { print $2 }' "$resolv_conf"
|
||||
}
|
||||
|
||||
check_vpc()
|
||||
{
|
||||
if "$(curl $metadata/network/interfaces/macs/$mac_address/)" | grep -q vpc; then
|
||||
in_vpc="true"
|
||||
fi
|
||||
}
|
||||
|
||||
get_vpc_cidrs()
|
||||
{
|
||||
cidrs=$(curl $metadata/network/interfaces/macs/$mac_address/vpc-ipv4-cidr-blocks)
|
||||
echo "$cidrs"
|
||||
}
|
||||
|
||||
append_line_to_dhclient_conf()
|
||||
{
|
||||
echo "$1" | tee -a "$dhclient_conf"
|
||||
}
|
||||
|
||||
prepend_domain()
|
||||
{
|
||||
#sample line : prepend domain-name "ec2.internal ";
|
||||
if grep -Eq "^prepend domain-name \"$1[:space:]+\";$" "$dhclient_conf"; then
|
||||
return
|
||||
else
|
||||
append_line_to_dhclient_conf "prepend domain-name \"$1 \";"
|
||||
restart_network="true"
|
||||
fi
|
||||
}
|
||||
|
||||
prepend_domain_server()
|
||||
{
|
||||
#sample line : prepend domain-name-servers 127.0.0.1;
|
||||
if grep -Eq "^prepend domain-name-servers $1;$" "$dhclient_conf"; then
|
||||
return
|
||||
fi
|
||||
append_line_to_dhclient_conf "prepend domain-name-servers $1;"
|
||||
restart_network="true"
|
||||
}
|
||||
|
||||
run_dnsmasq()
|
||||
{
|
||||
all_domains="$(grep ^search $resolv_conf | cut -d' ' -f2- )"
|
||||
pid=$(ps -ef | grep dnsmasq | grep synth-domain | awk '{print $2}')
|
||||
if [ $pid ]; then
|
||||
kill $pid
|
||||
fi
|
||||
for d in $all_domains; do
|
||||
for c in $(get_vpc_cidrs); do
|
||||
syn_domains="$syn_domains --synth-domain=$d,$c,ip- "
|
||||
done
|
||||
done
|
||||
runmasq="dnsmasq --listen-address=127.0.0.1 $syn_domains "
|
||||
eval "$runmasq"
|
||||
echo "started dnsmasq : $runmasq"
|
||||
}
|
||||
|
||||
get_host_name()
|
||||
{
|
||||
echo "$(hostname -f)"
|
||||
}
|
||||
|
||||
show_dns_status()
|
||||
{
|
||||
type="$1"
|
||||
echo "------------ $type $resolv_conf ------------"
|
||||
cat "$resolv_conf"
|
||||
echo "------------ $type $dhclient_conf ------------"
|
||||
cat "$dhclient_conf"
|
||||
hostname="$(get_host_name)"
|
||||
status="$?"
|
||||
"'hostname -f' returns : $hostname"
|
||||
return $status
|
||||
}
|
||||
|
||||
restart_network_if_needed()
|
||||
{
|
||||
if "$restart_network"; then
|
||||
echo "Updating DNS settings."
|
||||
service network restart
|
||||
restart_network="false"
|
||||
fi
|
||||
}
|
||||
|
||||
main()
|
||||
{
|
||||
show_dns_status "BeforeSetup"
|
||||
|
||||
old_domain="$(grep search $resolv_conf | cut -d' ' -f2-)"
|
||||
default_domain="$(get_default_domain)"
|
||||
|
||||
check_vpc
|
||||
|
||||
if [ "$in_vpc" = "false" ]; then
|
||||
# NON-VPC
|
||||
if [ "$region" = "us-east-1" ]; then
|
||||
if [[ "$old_domain" == "${default_domain}"* ]]; then
|
||||
echo "$default_domain is already used in us-east-1."
|
||||
else
|
||||
echo "Making sure $default_domain is used in us-east-1."
|
||||
prepend_domain $default_domain
|
||||
fi
|
||||
else
|
||||
echo "Not in VPC, do nothing and exit."
|
||||
fi
|
||||
else
|
||||
# VPC
|
||||
first_nameserver="$(get_first_nameserver_from_resolv_conf)"
|
||||
resolving_host_name="$(get_host_name)"
|
||||
if [ "$1" = "rundnsmasq" -o -z "$resolving_host_name" ]; then
|
||||
echo "Run dnsmasq"
|
||||
run_dnsmasq
|
||||
if [ "$first_nameserver" != "$localhost" ]; then
|
||||
prepend_domain_server "$localhost"
|
||||
fi
|
||||
else
|
||||
echo "Resolving hostname(${resolving_host_name}) successfully, do nothing and exit."
|
||||
fi
|
||||
fi
|
||||
|
||||
restart_network_if_needed
|
||||
return show_dns_status "AfterSetup"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
exit "$?"
|
||||
EOF
|
||||
|
||||
sudo mv $tmpfile /usr/bin/setup-dns
|
||||
}
|
||||
|
||||
if [ ! -f /tmp/dns_flag ]; then
|
||||
resolv_conf="/etc/resolv.conf"
|
||||
metadata="http://169.254.169.254/latest/meta-data"
|
||||
mac_address=`curl -s $metadata/mac`
|
||||
run_dnsmasq
|
||||
rewrite_setup_dns
|
||||
touch /tmp/dns_flag
|
||||
pid="$(/bin/ps axwwo pid,cmd | awk '$12 ~ /aws157.instancecontroller.Main/ { print $1 }')"
|
||||
sudo kill "$pid"
|
||||
fi
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
#!/bin/sh
|
||||
|
||||
sudo aws s3 cp s3://ty-emr-pdx/job_input/seagate/lib/atlas-hive-udfs.jar /usr/lib/hive/lib/
|
||||
sudo aws s3 cp s3://ty-emr-pdx/job_input/seagate/lib/updates.jar /usr/lib/hive/lib/
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
echo "Hallo!"
|
||||
echo "Ich heisse dummkopf Stepf eins!"
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
echo "Hallo!"
|
||||
echo "Ich heisse dummkopf Stepf zwei!"
|
||||
|
|
@ -0,0 +1,174 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -x
|
||||
|
||||
install_jupyter=false
|
||||
|
||||
# get input parameters
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--jupyter)
|
||||
install_jupyter=true
|
||||
;;
|
||||
-*)
|
||||
error_msg "unrecognized option: $1"
|
||||
;;
|
||||
*)
|
||||
break;
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
s3_utils='https://s3.amazonaws.com/ty-emr/XRR/utils'
|
||||
build_vim=false
|
||||
|
||||
if [ "$USER" == "root" ]; then
|
||||
util_path="/root/.utils"
|
||||
home="/root"
|
||||
else
|
||||
if [ "$platform" == "Darwin" ]; then
|
||||
users_dir="Users"
|
||||
fi
|
||||
if [ "$platform" == "Linux" ]; then
|
||||
users_dir="home"
|
||||
fi
|
||||
util_path="/$users_dir/$USER/.utils"
|
||||
fi
|
||||
vim_check=`vim --version`
|
||||
if [[ $vim_check != *"+lua"* ]]; then
|
||||
build_vim=true
|
||||
fi
|
||||
mkdir -p $util_path
|
||||
|
||||
release=`cat /etc/*release* | tr '[:upper:]' '[:lower:]'`
|
||||
if [[ $release == *"rhel fedora"* ]]; then
|
||||
echo "Looks like we're running on something that is kinda like RHEL..."
|
||||
sudo yum groupinstall -y "Development Tools"
|
||||
sudo yum install -y \
|
||||
tmux \
|
||||
wget \
|
||||
htop \
|
||||
mlocate \
|
||||
git \
|
||||
rake \
|
||||
zsh \
|
||||
jq \
|
||||
at \
|
||||
bind-utils \
|
||||
strace \
|
||||
lua \
|
||||
lua-devel \
|
||||
ncurses \
|
||||
ncurses-devel \
|
||||
gmp \
|
||||
gmp-devel \
|
||||
ctags \
|
||||
tcl-devel \
|
||||
perl \
|
||||
perl-devel \
|
||||
perl-ExtUtils-ParseXS \
|
||||
perl-ExtUtils-CBuilder \
|
||||
perl-ExtUtils-Embed
|
||||
|
||||
if [[ $build_vim == true ]]; then
|
||||
cd /tmp
|
||||
git clone http://luajit.org/git/luajit-2.0.git
|
||||
cd luajit-2.0
|
||||
make
|
||||
sudo make install
|
||||
|
||||
cd /tmp
|
||||
git clone https://github.com/vim/vim.git
|
||||
cd vim
|
||||
./configure \
|
||||
--with-features=huge \
|
||||
--enable-cscope \
|
||||
--enable-pythoninterp \
|
||||
--enable-luainterp \
|
||||
--enable-multibyte \
|
||||
--enable-fontset \
|
||||
--disable-gui \
|
||||
--without-x \
|
||||
--disable-netbeans \
|
||||
--enable-largefile
|
||||
make
|
||||
sudo make install
|
||||
|
||||
if [ -e /usr/bin/vi ]; then
|
||||
sudo rm /usr/bin/vi
|
||||
fi
|
||||
sudo ln -s /usr/local/bin/vim /usr/bin/vi
|
||||
rm -rf /tmp/vim
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
wget https://bootstrap.pypa.io/get-pip.py
|
||||
sudo python2.7 ./get-pip.py
|
||||
sudo env "PATH=$PATH" pip install awscli
|
||||
wget "https://raw.githubusercontent.com/o0beaner/dotfiles/master/install.sh"
|
||||
chmod +x install.sh
|
||||
./install.sh
|
||||
sudo chmod 644 /usr/bin/chsh
|
||||
sudo chmod +x /usr/bin/chsh
|
||||
sudo /usr/bin/chsh -s /bin/zsh $USER
|
||||
sudo updatedb
|
||||
cd $util_path
|
||||
wget --no-check-certificate $s3_utils/suntracker.sh
|
||||
chmod +x $util_path/suntracker.sh
|
||||
(crontab -l ; echo "0 3 * * * $util_path/suntracker.sh") | crontab -
|
||||
$util_path/suntracker.sh
|
||||
# AWS Instance customization
|
||||
if [ -e /usr/bin/cloud-init ]; then
|
||||
|
||||
# Install SSM Agent
|
||||
cd /tmp
|
||||
sudo yum install -y https://s3.amazonaws.com/ec2-downloads-windows/SSMAgent/latest/linux_amd64/amazon-ssm-agent.rpm
|
||||
|
||||
# Am I running EMR?
|
||||
instance=`aws ec2 describe-instances --instance-ids $(curl -s 169.254.169.254/latest/meta-data/instance-id)`
|
||||
tags=`echo $instance | jq -r '.Reservations[0].Instances[0].Tags[]'`
|
||||
cluster_id=`echo $tags | jq -r '. | select(.Key=="aws:elasticmapreduce:job-flow-id") | .Value'`
|
||||
if [ -n "$cluster_id" ]; then
|
||||
echo "$cluster_id" > ~/.cluster_id
|
||||
role=`echo $tags | jq -r '. | select(.Key=="aws:elasticmapreduce:instance-group-role") | .Value'`
|
||||
|
||||
if [ "$role" == "MASTER" ]; then
|
||||
# ToDo: Incorporate Hue?
|
||||
|
||||
cd $util_path
|
||||
wget --no-check-certificate $s3_utils/configure_zeppelin_s3.sh
|
||||
chmod +x $util_path/configure_zeppelin_s3.sh
|
||||
aws emr add-steps --cluster-id $cluster_id --steps Type=CUSTOM_JAR,Name="Configure Zeppelin for S3",Jar="command-runner.jar",Args=[$util_path/configure_zeppelin_s3.sh]
|
||||
fi
|
||||
|
||||
# install jupyter
|
||||
if [ $install_jupyter == true ]; then
|
||||
cd $util_path
|
||||
wget --no-check-certificate https://s3.amazonaws.com/aws-bigdata-blog/artifacts/aws-blog-emr-jupyter/install-jupyter-emr5.sh
|
||||
chmod +x $util_path/install-jupyter-emr5.sh
|
||||
$util_path/install-jupyter-emr5.sh \
|
||||
--r \
|
||||
--julia \
|
||||
--toree \
|
||||
--torch \
|
||||
--ruby \
|
||||
--ds-packages \
|
||||
--ml-packages \
|
||||
--python-packages ggplot nilearn \
|
||||
--port 8002 \
|
||||
--password jupyter \
|
||||
--jupyterhub \
|
||||
--jupyterhub-port 8001 \
|
||||
--cached-install \
|
||||
--notebook-dir s3://ty-emr/XRR/jupyter/notebooks/ \
|
||||
--copy-samples \
|
||||
--s3fs
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
touch ~/.zsh.prompts
|
||||
mkdir ~/.zsh.after/
|
||||
echo "prompt agnoster" > ~/.zsh.after/prompt.zsh
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
[{
|
||||
"Classification": "hue-ini",
|
||||
"Properties": {},
|
||||
"Configurations": [
|
||||
{
|
||||
"Classification": "aws",
|
||||
"Properties": {},
|
||||
"Configurations": [
|
||||
{
|
||||
"Classification": "aws_accounts",
|
||||
"Properties": {},
|
||||
"Configurations": [
|
||||
{
|
||||
"Classification": "default",
|
||||
"Properties": {
|
||||
"region": "us-east-1"
|
||||
}
|
||||
}]
|
||||
}]
|
||||
}]
|
||||
}]
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
[
|
||||
{
|
||||
"Classification": "hue-ini",
|
||||
"Properties": {},
|
||||
"Configurations": [
|
||||
{
|
||||
"Classification": "desktop",
|
||||
"Properties": {},
|
||||
"Configurations": [
|
||||
{
|
||||
"Classification": "ldap",
|
||||
"Properties": {
|
||||
"create_users_on_login": "false"
|
||||
},
|
||||
"Configurations": [
|
||||
{
|
||||
"Classification": "ldap_servers",
|
||||
"Properties": {},
|
||||
"Configurations": [
|
||||
{
|
||||
"Classification": "blipsandchitz.local",
|
||||
"Properties": {
|
||||
"base_dn": "DC=blipsandchitz,DC=local",
|
||||
"ldap_url": "ldap://10.0.8.254:389",
|
||||
"search_bind_authentication": "true",
|
||||
"bind_dn": "CN=hue,OU=ServiceAccounts,OU=UserAccounts,DC=blipsandchitz,DC=local",
|
||||
"bind_password": "Badpassword1"
|
||||
},
|
||||
"Configurations": []
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"Classification": "auth",
|
||||
"Properties": {
|
||||
"backend": "desktop.auth.backend.LdapBackend,desktop.auth.backend.AllowFirstUserDjangoBackend"
|
||||
},
|
||||
"Configurations": []
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
[
|
||||
{
|
||||
"Classification": "hue-ini",
|
||||
"Properties": {},
|
||||
"Configurations": [
|
||||
{
|
||||
"Classification": "desktop",
|
||||
"Properties": {},
|
||||
"Configurations": [
|
||||
{
|
||||
"Classification": "ldap",
|
||||
"Properties": {
|
||||
"create_users_on_login": "false"
|
||||
},
|
||||
"Configurations": [
|
||||
{
|
||||
"Classification": "ldap_servers",
|
||||
"Properties": {},
|
||||
"Configurations": [
|
||||
{
|
||||
"Classification": "blipsandchitz.local",
|
||||
"Properties": {
|
||||
"base_dn": "DC=blipsandchitz,DC=local",
|
||||
"ldap_url": "ldap://10.0.8.254:389",
|
||||
"search_bind_authentication": "true",
|
||||
"bind_dn": "CN=hue,OU=ServiceAccounts,OU=UserAccounts,DC=blipsandchitz,DC=local",
|
||||
"bind_password": "Badpassword1"
|
||||
},
|
||||
"Configurations": []
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"Classification": "users",
|
||||
"Properties": {
|
||||
"user_filter": "objectclass=person",
|
||||
"user_name_attr": "uid"
|
||||
},
|
||||
"Configurations": []
|
||||
},
|
||||
{
|
||||
"Classification": "groups",
|
||||
"Properties": {
|
||||
"group_filter": "objectclass=groupOfUniqueNames",
|
||||
"group_name_attr": "cn",
|
||||
"group_member_attr": "uniqueMember"
|
||||
},
|
||||
"Configurations": []
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"Classification": "auth",
|
||||
"Properties": {
|
||||
"backend": "desktop.auth.backend.LdapBackend,desktop.auth.backend.AllowFirstUserDjangoBackend"
|
||||
},
|
||||
"Configurations": []
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
[
|
||||
{
|
||||
"Classification": "hue-ini",
|
||||
"Properties": {},
|
||||
"Configurations": [
|
||||
{
|
||||
"Classification": "desktop",
|
||||
"Properties": {},
|
||||
"Configurations": [
|
||||
{
|
||||
"Classification": "ldap",
|
||||
"Properties": {
|
||||
"create_users_on_login": "false"
|
||||
},
|
||||
"Configurations": [
|
||||
{
|
||||
"Classification": "ldap_servers",
|
||||
"Properties": {},
|
||||
"Configurations": [
|
||||
{
|
||||
"Classification": "blipsandchitz.local",
|
||||
"Properties": {
|
||||
"base_dn": "DC=blipsandchitz,DC=local",
|
||||
"ldap_url": "ldap://10.0.8.254:389",
|
||||
"search_bind_authentication": "true",
|
||||
"bind_dn": "CN=hue,OU=ServiceAccounts,OU=UserAccounts,DC=blipsandchitz,DC=local",
|
||||
"bind_password": "Badpassword1"
|
||||
},
|
||||
"Configurations": []
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"Classification": "users",
|
||||
"Properties": {
|
||||
"user_filter": "objectclass=person",
|
||||
"user_name_attr": "uid"
|
||||
},
|
||||
"Configurations": []
|
||||
},
|
||||
{
|
||||
"Classification": "groups",
|
||||
"Properties": {
|
||||
"group_filter": "objectclass=groupOfUniqueNames",
|
||||
"group_name_attr": "cn",
|
||||
"group_member_attr": "uniqueMember"
|
||||
},
|
||||
"Configurations": []
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"Classification": "auth",
|
||||
"Properties": {
|
||||
"backend": "desktop.auth.backend.AllowFirstUserDjangoBackend"
|
||||
},
|
||||
"Configurations": []
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
|
@ -0,0 +1,832 @@
|
|||
#!/bin/bash
|
||||
set -x -e
|
||||
|
||||
# AWS EMR bootstrap script
|
||||
# for installing Jupyter notebook on AWS EMR 5+
|
||||
#
|
||||
# 2016-11-04 - Tom Zeng tomzeng@amazon.com, initial version
|
||||
# 2016-11-20 - Tom Zeng, add JupyterHub
|
||||
# 2016-12-01 - Tom Zeng, add s3 support and cached install
|
||||
# 2016-12-03 - Tom Zeng, use puppet to install/run services
|
||||
# 2016-12-06 - Tom Zeng, switch to s3fs for S3 support since s3nb is not fully working
|
||||
# 2016-12-29 - Tom Zeng, add Dask and Dask.distributed
|
||||
# 2017-04-18 - Tom Zeng, add BigDL support
|
||||
# 2017-05-16 = Tom Zeng, add cached install for EMR 5.5, updated yum rpm cache and miniCRAN
|
||||
# 2017-05-20 - Tom Zeng, add s3contents to replace s3nb which no longer works due to Jupyter update
|
||||
# 2017-05-23 - Tom Zeng, fix the s3contents dummy last_modified field
|
||||
# 2017-05-25 - Tom Zeng, turn off tensorflow, pip wheel install no longer working, will fix later
|
||||
# 2017-06-09 - Tom Zeng, fix install issue for EMR 5.6 caused by kernel source package already installed
|
||||
|
||||
#
|
||||
# Usage:
|
||||
# --r - install the IRKernel for R (Sparklyr is installed with this option, but as of 2017-04-05 Sparklyr does not support Spark 2.x yet)
|
||||
# --toree - install the Apache Toree kernel that supports Scala, PySpark, SQL, SparkR for Apache Spark
|
||||
# --interpreters - specify Apache Toree interpreters, default is all: "Scala,SQL,PySpark,SparkR"
|
||||
# --julia - install the IJulia kernel for Julia
|
||||
# --bigdl - install Intel's BigDL Deep Learning framework
|
||||
# --ruby - install the iRuby kernel for Ruby
|
||||
# --torch - intall the iTorch kernel for Torch
|
||||
# --javascript - install the JavaScript and CoffeeScript kernels (only works for JupyterHub for now)
|
||||
# --dask - install Dask and Dask.distributed, with the scheduler on master instance and the workers on the slave instances
|
||||
# --ds-packages - install the Python Data Science related packages (scikit-learn pandas numpy numexpr statsmodels seaborn)
|
||||
# --ml-packages - install the Python Machine Learning related packages (theano keras tensorflow)
|
||||
# --python-packages - install specific python packages e.g. "ggplot nilean"
|
||||
# --port - set the port for Jupyter notebook, default is 8888
|
||||
# --user - create a default user for Jupyterhub
|
||||
# --password - set the password for Jupyter notebook and JupyterHub
|
||||
# --localhost-only - restrict jupyter to listen on localhost only, default to listen on all ip addresses for the instance
|
||||
# --jupyterhub - install JupyterHub
|
||||
# --jupyterhub-port - set the port for JupyterHub, default is 8000
|
||||
# --no-jupyter - if JupyterHub is installed, use this to disable Jupyter
|
||||
# --notebook-dir - specify notebook folder, this could be a local directory or a S3 bucket
|
||||
# --cached-install - use some cached dependency artifacts on s3 to speed up installation
|
||||
# --ssl - enable ssl, make sure to use your own cert and key files to get rid of the warning
|
||||
# --copy-samples - copy sample notebooks to samples sub folder under the notebook folder
|
||||
# --spark-opts - user supplied Spark options to pass to SPARK_OPTS
|
||||
# --s3fs - use s3fs instead of s3contents(default) for storing notebooks on s3, s3fs could cause slowness if the s3 bucket has lots of file
|
||||
# --python3 - install python 3 packages and use python3
|
||||
|
||||
# check for master node
|
||||
IS_MASTER=false
|
||||
if grep isMaster /mnt/var/lib/info/instance.json | grep true;
|
||||
then
|
||||
IS_MASTER=true
|
||||
fi
|
||||
|
||||
# error message
|
||||
error_msg ()
|
||||
{
|
||||
echo 1>&2 "Error: $1"
|
||||
}
|
||||
|
||||
# some defaults
|
||||
RUBY_KERNEL=false
|
||||
R_KERNEL=false
|
||||
JULIA_KERNEL=false
|
||||
TOREE_KERNEL=false
|
||||
TORCH_KERNEL=false
|
||||
DS_PACKAGES=false
|
||||
ML_PACKAGES=false
|
||||
PYTHON_PACKAGES=""
|
||||
RUN_AS_STEP=false
|
||||
NOTEBOOK_DIR=""
|
||||
NOTEBOOK_DIR_S3=false
|
||||
JUPYTER_PORT=8888
|
||||
JUPYTER_PASSWORD=""
|
||||
JUPYTER_LOCALHOST_ONLY=false
|
||||
PYTHON3=false
|
||||
GPU=false
|
||||
CPU_GPU="cpu"
|
||||
GPUU=""
|
||||
JUPYTER_HUB=true
|
||||
JUPYTER_HUB_PORT=8000
|
||||
JUPYTER_HUB_IP="*"
|
||||
JUPYTER_HUB_DEFAULT_USER="jupyter"
|
||||
INTERPRETERS="Scala,SQL,PySpark,SparkR"
|
||||
R_REPOS_LOCAL="file:////mnt/miniCRAN"
|
||||
R_REPOS_REMOTE="http://cran.rstudio.com"
|
||||
R_REPOS=$R_REPOS_LOCAL
|
||||
USE_CACHED_DEPS=true
|
||||
SSL=false
|
||||
SSL_OPTS="--no-ssl"
|
||||
COPY_SAMPES=false
|
||||
USER_SPARK_OPTS=""
|
||||
NOTEBOOK_DIR_S3_S3NB=false
|
||||
NOTEBOOK_DIR_S3_S3CONTENTS=true
|
||||
JS_KERNEL=false
|
||||
NO_JUPYTER=false
|
||||
INSTALL_DASK=false
|
||||
INSTALL_PY3_PKGS=false
|
||||
APACHE_SPARK_VERSION="2.2.0"
|
||||
BIGDL=false
|
||||
MXNET=false
|
||||
DL4J=false
|
||||
|
||||
# get input parameters
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--r)
|
||||
R_KERNEL=true
|
||||
;;
|
||||
--julia)
|
||||
JULIA_KERNEL=true
|
||||
;;
|
||||
--toree)
|
||||
TOREE_KERNEL=true
|
||||
;;
|
||||
--torch)
|
||||
TORCH_KERNEL=true
|
||||
;;
|
||||
--javascript)
|
||||
JS_KERNEL=true
|
||||
;;
|
||||
--ds-packages)
|
||||
DS_PACKAGES=true
|
||||
;;
|
||||
--ml-packages)
|
||||
ML_PACKAGES=true
|
||||
;;
|
||||
--python-packages)
|
||||
shift
|
||||
PYTHON_PACKAGES=$1
|
||||
;;
|
||||
--bigdl)
|
||||
BIGDL=true
|
||||
;;
|
||||
--mxnet)
|
||||
MXNET=true
|
||||
;;
|
||||
--dl4j)
|
||||
DL4J=true
|
||||
;;
|
||||
--ruby)
|
||||
RUBY_KERNEL=true
|
||||
;;
|
||||
--gpu)
|
||||
GPU=true
|
||||
CPU_GPU="gpu"
|
||||
GPUU="_gpu"
|
||||
;;
|
||||
--run-as-step)
|
||||
RUN_AS_STEP=true
|
||||
;;
|
||||
--port)
|
||||
shift
|
||||
JUPYTER_PORT=$1
|
||||
;;
|
||||
--user)
|
||||
shift
|
||||
JUPYTER_HUB_DEFAULT_USER=$1
|
||||
;;
|
||||
--password)
|
||||
shift
|
||||
JUPYTER_PASSWORD=$1
|
||||
;;
|
||||
--localhost-only)
|
||||
JUPYTER_LOCALHOST_ONLY=true
|
||||
JUPYTER_HUB_IP=""
|
||||
;;
|
||||
--jupyterhub)
|
||||
JUPYTER_HUB=true
|
||||
#PYTHON3=true
|
||||
;;
|
||||
--jupyterhub-port)
|
||||
shift
|
||||
JUPYTER_HUB_PORT=$1
|
||||
;;
|
||||
--notebook-dir)
|
||||
shift
|
||||
NOTEBOOK_DIR=$1
|
||||
;;
|
||||
--copy-samples)
|
||||
COPY_SAMPLES=true
|
||||
;;
|
||||
--toree-interpreters)
|
||||
shift
|
||||
INTERPRETERS=$1
|
||||
;;
|
||||
--cached-install)
|
||||
USE_CACHED_DEPS=true
|
||||
R_REPOS=$R_REPOS_LOCAL
|
||||
;;
|
||||
--no-cached-install)
|
||||
USE_CACHED_DEPS=false
|
||||
R_REPOS=$R_REPOS_REMOTE
|
||||
;;
|
||||
--no-jupyter)
|
||||
NO_JUPYTER=true
|
||||
;;
|
||||
--ssl)
|
||||
SSL=true
|
||||
;;
|
||||
--dask)
|
||||
INSTALL_DASK=true
|
||||
;;
|
||||
--python3)
|
||||
INSTALL_PY3_PKGS=true
|
||||
;;
|
||||
--spark-opts)
|
||||
shift
|
||||
USER_SPARK_OPTS=$1
|
||||
;;
|
||||
--spark-version)
|
||||
shift
|
||||
APACHE_SPARK_VERSION=$1
|
||||
;;
|
||||
--s3fs)
|
||||
#NOTEBOOK_DIR_S3_S3NB=false
|
||||
NOTEBOOK_DIR_S3_S3CONTENTS=false
|
||||
;;
|
||||
#--s3nb) # this stopped working after Jupyter update in early 2017
|
||||
# NOTEBOOK_DIR_S3_S3NB=true
|
||||
# ;;
|
||||
-*)
|
||||
# do not exit out, just note failure
|
||||
error_msg "unrecognized option: $1"
|
||||
;;
|
||||
*)
|
||||
break;
|
||||
;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
RELEASE=$(cat /etc/system-release)
|
||||
REL_NUM=$(ruby -e "puts '$RELEASE'.split.last")
|
||||
|
||||
|
||||
sudo mkdir -p /mnt/var/aws/emr
|
||||
sudo cp -pr /var/aws/emr/packages /mnt/var/aws/emr/ && sudo rm -rf /var/aws/emr/packages && sudo mkdir /var/aws/emr/packages && sudo mount -o bind /mnt/var/aws/emr/packages /var/aws/emr/packages &
|
||||
|
||||
# move /usr/local and usr/share to /mnt/usr-moved/ to avoid running out of space on /
|
||||
if [ ! -d /mnt/usr-moved ]; then
|
||||
echo "move local start" >> /tmp/install_time.log
|
||||
date >> /tmp/install_time.log
|
||||
sudo mkdir /mnt/usr-moved
|
||||
sudo mv /usr/local /mnt/usr-moved/ && sudo ln -s /mnt/usr-moved/local /usr/
|
||||
echo "move local end, move share start" >> /tmp/install_time.log
|
||||
date >> /tmp/install_time.log
|
||||
sudo mv /usr/share /mnt/usr-moved/ && sudo ln -s /mnt/usr-moved/share /usr/
|
||||
echo "move share end" >> /tmp/install_time.log
|
||||
date >> /tmp/install_time.log
|
||||
fi
|
||||
|
||||
export MAKE='make -j 8'
|
||||
|
||||
export NODE_PATH='/usr/lib/node_modules'
|
||||
if [ "$JS_KERNEL" = true ]; then
|
||||
sudo python -m pip install -U jinja2 tornado jsonschema pyzmq
|
||||
sudo npm cache clean -f
|
||||
sudo npm install -g npm
|
||||
sudo npm install -g n
|
||||
sudo n stable
|
||||
fi
|
||||
|
||||
cd /mnt
|
||||
|
||||
TF_BINARY_URL_PY3="https://storage.googleapis.com/tensorflow/linux/$CPU_GPU/tensorflow$GPUU-1.1.0-cp34-cp34m-linux_x86_64.whl"
|
||||
TF_BINARY_URL="https://storage.googleapis.com/tensorflow/linux/$CPU_GPU/tensorflow$GPUU-1.1.0-cp27-none-linux_x86_64.whl"
|
||||
|
||||
|
||||
|
||||
|
||||
if [ "$DS_PACKAGES" = true ]; then
|
||||
# Python
|
||||
if [ "$INSTALL_PY3_PKGS" = true ]; then
|
||||
sudo python3 -m pip install -U scikit-learn pandas numpy numexpr statsmodels scipy
|
||||
else
|
||||
sudo python -m pip install -U scikit-learn pandas numpy numexpr statsmodels scipy
|
||||
fi
|
||||
# Javascript
|
||||
if [ "$JS_KERNEL" = true ]; then
|
||||
sudo npm install -g --unsafe-perm stats-analysis decision-tree machine_learning limdu synaptic node-svm lda brain.js scikit-node
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$ML_PACKAGES" = true ]; then
|
||||
if [ "$INSTALL_PY3_PKGS" = true ]; then
|
||||
sudo python3 -m pip install -U theano
|
||||
sudo python3 -m pip install -U keras
|
||||
sudo python3 -m pip install -U $TF_BINARY_URL_PY3
|
||||
else
|
||||
sudo python -m pip install -U theano
|
||||
sudo python -m pip install -U keras
|
||||
sudo python -m pip install -U $TF_BINARY_URL
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ! "$PYTHON_PACKAGES" = "" ]; then
|
||||
if [ "$INSTALL_PY3_PKGS" = true ]; then
|
||||
sudo python3 -m pip install -U $PYTHON_PACKAGES || true
|
||||
else
|
||||
sudo python -m pip install -U $PYTHON_PACKAGES || true
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$BIGDL" = true ]; then
|
||||
aws s3 cp s3://tomzeng/maven/apache-maven-3.3.3-bin.tar.gz .
|
||||
tar xvfz apache-maven-3.3.3-bin.tar.gz
|
||||
sudo mv apache-maven-3.3.3 /opt/maven
|
||||
sudo ln -s /opt/maven/bin/mvn /usr/bin/mvn
|
||||
|
||||
git clone https://github.com/intel-analytics/BigDL.git
|
||||
cd BigDL/
|
||||
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m"
|
||||
export BIGDL_HOME=/mnt/BigDL
|
||||
export BIGDL_VER="0.2.0-SNAPSHOT"
|
||||
bash make-dist.sh -P spark_2.1
|
||||
mkdir /tmp/bigdl_summaries
|
||||
/usr/local/bin/tensorboard --debug INFO --logdir /tmp/bigdl_summaries/ > /tmp/tensorboard_bigdl.log 2>&1 &
|
||||
fi
|
||||
|
||||
if [ "$JULIA_KERNEL" = true ]; then
|
||||
# Julia install
|
||||
cd /mnt
|
||||
if [ ! "$USE_CACHED_DEPS" = true ]; then
|
||||
wget https://julialang.s3.amazonaws.com/bin/linux/x64/0.5/julia-0.5.0-linux-x86_64.tar.gz
|
||||
tar xvfz julia-0.5.0-linux-x86_64.tar.gz
|
||||
fi
|
||||
cd julia-3c9d75391c
|
||||
sudo cp -pr bin/* /usr/bin/
|
||||
sudo cp -pr lib/* /usr/lib/
|
||||
#sudo cp -pr libexec/* /usr/libexec/
|
||||
sudo cp -pr share/* /usr/share/
|
||||
sudo cp -pr include/* /usr/include/
|
||||
fi
|
||||
|
||||
if [ "$INSTALL_DASK" = true ]; then
|
||||
if [ "$INSTALL_PY3_PKGS" = true ]; then
|
||||
sudo python3 -m pip install -U dask[complete] distributed
|
||||
else
|
||||
sudo python -m pip install -U dask[complete] distributed
|
||||
fi
|
||||
export PATH=$PATH:/usr/local/bin
|
||||
if [ "$IS_MASTER" = true ]; then
|
||||
dask-scheduler > /var/log/dask-scheduler.log 2>&1 &
|
||||
else
|
||||
MASTER_KV=$(grep masterHost /emr/instance-controller/lib/info/job-flow-state.txt)
|
||||
MASTER_HOST=$(ruby -e "puts '$MASTER_KV'.gsub('\"','').split.last")
|
||||
dask-worker $MASTER_HOST:8786 > /var/log/dask-worker.log 2>&1 &
|
||||
fi
|
||||
fi
|
||||
|
||||
#echo ". /mnt/ipython-env/bin/activate" >> ~/.bashrc
|
||||
|
||||
# only run below on master instance
|
||||
if [ "$IS_MASTER" = true ]; then
|
||||
|
||||
|
||||
|
||||
sudo mkdir -p /var/log/jupyter
|
||||
mkdir -p ~/.jupyter
|
||||
touch ls ~/.jupyter/jupyter_notebook_config.py
|
||||
|
||||
sed -i '/c.NotebookApp.open_browser/d' ~/.jupyter/jupyter_notebook_config.py
|
||||
echo "c.NotebookApp.open_browser = False" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
|
||||
if [ ! "$JUPYTER_LOCALHOST_ONLY" = true ]; then
|
||||
sed -i '/c.NotebookApp.ip/d' ~/.jupyter/jupyter_notebook_config.py
|
||||
echo "c.NotebookApp.ip='*'" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
fi
|
||||
|
||||
sed -i '/c.NotebookApp.port/d' ~/.jupyter/jupyter_notebook_config.py
|
||||
echo "c.NotebookApp.port = $JUPYTER_PORT" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
|
||||
if [ ! "$JUPYTER_PASSWORD" = "" ]; then
|
||||
sed -i '/c.NotebookApp.password/d' ~/.jupyter/jupyter_notebook_config.py
|
||||
HASHED_PASSWORD=$(python3 -c "from notebook.auth import passwd; print(passwd('$JUPYTER_PASSWORD'))")
|
||||
echo "c.NotebookApp.password = u'$HASHED_PASSWORD'" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
else
|
||||
sed -i '/c.NotebookApp.token/d' ~/.jupyter/jupyter_notebook_config.py
|
||||
echo "c.NotebookApp.token = u''" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
fi
|
||||
|
||||
echo "c.Authenticator.admin_users = {'$JUPYTER_HUB_DEFAULT_USER'}" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
echo "c.LocalAuthenticator.create_system_users = True" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
|
||||
if [ "$SSL" = true ]; then
|
||||
#NOTE - replace server.cert and server.key with your own cert and key files
|
||||
CERT=/usr/local/etc/server.cert
|
||||
KEY=/usr/local/etc/server.key
|
||||
sudo openssl req -x509 -nodes -days 3650 -newkey rsa:1024 -keyout $KEY -out $CERT -subj "/C=US/ST=Washington/L=Seattle/O=JupyterCert/CN=JupyterCert"
|
||||
|
||||
# the following works for Jupyter but will fail JupyterHub, use options for both instead
|
||||
#echo "c.NotebookApp.certfile = u'/usr/local/etc/server.cert'" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
#echo "c.NotebookApp.keyfile = u'/usr/local/etc/server.key'" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
|
||||
SSL_OPTS_JUPYTER="--keyfile=/usr/local/etc/server.key --certfile=/usr/local/etc/server.cert"
|
||||
SSL_OPTS_JUPYTERHUB="--ssl-key=/usr/local/etc/server.key --ssl-cert=/usr/local/etc/server.cert"
|
||||
fi
|
||||
|
||||
|
||||
# Javascript/CoffeeScript kernels
|
||||
if [ "$JS_KERNEL" = true ]; then
|
||||
sudo npm install -g --unsafe-perm ijavascript d3 lodash plotly jp-coffeescript
|
||||
sudo ijs --ijs-install=global
|
||||
sudo jp-coffee --jp-install=global
|
||||
fi
|
||||
|
||||
|
||||
|
||||
if [ "$JULIA_KERNEL" = true ]; then
|
||||
julia -e 'Pkg.add("IJulia")'
|
||||
julia -e 'Pkg.add("RDatasets");Pkg.add("Gadfly");Pkg.add("DataFrames");Pkg.add("PyPlot")'
|
||||
# Julia's Spark support does not support Spark on Yarn yet
|
||||
# install mvn
|
||||
#cd /mnt
|
||||
#aws s3 cp s3://tomzeng/maven/apache-maven-3.3.9-bin.tar.gz .
|
||||
#tar xvfz apache-maven-3.3.9-bin.tar.gz
|
||||
#sudo mv apache-maven-3.3.9 /opt/maven
|
||||
#sudo ln -s /opt/maven/bin/mvn /usr/bin/mvn
|
||||
# install Spark for Julia
|
||||
#julia -e 'Pkg.clone("https://github.com/dfdx/Spark.jl"); Pkg.build("Spark"); Pkg.checkout("JavaCall")'
|
||||
fi
|
||||
|
||||
# iTorch depends on Torch which is installed with --ml-packages
|
||||
if [ "$TORCH_KERNEL" = true ]; then
|
||||
set +e # workaround for the lengthy torch install-deps, esp when other background process are also running yum
|
||||
cd /mnt
|
||||
if [ ! "$USE_CACHED_DEPS" = true ]; then
|
||||
git clone https://github.com/torch/distro.git torch-distro
|
||||
fi
|
||||
cd torch-distro
|
||||
git pull
|
||||
./install-deps
|
||||
./install.sh -b
|
||||
export PATH=$PATH:/mnt/torch-distro/install/bin
|
||||
source ~/.profile
|
||||
luarocks install lzmq
|
||||
luarocks install gnuplot
|
||||
cd /mnt
|
||||
if [ ! "$USE_CACHED_DEPS" = true ]; then
|
||||
git clone https://github.com/facebook/iTorch.git
|
||||
fi
|
||||
cd iTorch
|
||||
luarocks make
|
||||
sudo cp -pr ~/.ipython/kernels/itorch /usr/local/share/jupyter/kernels/
|
||||
set -e
|
||||
fi
|
||||
|
||||
|
||||
|
||||
|
||||
if [ ! "$NOTEBOOK_DIR" = "" ]; then
|
||||
NOTEBOOK_DIR="${NOTEBOOK_DIR%/}/" # remove trailing / if exists then add /
|
||||
if [[ "$NOTEBOOK_DIR" == s3://* ]]; then
|
||||
NOTEBOOK_DIR_S3=true
|
||||
# the s3nb does not fully working yet(upload and createe folder not working)
|
||||
# s3nb does not work anymore due to Jupyter update
|
||||
if [ "$NOTEBOOK_DIR_S3_S3NB" = true ]; then
|
||||
cd /mnt
|
||||
if [ ! "$USE_CACHED_DEPS" = true ]; then
|
||||
git clone https://github.com/tomz/s3nb.git
|
||||
fi
|
||||
cd s3nb
|
||||
sudo python -m pip install -U entrypoints
|
||||
sudo python setup.py install
|
||||
if [ "$JUPYTER_HUB" = true ]; then
|
||||
sudo python3 -m pip install -U entrypoints
|
||||
sudo python3 setup.py install
|
||||
fi
|
||||
|
||||
echo "c.NotebookApp.contents_manager_class = 's3nb.S3ContentsManager'" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
echo "c.S3ContentsManager.checkpoints_kwargs = {'root_dir': '~/.checkpoints'}" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
# if just bucket with no subfolder, a trailing / is required, otherwise s3nb will break
|
||||
echo "c.S3ContentsManager.s3_base_uri = '$NOTEBOOK_DIR'" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
#echo "c.S3ContentsManager.s3_base_uri = '${NOTEBOOK_DIR_S3%/}/%U'" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
#echo "c.Spawner.default_url = '${NOTEBOOK_DIR_S3%/}/%U'" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
#echo "c.Spawner.notebook_dir = '/%U'" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
elif [ "$NOTEBOOK_DIR_S3_S3CONTENTS" = true ]; then
|
||||
BUCKET=$(ruby -e "puts '$NOTEBOOK_DIR'.split('//')[1].split('/')[0]")
|
||||
FOLDER=$(ruby -e "puts '$NOTEBOOK_DIR'.split('//')[1].split('/')[1..-1].join('/')")
|
||||
#sudo python -m pip install -U s3contents
|
||||
cd /mnt
|
||||
#aws s3 cp s3://aws-bigdata-blog/artifacts/aws-blog-emr-jupyter/s3contents.zip .
|
||||
#unzip s3contents.zip
|
||||
git clone https://github.com/tomz/s3contents.git
|
||||
cd s3contents
|
||||
sudo python setup.py install
|
||||
echo "c.NotebookApp.contents_manager_class = 's3contents.S3ContentsManager'" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
echo "c.S3ContentsManager.bucket_name = '$BUCKET'" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
echo "c.S3ContentsManager.prefix = '$FOLDER'" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
# this following is no longer needed, default was fixed in the latest on github
|
||||
#echo "c.S3ContentsManager.endpoint_url = 'https://s3.amazonaws.com'" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
else
|
||||
BUCKET=$(ruby -e "puts '$NOTEBOOK_DIR'.split('//')[1].split('/')[0]")
|
||||
FOLDER=$(ruby -e "puts '$NOTEBOOK_DIR'.split('//')[1].split('/')[1..-1].join('/')")
|
||||
if [ "$USE_CACHED_DEPS" != true ]; then
|
||||
sudo yum install -y automake fuse fuse-devel libxml2-devel
|
||||
fi
|
||||
cd /mnt
|
||||
git clone https://github.com/s3fs-fuse/s3fs-fuse.git
|
||||
cd s3fs-fuse/
|
||||
ls -alrt
|
||||
./autogen.sh
|
||||
./configure
|
||||
make
|
||||
sudo make install
|
||||
sudo su -c 'echo user_allow_other >> /etc/fuse.conf'
|
||||
mkdir -p /mnt/s3fs-cache
|
||||
mkdir -p /mnt/$BUCKET
|
||||
#/usr/local/bin/s3fs -o allow_other -o iam_role=auto -o umask=0 $BUCKET /mnt/$BUCKET
|
||||
# -o nodnscache -o nosscache -o parallel_count=20 -o multipart_size=50
|
||||
/usr/local/bin/s3fs -o allow_other -o iam_role=auto -o umask=0 -o url=https://s3.amazonaws.com -o no_check_certificate -o enable_noobj_cache -o use_cache=/mnt/s3fs-cache $BUCKET /mnt/$BUCKET
|
||||
#/usr/local/bin/s3fs -o allow_other -o iam_role=auto -o umask=0 -o use_cache=/mnt/s3fs-cache $BUCKET /mnt/$BUCKET
|
||||
echo "c.NotebookApp.notebook_dir = '/mnt/$BUCKET/$FOLDER'" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
echo "c.ContentsManager.checkpoints_kwargs = {'root_dir': '.checkpoints'}" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
fi
|
||||
else
|
||||
echo "c.NotebookApp.notebook_dir = '$NOTEBOOK_DIR'" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
echo "c.ContentsManager.checkpoints_kwargs = {'root_dir': '.checkpoints'}" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ! "$JUPYTER_HUB_DEFAULT_USER" = "" ]; then
|
||||
sudo adduser $JUPYTER_HUB_DEFAULT_USER
|
||||
fi
|
||||
|
||||
if [ "$COPY_SAMPLES" = true ]; then
|
||||
cd ~
|
||||
if [ "$NOTEBOOK_DIR_S3" = true ]; then
|
||||
aws s3 sync s3://aws-bigdata-blog/artifacts/aws-blog-emr-jupyter/notebooks/ ${NOTEBOOK_DIR}samples/ || true
|
||||
else
|
||||
if [ ! "$NOTEBOOK_DIR" = "" ]; then
|
||||
mkdir -p ${NOTEBOOK_DIR}samples || true
|
||||
sudo mkdir /home/$JUPYTER_HUB_DEFAULT_USER/${NOTEBOOK_DIR}samples || true
|
||||
fi
|
||||
aws s3 sync s3://aws-bigdata-blog/artifacts/aws-blog-emr-jupyter/notebooks/ ${NOTEBOOK_DIR}samples || true
|
||||
sudo cp -pr ${NOTEBOOK_DIR}samples /home/$JUPYTER_HUB_DEFAULT_USER/
|
||||
sudo chown -R $JUPYTER_HUB_DEFAULT_USER:$JUPYTER_HUB_DEFAULT_USER /home/$JUPYTER_HUB_DEFAULT_USER/${NOTEBOOK_DIR}samples
|
||||
fi
|
||||
if [ "$BIGDL" = true ]; then
|
||||
aws s3 cp s3://aws-bigdata-blog/artifacts/aws-blog-emr-jupyter/notebooks/text_classfication.ipynb ${NOTEBOOK_DIR}.
|
||||
sudo cp ${NOTEBOOK_DIR}text_classfication.ipynb /home/$JUPYTER_HUB_DEFAULT_USER/${NOTEBOOK_DIR}
|
||||
sudo chown -R $JUPYTER_HUB_DEFAULT_USER:$JUPYTER_HUB_DEFAULT_USER /home/$JUPYTER_HUB_DEFAULT_USER/${NOTEBOOK_DIR}text_classfication.ipynb
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
wait_for_spark() {
|
||||
# wait SparkR file to show up
|
||||
while [ ! -f /etc/spark/conf/spark-defaults.conf ]
|
||||
do
|
||||
sleep 10
|
||||
done
|
||||
}
|
||||
|
||||
setup_jupyter_process_with_bigdl() {
|
||||
wait_for_spark
|
||||
export PYTHON_API_PATH=${BIGDL_HOME}/dist/lib/bigdl-$BIGDL_VER-python-api.zip
|
||||
export BIGDL_JAR_PATH=${BIGDL_HOME}/dist/lib/bigdl-$BIGDL_VER-jar-with-dependencies.jar
|
||||
cat ${BIGDL_HOME}/dist/conf/spark-bigdl.conf | sudo tee -a /etc/spark/conf/spark-defaults.conf
|
||||
sudo puppet apply << PUPPET_SCRIPT
|
||||
include 'upstart'
|
||||
upstart::job { 'jupyter':
|
||||
description => 'Jupyter',
|
||||
respawn => true,
|
||||
respawn_limit => '0 10',
|
||||
start_on => 'runlevel [2345]',
|
||||
stop_on => 'runlevel [016]',
|
||||
console => 'output',
|
||||
chdir => '/home/hadoop',
|
||||
script => '
|
||||
sudo su - hadoop > /var/log/jupyter/jupyter.log 2>&1 <<BASH_SCRIPT
|
||||
export NODE_PATH="$NODE_PATH"
|
||||
export PYSPARK_DRIVER_PYTHON="jupyter"
|
||||
export PYSPARK_DRIVER_PYTHON_OPTS="notebook --no-browser $SSL_OPTS_JUPYTER --log-level=INFO"
|
||||
export NOTEBOOK_DIR="$NOTEBOOK_DIR"
|
||||
|
||||
export BIGDL_HOME=/mnt/BigDL
|
||||
export SPARK_HOME=/usr/lib/spark
|
||||
export YARN_CONF_DIR=/etc/hadoop/conf
|
||||
export PYTHONPATH=${PYTHON_API_PATH}:$PYTHONPATH
|
||||
source ${BIGDL_HOME}/dist/bin/bigdl.sh
|
||||
#pyspark --py-files ${PYTHON_API_PATH} --jars ${BIGDL_JAR_PATH} --conf spark.driver.extraClassPath=${BIGDL_JAR_PATH} --conf spark.executor.extraClassPath=bigdl-${BIGDL_VER}-jar-with-dependencies.jar
|
||||
pyspark --py-files ${PYTHON_API_PATH} --jars ${BIGDL_JAR_PATH}
|
||||
BASH_SCRIPT
|
||||
',
|
||||
}
|
||||
PUPPET_SCRIPT
|
||||
}
|
||||
|
||||
background_install_proc() {
|
||||
wait_for_spark
|
||||
|
||||
if ! grep "spark.sql.catalogImplementation" /etc/spark/conf/spark-defaults.conf; then
|
||||
sudo bash -c "echo 'spark.sql.catalogImplementation hive' >> /etc/spark/conf/spark-defaults.conf"
|
||||
fi
|
||||
|
||||
if [ ! -f /tmp/Renvextra ]; then # check if the rstudio BA maybe already done this
|
||||
cat << 'EOF' > /tmp/Renvextra
|
||||
JAVA_HOME="/etc/alternatives/jre"
|
||||
HADOOP_HOME_WARN_SUPPRESS="true"
|
||||
HADOOP_HOME="/usr/lib/hadoop"
|
||||
HADOOP_PREFIX="/usr/lib/hadoop"
|
||||
HADOOP_MAPRED_HOME="/usr/lib/hadoop-mapreduce"
|
||||
HADOOP_YARN_HOME="/usr/lib/hadoop-yarn"
|
||||
HADOOP_COMMON_HOME="/usr/lib/hadoop"
|
||||
HADOOP_HDFS_HOME="/usr/lib/hadoop-hdfs"
|
||||
HADOOP_CONF_DIR="/usr/lib/hadoop/etc/hadoop"
|
||||
YARN_CONF_DIR="/usr/lib/hadoop/etc/hadoop"
|
||||
YARN_HOME="/usr/lib/hadoop-yarn"
|
||||
HIVE_HOME="/usr/lib/hive"
|
||||
HIVE_CONF_DIR="/usr/lib/hive/conf"
|
||||
HBASE_HOME="/usr/lib/hbase"
|
||||
HBASE_CONF_DIR="/usr/lib/hbase/conf"
|
||||
SPARK_HOME="/usr/lib/spark"
|
||||
SPARK_CONF_DIR="/usr/lib/spark/conf"
|
||||
PATH=${PWD}:${PATH}
|
||||
EOF
|
||||
|
||||
#if [ "$PYSPARK_PYTHON" = "python3" ]; then
|
||||
if [ "$INSTALL_PY3_PKGS" = true ]; then
|
||||
cat << 'EOF' >> /tmp/Renvextra
|
||||
PYSPARK_PYTHON="python3"
|
||||
EOF
|
||||
fi
|
||||
|
||||
cat /tmp/Renvextra | sudo tee -a /usr/lib64/R/etc/Renviron
|
||||
|
||||
sudo mkdir -p /mnt/spark
|
||||
sudo chmod a+rwx /mnt/spark
|
||||
if [ -d /mnt1 ]; then
|
||||
sudo mkdir -p /mnt1/spark
|
||||
sudo chmod a+rwx /mnt1/spark
|
||||
fi
|
||||
|
||||
|
||||
|
||||
set +e # workaround for if SparkR is already installed by other BA
|
||||
# install SparkR and SparklyR for R - toree ifself does not need this
|
||||
sudo R --no-save << R_SCRIPT
|
||||
library(devtools)
|
||||
install('/usr/lib/spark/R/lib/SparkR')
|
||||
R_SCRIPT
|
||||
set -e
|
||||
|
||||
fi # end if -f /tmp/Renvextra
|
||||
|
||||
|
||||
export SPARK_HOME="/usr/lib/spark"
|
||||
SPARK_PACKAGES=""
|
||||
|
||||
PYSPARK_PYTHON="python"
|
||||
if [ "$INSTALL_PY3_PKGS" = true ]; then
|
||||
PYSPARK_PYTHON="python3"
|
||||
fi
|
||||
|
||||
if [ ! "$USER_SPARK_OPTS" = "" ]; then
|
||||
SPARK_OPTS=$USER_SPARK_OPTS
|
||||
SPARK_PACKAGES=$(ruby -e "opts='$SPARK_OPTS'.split;pkgs=nil;opts.each_with_index{|o,i| pkgs=opts[i+1] if o.start_with?('--packages')};puts pkgs || '$SPARK_PACKAGES'")
|
||||
export SPARK_OPTS
|
||||
export SPARK_PACKAGES
|
||||
|
||||
sudo jupyter toree install --interpreters=$INTERPRETERS --spark_home=$SPARK_HOME --python_exec=$PYSPARK_PYTHON --spark_opts="$SPARK_OPTS"
|
||||
# NOTE - toree does not pick SPARK_OPTS, so use the following workaround until it's fixed
|
||||
if [ ! "$SPARK_PACKAGES" = "" ]; then
|
||||
if ! grep "spark.jars.packages" /etc/spark/conf/spark-defaults.conf; then
|
||||
sudo bash -c "echo 'spark.jars.packages $SPARK_PACKAGES' >> /etc/spark/conf/spark-defaults.conf"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
sudo jupyter toree install --interpreters=$INTERPRETERS --spark_home=$SPARK_HOME --python_exec=$PYSPARK_PYTHON
|
||||
fi
|
||||
|
||||
|
||||
if [ "$INSTALL_PY3_PKGS" = true ]; then
|
||||
sudo bash -c 'echo "" >> /etc/spark/conf/spark-env.sh'
|
||||
sudo bash -c 'echo "export PYSPARK_PYTHON=/usr/bin/python3" >> /etc/spark/conf/spark-env.sh'
|
||||
|
||||
#if [ -f /usr/local/share/jupyter/kernels/apache_toree_pyspark/kernel.json ]; then
|
||||
# sudo bash -c 'sed -i "s/\"PYTHON_EXEC\": \"python\"/\"PYTHON_EXEC\": \"\/usr\/bin\/python3\"/g" /usr/local/share/jupyter/kernels/apache_toree_pyspark/kernel.json'
|
||||
#fi
|
||||
|
||||
fi
|
||||
|
||||
# the following dirs could cause conflict, so remove them
|
||||
rm -rf ~/.m2/
|
||||
rm -rf ~/.ivy2/
|
||||
|
||||
if [ "$NO_JUPYTER" = false ]; then
|
||||
echo "Starting Jupyter notebook via pyspark"
|
||||
cd ~
|
||||
#PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS="notebook --no-browser" pyspark > /var/log/jupyter/jupyter.log &
|
||||
if [ "$BIGDL" = false ]; then
|
||||
sudo puppet apply << PUPPET_SCRIPT
|
||||
include 'upstart'
|
||||
upstart::job { 'jupyter':
|
||||
description => 'Jupyter',
|
||||
respawn => true,
|
||||
respawn_limit => '0 10',
|
||||
start_on => 'runlevel [2345]',
|
||||
stop_on => 'runlevel [016]',
|
||||
console => 'output',
|
||||
chdir => '/home/hadoop',
|
||||
script => '
|
||||
sudo su - hadoop > /var/log/jupyter/jupyter.log 2>&1 <<BASH_SCRIPT
|
||||
export NODE_PATH="$NODE_PATH"
|
||||
export PYSPARK_DRIVER_PYTHON="jupyter"
|
||||
export PYSPARK_DRIVER_PYTHON_OPTS="notebook --no-browser $SSL_OPTS_JUPYTER --log-level=INFO"
|
||||
export NOTEBOOK_DIR="$NOTEBOOK_DIR"
|
||||
pyspark
|
||||
BASH_SCRIPT
|
||||
',
|
||||
}
|
||||
PUPPET_SCRIPT
|
||||
else
|
||||
setup_jupyter_process_with_bigdl
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
create_hdfs_user() {
|
||||
wait_for_spark
|
||||
sudo -u hdfs hdfs dfs -mkdir /user/$JUPYTER_HUB_DEFAULT_USER
|
||||
sudo -u hdfs hdfs dfs -chown $JUPYTER_HUB_DEFAULT_USER:$JUPYTER_HUB_DEFAULT_USER /user/$JUPYTER_HUB_DEFAULT_USER
|
||||
sudo -u hdfs hdfs dfs -chmod -R 777 /user/$JUPYTER_HUB_DEFAULT_USER
|
||||
}
|
||||
|
||||
# apache toree install
|
||||
if [ "$TOREE_KERNEL" = true ]; then
|
||||
echo "Running background process to install Apacke Toree"
|
||||
# spark 1.6
|
||||
#sudo pip install --pre toree
|
||||
#sudo jupyter toree install
|
||||
|
||||
# spark 2.0
|
||||
cd /mnt
|
||||
if [ "$USE_CACHED_DEPS" != true ]; then
|
||||
curl https://bintray.com/sbt/rpm/rpm | sudo tee /etc/yum.repos.d/bintray-sbt-rpm.repo
|
||||
sudo yum install docker sbt -y
|
||||
fi
|
||||
if [ ! "$USE_CACHED_DEPS" = true ]; then
|
||||
git clone https://github.com/apache/incubator-toree.git
|
||||
fi
|
||||
cd incubator-toree/
|
||||
git pull
|
||||
export APACHE_SPARK_VERSION=$APACHE_SPARK_VERSION
|
||||
make -j8 dist
|
||||
make clean release APACHE_SPARK_VERSION=$APACHE_SPARK_VERSION || true # gettting the docker not running error, swallow it with || true
|
||||
if [ "$RUN_AS_STEP" = true ]; then
|
||||
background_install_proc
|
||||
else
|
||||
background_install_proc &
|
||||
fi
|
||||
else
|
||||
if [ "$NO_JUPYTER" = false ]; then
|
||||
echo "Starting Jupyter notebook"
|
||||
if [ "$BIGDL" = false ]; then
|
||||
sudo puppet apply << PUPPET_SCRIPT
|
||||
include 'upstart'
|
||||
upstart::job { 'jupyter':
|
||||
description => 'Jupyter',
|
||||
respawn => true,
|
||||
respawn_limit => '0 10',
|
||||
start_on => 'runlevel [2345]',
|
||||
stop_on => 'runlevel [016]',
|
||||
console => 'output',
|
||||
chdir => '/home/hadoop',
|
||||
env => { 'NOTEBOOK_DIR' => '$NOTEBOOK_DIR', 'NODE_PATH' => '$NODE_PATH' },
|
||||
exec => 'sudo su - hadoop -c "jupyter notebook --no-browser $SSL_OPTS_JUPYTER" > /var/log/jupyter/jupyter.log 2>&1',
|
||||
}
|
||||
PUPPET_SCRIPT
|
||||
else
|
||||
setup_jupyter_process_with_bigdl &
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$JUPYTER_HUB" = true ]; then
|
||||
sudo npm install -g --unsafe-perm configurable-http-proxy
|
||||
sudo python3 -m pip install jupyterhub #notebook ipykernel
|
||||
#sudo python3 -m ipykernel install
|
||||
|
||||
if [ ! "$JUPYTER_HUB_DEFAULT_USER" = "" ]; then
|
||||
create_hdfs_user &
|
||||
fi
|
||||
# change the password of the hadoop user to JUPYTER_PASSWORD
|
||||
if [ ! "$JUPYTER_PASSWORD" = "" ]; then
|
||||
sudo sh -c "echo '$JUPYTER_PASSWORD' | passwd $JUPYTER_HUB_DEFAULT_USER --stdin"
|
||||
fi
|
||||
|
||||
sudo ln -sf /usr/local/bin/jupyterhub /usr/bin/
|
||||
sudo ln -sf /usr/local/bin/jupyterhub-singleuser /usr/bin/
|
||||
mkdir -p /mnt/jupyterhub
|
||||
cd /mnt/jupyterhub
|
||||
echo "Starting Jupyterhub"
|
||||
#sudo jupyterhub $SSL_OPTS_JUPYTERHUB --port=$JUPYTER_HUB_PORT --ip=$JUPYTER_HUB_IP --log-file=/var/log/jupyter/jupyterhub.log --config ~/.jupyter/jupyter_notebook_config.py &
|
||||
sudo puppet apply << PUPPET_SCRIPT
|
||||
include 'upstart'
|
||||
upstart::job { 'jupyterhub':
|
||||
description => 'JupyterHub',
|
||||
respawn => true,
|
||||
respawn_limit => '0 10',
|
||||
start_on => 'runlevel [2345]',
|
||||
stop_on => 'runlevel [016]',
|
||||
console => 'output',
|
||||
chdir => '/mnt/jupyterhub',
|
||||
env => { 'NOTEBOOK_DIR' => '$NOTEBOOK_DIR', 'NODE_PATH' => '$NODE_PATH' },
|
||||
exec => 'sudo /usr/bin/jupyterhub --pid-file=/var/run/jupyter.pid $SSL_OPTS_JUPYTERHUB --port=$JUPYTER_HUB_PORT --ip=$JUPYTER_HUB_IP --log-file=/var/log/jupyter/jupyterhub.log --config /home/hadoop/.jupyter/jupyter_notebook_config.py'
|
||||
}
|
||||
PUPPET_SCRIPT
|
||||
|
||||
fi
|
||||
|
||||
cat << 'EOF' > /tmp/jupyter_logpusher.config
|
||||
{
|
||||
"/var/log/jupyter/" : {
|
||||
"includes" : [ "(.*)" ],
|
||||
"s3Path" : "node/$instance-id/applications/jupyter/$0",
|
||||
"retentionPeriod" : "5d",
|
||||
"logType" : [ "USER_LOG", "SYSTEM_LOG" ]
|
||||
}
|
||||
}
|
||||
EOF
|
||||
cat /tmp/jupyter_logpusher.config | sudo tee -a /etc/logpusher/jupyter.config
|
||||
|
||||
fi
|
||||
echo "Bootstrap action finished"
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
#!/bin/bash
|
||||
|
||||
wget --no-check-certificate https://s3.amazonaws.com/aws-bigdata-blog/artifacts/aws-blog-emr-jupyter/install-jupyter-emr5.sh
|
||||
aws s3 cp s3://ty-emr/XRR/bootstrap/install-jupyter-emr5-payload.sh .
|
||||
chmod +x $util_path/install-jupyter-emr5-payload.sh
|
||||
chmod +x install-jupyter-emr5-payload.sh
|
||||
./install-jupyter-emr5-payload.sh \
|
||||
--r \
|
||||
--julia \
|
||||
--toree \
|
||||
--torch \
|
||||
--ruby \
|
||||
--ds-packages \
|
||||
--ml-packages \
|
||||
--python-packages ggplot nilearn \
|
||||
--port 8002 \
|
||||
--password jupyter \
|
||||
--jupyterhub \
|
||||
--jupyterhub-port 8001 \
|
||||
--cached-install \
|
||||
--notebook-dir s3://ty-emr/XRR/jupyter/notebooks/ \
|
||||
--copy-samples \
|
||||
--s3fs
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
#!/bin/sh
|
||||
|
||||
cd ~
|
||||
wget https://sift-tool.org/downloads/sift/sift_0.9.0_linux_amd64.tar.gz
|
||||
tar xvzf sift*
|
||||
sudo mv sift*/sift /usr/local/bin/
|
||||
rm -rf sift*
|
||||
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,14 @@
|
|||
#!/bin/bash
|
||||
|
||||
configure_postgres() {
|
||||
while ! status presto-server | grep -q running 2>/dev/null
|
||||
do
|
||||
sleep 1
|
||||
done
|
||||
aws s3 cp s3://ty-emr/XRR/presto/pg1.properties ~
|
||||
aws s3 cp s3://ty-emr/XRR/presto/pg2.properties ~
|
||||
sudo mv ~/*.properties /etc/presto/conf/catalog
|
||||
sudo restart presto-server
|
||||
}
|
||||
|
||||
configure_postgres &
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
[ -e /mnt/replaced-instance-controller ] && exit 0
|
||||
sudo find /usr/share/aws/emr/instance-controller/lib -name 'instance-controller*.jar' -exec rm {} \;
|
||||
sudo aws s3 cp s3://ty-emr/XRR/bootstrap/instance-controller/instance-controller.jar /usr/share/aws/emr/instance-controller/lib/
|
||||
sudo aws s3 cp s3://ty-emr/XRR/bootstrap/instance-controller/instance-controller-interface.jar /usr/share/aws/emr/instance-controller/lib/
|
||||
touch /mnt/replaced-instance-controller
|
||||
sudo service instance-controller stop
|
||||
sudo service logpusher stop
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
[ -e /mnt/replaced-instance-controller ] && exit 0
|
||||
sudo find /usr/share/aws/emr/instance-controller/lib -name 'instance-controller*.jar' -and -not -name '*interface*' -exec rm {} \;
|
||||
sudo aws s3 cp s3://ty-emr/XRR/bootstrap/instance-controller/instance-controller.jar /usr/share/aws/emr/instance-controller/lib/
|
||||
touch /mnt/replaced-instance-controller
|
||||
sudo service instance-controller stop
|
||||
sudo service logpusher stop
|
||||
Loading…
Reference in New Issue