Skip to content
This repository has been archived by the owner on Dec 4, 2024. It is now read-only.

Commit

Permalink
[SPARK-717] Enable statsd sink to monitor Spark metrics (#402)
Browse files Browse the repository at this point in the history
* [SPARK-717] Enable statsd sink to monitor Spark metrics (#290)
- add spark-statsd jar for the StatsDSink
- update spark-statsd jar to use versions Spark 2.2.1 and Scala 2.11 (#393)

Note: must enable UCR to output metrics
  • Loading branch information
samvantran authored Sep 25, 2018
1 parent 856019b commit 5998d09
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 1 deletion.
9 changes: 9 additions & 0 deletions conf/metrics.properties.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

# Enable StatsdSink for all instances by class name
*.sink.statsd.class=org.apache.spark.metrics.sink.StatsDSink
*.sink.statsd.prefix=spark
*.sink.statsd.host=<STATSD_UDP_HOST>
*.sink.statsd.port=<STATSD_UDP_PORT>

# Enable JVM metrics source for all instances (master, worker, driver and executor) by class name
*.source.jvm.class=org.apache.spark.metrics.source.JvmSource
10 changes: 9 additions & 1 deletion conf/spark-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ mkdir -p "${HADOOP_CONF_DIR}"
[ -f "${MESOS_SANDBOX}/hdfs-site.xml" ] && cp "${MESOS_SANDBOX}/hdfs-site.xml" "${HADOOP_CONF_DIR}"
[ -f "${MESOS_SANDBOX}/core-site.xml" ] && cp "${MESOS_SANDBOX}/core-site.xml" "${HADOOP_CONF_DIR}"


SPARK_WORKING_DIR=${PWD}

cd $MESOS_SANDBOX

Expand Down Expand Up @@ -87,6 +87,14 @@ else
echo "spark-env: StatsD metrics require Mesos UCR. For dispatcher metrics, enable the 'UCR_containerizer' option. For driver metrics, include '--conf spark.mesos.containerizer=mesos' in your run"
fi

if [ -n "${STATSD_UDP_HOST}" ] && [ -n "${STATSD_UDP_PORT}" ]; then
sed -e "s/<STATSD_UDP_HOST>/${STATSD_UDP_HOST}/g" \
-e "s/<STATSD_UDP_PORT>/${STATSD_UDP_PORT}/g" \
${SPARK_WORKING_DIR}/conf/metrics.properties.template >${SPARK_WORKING_DIR}/conf/metrics.properties
else
echo "spark-env: No STATSD_UDP environment variables were defined" >&2
fi

# Options read when launching programs locally with
# ./bin/run-example or ./bin/spark-submit
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
Expand Down
4 changes: 4 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ RUN ln -s /var/lib/runit/service/nginx /etc/service/nginx
ADD dist /opt/spark/dist
ADD krb5.conf.mustache /etc/

# Add external jar for pushing metrics via StatsD (https://github.com/vidhyaarvind/spark-statsd)
# current version: Spark 2.2.1, Scala 2.11
ADD jars/spark-statsd-2.2.1-2.11.jar /opt/spark/dist/jars/

# required to run as nobody
RUN chmod -R ugo+rw /etc/nginx
RUN chmod -R ugo+rw /etc/krb5.conf
Expand Down
Binary file added docker/jars/spark-statsd-2.2.1-2.11.jar
Binary file not shown.
9 changes: 9 additions & 0 deletions docker/runit/service/spark/run
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ function set_log_level() {
${SPARK_DIST_PATH}/conf/log4j.properties.template >${SPARK_DIST_PATH}/conf/log4j.properties
}

function set_statsd_address() {
if [ "$STATSD_UDP_HOST" != "" ] && [ "$STATSD_UDP_PORT" != "" ]; then
sed -e "s/<STATSD_UDP_HOST>/${STATSD_UDP_HOST}/g" \
-e "s/<STATSD_UDP_PORT>/${STATSD_UDP_PORT}/g" \
${SPARK_DIST_PATH}/conf/metrics.properties.template >${SPARK_DIST_PATH}/conf/metrics.properties
fi
}

function add_if_non_empty() {
if [ -n "$2" ]; then
echo "$1=$2" >> ${SPARK_DIST_PATH}/conf/mesos-cluster-dispatcher.properties
Expand Down Expand Up @@ -61,6 +69,7 @@ function configure_properties() {

export APPLICATION_WEB_PROXY_BASE="${DISPATCHER_UI_WEB_PROXY_BASE}"
set_log_level
set_statsd_address
export_daemon_opts
configure_properties
ZK="master.mesos:2181"
Expand Down

0 comments on commit 5998d09

Please sign in to comment.