Skip to content

Commit 45d51e3

Browse files
committed
Bump to spark 2.4.5 + minor improvements
1 parent 3deefc7 commit 45d51e3

File tree

6 files changed

+52
-9
lines changed

6 files changed

+52
-9
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ tx-en: ## rebuild en locale strings and push to master (req: GH_TOKEN)
9090

9191

9292
test/%: ## run tests against a stack
93-
@TEST_IMAGE="$(OWNER)/$(notdir $@)" pytest test
93+
@TEST_IMAGE="$(OWNER)/$(notdir $@)" pytest test $(notdir $@)/test
9494

9595
test/base-notebook: ## test supported options in the base notebook
9696
@TEST_IMAGE="$(OWNER)/$(notdir $@)" pytest test base-notebook/test

base-notebook/test/test_container_options.py

+14
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,20 @@ def test_chown_extra(container):
7878
assert '/opt/conda/LICENSE.txt:1010:101' in c.logs(stdout=True).decode('utf-8')
7979

8080

81+
def test_chown_home(container):
82+
"""Container should change the NB_USER home directory owner and
83+
group to the current value of NB_UID and NB_GID."""
84+
c = container.run(
85+
tty=True,
86+
user='root',
87+
environment=['CHOWN_HOME=yes',
88+
'CHOWN_HOME_OPTS=-R',
89+
],
90+
command=['start.sh', 'bash', '-c', 'chown root:root /home/jovyan && ls -alsh /home']
91+
)
92+
assert "Changing ownership of /home/jovyan to 1000:100 with options '-R'" in c.logs(stdout=True).decode('utf-8')
93+
94+
8195
def test_sudo(container):
8296
"""Container should grant passwordless sudo to the default user."""
8397
c = container.run(

conftest.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Copyright (c) Jupyter Development Team.
22
# Distributed under the terms of the Modified BSD License.
33
import os
4+
import logging
45

56
import docker
67
import pytest
@@ -10,6 +11,8 @@
1011
from requests.adapters import HTTPAdapter
1112

1213

14+
LOGGER = logging.getLogger(__name__)
15+
1316
@pytest.fixture(scope='session')
1417
def http_client():
1518
"""Requests session with retries and backoff."""
@@ -72,9 +75,10 @@ def run(self, **kwargs):
7275
all_kwargs = {}
7376
all_kwargs.update(self.kwargs)
7477
all_kwargs.update(kwargs)
78+
LOGGER.info(f"Running {self.image_name} with args {all_kwargs} ...")
7579
self.container = self.docker_client.containers.run(self.image_name, **all_kwargs)
7680
return self.container
77-
81+
7882
def remove(self):
7983
"""Kills and removes the tracked docker container."""
8084
if self.container:

pyspark-notebook/Dockerfile

+8-7
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,16 @@ LABEL maintainer="Jupyter Project <[email protected]>"
88
USER root
99

1010
# Spark dependencies
11-
ENV APACHE_SPARK_VERSION 2.4.4
12-
ENV HADOOP_VERSION 2.7
11+
ENV APACHE_SPARK_VERSION=2.4.5 \
12+
HADOOP_VERSION=2.7
1313

1414
RUN apt-get -y update && \
1515
apt-get install --no-install-recommends -y openjdk-8-jre-headless ca-certificates-java && \
1616
rm -rf /var/lib/apt/lists/*
1717

1818
RUN cd /tmp && \
1919
wget -q http://mirrors.ukfast.co.uk/sites/ftp.apache.org/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
20-
echo "2E3A5C853B9F28C7D4525C0ADCB0D971B73AD47D5CCE138C85335B9F53A6519540D3923CB0B5CEE41E386E49AE8A409A51AB7194BA11A254E037A848D0C4A9E5 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \
20+
echo "2426a20c548bdfc07df288cd1d18d1da6b3189d0b78dee76fa034c52a4e02895f0ad460720c526f163ba63a17efae4764c46a1cd8f9b04c60f9937a554db85d2 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \
2121
tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz -C /usr/local --owner root --group root --no-same-owner && \
2222
rm spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
2323
RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark
@@ -36,10 +36,11 @@ RUN apt-get -y update && \
3636
rm -rf /var/lib/apt/lists/*
3737

3838
# Spark and Mesos config
39-
ENV SPARK_HOME /usr/local/spark
40-
ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip
41-
ENV MESOS_NATIVE_LIBRARY /usr/local/lib/libmesos.so
42-
ENV SPARK_OPTS --driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info
39+
ENV SPARK_HOME=/usr/local/spark \
40+
PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip \
41+
MESOS_NATIVE_LIBRARY=/usr/local/lib/libmesos.so \
42+
SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info" \
43+
PATH=$PATH:/usr/local/spark/bin
4344

4445
USER $NB_UID
4546

pyspark-notebook/test/test_spark.py

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Copyright (c) Jupyter Development Team.
2+
# Distributed under the terms of the Modified BSD License.
3+
import time
4+
import logging
5+
6+
import pytest
7+
8+
LOGGER = logging.getLogger(__name__)
9+
10+
def test_spark_shell(container):
11+
"""Checking if Spark (spark-shell) is running properly"""
12+
c = container.run(
13+
tty=True,
14+
command=['start.sh', 'bash', '-c', 'spark-shell <<< "1+1"']
15+
)
16+
c.wait(timeout=30)
17+
logs = c.logs(stdout=True).decode('utf-8')
18+
LOGGER.debug(logs)
19+
assert 'res0: Int = 2' in logs

pytest.ini

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[pytest]
2+
log_cli = 1
3+
log_cli_level = INFO
4+
log_cli_format = %(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)
5+
log_cli_date_format=%Y-%m-%d %H:%M:%S

0 commit comments

Comments
 (0)