Skip to content

Commit 9427b7e

Browse files
Merge branch 'master' into feat/helm-fixes
2 parents cb28298 + d72f681 commit 9427b7e

6 files changed

+1408
-1367
lines changed

export.Dockerfile

+54-34
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,54 @@
1-
FROM quay.io/cdis/python:python3.9-buster-2.0.0
1+
ARG AZLINUX_BASE_VERSION=master
2+
3+
# Base stage with python-build-base
4+
FROM quay.io/cdis/python-build-base:${AZLINUX_BASE_VERSION} AS base
25

36
ENV appname=pelican
47

5-
ENV DEBIAN_FRONTEND=noninteractive
8+
# create gen3 user
9+
# Create a group 'gen3' with GID 1000 and a user 'gen3' with UID 1000
10+
RUN groupadd -g 1000 gen3 && \
11+
useradd -m -s /bin/bash -u 1000 -g gen3 gen3
12+
13+
# Install pipx
14+
RUN python3 -m pip install pipx && \
15+
python3 -m pipx ensurepath
16+
17+
USER gen3
18+
# Install Poetry via pipx
19+
RUN pipx install poetry
20+
ENV PATH="/home/gen3/.local/bin:${PATH}"
21+
USER root
22+
23+
WORKDIR /${appname}
24+
25+
# Builder stage
26+
FROM base AS builder
27+
28+
RUN dnf update && dnf install -y \
29+
python3-devel \
30+
gcc \
31+
postgresql-devel
32+
33+
COPY . /${appname}
34+
35+
# cache so that poetry install will run if these files change
36+
COPY poetry.lock pyproject.toml /${appname}/
37+
38+
RUN poetry install -vv --no-interaction --without dev
39+
40+
# Final stage
41+
FROM base
642

7-
#RUN mkdir -p /usr/share/man/man1
8-
#RUN mkdir -p /usr/share/man/man7
43+
COPY --from=builder /venv /venv
44+
COPY --from=builder /${appname} /${appname}
945

10-
RUN apt-get update && apt-get install -y --no-install-recommends \
11-
build-essential \
12-
libgnutls30 \
13-
openjdk-11-jre-headless \
14-
# dependency for pyscopg2
15-
libpq-dev \
16-
postgresql-client \
46+
RUN dnf update && dnf install -y \
1747
wget \
18-
unzip \
19-
g++ \
20-
&& rm -rf /var/lib/apt/lists/*
48+
tar \
49+
java-11-amazon-corretto \
50+
gnutls \
51+
&& rm -rf /var/cache/yum
2152

2253
ENV HADOOP_VERSION="3.2.1"
2354
ENV HADOOP_HOME="/hadoop" \
@@ -27,7 +58,8 @@ RUN wget ${HADOOP_INSTALLATION_URL} \
2758
&& mkdir -p $HADOOP_HOME \
2859
&& tar -xvf hadoop-${HADOOP_VERSION}.tar.gz -C ${HADOOP_HOME} --strip-components 1 \
2960
&& rm hadoop-${HADOOP_VERSION}.tar.gz \
30-
&& rm -rf $HADOOP_HOME/share/doc
61+
&& rm -rf $HADOOP_HOME/share/doc \
62+
&& chown -R gen3:gen3 $HADOOP_HOME
3163

3264
ENV SQOOP_VERSION="1.4.7"
3365
ENV SQOOP_HOME="/sqoop" \
@@ -39,12 +71,13 @@ RUN wget -q ${SQOOP_INSTALLATION_URL} \
3971
&& mkdir -p $SQOOP_HOME \
4072
&& tar -xvf sqoop-${SQOOP_VERSION}.bin__hadoop-2.6.0.tar.gz -C ${SQOOP_HOME} --strip-components 1 \
4173
&& rm sqoop-${SQOOP_VERSION}.bin__hadoop-2.6.0.tar.gz \
42-
&& rm -rf $SQOOP_HOME/docs
74+
&& rm -rf $SQOOP_HOME/docs \
75+
&& chown -R gen3:gen3 $SQOOP_HOME
4376

4477
ENV POSTGRES_JAR_VERSION="42.2.9"
4578
ENV POSTGRES_JAR_URL="https://jdbc.postgresql.org/download/postgresql-${POSTGRES_JAR_VERSION}.jar" \
4679
POSTGRES_JAR_PATH=$SQOOP_HOME/lib/postgresql-${POSTGRES_JAR_VERSION}.jar \
47-
JAVA_HOME="/usr/lib/jvm/java-11-openjdk-amd64"
80+
JAVA_HOME="/usr/lib/jvm/java-11-amazon-corretto"
4881

4982
RUN wget ${POSTGRES_JAR_URL} -O ${POSTGRES_JAR_PATH}
5083

@@ -63,25 +96,12 @@ ENV HADOOP_CONF_DIR="$HADOOP_HOME/etc/hadoop" \
6396

6497
RUN mkdir -p $ACCUMULO_HOME $HIVE_HOME $HBASE_HOME $HCAT_HOME $ZOOKEEPER_HOME
6598

66-
ENV PATH=${SQOOP_HOME}/bin:${HADOOP_HOME}/sbin:$HADOOP_HOME/bin:${JAVA_HOME}/bin:${PATH}
67-
68-
WORKDIR /pelican
69-
70-
RUN pip install --upgrade pip
99+
RUN chown -R gen3:gen3 $ACCUMULO_HOME $HIVE_HOME $HBASE_HOME $HCAT_HOME $ZOOKEEPER_HOME $JAVA_HOME $POSTGRES_JAR_PATH
71100

72-
# install poetry
73-
RUN pip install --upgrade "poetry<1.2"
74-
75-
COPY . /$appname
76-
WORKDIR /$appname
77-
78-
# cache so that poetry install will run if these files change
79-
COPY poetry.lock pyproject.toml /$appname/
101+
ENV PATH=${SQOOP_HOME}/bin:${HADOOP_HOME}/sbin:$HADOOP_HOME/bin:${JAVA_HOME}/bin:${PATH}
80102

81-
# install package and dependencies via poetry
82-
RUN poetry config virtualenvs.create false \
83-
&& poetry install -vv --no-dev --no-interaction \
84-
&& poetry show -v
103+
# Switch to non-root user 'gen3' for the serving process
104+
USER gen3
85105

86106
ENV PYTHONUNBUFFERED=1
87107

import.Dockerfile

+54-35
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,54 @@
1-
FROM quay.io/cdis/python:python3.9-buster-2.0.0
1+
ARG AZLINUX_BASE_VERSION=master
2+
3+
# Base stage with python-build-base
4+
FROM quay.io/cdis/python-build-base:${AZLINUX_BASE_VERSION} AS base
25

36
ENV appname=pelican
47

5-
ENV DEBIAN_FRONTEND=noninteractive
8+
# create gen3 user
9+
# Create a group 'gen3' with GID 1000 and a user 'gen3' with UID 1000
10+
RUN groupadd -g 1000 gen3 && \
11+
useradd -m -s /bin/bash -u 1000 -g gen3 gen3
12+
13+
# Install pipx
14+
RUN python3 -m pip install pipx && \
15+
python3 -m pipx ensurepath
16+
17+
USER gen3
18+
# Install Poetry via pipx
19+
RUN pipx install poetry
20+
ENV PATH="/home/gen3/.local/bin:${PATH}"
21+
USER root
22+
23+
WORKDIR /${appname}
24+
25+
# Builder stage
26+
FROM base AS builder
27+
28+
RUN dnf update && dnf install -y \
29+
python3-devel \
30+
gcc \
31+
postgresql-devel
32+
33+
COPY . /${appname}
34+
35+
# cache so that poetry install will run if these files change
36+
COPY poetry.lock pyproject.toml /${appname}/
637

7-
#RUN mkdir -p /usr/share/man/man1
8-
#RUN mkdir -p /usr/share/man/man7
38+
RUN poetry install -vv --no-interaction --without dev
939

10-
RUN apt-get update && apt-get install -y --no-install-recommends \
11-
build-essential \
12-
libgnutls30 \
13-
openjdk-11-jre-headless \
14-
# dependency for pyscopg2
15-
libpq-dev \
16-
postgresql-client \
40+
# Final stage
41+
FROM base
42+
43+
COPY --from=builder /venv /venv
44+
COPY --from=builder /${appname} /${appname}
45+
46+
RUN dnf update && dnf install -y \
1747
wget \
18-
unzip \
19-
g++ \
20-
&& rm -rf /var/lib/apt/lists/*
48+
tar \
49+
java-11-amazon-corretto \
50+
gnutls \
51+
&& rm -rf /var/cache/yum
2152

2253
ENV HADOOP_VERSION="3.2.1"
2354
ENV HADOOP_HOME="/hadoop" \
@@ -27,7 +58,8 @@ RUN wget ${HADOOP_INSTALLATION_URL} \
2758
&& mkdir -p $HADOOP_HOME \
2859
&& tar -xvf hadoop-${HADOOP_VERSION}.tar.gz -C ${HADOOP_HOME} --strip-components 1 \
2960
&& rm hadoop-${HADOOP_VERSION}.tar.gz \
30-
&& rm -rf $HADOOP_HOME/share/doc
61+
&& rm -rf $HADOOP_HOME/share/doc \
62+
&& chown -R gen3:gen3 $HADOOP_HOME
3163

3264
ENV SQOOP_VERSION="1.4.7"
3365
ENV SQOOP_HOME="/sqoop" \
@@ -39,12 +71,13 @@ RUN wget -q ${SQOOP_INSTALLATION_URL} \
3971
&& mkdir -p $SQOOP_HOME \
4072
&& tar -xvf sqoop-${SQOOP_VERSION}.bin__hadoop-2.6.0.tar.gz -C ${SQOOP_HOME} --strip-components 1 \
4173
&& rm sqoop-${SQOOP_VERSION}.bin__hadoop-2.6.0.tar.gz \
42-
&& rm -rf $SQOOP_HOME/docs
74+
&& rm -rf $SQOOP_HOME/docs \
75+
&& chown -R gen3:gen3 $SQOOP_HOME
4376

4477
ENV POSTGRES_JAR_VERSION="42.2.9"
4578
ENV POSTGRES_JAR_URL="https://jdbc.postgresql.org/download/postgresql-${POSTGRES_JAR_VERSION}.jar" \
4679
POSTGRES_JAR_PATH=$SQOOP_HOME/lib/postgresql-${POSTGRES_JAR_VERSION}.jar \
47-
JAVA_HOME="/usr/lib/jvm/java-11-openjdk-amd64"
80+
JAVA_HOME="/usr/lib/jvm/java-11-amazon-corretto"
4881

4982
RUN wget ${POSTGRES_JAR_URL} -O ${POSTGRES_JAR_PATH}
5083

@@ -63,26 +96,12 @@ ENV HADOOP_CONF_DIR="$HADOOP_HOME/etc/hadoop" \
6396

6497
RUN mkdir -p $ACCUMULO_HOME $HIVE_HOME $HBASE_HOME $HCAT_HOME $ZOOKEEPER_HOME
6598

66-
ENV PATH=${SQOOP_HOME}/bin:${HADOOP_HOME}/sbin:$HADOOP_HOME/bin:${JAVA_HOME}/bin:${PATH}
67-
68-
WORKDIR /pelican
99+
RUN chown -R gen3:gen3 $ACCUMULO_HOME $HIVE_HOME $HBASE_HOME $HCAT_HOME $ZOOKEEPER_HOME $JAVA_HOME $POSTGRES_JAR_PATH
69100

70-
RUN pip install --upgrade pip
71-
72-
# install poetry
73-
RUN pip install --upgrade "poetry<1.2"
74-
75-
COPY . /$appname
76-
WORKDIR /$appname
77-
78-
# copy ONLY poetry artifact, install the dependencies but not fence
79-
# this will make sure than the dependencies is cached
80-
COPY poetry.lock pyproject.toml /$appname/
101+
ENV PATH=${SQOOP_HOME}/bin:${HADOOP_HOME}/sbin:$HADOOP_HOME/bin:${JAVA_HOME}/bin:${PATH}
81102

82-
# install package and dependencies via poetry
83-
RUN poetry config virtualenvs.create false \
84-
&& poetry install -vv --no-dev --no-interaction \
85-
&& poetry show -v
103+
# Switch to non-root user 'gen3' for the serving process
104+
USER gen3
86105

87106
ENV PYTHONUNBUFFERED=1
88107

job_export.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,11 @@
157157

158158
if access_format == "guid":
159159
# calculate md5 sum
160-
md5_sum = hashlib.md5()
160+
md5 = (
161+
hashlib.md5()
162+
if sys.version_info < (3, 9)
163+
else hashlib.md5(usedforsecurity=False)
164+
) # nosec
161165
chunk_size = 8192
162166
with open(fname, "rb") as f:
163167
while True:

pelican/dictionary.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
def init_dictionary(url):
88
d = DataDictionary(url=url)
99
dictionary.init(d)
10-
# the gdcdatamodel expects dictionary initiated on load, so this can't be
10+
# the gen3datamodel expects dictionary initiated on load, so this can't be
1111
# imported on module level
12-
from gdcdatamodel import models as md
12+
from gen3datamodel import models as md
1313

1414
return d, md
1515

0 commit comments

Comments
 (0)