-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile
More file actions
121 lines (100 loc) · 3.16 KB
/
Dockerfile
File metadata and controls
121 lines (100 loc) · 3.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# Use official ubuntu image with CUDA support
FROM nvidia/cuda:11.6.2-base-ubuntu20.04
# Set environment variables
ENV TZ=Europe/Madrid
ENV DEBIAN_FRONTEND=noninteractive
ENV TORCH_HOME=/torch/
# ENV MALLET_HOME /app/Mallet
# Install Vim, Git, Java and Ant
RUN apt-get update && \
apt-get upgrade -y && \
apt-get install -y vim git openjdk-8-jdk ant
# Install build dependencies for Python
RUN apt-get update && apt-get install -y --no-install-recommends \
wget \
build-essential \
libbz2-dev \
libenchant-dev \
libffi-dev \
libgdbm-dev \
libncurses5-dev \
libnss3-dev \
libreadline-dev \
libsqlite3-dev \
libssl-dev \
tzdata \
zlib1g-dev \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Download and install Python 3.11.1
RUN wget https://www.python.org/ftp/python/3.11.1/Python-3.11.1.tgz && \
tar xzf Python-3.11.1.tgz && \
cd Python-3.11.1 && \
./configure --enable-optimizations && \
make altinstall && \
ln -s /usr/local/bin/python3.11 /usr/local/bin/python && \
ln -s /usr/local/bin/pip3.11 /usr/local/bin/pip && \
cd .. && \
rm -rf Python-3.11.1.tgz Python-3.11.1
# Install other necessary dependencies
RUN apt-get update && apt-get install hunspell-es
# Set the working directory
WORKDIR /app
# Copy the requirements file into the container
COPY requirements.txt .
# Upgrade pip
RUN pip install --upgrade pip
# Install additional dependencies for HDBSCAN
RUN apt-get update && apt-get install -y --no-install-recommends \
libopenblas-dev \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Install HDBSCAN
RUN pip install hdbscan
# Install pytorch
# RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
RUN pip install torch
# Install requirements
RUN pip install wheel setuptools pip --upgrade
RUN pip install -r requirements.txt
RUN pip install "dask[complete]"
#RUN python -m pip install "dask[dataframe]" --upgrade
RUN python -m spacy download es_dep_news_trf
RUN python -m spacy download es_core_news_lg
# Clone the Mallet repository
RUN git clone https://github.com/mimno/Mallet.git
# Change into the Mallet directory and build the Mallet project
RUN cd /app/Mallet && ant
# Download and cache the sentence transformer model
ARG MODEL_NAME=sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('${MODEL_NAME}')"
# Download fasttext model
RUN mkdir models
RUN wget https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.ftz -O /app/models/lid.176.ftz
# Copy the config/ directory
#COPY config/ config/
# Copy the src/ directory
#COPY src/ src/
# Copy all .py files
#COPY *.py ./
COPY . .
RUN python -c "import sys; print('\n'.join(sys.path))"
# Set the entrypoint command
#CMD ["python", "preprocess.py"]
CMD ["/bin/bash"]
# docker build -t next_proc .
# docker run \
# --gpus all \
# --name some_name \
# --rm \
# -it
# -v /path/to/your/local/data:/app/data \
# next_proc
# docker run \
# --gpus all \
# --name preprocess \
# --rm \
# -v ./data/:/app/data/ \
# next_proc