davidberenstein1957 HF staff commited on
Commit
40f5d26
·
unverified ·
2 Parent(s): 57b7e7b 4e323c8

Merge pull request #26 from mcdaqc/main

Browse files
.dockerignore ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Version control
2
+ .git
3
+ .gitignore
4
+
5
+ # Python
6
+ __pycache__/
7
+ *.py[cod]
8
+ *$py.class
9
+ *.so
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+
28
+ # Virtual environments
29
+ .env*
30
+ !.env.example
31
+ .venv
32
+ env/
33
+ venv/
34
+ ENV/
35
+
36
+ # IDE
37
+ .idea/
38
+ .vscode/
39
+ *.swp
40
+ *.swo
41
+
42
+ # Testing
43
+ .tox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+
53
+ # Project specific
54
+ nltk_data/
55
+ .pdm-python
56
+ .pdm.toml
57
+ __pypackages__/
.env.local.template ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =============================================================================
2
+ # LOCAL/API CONFIGURATION
3
+ # =============================================================================
4
+
5
+ # -----------------------------------------------------------------------------
6
+ # REQUIRED CONFIGURATION
7
+ # -----------------------------------------------------------------------------
8
+ # HF中国镜像站 token (required for all setups)
9
+ HF_TOKEN=hf_...
10
+
11
+ # Generation Settings
12
+ MAX_NUM_TOKENS=2048
13
+ MAX_NUM_ROWS=1000
14
+ DEFAULT_BATCH_SIZE=5
15
+
16
+ # Required for chat data generation with Llama or Qwen models
17
+ # Options: "llama3", "qwen2", or custom template string
18
+ MAGPIE_PRE_QUERY_TEMPLATE=llama3
19
+
20
+ # -----------------------------------------------------------------------------
21
+ # A. CLOUD API SERVICES
22
+ # -----------------------------------------------------------------------------
23
+
24
+ # 1. HF中国镜像站 INFERENCE API (Default, Recommended)
25
+ MODEL=meta-llama/Llama-3.1-8B-Instruct
26
+ # MODEL=Qwen/Qwen2.5-1.5B-Instruct
27
+
28
+ # 2. OPENAI API
29
+ # OPENAI_BASE_URL=https://api.openai.com/v1/
30
+ # MODEL=gpt-4
31
+ # API_KEY=sk-...
32
+
33
+ # 3. HF中国镜像站 SPACE FOR ARGILLA (optional)
34
+ # ARGILLA_API_URL=https://your-space.hf.space/
35
+ # ARGILLA_API_KEY=your_key
36
+
37
+ # -----------------------------------------------------------------------------
38
+ # B. LOCAL SERVICES (Requires Installation)
39
+ # -----------------------------------------------------------------------------
40
+
41
+ # 1. LOCAL OLLAMA
42
+ # OLLAMA_BASE_URL=http://127.0.0.1:11434/
43
+ # MODEL=llama3.2:1b
44
+ # TOKENIZER_ID=meta-llama/Llama-3.2-1B-Instruct
45
+
46
+ # 2. LOCAL VLLM
47
+ # VLLM_BASE_URL=http://127.0.0.1:8000/
48
+ # MODEL=Qwen/Qwen2.5-1.5B-Instruct
49
+ # TOKENIZER_ID=Qwen/Qwen2.5-1.5B-Instruct
50
+
51
+ # 3. LOCAL TGI
52
+ # HUGGINGFACE_BASE_URL=http://127.0.0.1:3000/
53
+ # MODEL=meta-llama/Llama-3.1-8B-Instruct
54
+ # TOKENIZER_ID=meta-llama/Llama-3.1-8B-Instruct
.gitignore CHANGED
@@ -167,4 +167,7 @@ cython_debug/
167
  nltk_data/
168
 
169
  # examples
170
- models/
 
 
 
 
167
  nltk_data/
168
 
169
  # examples
170
+ models/
171
+
172
+ # Elasticsearch data
173
+ elasticsearch_data/
README.md CHANGED
@@ -108,6 +108,12 @@ To save the generated datasets to a local directory instead of pushing them to t
108
 
109
  - `SAVE_LOCAL_DIR`: The local directory to save the generated datasets to.
110
 
 
 
 
 
 
 
111
  ### Argilla integration
112
 
113
  Argilla is an open source tool for data curation. It allows you to annotate and review datasets, and push curated datasets to the HF中国镜像站 Hub. You can easily get started with Argilla by following the [quickstart guide](https://docs.argilla.io/latest/getting_started/quickstart/).
@@ -138,3 +144,20 @@ Run the app:
138
  ```bash
139
  python app.py
140
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  - `SAVE_LOCAL_DIR`: The local directory to save the generated datasets to.
110
 
111
+ You can use our environment template as a starting point:
112
+
113
+ ```bash
114
+ cp .env.local.template .env
115
+ ```
116
+
117
  ### Argilla integration
118
 
119
  Argilla is an open source tool for data curation. It allows you to annotate and review datasets, and push curated datasets to the HF中国镜像站 Hub. You can easily get started with Argilla by following the [quickstart guide](https://docs.argilla.io/latest/getting_started/quickstart/).
 
144
  ```bash
145
  python app.py
146
  ```
147
+
148
+ ## 🐳 Docker Setup
149
+
150
+ Quick setup with all services (App + Ollama + Argilla):
151
+
152
+ ```bash
153
+ # Copy environment template
154
+ cp docker/.env.docker.template .env # Add your HF_TOKEN in .env
155
+
156
+ # Build all services (this may take a few minutes)
157
+ docker compose -f docker-compose.yml -f docker/ollama/compose.yml -f docker/argilla/compose.yml build
158
+
159
+ # Start all services
160
+ docker compose -f docker-compose.yml -f docker/ollama/compose.yml -f docker/argilla/compose.yml up -d
161
+ ```
162
+
163
+ > For more detailed Docker configurations and setups, check [docker/README.md](docker/README.md)
app.py CHANGED
@@ -1,3 +1,4 @@
1
  from synthetic_dataset_generator import launch
2
 
3
- launch()
 
 
1
  from synthetic_dataset_generator import launch
2
 
3
+ if __name__ == "__main__":
4
+ launch()
docker-compose.yml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ app:
3
+ build:
4
+ context: .
5
+ dockerfile: docker/Dockerfile
6
+ image: synthetic-data-generator:app
7
+ ports:
8
+ - "7860:7860"
9
+ env_file:
10
+ - .env
11
+ networks:
12
+ - app-network
13
+
14
+ networks:
15
+ app-network:
16
+ name: synthetic-data-network
17
+ driver: bridge
docker/.env.docker.template ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =============================================================================
2
+ # DOCKER CONFIGURATION ONLY - FULL SETUP (APP + OLLAMA + ARGILLA)
3
+ # =============================================================================
4
+
5
+ # Note: Before building:
6
+ # 1. Copy this template to the root directory: cp docker/.env.docker.template .env
7
+ # 2. Comment/uncomment the sections you want to use (OLLAMA and/or ARGILLA)
8
+ # 3. Then build and run with the appropriate docker compose command
9
+
10
+ # HF中国镜像站 token with read/write permissions
11
+ HF_TOKEN=your_token_here
12
+
13
+ # -----------------------------------------------------------------------------
14
+ # GENERATION SETTINGS
15
+ # -----------------------------------------------------------------------------
16
+ MAX_NUM_TOKENS=2048
17
+ MAX_NUM_ROWS=1000
18
+ DEFAULT_BATCH_SIZE=5
19
+
20
+ # -----------------------------------------------------------------------------
21
+ # OLLAMA DOCKER CONFIGURATION
22
+ # -----------------------------------------------------------------------------
23
+ OLLAMA_BASE_URL=http://ollama:11434
24
+ OLLAMA_HARDWARE=latest # latest (for CPU/NVIDIA), rocm (for AMD)
25
+
26
+ # LLAMA 3.2
27
+ MODEL=llama3.2:1b
28
+ TOKENIZER_ID=meta-llama/Llama-3.2-1B-Instruct
29
+ MAGPIE_PRE_QUERY_TEMPLATE=llama3
30
+
31
+ # DEEPSEEK R1
32
+ #MODEL=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
33
+ #TOKENIZER_ID=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
34
+ #MAGPIE_PRE_QUERY_TEMPLATE= "<|begin▁of▁sentence|>User: "
35
+
36
+ # -----------------------------------------------------------------------------
37
+ # ARGILLA DOCKER CONFIGURATION (persistent data)
38
+ # -----------------------------------------------------------------------------
39
+ ARGILLA_API_URL=http://argilla:6900
40
+ ARGILLA_USERNAME=admin
41
+ ARGILLA_PASSWORD=admin1234
42
+ ARGILLA_API_KEY=admin.1234
43
+ ARGILLA_REINDEX_DATASET=1
docker/Dockerfile ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python slim image as base
2
+ FROM python:3.10-slim
3
+
4
+ # Set environment variables
5
+ ENV PYTHONUNBUFFERED=1 \
6
+ PYTHONDONTWRITEBYTECODE=1 \
7
+ PIP_NO_CACHE_DIR=1
8
+
9
+ # Create and set working directory
10
+ WORKDIR /app
11
+
12
+ # Create non-root user first
13
+ RUN useradd -m -u 1000 appuser
14
+
15
+ # Install system dependencies including build tools
16
+ RUN apt-get update && apt-get install -y --no-install-recommends \
17
+ curl \
18
+ build-essential \
19
+ cmake \
20
+ libgl1-mesa-glx \
21
+ libglib2.0-0 \
22
+ libsm6 \
23
+ libxext6 \
24
+ libxrender-dev \
25
+ && rm -rf /var/lib/apt/lists/*
26
+
27
+ # Install pdm
28
+ RUN pip install --no-cache-dir pdm
29
+
30
+ # Copy project files and set permissions
31
+ COPY . .
32
+ RUN chown -R appuser:appuser /app && \
33
+ chmod -R 755 /app
34
+
35
+ # Switch to non-root user
36
+ USER appuser
37
+
38
+ # Install dependencies in a virtual environment
39
+ RUN pdm install --prod --frozen-lockfile
40
+
41
+ # Expose Gradio port
42
+ EXPOSE 7860
43
+
44
+ # Start command using pdm run to use the virtual environment
45
+ CMD ["pdm", "run", "python", "-m", "synthetic_dataset_generator"]
docker/README.md ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Docker Configuration Guide
2
+
3
+ The application can be run with different configurations using Docker Compose:
4
+
5
+ - `docker-compose.yml`: Core application
6
+ - `docker/ollama/compose.yml`: Ollama service for local LLM inference
7
+ - `docker/argilla/compose.yml`: Argilla service for data curation
8
+
9
+ ## Ollama Integration
10
+
11
+ The `MODEL` variable in your `.env` file determines which model Ollama will download and use. For example:
12
+ ```env
13
+ MODEL=llama3.2:1b
14
+ ```
15
+
16
+ ## Setup Options
17
+
18
+ ### Full Setup (App + Ollama + Argilla)
19
+ ```bash
20
+ # Keep all sections uncommented in .env
21
+ docker compose -f docker-compose.yml -f docker/ollama/compose.yml -f docker/argilla/compose.yml build
22
+ docker compose -f docker-compose.yml -f docker/ollama/compose.yml -f docker/argilla/compose.yml up -d
23
+ ```
24
+
25
+ ### App + Ollama
26
+ ```bash
27
+ # Comment out ARGILLA section in .env
28
+ docker compose -f docker-compose.yml -f docker/ollama/compose.yml build
29
+ docker compose -f docker-compose.yml -f docker/ollama/compose.yml up -d
30
+ ```
31
+
32
+ ### App + Argilla
33
+ ```bash
34
+ # Comment out OLLAMA section in .env
35
+ docker compose -f docker-compose.yml -f docker/argilla/compose.yml build
36
+ docker compose -f docker-compose.yml -f docker/argilla/compose.yml up -d
37
+ ```
38
+
39
+ ### App Only
40
+ ```bash
41
+ # Comment out both OLLAMA and ARGILLA sections in .env
42
+ docker compose -f docker-compose.yml build
43
+ docker compose -f docker-compose.yml up -d
44
+ ```
45
+
46
+ ## Managing Services
47
+
48
+ Services are built separately but are linked together. If you already have some services built and want to add another:
49
+
50
+ 1. You don't need to rebuild existing services
51
+ 2. Just build the new service
52
+ 3. Stop everything with `down` and start again with `up`
53
+
54
+ For example, if you have App + Ollama and want to add Argilla:
55
+ ```bash
56
+ docker compose -f docker/argilla/compose.yml build # only build Argilla
57
+ docker compose -f docker-compose.yml -f docker/ollama/compose.yml -f docker/argilla/compose.yml down
58
+ docker compose -f docker-compose.yml -f docker/ollama/compose.yml -f docker/argilla/compose.yml up -d
59
+ ```
60
+
61
+ Similarly, if you have built all services but want to run only some of them:
62
+ > **Important**: When running specific services, remember to comment out unused services in `.env` first
63
+
64
+ ```bash
65
+ # No need to build again, just start the services you need
66
+ docker compose -f docker-compose.yml -f docker/ollama/compose.yml up -d # start only App + Ollama
67
+ ```
68
+
69
+ ## Service URLs
70
+
71
+ Once running, access the services at:
72
+ - App: http://localhost:7860
73
+ - Argilla: http://localhost:6900 (if enabled)
74
+ - Ollama: http://localhost:11434 (if enabled)
75
+
76
+ > Note: Services will be available after a few seconds while they initialize. Ollama models and Argilla datasets are persisted and available after restarts
docker/argilla/compose.yml ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ app:
3
+ extends:
4
+ file: docker-compose.yml
5
+ service: app
6
+ depends_on:
7
+ argilla:
8
+ condition: service_healthy
9
+ required: false
10
+ environment:
11
+ - ARGILLA_API_URL=http://argilla:6900
12
+
13
+ elasticsearch:
14
+ image: docker.elastic.co/elasticsearch/elasticsearch:8.17.0
15
+ environment:
16
+ - ES_JAVA_OPTS=-Xms512m -Xmx512m
17
+ - node.name=elasticsearch
18
+ - cluster.name=es-argilla-local
19
+ - discovery.type=single-node
20
+ - cluster.routing.allocation.disk.threshold_enabled=false
21
+ - xpack.security.enabled=false
22
+ volumes:
23
+ - es_data:/usr/share/elasticsearch/data
24
+ networks:
25
+ - app-network
26
+ ports:
27
+ - "9200:9200"
28
+ - "9300:9300"
29
+ ulimits:
30
+ memlock:
31
+ soft: -1
32
+ hard: -1
33
+ nofile:
34
+ soft: 65536
35
+ hard: 65536
36
+ healthcheck:
37
+ test: ["CMD", "curl", "-f", "http://localhost:9200"]
38
+ interval: 30s
39
+ timeout: 10s
40
+ retries: 3
41
+
42
+ postgres:
43
+ image: postgres:14
44
+ environment:
45
+ POSTGRES_USER: postgres
46
+ POSTGRES_PASSWORD: postgres
47
+ POSTGRES_DB: argilla
48
+ networks:
49
+ - app-network
50
+ volumes:
51
+ - postgres_data:/var/lib/postgresql/data
52
+
53
+ redis:
54
+ image: redis
55
+ networks:
56
+ - app-network
57
+
58
+ argilla:
59
+ image: argilla/argilla-server:latest
60
+ ports:
61
+ - "6900:6900"
62
+ healthcheck:
63
+ test: ["CMD", "curl", "-f", "http://localhost:6900/api/ready"]
64
+ interval: 30s
65
+ timeout: 10s
66
+ retries: 3
67
+ env_file:
68
+ - .env
69
+ environment:
70
+ - ARGILLA_HOME_PATH=/var/lib/argilla
71
+ - ARGILLA_ELASTICSEARCH=http://elasticsearch:9200
72
+ - ARGILLA_DATABASE_URL=postgresql+asyncpg://postgres:postgres@postgres:5432/argilla
73
+ - ARGILLA_REDIS_URL=redis://redis:6379/0
74
+ - USERNAME=${ARGILLA_USERNAME}
75
+ - PASSWORD=${ARGILLA_PASSWORD}
76
+ - API_KEY=${ARGILLA_API_KEY}
77
+ - WORKSPACE=default
78
+ volumes:
79
+ - argilla_data:/argilla
80
+ networks:
81
+ - app-network
82
+ depends_on:
83
+ elasticsearch:
84
+ condition: service_healthy
85
+ postgres:
86
+ condition: service_started
87
+ redis:
88
+ condition: service_started
89
+
90
+ worker:
91
+ image: argilla/argilla-server:latest
92
+ env_file:
93
+ - .env
94
+ environment:
95
+ - ARGILLA_HOME_PATH=/var/lib/argilla
96
+ - ARGILLA_ELASTICSEARCH=http://elasticsearch:9200
97
+ - ARGILLA_DATABASE_URL=postgresql+asyncpg://postgres:postgres@postgres:5432/argilla
98
+ - ARGILLA_REDIS_URL=redis://redis:6379/0
99
+ - BACKGROUND_NUM_WORKERS=2
100
+ - USERNAME=${ARGILLA_USERNAME}
101
+ - PASSWORD=${ARGILLA_PASSWORD}
102
+ - API_KEY=${ARGILLA_API_KEY}
103
+ - WORKSPACE=default
104
+ networks:
105
+ - app-network
106
+ depends_on:
107
+ - postgres
108
+ - elasticsearch
109
+ - redis
110
+ command: sh -c 'python -m argilla_server worker --num-workers $${BACKGROUND_NUM_WORKERS}'
111
+
112
+ volumes:
113
+ es_data:
114
+ name: synthetic-data-es
115
+ argilla_data:
116
+ name: synthetic-data-argilla
117
+ postgres_data:
118
+ name: synthetic-data-postgres
docker/ollama/compose.yml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ app:
3
+ extends:
4
+ file: docker-compose.yml
5
+ service: app
6
+ depends_on:
7
+ ollama:
8
+ condition: service_healthy
9
+ required: true
10
+ environment:
11
+ - OLLAMA_BASE_URL=http://ollama:11434
12
+
13
+ ollama:
14
+ image: ollama/ollama:${OLLAMA_HARDWARE:-latest}
15
+ ports:
16
+ - "11434:11434"
17
+ env_file:
18
+ - .env
19
+ environment:
20
+ - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-}
21
+ volumes:
22
+ - ollama_data:/root/.ollama
23
+ - ./docker/ollama/entrypoint.sh:/entrypoint.sh
24
+ networks:
25
+ - app-network
26
+ deploy:
27
+ resources:
28
+ reservations:
29
+ devices:
30
+ - driver: nvidia
31
+ count: all
32
+ capabilities: [gpu]
33
+ tty: true
34
+ entrypoint: ["/usr/bin/bash", "/entrypoint.sh"]
35
+ healthcheck:
36
+ test:
37
+ - "CMD-SHELL"
38
+ - |
39
+ test -f /tmp/ollama_ready && \
40
+ bash -c '</dev/tcp/localhost/11434'
41
+ interval: 10s
42
+ timeout: 10s
43
+ retries: 100
44
+ start_period: 10s
45
+
46
+ volumes:
47
+ ollama_data:
48
+ name: synthetic-data-ollama
docker/ollama/entrypoint.sh ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Start Ollama in the background
4
+ /bin/ollama serve &
5
+ # Record Process ID
6
+ pid=$!
7
+
8
+ # Pause for Ollama to start
9
+ sleep 5
10
+
11
+ # Extract model name from MODEL variable (removing quotes if present)
12
+ MODEL_NAME=$(echo $MODEL | tr -d '"')
13
+
14
+ # Verificar que MODEL_NAME tenga un valor
15
+ if [ -z "$MODEL_NAME" ]; then
16
+ echo "❌ No model specified in MODEL environment variable"
17
+ else
18
+ # Check if model exists
19
+ if ollama list | grep -q "$MODEL_NAME"; then
20
+ echo "🟢 Model ($MODEL_NAME) already installed"
21
+ touch /tmp/ollama_ready
22
+ else
23
+ echo "🔴 Retrieving model ($MODEL_NAME)..."
24
+ # Intentar descargar el modelo sin crear el archivo hasta estar seguros
25
+ if ollama pull "$MODEL_NAME" 2>/dev/null && ollama list | grep -q "$MODEL_NAME"; then
26
+ echo "🟢 Model download complete!"
27
+ touch /tmp/ollama_ready
28
+ else
29
+ echo "❌ Error downloading model ($MODEL_NAME)"
30
+ fi
31
+ fi
32
+ fi
33
+
34
+ # Wait for Ollama process to finish
35
+ wait $pid
src/synthetic_dataset_generator/__init__.py CHANGED
@@ -1,5 +1,4 @@
1
  import inspect
2
-
3
  from gradio import TabbedInterface
4
 
5
  from synthetic_dataset_generator import ( # noqa
@@ -7,15 +6,13 @@ from synthetic_dataset_generator import ( # noqa
7
  _inference_endpoints,
8
  )
9
 
10
-
11
  def launch(*args, **kwargs):
12
  """Launch the synthetic dataset generator.
13
  Based on the `TabbedInterface` from Gradio.
14
  Parameters: https://www.gradio.app/docs/gradio/tabbedinterface
15
  """
16
  from synthetic_dataset_generator.app import demo
17
-
18
- return demo.launch(*args, **kwargs)
19
 
20
 
21
  launch.__doc__ = TabbedInterface.launch.__doc__
 
1
  import inspect
 
2
  from gradio import TabbedInterface
3
 
4
  from synthetic_dataset_generator import ( # noqa
 
6
  _inference_endpoints,
7
  )
8
 
 
9
  def launch(*args, **kwargs):
10
  """Launch the synthetic dataset generator.
11
  Based on the `TabbedInterface` from Gradio.
12
  Parameters: https://www.gradio.app/docs/gradio/tabbedinterface
13
  """
14
  from synthetic_dataset_generator.app import demo
15
+ return demo.launch(*args, server_name="0.0.0.0", **kwargs)
 
16
 
17
 
18
  launch.__doc__ = TabbedInterface.launch.__doc__
src/synthetic_dataset_generator/app.py CHANGED
@@ -17,6 +17,9 @@ button[role="tab"][aria-selected="true"]:hover {border-color: var(--button-prima
17
  .table-wrap .tbody td {vertical-align: top}
18
  #system_prompt_examples {color: var(--body-text-color) !important; background-color: var(--block-background-fill) !important;}
19
  .container {padding-inline: 0 !important}
 
 
 
20
  #sign_in_button {flex-grow: 0; width: auto !important; display: flex; align-items: center; justify-content: center; margin: 0 auto;}
21
  .datasets {height: 70px;}
22
  """
 
17
  .table-wrap .tbody td {vertical-align: top}
18
  #system_prompt_examples {color: var(--body-text-color) !important; background-color: var(--block-background-fill) !important;}
19
  .container {padding-inline: 0 !important}
20
+ .gradio-container { width: 100% !important; }
21
+ .gradio-row { display: flex !important; flex-direction: row !important; }
22
+ .gradio-column { flex: 1 !important; min-width: 0 !important; }
23
  #sign_in_button {flex-grow: 0; width: auto !important; display: flex; align-items: center; justify-content: center; margin: 0 auto;}
24
  .datasets {height: 70px;}
25
  """
src/synthetic_dataset_generator/apps/base.py CHANGED
@@ -131,6 +131,9 @@ def show_success_message(org_name: str, repo_name: str) -> gr.Markdown:
131
  max_height=None,
132
  )
133
  argilla_api_url = client.api_url
 
 
 
134
  return gr.Markdown(
135
  value=f"""
136
  <div style="padding: 1em; background-color: var(--block-background-fill); border-color: var(--border-color-primary); border-width: 1px; border-radius: 5px;">
 
131
  max_height=None,
132
  )
133
  argilla_api_url = client.api_url
134
+ # Transform Docker internal URL to localhost if needed
135
+ if "argilla:" in argilla_api_url:
136
+ argilla_api_url = argilla_api_url.replace("argilla:", "127.0.0.1:")
137
  return gr.Markdown(
138
  value=f"""
139
  <div style="padding: 1em; background-color: var(--block-background-fill); border-color: var(--border-color-primary); border-width: 1px; border-radius: 5px;">