synthetic-data-generator_3

Runtime error

App Files Files Community

daqc commited on Feb 6

Commit

9bc9bf6

1 Parent(s): 81a8ee0

Add .env.template

Browse files

Files changed (1) hide show

.env.template +103 -0

.env.template ADDED Viewed

	@@ -0,0 +1,103 @@

+# =============================================================================
+# REQUIRED CONFIGURATION
+# =============================================================================
+# HF中国镜像站 token with read/write permissions for repositories and inference API
+# Get it from: https://huggingface.co/settings/tokens
+HF_TOKEN=hg_...
+# -----------------------------------------------------------------------------
+# GENERATION SETTINGS
+# -----------------------------------------------------------------------------
+MAX_NUM_TOKENS=2048
+MAX_NUM_ROWS=1000
+DEFAULT_BATCH_SIZE=5
+# Required for chat data generation with Llama or Qwen models
+# Options: "llama3", "qwen2", or custom template string
+#MAGPIE_PRE_QUERY_TEMPLATE=qwen2
+# =============================================================================
+# MODEL & SERVICES CONFIGURATION
+# =============================================================================
+# -----------------------------------------------------------------------------
+# A. STANDALONE SETUP (No additional installation required)
+# -----------------------------------------------------------------------------
+# 1. HF中国镜像站 SERVERLESS (Recommended default)
+# Just requires HF_TOKEN
+# MODEL=meta-llama/Llama-3.1-8B-Instruct
+# MODEL=Qwen/Qwen2.5-1.5B-Instruct
+# 2. ARGILLA ON HF中国镜像站 SPACES (Recommended for data annotation)
+# ARGILLA_API_URL=https://daqc-my-argilla.hf.space/
+#ARGILLA_API_KEY=
+# 3. OPENAI API
+# Requires OpenAI API key
+# OPENAI_BASE_URL=https://api.openai.com/v1/
+# MODEL=gpt-4
+# API_KEY=
+# -----------------------------------------------------------------------------
+# B. LOCAL SETUP (Requires local installation)
+# -----------------------------------------------------------------------------
+# 1. LOCAL OLLAMA
+# Requires: Ollama installed (https://ollama.ai)
+#OLLAMA_BASE_URL=http://127.0.0.1:11434/
+#MODEL=qwen2.5:32b-instruct-q5_K_S
+#TOKENIZER_ID=Qwen/Qwen2.5-32B-Instruct
+# MODEL=deepseek-r1:1.5b
+# TOKENIZER_ID=deepseek-r1:1.5b
+# 2. LOCAL VLLM
+# Requires: VLLM installed
+# VLLM_BASE_URL=http://127.0.0.1:8000/
+# MODEL=Qwen/Qwen2.5-1.5B-Instruct
+# TOKENIZER_ID=Qwen/Qwen2.5-1.5B-Instruct
+# 3. LOCAL TGI/ENDPOINTS
+# Requires: Text Generation Inference installed
+# HUGGINGFACE_BASE_URL=http://127.0.0.1:3000/
+# TOKENIZER_ID=meta-llama/Llama-3.1-8B-Instruct
+# -----------------------------------------------------------------------------
+# C. DOCKER SETUP (Ready to use with docker-compose, recommended for full setup)
+# -----------------------------------------------------------------------------
+# 1. DOCKER OLLAMA
+OLLAMA_BASE_URL=http://ollama:11434
+# Options for OLLAMA_HARDWARE: latest (for CPU/NVIDIA), rocm (for AMD)
+OLLAMA_HARDWARE=latest
+# DEEPSEEK R1
+#MODEL=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+#TOKENIZER_ID=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+#MAGPIE_PRE_QUERY_TEMPLATE= "<｜begin▁of▁sentence｜>User: " # use the custom template for the model
+#LLAMA3.2
+MODEL=llama3.2:1b # model for instruction generation
+TOKENIZER_ID=meta-llama/Llama-3.2-1B-Instruct # tokenizer for instruction generation
+MAGPIE_PRE_QUERY_TEMPLATE=llama3 # magpie template required for instruction generation
+# 2. DOCKER ARGILLA (persistent data)
+ARGILLA_API_URL=http://argilla:6900
+ARGILLA_USERNAME=admin
+ARGILLA_PASSWORD=admin1234
+ARGILLA_API_KEY=admin.1234
+ARGILLA_REINDEX_DATASET=1
+# Usage:
+#docker-compose --profile with-ollama --profile with-argilla build
+#(open new terminal) docker-compose --profile with-ollama up -d
+#                    docker-compose exec ollama ollama run llama3.2:1b
+#docker-compose --profile with-ollama --profile with-argilla up -d