abdull4h commited on
Commit
d0982e0
·
verified ·
1 Parent(s): 7c21387

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -103
app.py CHANGED
@@ -1,111 +1,41 @@
1
  import gradio as gr
2
- import torch
3
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
4
 
5
- # Add specific import for Spaces GPU
6
- try:
7
- from huggingface_hub.hf_api import HfFolder
8
- from huggingface_hub import SpaceStage
9
- print("HF中国镜像站 Hub imports successful")
10
- except ImportError:
11
- print("Unable to import from huggingface_hub. Continuing without Spaces-specific handling.")
12
 
13
- # Try the specific spaces import for GPU
14
- try:
15
- from huggingface_hub import spaces
16
- # If this works, we'll use it later
17
- HAS_SPACES_GPU = True
18
- print("Successfully imported spaces from huggingface_hub")
19
- except ImportError:
20
- HAS_SPACES_GPU = False
21
- print("spaces module not available from huggingface_hub")
22
 
23
- # Define a function that handles the spaces.GPU decorator if available
24
- def main_wrapper():
25
- # For Spaces that support the @spaces.GPU decorator
26
- if HAS_SPACES_GPU:
27
- print("Using @spaces.GPU decorator")
28
- @spaces.GPU
29
- def main_with_gpu():
30
- run_app()
31
- main_with_gpu()
32
- else:
33
- # Fallback for environments that don't support the decorator
34
- print("Running without @spaces.GPU decorator")
35
- run_app()
36
 
37
- def run_app():
38
- # Check GPU availability
39
- if torch.cuda.is_available():
40
- print(f"GPU is available: {torch.cuda.get_device_name(0)}")
41
- device = "cuda"
42
- else:
43
- print("WARNING: GPU not available, using CPU instead. This will be very slow.")
44
- device = "cpu"
45
-
46
- # Use a smaller model as a starting point
47
- model_name = "ALLaM-AI/ALLaM-7B-Instruct-preview" # Start with a smaller model to test
48
-
49
- try:
50
- # Load tokenizer with appropriate options
51
- tokenizer = AutoTokenizer.from_pretrained(model_name)
52
-
53
- # Load model with appropriate device mapping
54
- model_kwargs = {
55
- "device_map": "auto" if device == "cuda" else None,
56
- }
57
-
58
- if device == "cuda":
59
- model_kwargs["torch_dtype"] = torch.float16
60
-
61
- model = AutoModelForCausalLM.from_pretrained(model_name, **model_kwargs)
62
-
63
- # Create pipeline
64
- text_generator = pipeline(
65
- "text-generation",
66
- model=model,
67
- tokenizer=tokenizer,
68
- device=0 if device == "cuda" else -1
69
- )
70
 
71
- # Define the generation function
72
- def generate_story(prompt, max_length, temperature):
73
- try:
74
- response = text_generator(
75
- prompt,
76
- max_length=max_length,
77
- temperature=temperature,
78
- do_sample=True
79
- )
80
- return response[0]["generated_text"]
81
- except Exception as e:
82
- return f"Error generating text: {str(e)}"
83
-
84
- # Create the Gradio interface
85
- demo = gr.Interface(
86
- fn=generate_story,
87
- inputs=[
88
- gr.Textbox(lines=3, placeholder="Enter a story prompt here...", label="Story Prompt"),
89
- gr.Slider(minimum=50, maximum=500, step=50, value=200, label="Max Length"),
90
- gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.7, label="Temperature")
91
- ],
92
- outputs=gr.Textbox(label="Generated Story"),
93
- title="Story Generator",
94
- description="Generate creative stories using a language model. Enter your prompt and let the AI craft a story for you!"
95
- )
96
-
97
- # Launch the demo
98
- demo.launch()
99
-
100
- except Exception as e:
101
- print(f"Error initializing model: {str(e)}")
102
- # Create a simple error interface
103
- gr.Interface(
104
- fn=lambda x: f"Error: {str(e)}. Please check the logs.",
105
- inputs=gr.Textbox(),
106
- outputs=gr.Textbox(),
107
- title="Error Initializing Model"
108
- ).launch()
109
 
110
- if __name__ == "__main__":
111
- main_wrapper()
 
1
  import gradio as gr
 
2
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
3
 
4
+ # Define the model name
5
+ model_name = "ALLaM-AI/ALLaM-7B-Instruct-preview"
 
 
 
 
 
6
 
7
+ # Load tokenizer and model (disable fast tokenizer)
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
9
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
 
 
 
 
 
 
10
 
11
+ # Create a text-generation pipeline
12
+ generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ def generate_story(prompt, max_length, temperature):
15
+ """
16
+ Generates a story based on the input prompt.
17
+ """
18
+ response = generator(
19
+ prompt,
20
+ max_length=max_length,
21
+ temperature=temperature,
22
+ do_sample=True
23
+ )
24
+ # Return the generated text
25
+ return response[0]["generated_text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ # Define the Gradio interface
28
+ demo = gr.Interface(
29
+ fn=generate_story,
30
+ inputs=[
31
+ gr.Textbox(lines=3, placeholder="Enter a story prompt here...", label="Story Prompt"),
32
+ gr.Slider(minimum=50, maximum=1000, step=50, default=300, label="Max Length"),
33
+ gr.Slider(minimum=0.1, maximum=1.0, step=0.1, default=0.7, label="Temperature")
34
+ ],
35
+ outputs=gr.Textbox(label="Generated Story"),
36
+ title="Story Teller",
37
+ description="Generate creative stories using the ALLaM-7B model. Enter your prompt and let the model craft a story for you!"
38
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ # Launch the app
41
+ demo.launch()