Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -409,6 +409,9 @@ class Music2emo:
|
|
409 |
midi.instruments.append(instrument)
|
410 |
midi.write(save_path.replace('.lab', '.midi'))
|
411 |
|
|
|
|
|
|
|
412 |
try:
|
413 |
midi_file = converter.parse(save_path.replace('.lab', '.midi'))
|
414 |
key_signature = str(midi_file.analyze('key'))
|
@@ -543,6 +546,14 @@ class Music2emo:
|
|
543 |
"predicted_moods_all": predicted_moods_with_scores_all
|
544 |
}
|
545 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
546 |
return model_output_dic
|
547 |
|
548 |
# Music2Emo Model Initialization
|
@@ -595,7 +606,7 @@ def plot_valence_arousal(valence, arousal):
|
|
595 |
|
596 |
return fig
|
597 |
|
598 |
-
|
599 |
# Prediction Formatting
|
600 |
def format_prediction(model_output_dic):
|
601 |
"""Format the model output in a structured format"""
|
@@ -611,13 +622,13 @@ def format_prediction(model_output_dic):
|
|
611 |
# Format mood output with scores
|
612 |
if predicted_moods_with_scores:
|
613 |
moods_text = ", ".join(
|
614 |
-
[f"
|
615 |
)
|
616 |
else:
|
617 |
moods_text = "No significant moods detected."
|
618 |
|
619 |
# Create formatted output
|
620 |
-
output_text = f"""
|
621 |
|
622 |
💖 Valence: {valence:.2f} (Scale: 1-9)
|
623 |
⚡ Arousal: {arousal:.2f} (Scale: 1-9)"""
|
@@ -625,28 +636,15 @@ def format_prediction(model_output_dic):
|
|
625 |
return output_text, va_chart, mood_chart
|
626 |
|
627 |
# Gradio UI Elements
|
628 |
-
title
|
629 |
-
# description_text = "Upload an audio file to analyze its emotional characteristics using Music2Emo. The model will predict: • Mood tags describing the emotional content • Valence score (1-9 scale, representing emotional positivity) • Arousal score (1-9 scale, representing emotional intensity) "
|
630 |
description_text = """
|
631 |
-
<p>
|
632 |
-
<br/><br/>
|
633 |
-
|
634 |
-
<p/>
|
635 |
"""
|
636 |
|
637 |
-
|
638 |
-
|
639 |
# Custom CSS Styling
|
640 |
css = """
|
641 |
-
#output-text {
|
642 |
-
font-family: 'Inter', sans-serif;
|
643 |
-
white-space: pre-wrap;
|
644 |
-
font-size: 14px;
|
645 |
-
padding: 0spx;
|
646 |
-
border-radius: 8px;
|
647 |
-
border-left: 5px solid #4CAF50;
|
648 |
-
margin: 0px 0;
|
649 |
-
}
|
650 |
.gradio-container {
|
651 |
font-family: 'Inter', -apple-system, system-ui, sans-serif;
|
652 |
}
|
@@ -656,10 +654,22 @@ css = """
|
|
656 |
border-radius: 8px;
|
657 |
padding: 10px;
|
658 |
}
|
|
|
|
|
|
|
|
|
659 |
"""
|
|
|
660 |
with gr.Blocks(css=css) as demo:
|
661 |
gr.HTML(f"<h1 style='text-align: center;'>{title}</h1>")
|
662 |
gr.Markdown(description_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
663 |
|
664 |
with gr.Row():
|
665 |
# Left Panel (Input)
|
@@ -680,34 +690,25 @@ with gr.Blocks(css=css) as demo:
|
|
680 |
|
681 |
# Right Panel (Output)
|
682 |
with gr.Column(scale=1):
|
683 |
-
output_text = gr.
|
|
|
|
|
|
|
|
|
684 |
|
685 |
-
#
|
686 |
with gr.Row(equal_height=True):
|
687 |
-
mood_chart = gr.Plot(label=" ", scale=2)
|
688 |
-
va_chart = gr.Plot(label=" ", scale=1)
|
689 |
-
|
690 |
-
predict_btn.click(
|
691 |
-
fn=lambda audio, thresh: format_prediction(music2emo.predict(audio, thresh)),
|
692 |
-
inputs=[input_audio, threshold],
|
693 |
-
outputs=[output_text, va_chart, mood_chart]
|
694 |
-
)
|
695 |
|
696 |
-
# Button Click Function
|
697 |
predict_btn.click(
|
698 |
fn=lambda audio, thresh: format_prediction(music2emo.predict(audio, thresh)),
|
699 |
inputs=[input_audio, threshold],
|
700 |
outputs=[output_text, va_chart, mood_chart]
|
701 |
)
|
702 |
|
703 |
-
# Notes Section
|
704 |
-
gr.Markdown("""
|
705 |
-
### 📝 Notes:
|
706 |
-
- **Supported audio formats:** MP3, WAV
|
707 |
-
- **Recommended:** High-quality audio files
|
708 |
-
- **Processing time:** A few seconds, depending on file size
|
709 |
-
""")
|
710 |
-
|
711 |
# Launch the App
|
712 |
demo.queue().launch()
|
|
|
|
|
713 |
|
|
|
409 |
midi.instruments.append(instrument)
|
410 |
midi.write(save_path.replace('.lab', '.midi'))
|
411 |
|
412 |
+
|
413 |
+
|
414 |
+
|
415 |
try:
|
416 |
midi_file = converter.parse(save_path.replace('.lab', '.midi'))
|
417 |
key_signature = str(midi_file.analyze('key'))
|
|
|
546 |
"predicted_moods_all": predicted_moods_with_scores_all
|
547 |
}
|
548 |
|
549 |
+
# predicted_moods = [mood_list[i] for i, p in enumerate(probs.squeeze().tolist()) if p > threshold]
|
550 |
+
# valence, arousal = regression_output.squeeze().tolist()
|
551 |
+
# model_output_dic = {
|
552 |
+
# "valence": valence,
|
553 |
+
# "arousal": arousal,
|
554 |
+
# "predicted_moods": predicted_moods
|
555 |
+
# }
|
556 |
+
|
557 |
return model_output_dic
|
558 |
|
559 |
# Music2Emo Model Initialization
|
|
|
606 |
|
607 |
return fig
|
608 |
|
609 |
+
|
610 |
# Prediction Formatting
|
611 |
def format_prediction(model_output_dic):
|
612 |
"""Format the model output in a structured format"""
|
|
|
622 |
# Format mood output with scores
|
623 |
if predicted_moods_with_scores:
|
624 |
moods_text = ", ".join(
|
625 |
+
[f"{m['mood']} ({m['score']:.2f})" for m in predicted_moods_with_scores]
|
626 |
)
|
627 |
else:
|
628 |
moods_text = "No significant moods detected."
|
629 |
|
630 |
# Create formatted output
|
631 |
+
output_text = f"""🎭 Predicted Mood Tags: {moods_text}
|
632 |
|
633 |
💖 Valence: {valence:.2f} (Scale: 1-9)
|
634 |
⚡ Arousal: {arousal:.2f} (Scale: 1-9)"""
|
|
|
636 |
return output_text, va_chart, mood_chart
|
637 |
|
638 |
# Gradio UI Elements
|
639 |
+
title="Music2Emo: Toward Unified Music Recognition"
|
|
|
640 |
description_text = """
|
641 |
+
<p> Upload an audio file to analyze its emotional characteristics using Music2Emo. The model will predict: 1) Mood tags describing the emotional content, 2) Valence score (1-9 scale, representing emotional positivity), and 3) Arousal score (1-9 scale, representing emotional intensity)
|
642 |
+
<br/><br/> This is the demo for Music2Emo for music emotion recognition: <a href="https://arxiv.org/abs/2502.03979">Read our paper.</a>
|
643 |
+
</p>
|
|
|
644 |
"""
|
645 |
|
|
|
|
|
646 |
# Custom CSS Styling
|
647 |
css = """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
648 |
.gradio-container {
|
649 |
font-family: 'Inter', -apple-system, system-ui, sans-serif;
|
650 |
}
|
|
|
654 |
border-radius: 8px;
|
655 |
padding: 10px;
|
656 |
}
|
657 |
+
/* Add padding to the top of the two plot boxes */
|
658 |
+
.gr-box {
|
659 |
+
padding-top: 25px !important;
|
660 |
+
}
|
661 |
"""
|
662 |
+
|
663 |
with gr.Blocks(css=css) as demo:
|
664 |
gr.HTML(f"<h1 style='text-align: center;'>{title}</h1>")
|
665 |
gr.Markdown(description_text)
|
666 |
+
|
667 |
+
# Notes Section
|
668 |
+
gr.Markdown("""
|
669 |
+
### 📝 Notes:
|
670 |
+
- **Supported audio formats:** MP3, WAV
|
671 |
+
- **Recommended:** High-quality audio files
|
672 |
+
""")
|
673 |
|
674 |
with gr.Row():
|
675 |
# Left Panel (Input)
|
|
|
690 |
|
691 |
# Right Panel (Output)
|
692 |
with gr.Column(scale=1):
|
693 |
+
output_text = gr.Textbox(
|
694 |
+
label="Analysis Results",
|
695 |
+
lines=4,
|
696 |
+
interactive=False # Prevent user input
|
697 |
+
)
|
698 |
|
699 |
+
# Ensure both plots have padding on top
|
700 |
with gr.Row(equal_height=True):
|
701 |
+
mood_chart = gr.Plot(label="Mood Probabilities", scale=2, elem_classes=["gr-box"])
|
702 |
+
va_chart = gr.Plot(label="Valence-Arousal Space", scale=1, elem_classes=["gr-box"])
|
|
|
|
|
|
|
|
|
|
|
|
|
703 |
|
|
|
704 |
predict_btn.click(
|
705 |
fn=lambda audio, thresh: format_prediction(music2emo.predict(audio, thresh)),
|
706 |
inputs=[input_audio, threshold],
|
707 |
outputs=[output_text, va_chart, mood_chart]
|
708 |
)
|
709 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
710 |
# Launch the App
|
711 |
demo.queue().launch()
|
712 |
+
|
713 |
+
|
714 |
|