Audio Analyzer¶

Анализ аудио файлов: определение BPM, битов, секций.

Возможности¶

Определение BPM
Детекция битов (timestamps)
Определение секций (intro, verse, chorus, etc.)
Анализ громкости
Onset detection

Использование¶

Базовый анализ¶

from viai_kit.utils import AudioAnalyzer

analyzer = AudioAnalyzer()

result = await analyzer.analyze("music.mp3")

print(f"BPM: {result.bpm}")
print(f"Duration: {result.duration_seconds}s")
print(f"Beats: {len(result.beats)}")

Детекция битов¶

result = await analyzer.analyze("track.mp3")

# Timestamps всех битов
for beat in result.beats:
    print(f"Beat at {beat.time}s (strength: {beat.strength})")

# Только сильные биты
strong_beats = [b for b in result.beats if b.strength > 0.8]

Определение секций¶

result = await analyzer.analyze("song.mp3")

for section in result.sections:
    print(f"{section.label}: {section.start}s - {section.end}s")

# Пример вывода:
# intro: 0s - 15s
# verse: 15s - 45s
# chorus: 45s - 75s
# verse: 75s - 105s
# chorus: 105s - 135s
# outro: 135s - 150s

Анализ громкости¶

result = await analyzer.analyze("audio.mp3")

# RMS громкость по времени
for i, rms in enumerate(result.loudness):
    time = i * result.hop_length / result.sample_rate
    print(f"{time:.2f}s: {rms:.3f}")

# Пиковые моменты
peaks = result.get_loudness_peaks(threshold=0.8)

Onset Detection¶

# Определение начала звуков/нот
result = await analyzer.analyze("music.mp3")

onsets = result.onsets
for onset in onsets:
    print(f"Onset at {onset}s")

Кастомные параметры¶

analyzer = AudioAnalyzer(
    hop_length=512,
    n_fft=2048,
    sr=22050  # Sample rate
)

result = await analyzer.analyze(
    "track.mp3",
    detect_beats=True,
    detect_sections=True,
    analyze_loudness=True,
    detect_onsets=True
)

API Reference¶

AudioAnalyzer¶

class AudioAnalyzer:
    def __init__(
        self,
        hop_length: int = 512,
        n_fft: int = 2048,
        sr: int = 22050
    )

    async def analyze(
        self,
        audio_path: str,
        detect_beats: bool = True,
        detect_sections: bool = True,
        analyze_loudness: bool = True,
        detect_onsets: bool = False
    ) -> AudioAnalysis

    async def get_bpm(self, audio_path: str) -> float
    async def get_beats(self, audio_path: str) -> List[Beat]
    async def get_sections(self, audio_path: str) -> List[Section]

AudioAnalysis¶

@dataclass
class AudioAnalysis:
    bpm: float
    duration_seconds: float
    sample_rate: int
    beats: List[Beat]
    sections: List[Section]
    loudness: List[float]
    onsets: List[float]

    def get_loudness_peaks(self, threshold: float = 0.8) -> List[float]
    def get_beats_in_range(self, start: float, end: float) -> List[Beat]

@dataclass
class Beat:
    time: float      # Время в секундах
    strength: float  # Сила бита (0-1)

@dataclass
class Section:
    label: str       # intro, verse, chorus, bridge, outro
    start: float     # Начало в секундах
    end: float       # Конец в секундах

    @property
    def duration(self) -> float:
        return self.end - self.start

Примеры из production¶

Music Video Generator — синхронизация с музыкой¶

class MusicVideoSyncer:
    def __init__(self):
        self.analyzer = AudioAnalyzer()

    async def create_beat_markers(self, audio_path: str) -> List[dict]:
        """Создание маркеров для смены кадров на битах."""
        analysis = await self.analyzer.analyze(audio_path)

        markers = []
        for beat in analysis.beats:
            if beat.strength > 0.7:  # Только сильные биты
                markers.append({
                    "time": beat.time,
                    "type": "strong_beat",
                    "action": "cut"  # Смена кадра
                })
            elif beat.strength > 0.4:
                markers.append({
                    "time": beat.time,
                    "type": "weak_beat",
                    "action": "zoom"  # Зум эффект
                })

        return markers

    async def match_sections_to_visuals(
        self,
        audio_path: str,
        visuals: dict
    ) -> List[dict]:
        """Сопоставление секций музыки с визуальными темами."""
        analysis = await self.analyzer.analyze(audio_path)

        section_visuals = {
            "intro": visuals.get("calm", []),
            "verse": visuals.get("narrative", []),
            "chorus": visuals.get("energetic", []),
            "bridge": visuals.get("transition", []),
            "outro": visuals.get("calm", [])
        }

        timeline = []
        for section in analysis.sections:
            available_visuals = section_visuals.get(section.label, [])
            timeline.append({
                "start": section.start,
                "end": section.end,
                "section": section.label,
                "visuals": available_visuals
            })

        return timeline

Autoshorts — определение точек для субтитров¶

class SubtitleSyncer:
    def __init__(self):
        self.analyzer = AudioAnalyzer()

    async def find_pause_points(
        self,
        audio_path: str,
        min_pause_duration: float = 0.3
    ) -> List[float]:
        """Поиск пауз в аудио для разбиения субтитров."""
        analysis = await self.analyzer.analyze(audio_path)

        pauses = []
        prev_onset = 0

        for onset in analysis.onsets:
            gap = onset - prev_onset
            if gap > min_pause_duration:
                pauses.append(prev_onset + gap / 2)  # Середина паузы
            prev_onset = onset

        return pauses

    async def align_words_to_beats(
        self,
        audio_path: str,
        word_timings: List[dict]
    ) -> List[dict]:
        """Выравнивание слов по ближайшим битам."""
        analysis = await self.analyzer.analyze(audio_path)
        beat_times = [b.time for b in analysis.beats]

        aligned = []
        for word in word_timings:
            # Найти ближайший бит
            closest_beat = min(beat_times, key=lambda b: abs(b - word["start"]))

            aligned.append({
                **word,
                "beat_aligned": closest_beat,
                "on_beat": abs(closest_beat - word["start"]) < 0.05
            })

        return aligned

BPM-based effects¶

class BPMEffects:
    def __init__(self):
        self.analyzer = AudioAnalyzer()

    async def calculate_effect_timing(
        self,
        audio_path: str,
        effect_type: str
    ) -> List[float]:
        """Расчёт тайминга эффектов на основе BPM."""
        bpm = await self.analyzer.get_bpm(audio_path)
        beat_duration = 60 / bpm

        if effect_type == "pulse":
            # Эффект на каждый бит
            return [i * beat_duration for i in range(100)]
        elif effect_type == "slow_pulse":
            # Эффект на каждые 4 бита
            return [i * beat_duration * 4 for i in range(25)]
        elif effect_type == "double":
            # Эффект на каждую половину бита
            return [i * beat_duration / 2 for i in range(200)]

        return []