Audio Analyzer¶
Анализ аудио файлов: определение BPM, битов, секций.
Возможности¶
- Определение BPM
- Детекция битов (timestamps)
- Определение секций (intro, verse, chorus, etc.)
- Анализ громкости
- Onset detection
Использование¶
Базовый анализ¶
from kit.utils import AudioAnalyzer
analyzer = AudioAnalyzer()
result = await analyzer.analyze("music.mp3")
print(f"BPM: {result.bpm}")
print(f"Duration: {result.duration_seconds}s")
print(f"Beats: {len(result.beats)}")
Детекция битов¶
result = await analyzer.analyze("track.mp3")
# Timestamps всех битов
for beat in result.beats:
print(f"Beat at {beat.time}s (strength: {beat.strength})")
# Только сильные биты
strong_beats = [b for b in result.beats if b.strength > 0.8]
Определение секций¶
result = await analyzer.analyze("song.mp3")
for section in result.sections:
print(f"{section.label}: {section.start}s - {section.end}s")
# Пример вывода:
# intro: 0s - 15s
# verse: 15s - 45s
# chorus: 45s - 75s
# verse: 75s - 105s
# chorus: 105s - 135s
# outro: 135s - 150s
Анализ громкости¶
result = await analyzer.analyze("audio.mp3")
# RMS громкость по времени
for i, rms in enumerate(result.loudness):
time = i * result.hop_length / result.sample_rate
print(f"{time:.2f}s: {rms:.3f}")
# Пиковые моменты
peaks = result.get_loudness_peaks(threshold=0.8)
Onset Detection¶
# Определение начала звуков/нот
result = await analyzer.analyze("music.mp3")
onsets = result.onsets
for onset in onsets:
print(f"Onset at {onset}s")
Кастомные параметры¶
analyzer = AudioAnalyzer(
hop_length=512,
n_fft=2048,
sr=22050 # Sample rate
)
result = await analyzer.analyze(
"track.mp3",
detect_beats=True,
detect_sections=True,
analyze_loudness=True,
detect_onsets=True
)
API Reference¶
AudioAnalyzer¶
class AudioAnalyzer:
def __init__(
self,
hop_length: int = 512,
n_fft: int = 2048,
sr: int = 22050
)
async def analyze(
self,
audio_path: str,
detect_beats: bool = True,
detect_sections: bool = True,
analyze_loudness: bool = True,
detect_onsets: bool = False
) -> AudioAnalysis
async def get_bpm(self, audio_path: str) -> float
async def get_beats(self, audio_path: str) -> List[Beat]
async def get_sections(self, audio_path: str) -> List[Section]
AudioAnalysis¶
@dataclass
class AudioAnalysis:
bpm: float
duration_seconds: float
sample_rate: int
beats: List[Beat]
sections: List[Section]
loudness: List[float]
onsets: List[float]
def get_loudness_peaks(self, threshold: float = 0.8) -> List[float]
def get_beats_in_range(self, start: float, end: float) -> List[Beat]
@dataclass
class Beat:
time: float # Время в секундах
strength: float # Сила бита (0-1)
@dataclass
class Section:
label: str # intro, verse, chorus, bridge, outro
start: float # Начало в секундах
end: float # Конец в секундах
@property
def duration(self) -> float:
return self.end - self.start
Примеры из production¶
Music Video Generator — синхронизация с музыкой¶
class MusicVideoSyncer:
def __init__(self):
self.analyzer = AudioAnalyzer()
async def create_beat_markers(self, audio_path: str) -> List[dict]:
"""Создание маркеров для смены кадров на битах."""
analysis = await self.analyzer.analyze(audio_path)
markers = []
for beat in analysis.beats:
if beat.strength > 0.7: # Только сильные биты
markers.append({
"time": beat.time,
"type": "strong_beat",
"action": "cut" # Смена кадра
})
elif beat.strength > 0.4:
markers.append({
"time": beat.time,
"type": "weak_beat",
"action": "zoom" # Зум эффект
})
return markers
async def match_sections_to_visuals(
self,
audio_path: str,
visuals: dict
) -> List[dict]:
"""Сопоставление секций музыки с визуальными темами."""
analysis = await self.analyzer.analyze(audio_path)
section_visuals = {
"intro": visuals.get("calm", []),
"verse": visuals.get("narrative", []),
"chorus": visuals.get("energetic", []),
"bridge": visuals.get("transition", []),
"outro": visuals.get("calm", [])
}
timeline = []
for section in analysis.sections:
available_visuals = section_visuals.get(section.label, [])
timeline.append({
"start": section.start,
"end": section.end,
"section": section.label,
"visuals": available_visuals
})
return timeline
Autoshorts — определение точек для субтитров¶
class SubtitleSyncer:
def __init__(self):
self.analyzer = AudioAnalyzer()
async def find_pause_points(
self,
audio_path: str,
min_pause_duration: float = 0.3
) -> List[float]:
"""Поиск пауз в аудио для разбиения субтитров."""
analysis = await self.analyzer.analyze(audio_path)
pauses = []
prev_onset = 0
for onset in analysis.onsets:
gap = onset - prev_onset
if gap > min_pause_duration:
pauses.append(prev_onset + gap / 2) # Середина паузы
prev_onset = onset
return pauses
async def align_words_to_beats(
self,
audio_path: str,
word_timings: List[dict]
) -> List[dict]:
"""Выравнивание слов по ближайшим битам."""
analysis = await self.analyzer.analyze(audio_path)
beat_times = [b.time for b in analysis.beats]
aligned = []
for word in word_timings:
# Найти ближайший бит
closest_beat = min(beat_times, key=lambda b: abs(b - word["start"]))
aligned.append({
**word,
"beat_aligned": closest_beat,
"on_beat": abs(closest_beat - word["start"]) < 0.05
})
return aligned
BPM-based effects¶
class BPMEffects:
def __init__(self):
self.analyzer = AudioAnalyzer()
async def calculate_effect_timing(
self,
audio_path: str,
effect_type: str
) -> List[float]:
"""Расчёт тайминга эффектов на основе BPM."""
bpm = await self.analyzer.get_bpm(audio_path)
beat_duration = 60 / bpm
if effect_type == "pulse":
# Эффект на каждый бит
return [i * beat_duration for i in range(100)]
elif effect_type == "slow_pulse":
# Эффект на каждые 4 бита
return [i * beat_duration * 4 for i in range(25)]
elif effect_type == "double":
# Эффект на каждую половину бита
return [i * beat_duration / 2 for i in range(200)]
return []