From a113be3b852aecc036bc21a53eeedb47b3e3e3bb Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Mon, 16 Mar 2026 05:48:34 +0000 Subject: [PATCH 1/4] init --- .../havpe-relay/firmware/voice-chronicle.yaml | 78 +++++++++++-------- 1 file changed, 44 insertions(+), 34 deletions(-) diff --git a/extras/havpe-relay/firmware/voice-chronicle.yaml b/extras/havpe-relay/firmware/voice-chronicle.yaml index 387af022..b963eba0 100644 --- a/extras/havpe-relay/firmware/voice-chronicle.yaml +++ b/extras/havpe-relay/firmware/voice-chronicle.yaml @@ -361,20 +361,20 @@ i2s_audio: number: GPIO13 # ─────────── Microphone (TCP streaming to relay) ─────────── -# The XMOS voice kit outputs stereo 32-bit I2S (secondary mode). Channel 0 -# (right) carries the fully-processed speech signal (AEC→IC→NS→AGC), while -# channel 1 (left) is a pre-AGC tap useful for wake-word engines. +# The XMOS voice kit outputs stereo 32-bit I2S (secondary mode). +# ESPHome interleaves stereo as [L, R, L, R, ...] where channel 0 = left (even) +# and channel 1 = right (odd). +# Source: esphome/components/microphone/microphone_source.h +# https://github.com/esphome/esphome/blob/dev/esphome/components/microphone/microphone_source.h # -# We match the official HA Voice PE microphone config (stereo, 32-bit) and -# extract+downconvert in the on_data lambda before sending over TCP. +# The official HA Voice PE firmware uses channels: 0 (left) for voice assistant +# and channels: 1 (right) with gain_factor: 4 for wake word. +# Source: home-assistant-voice.yaml +# https://github.com/esphome/home-assistant-voice-pe/blob/dev/home-assistant-voice.yaml # -# References: -# Official Voice PE config: -# https://github.com/esphome/home-assistant-voice-pe/blob/dev/home-assistant-voice.yaml -# XMOS voice kit audio pipeline: -# https://deepwiki.com/esphome/home-assistant-voice-pe/3.2-audio-processing-pipeline -# I2S microphone component docs: -# https://esphome.io/components/microphone/i2s_audio/ +# With voice_kit channel_0_stage=AGC, channel 0 (left) carries the +# fully-processed speech signal (AEC→IC→NS→AGC), while channel 1 (right) +# is a pre-AGC tap (NS stage) useful for wake-word engines. microphone: - platform: i2s_audio id: mic_in @@ -409,46 +409,56 @@ microphone: // Send audio-start once per connection (mono 16-bit after conversion) if (!session_started) { - if (!chronicle::send_audio_start(16000, 2, 1)) { + if (!chronicle::send_audio_start(16000, 2, 2)) { // DEBUG: stereo id(tcp_ok) = false; return; } session_started = true; } - // Extract channel 0 (AGC-processed speech) and convert 32→16 bit. - // Input: interleaved stereo 32-bit samples [L0, R0, L1, R1, ...] - // XMOS channel 0 (AGC) maps to right (odd indices), - // channel 1 (NS/pre-AGC) maps to left (even indices). - // Output: mono 16-bit samples + // DEBUG: Send STEREO to relay so we can compare both channels. + // Channel 0 (left/even) = AGC, Channel 1 (right/odd) = NS. + // Relay will receive 2-channel 16-bit audio — record and compare. const int32_t* samples = reinterpret_cast(x.data()); - size_t total_samples = x.size() / sizeof(int32_t); // total L+R samples - size_t num_frames = total_samples / 2; // stereo frames + size_t total_samples = x.size() / sizeof(int32_t); + size_t num_frames = total_samples / 2; - // Temporary buffer for mono 16-bit output (2 bytes per frame) - static std::vector mono_buf; - if (mono_buf.size() < num_frames) { - mono_buf.resize(num_frames); + static std::vector stereo_buf; + if (stereo_buf.size() < num_frames * 2) { + stereo_buf.resize(num_frames * 2); } for (size_t i = 0; i < num_frames; i++) { - int32_t right = samples[i * 2 + 1]; // channel 0 (right) = AGC output - int32_t s16 = right >> 14; // 32→16 bit with 4x gain (>> 14 vs >> 16) - if (s16 > 32767) s16 = 32767; - if (s16 < -32768) s16 = -32768; - mono_buf[i] = (int16_t)s16; + stereo_buf[i * 2] = (int16_t)(samples[i * 2] >> 16); // left (ch0 AGC) + stereo_buf[i * 2 + 1] = (int16_t)(samples[i * 2 + 1] >> 16); // right (ch1 NS) } - size_t mono_bytes = num_frames * sizeof(int16_t); + // Log peak amplitude of each channel every ~2 seconds + static uint32_t last_log = 0; + static int16_t peak_l = 0, peak_r = 0; + for (size_t i = 0; i < num_frames; i++) { + int16_t al = abs(stereo_buf[i * 2]); + int16_t ar = abs(stereo_buf[i * 2 + 1]); + if (al > peak_l) peak_l = al; + if (ar > peak_r) peak_r = ar; + } + if (millis() - last_log > 2000) { + ESP_LOGI("audio", "Peak L(ch0/AGC)=%d R(ch1/NS)=%d", peak_l, peak_r); + peak_l = 0; + peak_r = 0; + last_log = millis(); + } + + size_t stereo_bytes = num_frames * 2 * sizeof(int16_t); - // Send converted mono 16-bit PCM - if (!chronicle::send_audio(reinterpret_cast(mono_buf.data()), - mono_bytes, 16000, 2, 1)) { + // Send as stereo (channels=2) so relay saves both + if (!chronicle::send_audio(reinterpret_cast(stereo_buf.data()), + stereo_bytes, 16000, 2, 2)) { announced = false; session_started = false; id(tcp_ok) = false; } else if (!announced) { - ESP_LOGI("tcp", "Streaming audio to relay (stereo 32bit → mono 16bit, %d frames)", num_frames); + ESP_LOGI("tcp", "DEBUG: Streaming STEREO 32→16bit, %d frames per channel", num_frames); announced = true; } From bd6b85fc18ffcef6f618d5e1ea6a9e5d6b6f7df5 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Mon, 16 Mar 2026 06:13:42 +0000 Subject: [PATCH 2/4] fix --- extras/havpe-relay/capture_default_audio.py | 174 ++ .../havpe-relay/firmware/voice-chronicle.yaml | 56 +- .../havpe-relay/firmware/voice-default.yaml | 1918 +++++++++++++++++ extras/havpe-relay/flash_default.sh | 25 + 4 files changed, 2135 insertions(+), 38 deletions(-) create mode 100644 extras/havpe-relay/capture_default_audio.py create mode 100644 extras/havpe-relay/firmware/voice-default.yaml create mode 100755 extras/havpe-relay/flash_default.sh diff --git a/extras/havpe-relay/capture_default_audio.py b/extras/havpe-relay/capture_default_audio.py new file mode 100644 index 00000000..8d57223a --- /dev/null +++ b/extras/havpe-relay/capture_default_audio.py @@ -0,0 +1,174 @@ +""" +Capture audio from a default HA Voice PE device running stock firmware. + +Acts as a minimal "Home Assistant" voice assistant server using aioesphomeapi. +Connects to the device, subscribes to voice assistant events, and saves +the raw audio stream to a WAV file for comparison. + +Usage: + uv run python capture_default_audio.py [--output audio_capture.wav] + +The device must be running the default HA Voice PE firmware with voice_assistant +component. Trigger audio by saying the wake word or pressing the button. +Press Ctrl+C to stop and save. +""" + +import argparse +import asyncio +import struct +import sys +import wave +from datetime import datetime + +from aioesphomeapi import ( + APIClient, + VoiceAssistantAudioSettingsModel, + VoiceAssistantEventType, +) + + +class AudioCapture: + def __init__(self, output_path: str): + self.output_path = output_path + self.audio_chunks: list[bytes] = [] + self.capturing = False + self.capture_count = 0 + + async def handle_start( + self, + conversation_id: str, + flags: int, + audio_settings: VoiceAssistantAudioSettingsModel, + wake_word_phrase: str | None, + ) -> int | None: + """Called when device wants to start voice assistant pipeline.""" + print(f"\n--- Voice assistant START ---") + print(f" conversation_id: {conversation_id}") + print(f" flags: {flags}") + print( + f" audio_settings: noise_suppression={audio_settings.noise_suppression_level}, " + f"auto_gain={audio_settings.auto_gain}, " + f"volume_multiplier={audio_settings.volume_multiplier}" + ) + if wake_word_phrase: + print(f" wake_word: {wake_word_phrase}") + + self.capturing = True + self.capture_count += 1 + print(f" Capturing audio (session #{self.capture_count})...") + + # Return port 0 = use API audio (not UDP) + return 0 + + async def handle_stop(self, abort: bool) -> None: + """Called when device stops voice assistant pipeline.""" + if self.capturing: + print(f"\n--- Voice assistant STOP (abort={abort}) ---") + print( + f" Captured {len(self.audio_chunks)} chunks, " + f"{sum(len(c) for c in self.audio_chunks)} bytes total" + ) + self.capturing = False + + async def handle_audio(self, data: bytes) -> None: + """Called for each audio chunk from the device.""" + self.audio_chunks.append(data) + if len(self.audio_chunks) % 50 == 0: + total_bytes = sum(len(c) for c in self.audio_chunks) + # Assume 16-bit mono 16kHz + duration = total_bytes / (16000 * 2) + print(f" ... {len(self.audio_chunks)} chunks, {duration:.1f}s", end="\r") + + def save_wav( + self, sample_rate: int = 16000, sample_width: int = 2, channels: int = 1 + ): + """Save captured audio to WAV file.""" + if not self.audio_chunks: + print("No audio captured!") + return + + raw_audio = b"".join(self.audio_chunks) + total_samples = len(raw_audio) // sample_width + duration = total_samples / (sample_rate * channels) + + # Analyze levels + samples = struct.unpack(f"<{total_samples}h", raw_audio) + peak = max(abs(s) for s in samples) if samples else 0 + rms = (sum(s * s for s in samples) / len(samples)) ** 0.5 if samples else 0 + + with wave.open(self.output_path, "wb") as wf: + wf.setnchannels(channels) + wf.setsampwidth(sample_width) + wf.setframerate(sample_rate) + wf.writeframes(raw_audio) + + print(f"\nSaved {self.output_path}:") + print(f" Duration: {duration:.1f}s") + print(f" Format: {sample_rate}Hz, {sample_width * 8}-bit, {channels}ch") + print(f" Peak: {peak}") + print(f" RMS: {rms:.0f}") + print(f" Samples: {total_samples}") + + +async def main(): + parser = argparse.ArgumentParser( + description="Capture audio from default HA Voice PE firmware" + ) + parser.add_argument("device_ip", help="IP address of the ESPHome device") + parser.add_argument( + "--port", type=int, default=6053, help="ESPHome native API port (default: 6053)" + ) + parser.add_argument("--password", default="", help="API password if set") + parser.add_argument("--output", "-o", default=None, help="Output WAV file path") + parser.add_argument( + "--noise-suppression", type=int, default=0, help="Noise suppression level (0-4)" + ) + parser.add_argument( + "--auto-gain", type=int, default=0, help="Auto gain in dBFS (0-31)" + ) + parser.add_argument( + "--volume-multiplier", type=float, default=1.0, help="Volume multiplier" + ) + args = parser.parse_args() + + if args.output is None: + args.output = f"capture_default_{datetime.now().strftime('%H%M%S')}.wav" + + capture = AudioCapture(args.output) + + print(f"Connecting to {args.device_ip}:{args.port}...") + client = APIClient(args.device_ip, args.port, args.password) + + try: + await client.connect(login=True) + info = await client.device_info() + print(f"Connected to: {info.name} (ESPHome {info.esphome_version})") + + print(f"\nSubscribing to voice assistant...") + print(f" Trigger the wake word or press the button to start capture.") + print(f" Press Ctrl+C to stop and save.\n") + + unsub = client.subscribe_voice_assistant( + handle_start=capture.handle_start, + handle_stop=capture.handle_stop, + handle_audio=capture.handle_audio, + ) + + # Keep running until Ctrl+C + try: + while True: + await asyncio.sleep(1) + except KeyboardInterrupt: + print("\n\nStopping...") + unsub() + + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + raise + finally: + capture.save_wav() + await client.disconnect() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/extras/havpe-relay/firmware/voice-chronicle.yaml b/extras/havpe-relay/firmware/voice-chronicle.yaml index b963eba0..c1140331 100644 --- a/extras/havpe-relay/firmware/voice-chronicle.yaml +++ b/extras/havpe-relay/firmware/voice-chronicle.yaml @@ -322,8 +322,7 @@ voice_kit: id: voice_kit_component i2c_id: internal_i2c reset_pin: GPIO4 - channel_0_stage: AGC # Speech-ready output (AEC→IC→NS→AGC) - channel_1_stage: NS # Pre-AGC tap (useful for wake word or analysis) + # No channel_0_stage/channel_1_stage — use XMOS defaults (matches official HA firmware) firmware: url: https://github.com/esphome/voice-kit-xmos-firmware/releases/download/v1.3.1/ffva_v1.3.1_upgrade.bin version: "1.3.1" @@ -371,10 +370,6 @@ i2s_audio: # and channels: 1 (right) with gain_factor: 4 for wake word. # Source: home-assistant-voice.yaml # https://github.com/esphome/home-assistant-voice-pe/blob/dev/home-assistant-voice.yaml -# -# With voice_kit channel_0_stage=AGC, channel 0 (left) carries the -# fully-processed speech signal (AEC→IC→NS→AGC), while channel 1 (right) -# is a pre-AGC tap (NS stage) useful for wake-word engines. microphone: - platform: i2s_audio id: mic_in @@ -409,56 +404,41 @@ microphone: // Send audio-start once per connection (mono 16-bit after conversion) if (!session_started) { - if (!chronicle::send_audio_start(16000, 2, 2)) { // DEBUG: stereo + if (!chronicle::send_audio_start(16000, 2, 1)) { id(tcp_ok) = false; return; } session_started = true; } - // DEBUG: Send STEREO to relay so we can compare both channels. - // Channel 0 (left/even) = AGC, Channel 1 (right/odd) = NS. - // Relay will receive 2-channel 16-bit audio — record and compare. + // Extract channel 0 (AGC-processed speech) and convert 32→16 bit. + // ESPHome stereo interleave: channel 0 = left (even), channel 1 = right (odd). + // XMOS voice_kit: channel_0_stage=AGC → left, channel_1_stage=NS → right. const int32_t* samples = reinterpret_cast(x.data()); - size_t total_samples = x.size() / sizeof(int32_t); - size_t num_frames = total_samples / 2; + size_t total_samples = x.size() / sizeof(int32_t); // total L+R samples + size_t num_frames = total_samples / 2; // stereo frames - static std::vector stereo_buf; - if (stereo_buf.size() < num_frames * 2) { - stereo_buf.resize(num_frames * 2); + // Temporary buffer for mono 16-bit output (2 bytes per frame) + static std::vector mono_buf; + if (mono_buf.size() < num_frames) { + mono_buf.resize(num_frames); } for (size_t i = 0; i < num_frames; i++) { - stereo_buf[i * 2] = (int16_t)(samples[i * 2] >> 16); // left (ch0 AGC) - stereo_buf[i * 2 + 1] = (int16_t)(samples[i * 2 + 1] >> 16); // right (ch1 NS) - } - - // Log peak amplitude of each channel every ~2 seconds - static uint32_t last_log = 0; - static int16_t peak_l = 0, peak_r = 0; - for (size_t i = 0; i < num_frames; i++) { - int16_t al = abs(stereo_buf[i * 2]); - int16_t ar = abs(stereo_buf[i * 2 + 1]); - if (al > peak_l) peak_l = al; - if (ar > peak_r) peak_r = ar; - } - if (millis() - last_log > 2000) { - ESP_LOGI("audio", "Peak L(ch0/AGC)=%d R(ch1/NS)=%d", peak_l, peak_r); - peak_l = 0; - peak_r = 0; - last_log = millis(); + int32_t left = samples[i * 2]; // channel 0 (left) = AGC output + mono_buf[i] = (int16_t)(left >> 16); // 32→16 bit: take upper 16 bits } - size_t stereo_bytes = num_frames * 2 * sizeof(int16_t); + size_t mono_bytes = num_frames * sizeof(int16_t); - // Send as stereo (channels=2) so relay saves both - if (!chronicle::send_audio(reinterpret_cast(stereo_buf.data()), - stereo_bytes, 16000, 2, 2)) { + // Send converted mono 16-bit PCM + if (!chronicle::send_audio(reinterpret_cast(mono_buf.data()), + mono_bytes, 16000, 2, 1)) { announced = false; session_started = false; id(tcp_ok) = false; } else if (!announced) { - ESP_LOGI("tcp", "DEBUG: Streaming STEREO 32→16bit, %d frames per channel", num_frames); + ESP_LOGI("tcp", "Streaming audio to relay (stereo 32bit → mono 16bit, %d frames)", num_frames); announced = true; } diff --git a/extras/havpe-relay/firmware/voice-default.yaml b/extras/havpe-relay/firmware/voice-default.yaml new file mode 100644 index 00000000..a4b1bd82 --- /dev/null +++ b/extras/havpe-relay/firmware/voice-default.yaml @@ -0,0 +1,1918 @@ +substitutions: + # Phases of the Voice Assistant + # The voice assistant is ready to be triggered by a wake word + voice_assist_idle_phase_id: '1' + # The voice assistant is waiting for a voice command (after being triggered by the wake word) + voice_assist_waiting_for_command_phase_id: '2' + # The voice assistant is listening for a voice command + voice_assist_listening_for_command_phase_id: '3' + # The voice assistant is currently processing the command + voice_assist_thinking_phase_id: '4' + # The voice assistant is replying to the command + voice_assist_replying_phase_id: '5' + # The voice assistant is not ready + voice_assist_not_ready_phase_id: '10' + # The voice assistant encountered an error + voice_assist_error_phase_id: '11' + # Change this to true in case you ahve a hidden SSID at home. + hidden_ssid: "false" + # Substitutions for audio files + jack_connected_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/jack_connected.flac + jack_disconnected_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/jack_disconnected.flac + mute_switch_on_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/mute_switch_on.flac + mute_switch_off_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/mute_switch_off.flac + timer_finished_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/timer_finished.flac + wake_word_triggered_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/wake_word_triggered.flac + center_button_press_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/center_button_press.flac + center_button_double_press_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/center_button_double_press.flac + center_button_triple_press_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/center_button_triple_press.flac + center_button_long_press_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/center_button_long_press.flac + factory_reset_initiated_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/factory_reset_initiated.mp3 + factory_reset_cancelled_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/factory_reset_cancelled.mp3 + factory_reset_confirmed_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/factory_reset_confirmed.mp3 + easter_egg_tick_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/easter_egg_tick.mp3 + easter_egg_tada_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/easter_egg_tada.mp3 + error_cloud_expired_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/error_cloud_expired.mp3 + +esphome: + name: home-assistant-voice + friendly_name: Home Assistant Voice + name_add_mac_suffix: true + min_version: 2026.2.0 + on_boot: + priority: 375 + then: + # Run the script to refresh the LED status + - script.execute: control_leds + - delay: 1s + - switch.turn_on: internal_speaker_amp + # If after 10 minutes, the device is still initializing (It did not yet connect to Home Assistant), turn off the init_in_progress variable and run the script to refresh the LED status + - delay: 10min + - if: + condition: + lambda: return id(init_in_progress); + then: + - lambda: id(init_in_progress) = false; + - script.execute: control_leds + +esp32: + board: esp32-s3-devkitc-1 + cpu_frequency: 240MHz + variant: esp32s3 + flash_size: 16MB + framework: + type: esp-idf + version: recommended + sdkconfig_options: + CONFIG_ESP32S3_DATA_CACHE_64KB: "y" + CONFIG_ESP32S3_DATA_CACHE_LINE_64B: "y" + CONFIG_ESP32S3_INSTRUCTION_CACHE_32KB: "y" + + # Moves instructions and read only data from flash into PSRAM on boot. + # Both enabled allows instructions to execute while a flash operation is in progress without needing to be placed in IRAM. + # Considerably speeds up mWW at the cost of using more PSRAM. + CONFIG_SPIRAM_RODATA: "y" + CONFIG_SPIRAM_FETCH_INSTRUCTIONS: "y" + + CONFIG_BT_ALLOCATION_FROM_SPIRAM_FIRST: "y" + CONFIG_BT_BLE_DYNAMIC_ENV_MEMORY: "y" + + CONFIG_MBEDTLS_EXTERNAL_MEM_ALLOC: "y" + CONFIG_MBEDTLS_SSL_PROTO_TLS1_3: "y" # TLS1.3 support isn't enabled by default in IDF 5.1.5 + +wifi: + id: wifi_id + fast_connect: ${hidden_ssid} + on_connect: + - lambda: id(improv_ble_in_progress) = false; + - script.execute: control_leds + on_disconnect: + - script.execute: control_leds + +network: + enable_ipv6: true + +logger: + level: DEBUG + logs: + sensor: WARN # avoids logging debug sensor updates + +api: + id: api_id + on_client_connected: + - script.execute: control_leds + on_client_disconnected: + - script.execute: control_leds + # encryption removed for capture_default_audio.py compatibility + +ota: + - platform: esphome + id: ota_esphome + +i2c: + - id: internal_i2c + sda: GPIO5 + scl: GPIO6 + frequency: 400kHz + +psram: + mode: octal + speed: 80MHz + ignore_not_found: false # The VPE has PSRAM, so this is safe. Allows configuring WiFi driver to use more resources (done automatically by the speaker media player) + +globals: + # Global index for our LEDs. So that switching between different animation does not lead to unwanted effects. + - id: global_led_animation_index + type: int + restore_value: no + initial_value: '0' + # Global initialization variable. Initialized to true and set to false once everything is connected. Only used to have a smooth "plugging" experience + - id: init_in_progress + type: bool + restore_value: no + initial_value: 'true' + # Global variable storing the state of ImprovBLE. Used to draw different LED animations + - id: improv_ble_in_progress + type: bool + restore_value: no + initial_value: 'false' + # Global variable tracking the phase of the voice assistant (defined above). Initialized to not_ready + - id: voice_assistant_phase + type: int + restore_value: no + initial_value: ${voice_assist_not_ready_phase_id} + # Global variable tracking if the dial was recently touched. + - id: dial_touched + type: bool + restore_value: no + initial_value: 'false' + # Global variable tracking if the LED color was recently changed. + - id: color_changed + type: bool + restore_value: no + initial_value: 'false' + # Global variable tracking if the group media player volume was recent changed. + - id: group_volume_changed + type: bool + restore_value: no + initial_value: 'false' + # Global variable tracking if the jack has been plugged touched. + - id: jack_plugged_recently + type: bool + restore_value: no + initial_value: 'false' + # Global variable tracking if the jack has been unplugged touched. + - id: jack_unplugged_recently + type: bool + restore_value: no + initial_value: 'false' + # Global variable storing the first active timer + - id: first_active_timer + type: voice_assistant::Timer + restore_value: false + # Global variable storing if a timer is active + - id: is_timer_active + type: bool + restore_value: false + # Global variable storing if a factory reset was requested. If it is set to true, the device will factory reset once the center button is released + - id: factory_reset_requested + type: bool + restore_value: no + initial_value: 'false' + +switch: + # This is the master mute switch. It is exposed to Home Assistant. The user can only turn it on and off if the hardware switch is off. (The hardware switch overrides the software one) + - platform: template + id: master_mute_switch + restore_mode: RESTORE_DEFAULT_OFF + icon: "mdi:microphone-off" + name: Mute + entity_category: config + lambda: |- + // Muted either if the hardware mute switch is on or the microphone's software mute switch is enabled + if (id(hardware_mute_switch).state || id(i2s_mics).get_mute_state()) { + return true; + } else { + return false; + } + turn_on_action: + - if: + condition: + binary_sensor.is_off: hardware_mute_switch + then: + - microphone.mute: + turn_off_action: + - if: + condition: + binary_sensor.is_off: hardware_mute_switch + then: + - microphone.unmute: + on_turn_on: + - script.execute: control_leds + on_turn_off: + - script.execute: control_leds + # Wake Word Sound Switch. + - platform: template + id: wake_sound + name: Wake sound + icon: "mdi:bullhorn" + entity_category: config + optimistic: true + restore_mode: RESTORE_DEFAULT_ON + # Internal switch to track when a timer is ringing on the device. + - platform: template + id: timer_ringing + optimistic: true + internal: true + restore_mode: ALWAYS_OFF + on_turn_off: + # Disable stop wake word + - micro_wake_word.disable_model: stop + - script.execute: disable_repeat + # Stop any current announcement (ie: stop the timer ring mid playback) + - if: + condition: + media_player.is_announcing: + id: external_media_player + then: + media_player.stop: + announcement: true + id: external_media_player + # Set back ducking ratio to zero + - mixer_speaker.apply_ducking: + id: media_mixing_input + decibel_reduction: 0 + duration: 1.0s + # Refresh the LED ring + - script.execute: control_leds + on_turn_on: + # Duck audio + - mixer_speaker.apply_ducking: + id: media_mixing_input + decibel_reduction: 20 + duration: 0.0s + # Enable stop wake word + - micro_wake_word.enable_model: stop + # Ring timer + - script.execute: ring_timer + # Refresh LED + - script.execute: control_leds + # If 15 minutes have passed and the timer is still ringing, stop it. + - delay: 15min + - switch.turn_off: timer_ringing + - platform: gpio + pin: GPIO47 + id: internal_speaker_amp + name: "Internal speaker amp" + entity_category: config + restore_mode: ALWAYS_OFF + internal: true + +binary_sensor: + # Center Button. Used for many things (See on_multi_click) + - platform: gpio + id: center_button + pin: + number: GPIO0 + inverted: true + on_press: + - script.execute: control_leds + on_release: + - script.execute: control_leds + # If a factory reset is requested, factory reset on release + - if: + condition: + lambda: return id(factory_reset_requested); + then: + - button.press: factory_reset_button + on_multi_click: + # Simple Click: + # - Abort "things" in order + # - Timer + # - Announcements + # - Voice Assistant Pipeline run + # - Music + # - Starts the voice assistant if it is not yet running and if the device is not muted. + - timing: + - ON for at most 1s + - OFF for at least 0.25s + then: + - if: + condition: + lambda: return !id(init_in_progress) && !id(color_changed) && !id(group_volume_changed); + then: + - if: + condition: + switch.is_on: timer_ringing + then: + - switch.turn_off: timer_ringing + else: + - if: + condition: + voice_assistant.is_running: + then: + - voice_assistant.stop: + else: + - if: + condition: + media_player.is_announcing: + id: external_media_player + then: + media_player.stop: + announcement: true + id: external_media_player + else: + - if: + condition: + media_player.is_playing: + id: external_media_player + then: + - media_player.pause: + id: external_media_player + else: + - if: + condition: + and: + - switch.is_off: master_mute_switch + - not: voice_assistant.is_running + then: + - script.execute: + id: play_sound + priority: true + sound_file: "center_button_press_sound" + - delay: 300ms + - voice_assistant.start: + # Double Click + # . Exposed as an event entity. To be used in automations inside Home Assistant + - timing: + - ON for at most 1s + - OFF for at most 0.25s + - ON for at most 1s + - OFF for at least 0.25s + then: + - if: + condition: + lambda: return !id(init_in_progress) && !id(color_changed) && !id(group_volume_changed); + then: + - script.execute: + id: play_sound + priority: false + sound_file: "center_button_double_press_sound" + - event.trigger: + id: button_press_event + event_type: "double_press" + # Triple Click + # . Exposed as an event entity. To be used in automations inside Home Assistant + - timing: + - ON for at most 1s + - OFF for at most 0.25s + - ON for at most 1s + - OFF for at most 0.25s + - ON for at most 1s + - OFF for at least 0.25s + then: + - if: + condition: + lambda: return !id(init_in_progress) && !id(color_changed) && !id(group_volume_changed); + then: + - script.execute: + id: play_sound + priority: false + sound_file: "center_button_triple_press_sound" + - event.trigger: + id: button_press_event + event_type: "triple_press" + # Long Press + # . Exposed as an event entity. To be used in automations inside Home Assistant + - timing: + - ON for at least 1s + then: + - if: + condition: + lambda: return !id(init_in_progress) && !id(color_changed) && !id(group_volume_changed); + then: + - script.execute: + id: play_sound + priority: false + sound_file: "center_button_long_press_sound" + - light.turn_off: voice_assistant_leds + - event.trigger: + id: button_press_event + event_type: "long_press" + # Very important do not remove. Trust me :D + - timing: + # H .... + - ON for at most 0.2s + - OFF for 0s to 2s + - ON for at most 0.2s + - OFF for 0s to 2s + - ON for at most 0.2s + - OFF for 0s to 2s + - ON for at most 0.2s + - OFF for 0.5s to 2s + # A ._ + - ON for at most 0.2s + - OFF for 0s to 2s + - ON for 0.2s to 2s + then: + - if: + condition: + lambda: return !id(init_in_progress); + then: + - light.turn_on: + brightness: 100% + id: voice_assistant_leds + effect: "Tick" + - script.execute: + id: play_sound + priority: true + sound_file: "easter_egg_tick_sound" + - delay: 4s + - light.turn_off: voice_assistant_leds + - script.execute: + id: play_sound + priority: true + sound_file: "easter_egg_tada_sound" + - light.turn_on: + brightness: 100% + id: voice_assistant_leds + effect: "Rainbow" + - event.trigger: + id: button_press_event + event_type: "easter_egg_press" + # Factory Reset Warning + # . Audible and Visible warning. + - timing: + - ON for at least 10s + then: + - if: + condition: + lambda: return !id(dial_touched); + then: + - light.turn_on: + brightness: 100% + id: voice_assistant_leds + effect: "Factory Reset Coming Up" + - script.execute: + id: play_sound + priority: true + sound_file: "factory_reset_initiated_sound" + - wait_until: + binary_sensor.is_off: center_button + - if: + condition: + lambda: return !id(factory_reset_requested); + then: + - light.turn_off: voice_assistant_leds + - script.execute: + id: play_sound + priority: true + sound_file: "factory_reset_cancelled_sound" + # Factory Reset Confirmed. + # . Audible warning to prompt user to release the button + # . Set factory_reset_requested to true + - timing: + - ON for at least 22s + then: + - if: + condition: + lambda: return !id(dial_touched); + then: + - script.execute: + id: play_sound + priority: true + sound_file: "factory_reset_confirmed_sound" + - light.turn_on: + brightness: 100% + red: 100% + green: 0% + blue: 0% + id: voice_assistant_leds + effect: "none" + - lambda: id(factory_reset_requested) = true; + + # Hardware mute switch (Side of the device) + - platform: gpio + id: hardware_mute_switch + internal: true + pin: GPIO3 + on_press: + # Play mute on sound only if software mute isn't enabled + - if: + condition: + - switch.is_off: master_mute_switch + then: + - script.execute: + id: play_sound + priority: false + sound_file: "mute_switch_on_sound" + on_release: + - script.execute: + id: play_sound + priority: false + sound_file: "mute_switch_off_sound" + - microphone.unmute: + # Audio Jack Plugged sensor + - platform: gpio + id: jack_plugged + # Debouncing it a bit because it can be activated back and forth as you plug the audio jack + filters: + - delayed_on: 200ms + - delayed_off: 200ms + pin: + number: GPIO17 + # When the jack is plugged in: + # - LED animation + # - Sound played + on_press: + - lambda: id(jack_plugged_recently) = true; + - script.execute: control_leds + - delay: 200ms + - script.execute: + id: play_sound + priority: false + sound_file: "jack_connected_sound" + - delay: 800ms + - lambda: id(jack_plugged_recently) = false; + - script.execute: control_leds + # When the jack is unplugged: + # - LED animation + # - Sound played + on_release: + - lambda: id(jack_unplugged_recently) = true; + - script.execute: control_leds + - delay: 200ms + - script.execute: + id: play_sound + priority: false + sound_file: "jack_disconnected_sound" + - delay: 800ms + - lambda: id(jack_unplugged_recently) = false; + - script.execute: control_leds + +light: + # Hardware LED ring. Not used because remapping needed + - platform: esp32_rmt_led_strip + id: leds_internal + pin: GPIO21 + chipset: WS2812 + max_refresh_rate: 15ms + num_leds: 12 + rgb_order: GRB + rmt_symbols: 192 + default_transition_length: 0ms + power_supply: led_power + + # Voice Assistant LED ring. Remapping of the internal LED. + # This light is not exposed. The device controls it + - platform: partition + id: voice_assistant_leds + internal: true + default_transition_length: 0ms + segments: + - id: leds_internal + from: 7 + to: 11 + - id: leds_internal + from: 0 + to: 6 + effects: + - addressable_lambda: + name: "Waiting for Command" + update_interval: 100ms + lambda: |- + auto light_color = id(led_ring).current_values; + Color color(light_color.get_red() * 255, light_color.get_green() * 255, + light_color.get_blue() * 255); + for (uint8_t i = 0; i < 12; i++) { + if (i == id(global_led_animation_index) % 12) { + it[i] = color; + } else if (i == (id(global_led_animation_index) + 11) % 12) { + it[i] = color * 192; + } else if (i == (id(global_led_animation_index) + 10) % 12) { + it[i] = color * 128; + } else if (i == (id(global_led_animation_index) + 6) % 12) { + it[i] = color; + } else if (i == (id(global_led_animation_index) + 5) % 12) { + it[i] = color * 192; + } else if (i == (id(global_led_animation_index) + 4) % 12) { + it[i] = color * 128; + } else { + it[i] = Color::BLACK; + } + } + id(global_led_animation_index) = (id(global_led_animation_index) + 1) % 12; + - addressable_lambda: + name: "Listening For Command" + update_interval: 50ms + lambda: |- + auto light_color = id(led_ring).current_values; + Color color(light_color.get_red() * 255, light_color.get_green() * 255, + light_color.get_blue() * 255); + for (uint8_t i = 0; i < 12; i++) { + if (i == id(global_led_animation_index) % 12) { + it[i] = color; + } else if (i == (id(global_led_animation_index) + 11) % 12) { + it[i] = color * 192; + } else if (i == (id(global_led_animation_index) + 10) % 12) { + it[i] = color * 128; + } else if (i == (id(global_led_animation_index) + 6) % 12) { + it[i] = color; + } else if (i == (id(global_led_animation_index) + 5) % 12) { + it[i] = color * 192; + } else if (i == (id(global_led_animation_index) + 4) % 12) { + it[i] = color * 128; + } else { + it[i] = Color::BLACK; + } + } + id(global_led_animation_index) = (id(global_led_animation_index) + 1) % 12; + - addressable_lambda: + name: "Thinking" + update_interval: 10ms + lambda: |- + static uint8_t brightness_step = 0; + static bool brightness_decreasing = true; + static uint8_t brightness_step_number = 10; + if (initial_run) { + brightness_step = 0; + brightness_decreasing = true; + } + auto light_color = id(led_ring).current_values; + Color color(light_color.get_red() * 255, light_color.get_green() * 255, + light_color.get_blue() * 255); + for (uint8_t i = 0; i < 12; i++) { + if (i == id(global_led_animation_index) % 12) { + it[i] = color * uint8_t(255/brightness_step_number*(brightness_step_number-brightness_step)); + } else if (i == (id(global_led_animation_index) + 6) % 12) { + it[i] = color * uint8_t(255/brightness_step_number*(brightness_step_number-brightness_step)); + } else { + it[i] = Color::BLACK; + } + } + if (brightness_decreasing) { + brightness_step++; + } else { + brightness_step--; + } + if (brightness_step == 0 || brightness_step == brightness_step_number) { + brightness_decreasing = !brightness_decreasing; + } + - addressable_lambda: + name: "Replying" + update_interval: 50ms + lambda: |- + id(global_led_animation_index) = (12 + id(global_led_animation_index) - 1) % 12; + auto light_color = id(led_ring).current_values; + Color color(light_color.get_red() * 255, light_color.get_green() * 255, + light_color.get_blue() * 255); + for (uint8_t i = 0; i < 12; i++) { + if (i == (id(global_led_animation_index)) % 12) { + it[i] = color; + } else if (i == ( id(global_led_animation_index) + 1) % 12) { + it[i] = color * 192; + } else if (i == ( id(global_led_animation_index) + 2) % 12) { + it[i] = color * 128; + } else if (i == ( id(global_led_animation_index) + 6) % 12) { + it[i] = color; + } else if (i == ( id(global_led_animation_index) + 7) % 12) { + it[i] = color * 192; + } else if (i == ( id(global_led_animation_index) + 8) % 12) { + it[i] = color * 128; + } else { + it[i] = Color::BLACK; + } + } + - addressable_lambda: + name: "Muted or Silent" + update_interval: 16ms + lambda: |- + static int8_t index = 0; + Color muted_color(255, 0, 0); + auto light_color = id(led_ring).current_values; + Color color(light_color.get_red() * 255, light_color.get_green() * 255, + light_color.get_blue() * 255); + for (uint8_t i = 0; i < 12; i++) { + if ( light_color.get_state() ) { + it[i] = color; + } else { + it[i] = Color::BLACK; + } + } + if ( id(master_mute_switch).state ) { + it[2] = Color::BLACK; + it[3] = muted_color; + it[4] = Color::BLACK; + it[8] = Color::BLACK; + it[9] = muted_color; + it[10] = Color::BLACK; + } + if ( id(external_media_player).volume == 0.0f || id(external_media_player).is_muted() ) { + it[5] = Color::BLACK; + it[6] = muted_color; + it[7] = Color::BLACK; + } + - addressable_lambda: + name: "Voice kit startup failed" + # update_interval: 16ms + lambda: |- + static int8_t index = 0; + Color fail_color(255, 0, 0); + for (uint8_t i = 0; i < 12; i++) { + if (i % 3) { + it[i] = Color::BLACK; + } else { + it[i] = fail_color; + } + } + - addressable_lambda: + name: "Volume Display" + update_interval: 50ms + lambda: |- + auto light_color = id(led_ring).current_values; + Color color(light_color.get_red() * 255, light_color.get_green() * 255, + light_color.get_blue() * 255); + Color silenced_color(255, 0, 0); + auto volume_ratio = 12.0f * id(external_media_player).volume; + for (uint8_t i = 0; i < 12; i++) { + if (i <= volume_ratio) { + it[(6+i)%12] = color * min( 255.0f * (volume_ratio - i) , 255.0f ) ; + } else { + it[(6+i)%12] = Color::BLACK; + } + } + if (id(external_media_player).volume == 0.0f) { + it[6] = silenced_color; + } + - addressable_lambda: + name: "Center Button Touched" + update_interval: 16ms + lambda: |- + if (initial_run) { + // set voice_assistant_leds light to colors based on led_ring + auto led_ring_cv = id(led_ring).current_values; + auto va_leds_call = id(voice_assistant_leds).make_call(); + va_leds_call.from_light_color_values(led_ring_cv); + va_leds_call.set_brightness( min ( max( id(led_ring).current_values.get_brightness() , 0.2f ) + 0.1f , 1.0f ) ); + va_leds_call.set_state(true); + va_leds_call.perform(); + } + auto light_color = id(voice_assistant_leds).current_values; + Color color(light_color.get_red() * 255, light_color.get_green() * 255, + light_color.get_blue() * 255); + for (uint8_t i = 0; i < 12; i++) { + it[i] = color; + } + - addressable_twinkle: + name: "Twinkle" + twinkle_probability: 50% + - addressable_lambda: + name: "Error" + update_interval: 10ms + lambda: |- + static uint8_t brightness_step = 0; + static bool brightness_decreasing = true; + static uint8_t brightness_step_number = 10; + if (initial_run) { + brightness_step = 0; + brightness_decreasing = true; + } + Color error_color(255, 0, 0); + for (uint8_t i = 0; i < 12; i++) { + it[i] = error_color * uint8_t(255/brightness_step_number*(brightness_step_number-brightness_step)); + } + if (brightness_decreasing) { + brightness_step++; + } else { + brightness_step--; + } + if (brightness_step == 0 || brightness_step == brightness_step_number) { + brightness_decreasing = !brightness_decreasing; + } + - addressable_lambda: + name: "Timer Ring" + update_interval: 10ms + lambda: |- + static uint8_t brightness_step = 0; + static bool brightness_decreasing = true; + static uint8_t brightness_step_number = 10; + if (initial_run) { + brightness_step = 0; + brightness_decreasing = true; + } + auto light_color = id(led_ring).current_values; + Color color(light_color.get_red() * 255, light_color.get_green() * 255, + light_color.get_blue() * 255); + Color muted_color(255, 0, 0); + for (uint8_t i = 0; i < 12; i++) { + it[i] = color * uint8_t(255/brightness_step_number*(brightness_step_number-brightness_step)); + } + if ( id(master_mute_switch).state ) { + it[3] = muted_color; + it[9] = muted_color; + } + if (brightness_decreasing) { + brightness_step++; + } else { + brightness_step--; + } + if (brightness_step == 0 || brightness_step == brightness_step_number) { + brightness_decreasing = !brightness_decreasing; + } + - addressable_lambda: + name: "Timer Tick" + update_interval: 100ms + lambda: |- + auto light_color = id(led_ring).current_values; + Color color(light_color.get_red() * 255, light_color.get_green() * 255, + light_color.get_blue() * 255); + Color muted_color(255, 0, 0); + auto timer_ratio = 12.0f * id(first_active_timer).seconds_left / max(id(first_active_timer).total_seconds , static_cast(1)); + uint8_t last_led_on = static_cast(ceil(timer_ratio)) - 1; + for (uint8_t i = 0; i < 12; i++) { + float brightness_dip = ( i == id(global_led_animation_index) % 12 && i != last_led_on ) ? 0.9f : 1.0f ; + if (i <= timer_ratio) { + it[i] = color * min(255.0f * brightness_dip * (timer_ratio - i) , 255.0f * brightness_dip) ; + } else { + it[i] = Color::BLACK; + } + } + if (id(master_mute_switch).state) { + it[2] = Color::BLACK; + it[3] = muted_color; + it[4] = Color::BLACK; + it[8] = Color::BLACK; + it[9] = muted_color; + it[10] = Color::BLACK; + } + id(global_led_animation_index) = (12 + id(global_led_animation_index) - 1) % 12; + - addressable_rainbow: + name: "Rainbow" + width: 12 + - addressable_lambda: + name: "Tick" + update_interval: 333ms + lambda: |- + static uint8_t index = 0; + Color color(255, 0, 0); + if (initial_run) { + index = 0; + } + for (uint8_t i = 0; i < 12; i++) { + if (i <= index ) { + it[i] = Color::BLACK; + } else { + it[i] = color; + } + } + index = (index + 1) % 12; + - addressable_lambda: + name: "Factory Reset Coming Up" + update_interval: 1s + lambda: |- + static uint8_t index = 0; + Color color(255, 0, 0); + if (initial_run) { + index = 0; + } + for (uint8_t i = 0; i < 12; i++) { + if (i <= index ) { + it[i] = color; + } else { + it[i] = Color::BLACK; + } + } + index = (index + 1) % 12; + - addressable_lambda: + name: "Jack Plugged" + update_interval: 40ms + lambda: |- + static uint8_t index = 0; + if (initial_run) { + index = 0; + } + auto light_color = id(led_ring).current_values; + Color color(light_color.get_red() * 255, light_color.get_green() * 255, + light_color.get_blue() * 255); + if (index <= 6) { + for (uint8_t i = 0; i < 12; i++) { + if (i == index) { + it[i] = color; + } else if (i == (12 - index) % 12) { + it[i] = color; + } else { + it[i] = Color::BLACK; + } + } + } + index = (index + 1); + - addressable_lambda: + name: "Jack Unplugged" + update_interval: 40ms + lambda: |- + static uint8_t index = 0; + if (initial_run) { + index = 0; + } + auto light_color = id(led_ring).current_values; + Color color(light_color.get_red() * 255, light_color.get_green() * 255, + light_color.get_blue() * 255); + if (index <= 6) { + for (uint8_t i = 0; i < 12; i++) { + if (i == 6 - index) { + it[i] = color; + } else if (i == (6 + index) % 12) { + it[i] = color; + } else { + it[i] = Color::BLACK; + } + } + } + index = (index + 1); + + # User facing LED ring. Remapping of the internal LEDs. + # Exposed to be used by the user. + - platform: partition + id: led_ring + name: LED Ring + entity_category: config + icon: "mdi:circle-outline" + default_transition_length: 0ms + restore_mode: RESTORE_DEFAULT_OFF + initial_state: + color_mode: rgb + brightness: 66% + red: 9.4% + green: 73.3% + blue: 94.9% + segments: + - id: leds_internal + from: 7 + to: 11 + - id: leds_internal + from: 0 + to: 6 + +power_supply: + - id: led_power + pin: GPIO45 + +sensor: + # The dial. Used to control volume and Hue of the LED ring. + - platform: rotary_encoder + id: dial + pin_a: GPIO16 + pin_b: GPIO18 + resolution: 2 + on_clockwise: + - lambda: id(dial_touched) = true; + - if: + condition: + binary_sensor.is_off: center_button + then: + - script.execute: + id: control_volume + increase_volume: true + else: + - if: + condition: + media_player.is_playing: + id: sendspin_group_media_player + then: + - script.execute: + id: control_group_volume + increase_volume: true + else: + - script.execute: + id: control_hue + increase_hue: true + on_anticlockwise: + - lambda: id(dial_touched) = true; + - if: + condition: + binary_sensor.is_off: center_button + then: + - script.execute: + id: control_volume + increase_volume: false + else: + - if: + condition: + media_player.is_playing: + id: sendspin_group_media_player + then: + - script.execute: + id: control_group_volume + increase_volume: false + else: + - script.execute: + id: control_hue + increase_hue: false + +event: + # Event entity exposed to the user to automate on complex center button presses. + # The simple press is not exposed as it is used to control the device itself. + - platform: template + id: button_press_event + name: "Button press" + icon: mdi:button-pointer + device_class: button + event_types: + - double_press + - triple_press + - long_press + - easter_egg_press + +script: + # Master script controlling the LEDs, based on different conditions : initialization in progress, wifi and api connected and voice assistant phase. + # For the sake of simplicity and re-usability, the script calls child scripts defined below. + # This script will be called every time one of these conditions is changing. + - id: control_leds + then: + - lambda: | + if (id(voice_kit_component).is_failed()) { + id(control_leds_voice_kit_startup_failed).execute(); + return; + } + id(check_if_timers_active).execute(); + if (id(is_timer_active)){ + id(fetch_first_active_timer).execute(); + } + if (id(improv_ble_in_progress)) { + id(control_leds_improv_ble_state).execute(); + } else if (id(init_in_progress)) { + id(control_leds_init_state).execute(); + } else if (!id(wifi_id).is_connected() || !id(api_id).is_connected()){ + id(control_leds_no_ha_connection_state).execute(); + } else if (id(center_button).state) { + id(control_leds_center_button_touched).execute(); + } else if (id(jack_plugged_recently)) { + id(control_leds_jack_plugged_recently).execute(); + } else if (id(jack_unplugged_recently)) { + id(control_leds_jack_unplugged_recently).execute(); + } else if (id(dial_touched)) { + id(control_leds_dial_touched).execute(); + } else if (id(timer_ringing).state) { + id(control_leds_timer_ringing).execute(); + } else if (id(voice_assistant_phase) == ${voice_assist_waiting_for_command_phase_id}) { + id(control_leds_voice_assistant_waiting_for_command_phase).execute(); + } else if (id(voice_assistant_phase) == ${voice_assist_listening_for_command_phase_id}) { + id(control_leds_voice_assistant_listening_for_command_phase).execute(); + } else if (id(voice_assistant_phase) == ${voice_assist_thinking_phase_id}) { + id(control_leds_voice_assistant_thinking_phase).execute(); + } else if (id(voice_assistant_phase) == ${voice_assist_replying_phase_id}) { + id(control_leds_voice_assistant_replying_phase).execute(); + } else if (id(voice_assistant_phase) == ${voice_assist_error_phase_id}) { + id(control_leds_voice_assistant_error_phase).execute(); + } else if (id(voice_assistant_phase) == ${voice_assist_not_ready_phase_id}) { + id(control_leds_voice_assistant_not_ready_phase).execute(); + } else if (id(is_timer_active)) { + id(control_leds_timer_ticking).execute(); + } else if (id(master_mute_switch).state) { + id(control_leds_muted_or_silent).execute(); + } else if (id(external_media_player).volume == 0.0f || id(external_media_player).is_muted()) { + id(control_leds_muted_or_silent).execute(); + } else if (id(voice_assistant_phase) == ${voice_assist_idle_phase_id}) { + id(control_leds_voice_assistant_idle_phase).execute(); + } + + # Script executed if voice_kit startup failed + # Static red "X" + - id: control_leds_voice_kit_startup_failed + then: + - light.turn_on: + brightness: 40% + red: 0% + green: 0% + blue: 0% + id: voice_assistant_leds + effect: "Voice kit startup failed" + + # Script executed during Improv BLE + # Warm White Twinkle + - id: control_leds_improv_ble_state + then: + - light.turn_on: + brightness: 66% + red: 100% + green: 89% + blue: 71% + id: voice_assistant_leds + effect: "Twinkle" + + # Script executed during initialization + # Blue Twinkle if Wifi is connected, Else solid warm white + - id: control_leds_init_state + then: + - if: + condition: + wifi.connected: + then: + - light.turn_on: + brightness: 66% + red: 9.4% + green: 73.3% + blue: 94.9% + id: voice_assistant_leds + effect: "Twinkle" + else: + - light.turn_on: + brightness: 66% + red: 100% + green: 89% + blue: 71% + id: voice_assistant_leds + effect: "none" + + # Script executed when the device has no connection to Home Assistant + # Red Twinkle (This will be visible during HA updates for example) + - id: control_leds_no_ha_connection_state + then: + - light.turn_on: + brightness: 66% + red: 1 + green: 0 + blue: 0 + id: voice_assistant_leds + effect: "Twinkle" + + # Script executed when the voice assistant is idle (waiting for a wake word) + # Nothing (Either LED ring off or LED ring on if the user decided to turn the user facing LED ring on) + - id: control_leds_voice_assistant_idle_phase + then: + - light.turn_off: voice_assistant_leds + - if: + condition: + light.is_on: led_ring + then: + light.turn_on: led_ring + + # Script executed when the voice assistant is waiting for a command (After the wake word) + # Slow clockwise spin of the LED ring. + - id: control_leds_voice_assistant_waiting_for_command_phase + then: + - light.turn_on: + brightness: !lambda return max( id(led_ring).current_values.get_brightness() , 0.2f ); + id: voice_assistant_leds + effect: "Waiting for Command" + + # Script executed when the voice assistant is listening to a command + # Fast clockwise spin of the LED ring. + - id: control_leds_voice_assistant_listening_for_command_phase + then: + - light.turn_on: + brightness: !lambda return max( id(led_ring).current_values.get_brightness() , 0.2f ); + id: voice_assistant_leds + effect: "Listening For Command" + + # Script executed when the voice assistant is thinking to a command + # The spin stops and the 2 LEDs that are currently on and blinking indicating the commend is being processed. + - id: control_leds_voice_assistant_thinking_phase + then: + - light.turn_on: + brightness: !lambda return max( id(led_ring).current_values.get_brightness() , 0.2f ); + id: voice_assistant_leds + effect: "Thinking" + + # Script executed when the voice assistant is thinking to a command + # Fast anticlockwise spin of the LED ring. + - id: control_leds_voice_assistant_replying_phase + then: + - light.turn_on: + brightness: !lambda return max( id(led_ring).current_values.get_brightness() , 0.2f ); + id: voice_assistant_leds + effect: "Replying" + + # Script executed when the voice assistant is in error + # Fast Red Pulse + - id: control_leds_voice_assistant_error_phase + then: + - light.turn_on: + brightness: !lambda return min ( max( id(led_ring).current_values.get_brightness() , 0.2f ) + 0.1f , 1.0f ); + red: 1 + green: 0 + blue: 0 + id: voice_assistant_leds + effect: "Error" + + # Script executed when the voice assistant is muted or silent + # The LED next to the 2 microphones turn red / one red LED next to the speaker grill + - id: control_leds_muted_or_silent + then: + - light.turn_on: + brightness: !lambda return max( id(led_ring).current_values.get_brightness() , 0.2f ); + id: voice_assistant_leds + effect: "Muted or Silent" + + # Script executed when the voice assistant is not ready + - id: control_leds_voice_assistant_not_ready_phase + then: + - light.turn_on: + brightness: 66% + red: 1 + green: 0 + blue: 0 + id: voice_assistant_leds + effect: "Twinkle" + + # Script executed when the dial is touched + # A number of LEDs turn on indicating a visual representation of the volume of the media player entity. + - id: control_leds_dial_touched + then: + - light.turn_on: + brightness: !lambda return max( id(led_ring).current_values.get_brightness() , 0.2f ); + id: voice_assistant_leds + effect: "Volume Display" + + # Script executed when the jack has just been unplugged + # A ripple effect + - id: control_leds_jack_unplugged_recently + then: + - light.turn_on: + brightness: !lambda return max( id(led_ring).current_values.get_brightness() , 0.2f ); + id: voice_assistant_leds + effect: "Jack Unplugged" + + # Script executed when the jack has just been plugged + # A ripple effect + - id: control_leds_jack_plugged_recently + then: + - light.turn_on: + brightness: !lambda return max( id(led_ring).current_values.get_brightness() , 0.2f ); + id: voice_assistant_leds + effect: "Jack Plugged" + + # Script executed when the center button is touched + # The complete LED ring turns on + - id: control_leds_center_button_touched + then: + - light.turn_on: + brightness: !lambda return min ( max( id(led_ring).current_values.get_brightness() , 0.2f ) + 0.1f , 1.0f ); + id: voice_assistant_leds + effect: "Center Button Touched" + + # Script executed when the timer is ringing, to control the LEDs + # The LED ring blinks. + - id: control_leds_timer_ringing + then: + - light.turn_on: + brightness: !lambda return min ( max( id(led_ring).current_values.get_brightness() , 0.2f ) + 0.1f , 1.0f ); + id: voice_assistant_leds + effect: "Timer Ring" + + # Script executed when the timer is ticking, to control the LEDs + # The LEDs shows the remaining time as a fraction of the full ring. + - id: control_leds_timer_ticking + then: + - light.turn_on: + brightness: !lambda return max( id(led_ring).current_values.get_brightness() , 0.2f ); + id: voice_assistant_leds + effect: "Timer tick" + + # Script executed when the volume is increased/decreased from the dial + - id: control_volume + mode: restart + parameters: + increase_volume: bool # True: Increase volume / False: Decrease volume. + then: + - delay: 16ms + - if: + condition: + lambda: return increase_volume; + then: + - media_player.volume_up: + id: external_media_player + else: + - media_player.volume_down: + id: external_media_player + - script.execute: control_leds + - delay: 1s + - lambda: id(dial_touched) = false; + - sensor.rotary_encoder.set_value: + id: dial + value: 0 + - script.execute: control_leds + + # Script executed when the volume is increased/decreased from the dial for the group media player + - id: control_group_volume + mode: restart + parameters: + increase_volume: bool # True: Increase volume / False: Decrease volume. + then: + - delay: 16ms + - if: + condition: + lambda: return increase_volume; + then: + - lambda: id(group_volume_changed) = true; + - media_player.volume_up: + id: sendspin_group_media_player + else: + - lambda: id(group_volume_changed) = true; + - media_player.volume_down: + id: sendspin_group_media_player + - script.execute: control_leds + - delay: 1s + - lambda: id(dial_touched) = false; + - lambda: id(group_volume_changed) = false; + - sensor.rotary_encoder.set_value: + id: dial + value: 0 + - script.execute: control_leds + + # Script executed when the hue is increased/decreased from the dial + - id: control_hue + mode: restart + parameters: + increase_hue: bool # True: Increase hue / False: Decrease hue. + then: + - delay: 16ms + - if: + condition: + lambda: return(abs(int(id(dial).state)) > 3 || id(color_changed)); + then: + - lambda: | + id(color_changed) = true; + auto light_color = id(voice_assistant_leds).current_values; + int hue = 0; + float saturation = 0; + float value = 0; + rgb_to_hsv( light_color.get_red(), + light_color.get_green(), + light_color.get_blue(), + hue, + saturation, + value); + if (increase_hue) { + hue = (hue + 10) % 360; + } else { + hue = (hue + 350) % 360; + } + if (saturation < 0.05) { + saturation = 1; + } + float red = 0; + float green = 0; + float blue = 0; + hsv_to_rgb( hue, + saturation, + value, + red, + green, + blue); + id(voice_assistant_leds).make_call().set_rgb(red, green, blue).perform(); + - wait_until: + binary_sensor.is_off: center_button + - lambda: | + id(dial_touched) = false; + // now we "save" the new LED color/state to led_ring, maintaining its brightness and state + auto led_ring_call = id(led_ring).make_call(); + auto va_leds_cv = id(voice_assistant_leds).current_values; + led_ring_call.from_light_color_values(va_leds_cv); + led_ring_call.set_brightness(id(led_ring).current_values.get_brightness()); + led_ring_call.set_state(id(led_ring).current_values.is_on()); + led_ring_call.perform(); + - sensor.rotary_encoder.set_value: + id: dial + value: 0 + - script.execute: control_leds + - delay: 500ms + - lambda: id(color_changed) = false; + + # Script executed when the timer is ringing, to playback sounds. + - id: ring_timer + then: + - script.execute: enable_repeat_one + - script.execute: + id: play_sound + priority: true + sound_file: "timer_finished_sound" + + # Script executed when the timer is ringing, to repeat the timer finished sound. + - id: enable_repeat_one + then: + - media_player.repeat_one: + id: external_media_player + announcement: true + # Turn on the repeat mode and pause for 500 ms between playlist items/repeats + - lambda: |- + id(external_media_player)->set_playlist_delay_ms(1, 500); + + # Script execute when the timer is done ringing, to disable repeat mode. + - id: disable_repeat + then: + # Turn off the repeat mode and pause for 0 ms between playlist items/repeats + - media_player.repeat_off: + id: external_media_player + announcement: true + - lambda: |- + id(external_media_player)->set_playlist_delay_ms(1, 0); + + # Script executed when we want to play sounds on the device. + - id: play_sound + parameters: + priority: bool + sound_file: string + then: + - if: + condition: + lambda: return priority; + then: + - media_player.stop: + id: external_media_player + announcement: true + - lambda: |- + if ( (id(external_media_player).state != media_player::MediaPlayerState::MEDIA_PLAYER_STATE_ANNOUNCING ) || priority) { + id(external_media_player) + ->make_call() + .set_media_url("file://" + sound_file) + .set_announcement(true) + .perform(); + } + + # Script used to fetch the first active timer (Stored in global first_active_timer) + - id: fetch_first_active_timer + then: + - lambda: | + const auto &timers = id(va).get_timers(); + auto output_timer = *timers.begin(); + for (const auto &timer : timers) { + if (timer.is_active && timer.seconds_left <= output_timer.seconds_left) { + output_timer = timer; + } + } + id(first_active_timer) = output_timer; + + # Script used to check if a timer is active (Stored in global is_timer_active) + - id: check_if_timers_active + then: + - lambda: | + const auto &timers = id(va).get_timers(); + bool output = false; + for (const auto &timer : timers) { + if (timer.is_active) { + output = true; + } + } + id(is_timer_active) = output; + + # Script used activate the stop word if the TTS step is long. + # Why is this wrapped on a script? + # Becasue we want to stop the sequence if the TTS step is faster than that. + # This allows us to prevent having the deactivation of the stop word before its own activation. + - id: activate_stop_word_once + then: + - wait_until: + condition: + media_player.is_announcing: + id: external_media_player + timeout: 5s + - delay: 1s + # Enable stop wake word + - if: + condition: + switch.is_off: timer_ringing + then: + - micro_wake_word.enable_model: stop + - wait_until: + not: + media_player.is_announcing: + id: external_media_player + - if: + condition: + switch.is_off: timer_ringing + then: + - micro_wake_word.disable_model: stop + +i2s_audio: + - id: i2s_output + # i2s_output data pin is gpio10 + i2s_lrclk_pin: + number: GPIO7 + i2s_bclk_pin: + number: GPIO8 + + - id: i2s_input + # data line is GPIO15 + i2s_lrclk_pin: + number: GPIO14 + i2s_bclk_pin: + number: GPIO13 + +microphone: + - platform: i2s_audio + id: i2s_mics + i2s_din_pin: GPIO15 + adc_type: external + pdm: false + sample_rate: 16000 + bits_per_sample: 32bit + i2s_mode: secondary + i2s_audio_id: i2s_input + channel: stereo + +speaker: + # Hardware speaker output + - platform: i2s_audio + id: i2s_audio_speaker + sample_rate: 48000 + i2s_mode: secondary + i2s_dout_pin: GPIO10 + bits_per_sample: 32bit + i2s_audio_id: i2s_output + dac_type: external + channel: stereo + timeout: never + buffer_duration: 100ms + audio_dac: aic3204_dac + + # Virtual speakers to combine the announcement and media streams together into one output + - platform: mixer + id: mixing_speaker + output_speaker: i2s_audio_speaker + num_channels: 2 + task_stack_in_psram: true + source_speakers: + - id: announcement_mixing_input + timeout: never + - id: media_mixing_input + timeout: never + + # Virtual speakers to resample each pipelines' audio, if necessary, as the mixer speaker requires the same sample rate + - platform: resampler + id: announcement_resampling_speaker + output_speaker: announcement_mixing_input + sample_rate: 48000 + bits_per_sample: 16 + - platform: resampler + id: media_resampling_speaker + output_speaker: media_mixing_input + sample_rate: 48000 + bits_per_sample: 16 + +sendspin: + id: sendspin_hub + task_stack_in_psram: true + kalman_process_error: 0.01 + +http_request: + +media_source: + - platform: sendspin + id: sendspin_source + - platform: http_request + id: http_source + buffer_size: 500000 + - platform: file + id: file_source + files: + - id: center_button_press_sound + file: ${center_button_press_sound_file} + - id: center_button_double_press_sound + file: ${center_button_double_press_sound_file} + - id: center_button_triple_press_sound + file: ${center_button_triple_press_sound_file} + - id: center_button_long_press_sound + file: ${center_button_long_press_sound_file} + - id: factory_reset_initiated_sound + file: ${factory_reset_initiated_sound_file} + - id: factory_reset_cancelled_sound + file: ${factory_reset_cancelled_sound_file} + - id: factory_reset_confirmed_sound + file: ${factory_reset_confirmed_sound_file} + - id: jack_connected_sound + file: ${jack_connected_sound_file} + - id: jack_disconnected_sound + file: ${jack_disconnected_sound_file} + - id: mute_switch_on_sound + file: ${mute_switch_on_sound_file} + - id: mute_switch_off_sound + file: ${mute_switch_off_sound_file} + - id: timer_finished_sound + file: ${timer_finished_sound_file} + - id: wake_word_triggered_sound + file: ${wake_word_triggered_sound_file} + - id: easter_egg_tick_sound + file: ${easter_egg_tick_sound_file} + - id: easter_egg_tada_sound + file: ${easter_egg_tada_sound_file} + - id: error_cloud_expired + file: ${error_cloud_expired_sound_file} + +media_player: + - platform: sendspin + id: sendspin_group_media_player + - platform: speaker_source + id: external_media_player + name: Media Player + announcement_speaker: announcement_resampling_speaker + media_speaker: media_resampling_speaker + announcement_pipeline: + format: FLAC # FLAC is the least processor intensive codec + num_channels: 1 # Stereo audio is unnecessary for announcements + sample_rate: 48000 + media_pipeline: + format: FLAC # FLAC is the least processor intensive codec + num_channels: 2 + sample_rate: 48000 + volume_increment: 0.05 + volume_min: 0.4 + volume_max: 0.85 + sources: + - file_source + - http_source + - sendspin_source + on_mute: + - script.execute: control_leds + on_unmute: + - script.execute: control_leds + on_volume: + - script.execute: control_leds + on_announcement: + - mixer_speaker.apply_ducking: + id: media_mixing_input + decibel_reduction: 20 + duration: 0.0s + on_state: + if: + condition: + and: + - switch.is_off: timer_ringing + - not: + voice_assistant.is_running: + - not: + media_player.is_announcing: external_media_player + then: + - mixer_speaker.apply_ducking: + id: media_mixing_input + decibel_reduction: 0 + duration: 1.0s + +voice_kit: + id: voice_kit_component + i2c_id: internal_i2c + reset_pin: GPIO4 + firmware: + url: https://github.com/esphome/voice-kit-xmos-firmware/releases/download/v1.3.1/ffva_v1.3.1_upgrade.bin + version: "1.3.1" + md5: 964635c5bf125529dab14a2472a15401 + +# Sendspin related components are pinned to specific commits for reproducible builds +external_components: + - source: + type: git + url: https://github.com/esphome/home-assistant-voice-pe + ref: dev + components: + - voice_kit + refresh: 0s + - source: + # https://github.com/esphome/esphome/pull/12256 + type: git + url: https://github.com/esphome/esphome + ref: a2d98e1d5e020200db8f3caf27a74a939a661dc4 + components: [audio] + - source: + # https://github.com/esphome/esphome/pull/12258 + type: git + url: https://github.com/esphome/esphome + ref: b4b7c5b25ebe0f2ab988f700219fa3c57b2377b7 + components: [media_player] + - source: + # https://github.com/esphome/esphome/pull/12284 + type: git + url: https://github.com/esphome/esphome + ref: d48058e140c98f5c2d902661d851a6b712d62434 + components: [sendspin] + - source: + # https://github.com/esphome/esphome/pull/14013 + type: git + url: https://github.com/esphome/esphome + ref: 51dcce3d1f22865ebb458a5447bbc877ac946b5a + components: [mdns] + - source: + # https://github.com/esphome/esphome/pull/12429 + type: git + url: https://github.com/esphome/esphome + ref: b49b09b6ae56502aa3ce51be86f90d732d019b2c + refresh: 0s + components: [file, http_request, media_source, speaker_source] + +audio_dac: + - platform: aic3204 + id: aic3204_dac + i2c_id: internal_i2c + +micro_wake_word: + id: mww + microphone: + microphone: i2s_mics + channels: 1 + gain_factor: 4 + stop_after_detection: false + models: + - model: https://github.com/kahrendt/microWakeWord/releases/download/okay_nabu_20241226.3/okay_nabu.json + id: okay_nabu + - model: hey_jarvis + id: hey_jarvis + - model: hey_mycroft + id: hey_mycroft + - model: https://github.com/kahrendt/microWakeWord/releases/download/stop/stop.json + id: stop + internal: true + vad: + on_wake_word_detected: + # If the wake word is detected when the device is muted (Possible with the software mute switch): Do nothing + - if: + condition: + switch.is_off: master_mute_switch + then: + # If a timer is ringing: Stop it, do not start the voice assistant (We can stop timer from voice!) + - if: + condition: + switch.is_on: timer_ringing + then: + - switch.turn_off: timer_ringing + # Stop voice assistant if running + else: + - if: + condition: + voice_assistant.is_running: + then: + voice_assistant.stop: + # Stop any other media player announcement + else: + - if: + condition: + media_player.is_announcing: + id: external_media_player + then: + - media_player.stop: + announcement: true + id: external_media_player + # Start the voice assistant and play the wake sound, if enabled + else: + - if: + condition: + switch.is_on: wake_sound + then: + - script.execute: + id: play_sound + priority: true + sound_file: "wake_word_triggered_sound" + - delay: 300ms + - voice_assistant.start: + wake_word: !lambda return wake_word; + +select: + - platform: template + name: "Wake word sensitivity" + optimistic: true + initial_option: Slightly sensitive + restore_value: true + entity_category: config + options: + - Slightly sensitive + - Moderately sensitive + - Very sensitive + on_value: + # Sets specific wake word probabilities computed for each particular model + # Note probability cutoffs are set as a quantized uint8 value, each comment has the corresponding floating point cutoff + # False Accepts per Hour values are tested against all units and channels from the Dinner Party Corpus. + # These cutoffs apply only to the specific models included in the firmware: okay_nabu@20241226.3, hey_jarvis@v2, hey_mycroft@v2 + lambda: |- + if (x == "Slightly sensitive") { + id(okay_nabu).set_probability_cutoff(217); // 0.85 -> 0.000 FAPH on DipCo (Manifest's default) + id(hey_jarvis).set_probability_cutoff(247); // 0.97 -> 0.563 FAPH on DipCo (Manifest's default) + id(hey_mycroft).set_probability_cutoff(253); // 0.99 -> 0.567 FAPH on DipCo + } else if (x == "Moderately sensitive") { + id(okay_nabu).set_probability_cutoff(176); // 0.69 -> 0.376 FAPH on DipCo + id(hey_jarvis).set_probability_cutoff(235); // 0.92 -> 0.939 FAPH on DipCo + id(hey_mycroft).set_probability_cutoff(242); // 0.95 -> 1.502 FAPH on DipCo (Manifest's default) + } else if (x == "Very sensitive") { + id(okay_nabu).set_probability_cutoff(143); // 0.56 -> 0.751 FAPH on DipCo + id(hey_jarvis).set_probability_cutoff(212); // 0.83 -> 1.502 FAPH on DipCo + id(hey_mycroft).set_probability_cutoff(237); // 0.93 -> 1.878 FAPH on DipCo + } + +voice_assistant: + id: va + microphone: + microphone: i2s_mics + channels: 0 + media_player: external_media_player + micro_wake_word: mww + use_wake_word: false + noise_suppression_level: 0 + auto_gain: 0 dbfs + volume_multiplier: 1 + on_client_connected: + - lambda: id(init_in_progress) = false; + - micro_wake_word.start: + - lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id}; + - script.execute: control_leds + on_client_disconnected: + - voice_assistant.stop: + - lambda: id(voice_assistant_phase) = ${voice_assist_not_ready_phase_id}; + - script.execute: control_leds + on_error: + # Only set the error phase if the error code is different than duplicate_wake_up_detected or stt-no-text-recognized + # These two are ignored for a better user experience + - if: + condition: + and: + - lambda: return !id(init_in_progress); + - lambda: return code != "duplicate_wake_up_detected"; + - lambda: return code != "stt-no-text-recognized"; + then: + - lambda: id(voice_assistant_phase) = ${voice_assist_error_phase_id}; + - script.execute: control_leds + # If the error code is cloud-auth-failed, serve a local audio file guiding the user. + - if: + condition: + - lambda: return code == "cloud-auth-failed"; + then: + - script.execute: + id: play_sound + priority: true + sound_file: "error_cloud_expired" + # When the voice assistant starts: Play a wake up sound, duck audio. + on_start: + - mixer_speaker.apply_ducking: + id: media_mixing_input + decibel_reduction: 20 # Number of dB quieter; higher implies more quiet, 0 implies full volume + duration: 0.0s # The duration of the transition (default is no transition) + on_listening: + - lambda: id(voice_assistant_phase) = ${voice_assist_waiting_for_command_phase_id}; + - script.execute: control_leds + on_stt_vad_start: + - lambda: id(voice_assistant_phase) = ${voice_assist_listening_for_command_phase_id}; + - script.execute: control_leds + on_stt_vad_end: + - lambda: id(voice_assistant_phase) = ${voice_assist_thinking_phase_id}; + - script.execute: control_leds + on_intent_progress: + - if: + condition: + # A nonempty x variable means a streaming TTS url was sent to the media player + lambda: 'return !x.empty();' + then: + - lambda: id(voice_assistant_phase) = ${voice_assist_replying_phase_id}; + - script.execute: control_leds + # Start a script that would potentially enable the stop word if the response is longer than a second + - script.execute: activate_stop_word_once + on_tts_start: + - if: + condition: + # The intent_progress trigger didn't start the TTS Reponse + lambda: 'return id(voice_assistant_phase) != ${voice_assist_replying_phase_id};' + then: + - lambda: id(voice_assistant_phase) = ${voice_assist_replying_phase_id}; + - script.execute: control_leds + # Start a script that would potentially enable the stop word if the response is longer than a second + - script.execute: activate_stop_word_once + # When the voice assistant ends ... + on_end: + - wait_until: + not: + voice_assistant.is_running: + # Stop ducking audio. + - mixer_speaker.apply_ducking: + id: media_mixing_input + decibel_reduction: 0 + duration: 1.0s + # If the end happened because of an error, let the error phase on for a second + - if: + condition: + lambda: return id(voice_assistant_phase) == ${voice_assist_error_phase_id}; + then: + - delay: 1s + # Reset the voice assistant phase id and reset the LED animations. + - lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id}; + - script.execute: control_leds + on_timer_finished: + - switch.turn_on: timer_ringing + on_timer_started: + - script.execute: control_leds + on_timer_cancelled: + - script.execute: control_leds + on_timer_updated: + - script.execute: control_leds + on_timer_tick: + - script.execute: control_leds + +button: + - platform: factory_reset + id: factory_reset_button + name: "Factory Reset" + entity_category: diagnostic + internal: true + - platform: restart + id: restart_button + name: "Restart" + entity_category: config + disabled_by_default: true + icon: "mdi:restart" + +debug: + update_interval: 5s diff --git a/extras/havpe-relay/flash_default.sh b/extras/havpe-relay/flash_default.sh new file mode 100755 index 00000000..0f351770 --- /dev/null +++ b/extras/havpe-relay/flash_default.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Flash default HA-style firmware for A/B audio comparison. +# +# Usage: +# ./flash_default.sh # compile and flash +# ./flash_default.sh logs # view device logs +# +# After flashing: +# uv run python capture_default_audio.py +# Hold the center button to stream audio, release to stop. + +set -e +cd "$(dirname "$0")/firmware" + +if [ ! -f secrets.yaml ]; then + echo "Error: firmware/secrets.yaml not found." + echo "Run ./init.sh and enable firmware setup, or:" + echo " cp secrets.template.yaml secrets.yaml" + echo " # then edit secrets.yaml with your WiFi and relay IP" + exit 1 +fi + +ACTION="${1:-run}" +cd .. +exec uv run --group firmware esphome "$ACTION" firmware/voice-default.yaml From 4f59cd80e40a8e9290481a14ca8510a3d3307160 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Mon, 16 Mar 2026 06:16:36 +0000 Subject: [PATCH 3/4] fix --- extras/havpe-relay/firmware/voice-default.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/extras/havpe-relay/firmware/voice-default.yaml b/extras/havpe-relay/firmware/voice-default.yaml index a4b1bd82..4416c813 100644 --- a/extras/havpe-relay/firmware/voice-default.yaml +++ b/extras/havpe-relay/firmware/voice-default.yaml @@ -82,6 +82,8 @@ esp32: wifi: id: wifi_id + ssid: !secret wifi_ssid + password: !secret wifi_password fast_connect: ${hidden_ssid} on_connect: - lambda: id(improv_ble_in_progress) = false; From dbf74cb1d45ff082d7d1a52119b34c6feb635941 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Tue, 17 Mar 2026 05:07:24 +0000 Subject: [PATCH 4/4] remove --- extras/havpe-relay/capture_default_audio.py | 174 -- .../havpe-relay/firmware/voice-default.yaml | 1920 ----------------- extras/havpe-relay/flash_default.sh | 25 - 3 files changed, 2119 deletions(-) delete mode 100644 extras/havpe-relay/capture_default_audio.py delete mode 100644 extras/havpe-relay/firmware/voice-default.yaml delete mode 100755 extras/havpe-relay/flash_default.sh diff --git a/extras/havpe-relay/capture_default_audio.py b/extras/havpe-relay/capture_default_audio.py deleted file mode 100644 index 8d57223a..00000000 --- a/extras/havpe-relay/capture_default_audio.py +++ /dev/null @@ -1,174 +0,0 @@ -""" -Capture audio from a default HA Voice PE device running stock firmware. - -Acts as a minimal "Home Assistant" voice assistant server using aioesphomeapi. -Connects to the device, subscribes to voice assistant events, and saves -the raw audio stream to a WAV file for comparison. - -Usage: - uv run python capture_default_audio.py [--output audio_capture.wav] - -The device must be running the default HA Voice PE firmware with voice_assistant -component. Trigger audio by saying the wake word or pressing the button. -Press Ctrl+C to stop and save. -""" - -import argparse -import asyncio -import struct -import sys -import wave -from datetime import datetime - -from aioesphomeapi import ( - APIClient, - VoiceAssistantAudioSettingsModel, - VoiceAssistantEventType, -) - - -class AudioCapture: - def __init__(self, output_path: str): - self.output_path = output_path - self.audio_chunks: list[bytes] = [] - self.capturing = False - self.capture_count = 0 - - async def handle_start( - self, - conversation_id: str, - flags: int, - audio_settings: VoiceAssistantAudioSettingsModel, - wake_word_phrase: str | None, - ) -> int | None: - """Called when device wants to start voice assistant pipeline.""" - print(f"\n--- Voice assistant START ---") - print(f" conversation_id: {conversation_id}") - print(f" flags: {flags}") - print( - f" audio_settings: noise_suppression={audio_settings.noise_suppression_level}, " - f"auto_gain={audio_settings.auto_gain}, " - f"volume_multiplier={audio_settings.volume_multiplier}" - ) - if wake_word_phrase: - print(f" wake_word: {wake_word_phrase}") - - self.capturing = True - self.capture_count += 1 - print(f" Capturing audio (session #{self.capture_count})...") - - # Return port 0 = use API audio (not UDP) - return 0 - - async def handle_stop(self, abort: bool) -> None: - """Called when device stops voice assistant pipeline.""" - if self.capturing: - print(f"\n--- Voice assistant STOP (abort={abort}) ---") - print( - f" Captured {len(self.audio_chunks)} chunks, " - f"{sum(len(c) for c in self.audio_chunks)} bytes total" - ) - self.capturing = False - - async def handle_audio(self, data: bytes) -> None: - """Called for each audio chunk from the device.""" - self.audio_chunks.append(data) - if len(self.audio_chunks) % 50 == 0: - total_bytes = sum(len(c) for c in self.audio_chunks) - # Assume 16-bit mono 16kHz - duration = total_bytes / (16000 * 2) - print(f" ... {len(self.audio_chunks)} chunks, {duration:.1f}s", end="\r") - - def save_wav( - self, sample_rate: int = 16000, sample_width: int = 2, channels: int = 1 - ): - """Save captured audio to WAV file.""" - if not self.audio_chunks: - print("No audio captured!") - return - - raw_audio = b"".join(self.audio_chunks) - total_samples = len(raw_audio) // sample_width - duration = total_samples / (sample_rate * channels) - - # Analyze levels - samples = struct.unpack(f"<{total_samples}h", raw_audio) - peak = max(abs(s) for s in samples) if samples else 0 - rms = (sum(s * s for s in samples) / len(samples)) ** 0.5 if samples else 0 - - with wave.open(self.output_path, "wb") as wf: - wf.setnchannels(channels) - wf.setsampwidth(sample_width) - wf.setframerate(sample_rate) - wf.writeframes(raw_audio) - - print(f"\nSaved {self.output_path}:") - print(f" Duration: {duration:.1f}s") - print(f" Format: {sample_rate}Hz, {sample_width * 8}-bit, {channels}ch") - print(f" Peak: {peak}") - print(f" RMS: {rms:.0f}") - print(f" Samples: {total_samples}") - - -async def main(): - parser = argparse.ArgumentParser( - description="Capture audio from default HA Voice PE firmware" - ) - parser.add_argument("device_ip", help="IP address of the ESPHome device") - parser.add_argument( - "--port", type=int, default=6053, help="ESPHome native API port (default: 6053)" - ) - parser.add_argument("--password", default="", help="API password if set") - parser.add_argument("--output", "-o", default=None, help="Output WAV file path") - parser.add_argument( - "--noise-suppression", type=int, default=0, help="Noise suppression level (0-4)" - ) - parser.add_argument( - "--auto-gain", type=int, default=0, help="Auto gain in dBFS (0-31)" - ) - parser.add_argument( - "--volume-multiplier", type=float, default=1.0, help="Volume multiplier" - ) - args = parser.parse_args() - - if args.output is None: - args.output = f"capture_default_{datetime.now().strftime('%H%M%S')}.wav" - - capture = AudioCapture(args.output) - - print(f"Connecting to {args.device_ip}:{args.port}...") - client = APIClient(args.device_ip, args.port, args.password) - - try: - await client.connect(login=True) - info = await client.device_info() - print(f"Connected to: {info.name} (ESPHome {info.esphome_version})") - - print(f"\nSubscribing to voice assistant...") - print(f" Trigger the wake word or press the button to start capture.") - print(f" Press Ctrl+C to stop and save.\n") - - unsub = client.subscribe_voice_assistant( - handle_start=capture.handle_start, - handle_stop=capture.handle_stop, - handle_audio=capture.handle_audio, - ) - - # Keep running until Ctrl+C - try: - while True: - await asyncio.sleep(1) - except KeyboardInterrupt: - print("\n\nStopping...") - unsub() - - except Exception as e: - print(f"Error: {e}", file=sys.stderr) - raise - finally: - capture.save_wav() - await client.disconnect() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/extras/havpe-relay/firmware/voice-default.yaml b/extras/havpe-relay/firmware/voice-default.yaml deleted file mode 100644 index 4416c813..00000000 --- a/extras/havpe-relay/firmware/voice-default.yaml +++ /dev/null @@ -1,1920 +0,0 @@ -substitutions: - # Phases of the Voice Assistant - # The voice assistant is ready to be triggered by a wake word - voice_assist_idle_phase_id: '1' - # The voice assistant is waiting for a voice command (after being triggered by the wake word) - voice_assist_waiting_for_command_phase_id: '2' - # The voice assistant is listening for a voice command - voice_assist_listening_for_command_phase_id: '3' - # The voice assistant is currently processing the command - voice_assist_thinking_phase_id: '4' - # The voice assistant is replying to the command - voice_assist_replying_phase_id: '5' - # The voice assistant is not ready - voice_assist_not_ready_phase_id: '10' - # The voice assistant encountered an error - voice_assist_error_phase_id: '11' - # Change this to true in case you ahve a hidden SSID at home. - hidden_ssid: "false" - # Substitutions for audio files - jack_connected_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/jack_connected.flac - jack_disconnected_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/jack_disconnected.flac - mute_switch_on_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/mute_switch_on.flac - mute_switch_off_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/mute_switch_off.flac - timer_finished_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/timer_finished.flac - wake_word_triggered_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/wake_word_triggered.flac - center_button_press_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/center_button_press.flac - center_button_double_press_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/center_button_double_press.flac - center_button_triple_press_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/center_button_triple_press.flac - center_button_long_press_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/center_button_long_press.flac - factory_reset_initiated_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/factory_reset_initiated.mp3 - factory_reset_cancelled_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/factory_reset_cancelled.mp3 - factory_reset_confirmed_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/factory_reset_confirmed.mp3 - easter_egg_tick_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/easter_egg_tick.mp3 - easter_egg_tada_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/easter_egg_tada.mp3 - error_cloud_expired_sound_file: https://github.com/esphome/home-assistant-voice-pe/raw/dev/sounds/error_cloud_expired.mp3 - -esphome: - name: home-assistant-voice - friendly_name: Home Assistant Voice - name_add_mac_suffix: true - min_version: 2026.2.0 - on_boot: - priority: 375 - then: - # Run the script to refresh the LED status - - script.execute: control_leds - - delay: 1s - - switch.turn_on: internal_speaker_amp - # If after 10 minutes, the device is still initializing (It did not yet connect to Home Assistant), turn off the init_in_progress variable and run the script to refresh the LED status - - delay: 10min - - if: - condition: - lambda: return id(init_in_progress); - then: - - lambda: id(init_in_progress) = false; - - script.execute: control_leds - -esp32: - board: esp32-s3-devkitc-1 - cpu_frequency: 240MHz - variant: esp32s3 - flash_size: 16MB - framework: - type: esp-idf - version: recommended - sdkconfig_options: - CONFIG_ESP32S3_DATA_CACHE_64KB: "y" - CONFIG_ESP32S3_DATA_CACHE_LINE_64B: "y" - CONFIG_ESP32S3_INSTRUCTION_CACHE_32KB: "y" - - # Moves instructions and read only data from flash into PSRAM on boot. - # Both enabled allows instructions to execute while a flash operation is in progress without needing to be placed in IRAM. - # Considerably speeds up mWW at the cost of using more PSRAM. - CONFIG_SPIRAM_RODATA: "y" - CONFIG_SPIRAM_FETCH_INSTRUCTIONS: "y" - - CONFIG_BT_ALLOCATION_FROM_SPIRAM_FIRST: "y" - CONFIG_BT_BLE_DYNAMIC_ENV_MEMORY: "y" - - CONFIG_MBEDTLS_EXTERNAL_MEM_ALLOC: "y" - CONFIG_MBEDTLS_SSL_PROTO_TLS1_3: "y" # TLS1.3 support isn't enabled by default in IDF 5.1.5 - -wifi: - id: wifi_id - ssid: !secret wifi_ssid - password: !secret wifi_password - fast_connect: ${hidden_ssid} - on_connect: - - lambda: id(improv_ble_in_progress) = false; - - script.execute: control_leds - on_disconnect: - - script.execute: control_leds - -network: - enable_ipv6: true - -logger: - level: DEBUG - logs: - sensor: WARN # avoids logging debug sensor updates - -api: - id: api_id - on_client_connected: - - script.execute: control_leds - on_client_disconnected: - - script.execute: control_leds - # encryption removed for capture_default_audio.py compatibility - -ota: - - platform: esphome - id: ota_esphome - -i2c: - - id: internal_i2c - sda: GPIO5 - scl: GPIO6 - frequency: 400kHz - -psram: - mode: octal - speed: 80MHz - ignore_not_found: false # The VPE has PSRAM, so this is safe. Allows configuring WiFi driver to use more resources (done automatically by the speaker media player) - -globals: - # Global index for our LEDs. So that switching between different animation does not lead to unwanted effects. - - id: global_led_animation_index - type: int - restore_value: no - initial_value: '0' - # Global initialization variable. Initialized to true and set to false once everything is connected. Only used to have a smooth "plugging" experience - - id: init_in_progress - type: bool - restore_value: no - initial_value: 'true' - # Global variable storing the state of ImprovBLE. Used to draw different LED animations - - id: improv_ble_in_progress - type: bool - restore_value: no - initial_value: 'false' - # Global variable tracking the phase of the voice assistant (defined above). Initialized to not_ready - - id: voice_assistant_phase - type: int - restore_value: no - initial_value: ${voice_assist_not_ready_phase_id} - # Global variable tracking if the dial was recently touched. - - id: dial_touched - type: bool - restore_value: no - initial_value: 'false' - # Global variable tracking if the LED color was recently changed. - - id: color_changed - type: bool - restore_value: no - initial_value: 'false' - # Global variable tracking if the group media player volume was recent changed. - - id: group_volume_changed - type: bool - restore_value: no - initial_value: 'false' - # Global variable tracking if the jack has been plugged touched. - - id: jack_plugged_recently - type: bool - restore_value: no - initial_value: 'false' - # Global variable tracking if the jack has been unplugged touched. - - id: jack_unplugged_recently - type: bool - restore_value: no - initial_value: 'false' - # Global variable storing the first active timer - - id: first_active_timer - type: voice_assistant::Timer - restore_value: false - # Global variable storing if a timer is active - - id: is_timer_active - type: bool - restore_value: false - # Global variable storing if a factory reset was requested. If it is set to true, the device will factory reset once the center button is released - - id: factory_reset_requested - type: bool - restore_value: no - initial_value: 'false' - -switch: - # This is the master mute switch. It is exposed to Home Assistant. The user can only turn it on and off if the hardware switch is off. (The hardware switch overrides the software one) - - platform: template - id: master_mute_switch - restore_mode: RESTORE_DEFAULT_OFF - icon: "mdi:microphone-off" - name: Mute - entity_category: config - lambda: |- - // Muted either if the hardware mute switch is on or the microphone's software mute switch is enabled - if (id(hardware_mute_switch).state || id(i2s_mics).get_mute_state()) { - return true; - } else { - return false; - } - turn_on_action: - - if: - condition: - binary_sensor.is_off: hardware_mute_switch - then: - - microphone.mute: - turn_off_action: - - if: - condition: - binary_sensor.is_off: hardware_mute_switch - then: - - microphone.unmute: - on_turn_on: - - script.execute: control_leds - on_turn_off: - - script.execute: control_leds - # Wake Word Sound Switch. - - platform: template - id: wake_sound - name: Wake sound - icon: "mdi:bullhorn" - entity_category: config - optimistic: true - restore_mode: RESTORE_DEFAULT_ON - # Internal switch to track when a timer is ringing on the device. - - platform: template - id: timer_ringing - optimistic: true - internal: true - restore_mode: ALWAYS_OFF - on_turn_off: - # Disable stop wake word - - micro_wake_word.disable_model: stop - - script.execute: disable_repeat - # Stop any current announcement (ie: stop the timer ring mid playback) - - if: - condition: - media_player.is_announcing: - id: external_media_player - then: - media_player.stop: - announcement: true - id: external_media_player - # Set back ducking ratio to zero - - mixer_speaker.apply_ducking: - id: media_mixing_input - decibel_reduction: 0 - duration: 1.0s - # Refresh the LED ring - - script.execute: control_leds - on_turn_on: - # Duck audio - - mixer_speaker.apply_ducking: - id: media_mixing_input - decibel_reduction: 20 - duration: 0.0s - # Enable stop wake word - - micro_wake_word.enable_model: stop - # Ring timer - - script.execute: ring_timer - # Refresh LED - - script.execute: control_leds - # If 15 minutes have passed and the timer is still ringing, stop it. - - delay: 15min - - switch.turn_off: timer_ringing - - platform: gpio - pin: GPIO47 - id: internal_speaker_amp - name: "Internal speaker amp" - entity_category: config - restore_mode: ALWAYS_OFF - internal: true - -binary_sensor: - # Center Button. Used for many things (See on_multi_click) - - platform: gpio - id: center_button - pin: - number: GPIO0 - inverted: true - on_press: - - script.execute: control_leds - on_release: - - script.execute: control_leds - # If a factory reset is requested, factory reset on release - - if: - condition: - lambda: return id(factory_reset_requested); - then: - - button.press: factory_reset_button - on_multi_click: - # Simple Click: - # - Abort "things" in order - # - Timer - # - Announcements - # - Voice Assistant Pipeline run - # - Music - # - Starts the voice assistant if it is not yet running and if the device is not muted. - - timing: - - ON for at most 1s - - OFF for at least 0.25s - then: - - if: - condition: - lambda: return !id(init_in_progress) && !id(color_changed) && !id(group_volume_changed); - then: - - if: - condition: - switch.is_on: timer_ringing - then: - - switch.turn_off: timer_ringing - else: - - if: - condition: - voice_assistant.is_running: - then: - - voice_assistant.stop: - else: - - if: - condition: - media_player.is_announcing: - id: external_media_player - then: - media_player.stop: - announcement: true - id: external_media_player - else: - - if: - condition: - media_player.is_playing: - id: external_media_player - then: - - media_player.pause: - id: external_media_player - else: - - if: - condition: - and: - - switch.is_off: master_mute_switch - - not: voice_assistant.is_running - then: - - script.execute: - id: play_sound - priority: true - sound_file: "center_button_press_sound" - - delay: 300ms - - voice_assistant.start: - # Double Click - # . Exposed as an event entity. To be used in automations inside Home Assistant - - timing: - - ON for at most 1s - - OFF for at most 0.25s - - ON for at most 1s - - OFF for at least 0.25s - then: - - if: - condition: - lambda: return !id(init_in_progress) && !id(color_changed) && !id(group_volume_changed); - then: - - script.execute: - id: play_sound - priority: false - sound_file: "center_button_double_press_sound" - - event.trigger: - id: button_press_event - event_type: "double_press" - # Triple Click - # . Exposed as an event entity. To be used in automations inside Home Assistant - - timing: - - ON for at most 1s - - OFF for at most 0.25s - - ON for at most 1s - - OFF for at most 0.25s - - ON for at most 1s - - OFF for at least 0.25s - then: - - if: - condition: - lambda: return !id(init_in_progress) && !id(color_changed) && !id(group_volume_changed); - then: - - script.execute: - id: play_sound - priority: false - sound_file: "center_button_triple_press_sound" - - event.trigger: - id: button_press_event - event_type: "triple_press" - # Long Press - # . Exposed as an event entity. To be used in automations inside Home Assistant - - timing: - - ON for at least 1s - then: - - if: - condition: - lambda: return !id(init_in_progress) && !id(color_changed) && !id(group_volume_changed); - then: - - script.execute: - id: play_sound - priority: false - sound_file: "center_button_long_press_sound" - - light.turn_off: voice_assistant_leds - - event.trigger: - id: button_press_event - event_type: "long_press" - # Very important do not remove. Trust me :D - - timing: - # H .... - - ON for at most 0.2s - - OFF for 0s to 2s - - ON for at most 0.2s - - OFF for 0s to 2s - - ON for at most 0.2s - - OFF for 0s to 2s - - ON for at most 0.2s - - OFF for 0.5s to 2s - # A ._ - - ON for at most 0.2s - - OFF for 0s to 2s - - ON for 0.2s to 2s - then: - - if: - condition: - lambda: return !id(init_in_progress); - then: - - light.turn_on: - brightness: 100% - id: voice_assistant_leds - effect: "Tick" - - script.execute: - id: play_sound - priority: true - sound_file: "easter_egg_tick_sound" - - delay: 4s - - light.turn_off: voice_assistant_leds - - script.execute: - id: play_sound - priority: true - sound_file: "easter_egg_tada_sound" - - light.turn_on: - brightness: 100% - id: voice_assistant_leds - effect: "Rainbow" - - event.trigger: - id: button_press_event - event_type: "easter_egg_press" - # Factory Reset Warning - # . Audible and Visible warning. - - timing: - - ON for at least 10s - then: - - if: - condition: - lambda: return !id(dial_touched); - then: - - light.turn_on: - brightness: 100% - id: voice_assistant_leds - effect: "Factory Reset Coming Up" - - script.execute: - id: play_sound - priority: true - sound_file: "factory_reset_initiated_sound" - - wait_until: - binary_sensor.is_off: center_button - - if: - condition: - lambda: return !id(factory_reset_requested); - then: - - light.turn_off: voice_assistant_leds - - script.execute: - id: play_sound - priority: true - sound_file: "factory_reset_cancelled_sound" - # Factory Reset Confirmed. - # . Audible warning to prompt user to release the button - # . Set factory_reset_requested to true - - timing: - - ON for at least 22s - then: - - if: - condition: - lambda: return !id(dial_touched); - then: - - script.execute: - id: play_sound - priority: true - sound_file: "factory_reset_confirmed_sound" - - light.turn_on: - brightness: 100% - red: 100% - green: 0% - blue: 0% - id: voice_assistant_leds - effect: "none" - - lambda: id(factory_reset_requested) = true; - - # Hardware mute switch (Side of the device) - - platform: gpio - id: hardware_mute_switch - internal: true - pin: GPIO3 - on_press: - # Play mute on sound only if software mute isn't enabled - - if: - condition: - - switch.is_off: master_mute_switch - then: - - script.execute: - id: play_sound - priority: false - sound_file: "mute_switch_on_sound" - on_release: - - script.execute: - id: play_sound - priority: false - sound_file: "mute_switch_off_sound" - - microphone.unmute: - # Audio Jack Plugged sensor - - platform: gpio - id: jack_plugged - # Debouncing it a bit because it can be activated back and forth as you plug the audio jack - filters: - - delayed_on: 200ms - - delayed_off: 200ms - pin: - number: GPIO17 - # When the jack is plugged in: - # - LED animation - # - Sound played - on_press: - - lambda: id(jack_plugged_recently) = true; - - script.execute: control_leds - - delay: 200ms - - script.execute: - id: play_sound - priority: false - sound_file: "jack_connected_sound" - - delay: 800ms - - lambda: id(jack_plugged_recently) = false; - - script.execute: control_leds - # When the jack is unplugged: - # - LED animation - # - Sound played - on_release: - - lambda: id(jack_unplugged_recently) = true; - - script.execute: control_leds - - delay: 200ms - - script.execute: - id: play_sound - priority: false - sound_file: "jack_disconnected_sound" - - delay: 800ms - - lambda: id(jack_unplugged_recently) = false; - - script.execute: control_leds - -light: - # Hardware LED ring. Not used because remapping needed - - platform: esp32_rmt_led_strip - id: leds_internal - pin: GPIO21 - chipset: WS2812 - max_refresh_rate: 15ms - num_leds: 12 - rgb_order: GRB - rmt_symbols: 192 - default_transition_length: 0ms - power_supply: led_power - - # Voice Assistant LED ring. Remapping of the internal LED. - # This light is not exposed. The device controls it - - platform: partition - id: voice_assistant_leds - internal: true - default_transition_length: 0ms - segments: - - id: leds_internal - from: 7 - to: 11 - - id: leds_internal - from: 0 - to: 6 - effects: - - addressable_lambda: - name: "Waiting for Command" - update_interval: 100ms - lambda: |- - auto light_color = id(led_ring).current_values; - Color color(light_color.get_red() * 255, light_color.get_green() * 255, - light_color.get_blue() * 255); - for (uint8_t i = 0; i < 12; i++) { - if (i == id(global_led_animation_index) % 12) { - it[i] = color; - } else if (i == (id(global_led_animation_index) + 11) % 12) { - it[i] = color * 192; - } else if (i == (id(global_led_animation_index) + 10) % 12) { - it[i] = color * 128; - } else if (i == (id(global_led_animation_index) + 6) % 12) { - it[i] = color; - } else if (i == (id(global_led_animation_index) + 5) % 12) { - it[i] = color * 192; - } else if (i == (id(global_led_animation_index) + 4) % 12) { - it[i] = color * 128; - } else { - it[i] = Color::BLACK; - } - } - id(global_led_animation_index) = (id(global_led_animation_index) + 1) % 12; - - addressable_lambda: - name: "Listening For Command" - update_interval: 50ms - lambda: |- - auto light_color = id(led_ring).current_values; - Color color(light_color.get_red() * 255, light_color.get_green() * 255, - light_color.get_blue() * 255); - for (uint8_t i = 0; i < 12; i++) { - if (i == id(global_led_animation_index) % 12) { - it[i] = color; - } else if (i == (id(global_led_animation_index) + 11) % 12) { - it[i] = color * 192; - } else if (i == (id(global_led_animation_index) + 10) % 12) { - it[i] = color * 128; - } else if (i == (id(global_led_animation_index) + 6) % 12) { - it[i] = color; - } else if (i == (id(global_led_animation_index) + 5) % 12) { - it[i] = color * 192; - } else if (i == (id(global_led_animation_index) + 4) % 12) { - it[i] = color * 128; - } else { - it[i] = Color::BLACK; - } - } - id(global_led_animation_index) = (id(global_led_animation_index) + 1) % 12; - - addressable_lambda: - name: "Thinking" - update_interval: 10ms - lambda: |- - static uint8_t brightness_step = 0; - static bool brightness_decreasing = true; - static uint8_t brightness_step_number = 10; - if (initial_run) { - brightness_step = 0; - brightness_decreasing = true; - } - auto light_color = id(led_ring).current_values; - Color color(light_color.get_red() * 255, light_color.get_green() * 255, - light_color.get_blue() * 255); - for (uint8_t i = 0; i < 12; i++) { - if (i == id(global_led_animation_index) % 12) { - it[i] = color * uint8_t(255/brightness_step_number*(brightness_step_number-brightness_step)); - } else if (i == (id(global_led_animation_index) + 6) % 12) { - it[i] = color * uint8_t(255/brightness_step_number*(brightness_step_number-brightness_step)); - } else { - it[i] = Color::BLACK; - } - } - if (brightness_decreasing) { - brightness_step++; - } else { - brightness_step--; - } - if (brightness_step == 0 || brightness_step == brightness_step_number) { - brightness_decreasing = !brightness_decreasing; - } - - addressable_lambda: - name: "Replying" - update_interval: 50ms - lambda: |- - id(global_led_animation_index) = (12 + id(global_led_animation_index) - 1) % 12; - auto light_color = id(led_ring).current_values; - Color color(light_color.get_red() * 255, light_color.get_green() * 255, - light_color.get_blue() * 255); - for (uint8_t i = 0; i < 12; i++) { - if (i == (id(global_led_animation_index)) % 12) { - it[i] = color; - } else if (i == ( id(global_led_animation_index) + 1) % 12) { - it[i] = color * 192; - } else if (i == ( id(global_led_animation_index) + 2) % 12) { - it[i] = color * 128; - } else if (i == ( id(global_led_animation_index) + 6) % 12) { - it[i] = color; - } else if (i == ( id(global_led_animation_index) + 7) % 12) { - it[i] = color * 192; - } else if (i == ( id(global_led_animation_index) + 8) % 12) { - it[i] = color * 128; - } else { - it[i] = Color::BLACK; - } - } - - addressable_lambda: - name: "Muted or Silent" - update_interval: 16ms - lambda: |- - static int8_t index = 0; - Color muted_color(255, 0, 0); - auto light_color = id(led_ring).current_values; - Color color(light_color.get_red() * 255, light_color.get_green() * 255, - light_color.get_blue() * 255); - for (uint8_t i = 0; i < 12; i++) { - if ( light_color.get_state() ) { - it[i] = color; - } else { - it[i] = Color::BLACK; - } - } - if ( id(master_mute_switch).state ) { - it[2] = Color::BLACK; - it[3] = muted_color; - it[4] = Color::BLACK; - it[8] = Color::BLACK; - it[9] = muted_color; - it[10] = Color::BLACK; - } - if ( id(external_media_player).volume == 0.0f || id(external_media_player).is_muted() ) { - it[5] = Color::BLACK; - it[6] = muted_color; - it[7] = Color::BLACK; - } - - addressable_lambda: - name: "Voice kit startup failed" - # update_interval: 16ms - lambda: |- - static int8_t index = 0; - Color fail_color(255, 0, 0); - for (uint8_t i = 0; i < 12; i++) { - if (i % 3) { - it[i] = Color::BLACK; - } else { - it[i] = fail_color; - } - } - - addressable_lambda: - name: "Volume Display" - update_interval: 50ms - lambda: |- - auto light_color = id(led_ring).current_values; - Color color(light_color.get_red() * 255, light_color.get_green() * 255, - light_color.get_blue() * 255); - Color silenced_color(255, 0, 0); - auto volume_ratio = 12.0f * id(external_media_player).volume; - for (uint8_t i = 0; i < 12; i++) { - if (i <= volume_ratio) { - it[(6+i)%12] = color * min( 255.0f * (volume_ratio - i) , 255.0f ) ; - } else { - it[(6+i)%12] = Color::BLACK; - } - } - if (id(external_media_player).volume == 0.0f) { - it[6] = silenced_color; - } - - addressable_lambda: - name: "Center Button Touched" - update_interval: 16ms - lambda: |- - if (initial_run) { - // set voice_assistant_leds light to colors based on led_ring - auto led_ring_cv = id(led_ring).current_values; - auto va_leds_call = id(voice_assistant_leds).make_call(); - va_leds_call.from_light_color_values(led_ring_cv); - va_leds_call.set_brightness( min ( max( id(led_ring).current_values.get_brightness() , 0.2f ) + 0.1f , 1.0f ) ); - va_leds_call.set_state(true); - va_leds_call.perform(); - } - auto light_color = id(voice_assistant_leds).current_values; - Color color(light_color.get_red() * 255, light_color.get_green() * 255, - light_color.get_blue() * 255); - for (uint8_t i = 0; i < 12; i++) { - it[i] = color; - } - - addressable_twinkle: - name: "Twinkle" - twinkle_probability: 50% - - addressable_lambda: - name: "Error" - update_interval: 10ms - lambda: |- - static uint8_t brightness_step = 0; - static bool brightness_decreasing = true; - static uint8_t brightness_step_number = 10; - if (initial_run) { - brightness_step = 0; - brightness_decreasing = true; - } - Color error_color(255, 0, 0); - for (uint8_t i = 0; i < 12; i++) { - it[i] = error_color * uint8_t(255/brightness_step_number*(brightness_step_number-brightness_step)); - } - if (brightness_decreasing) { - brightness_step++; - } else { - brightness_step--; - } - if (brightness_step == 0 || brightness_step == brightness_step_number) { - brightness_decreasing = !brightness_decreasing; - } - - addressable_lambda: - name: "Timer Ring" - update_interval: 10ms - lambda: |- - static uint8_t brightness_step = 0; - static bool brightness_decreasing = true; - static uint8_t brightness_step_number = 10; - if (initial_run) { - brightness_step = 0; - brightness_decreasing = true; - } - auto light_color = id(led_ring).current_values; - Color color(light_color.get_red() * 255, light_color.get_green() * 255, - light_color.get_blue() * 255); - Color muted_color(255, 0, 0); - for (uint8_t i = 0; i < 12; i++) { - it[i] = color * uint8_t(255/brightness_step_number*(brightness_step_number-brightness_step)); - } - if ( id(master_mute_switch).state ) { - it[3] = muted_color; - it[9] = muted_color; - } - if (brightness_decreasing) { - brightness_step++; - } else { - brightness_step--; - } - if (brightness_step == 0 || brightness_step == brightness_step_number) { - brightness_decreasing = !brightness_decreasing; - } - - addressable_lambda: - name: "Timer Tick" - update_interval: 100ms - lambda: |- - auto light_color = id(led_ring).current_values; - Color color(light_color.get_red() * 255, light_color.get_green() * 255, - light_color.get_blue() * 255); - Color muted_color(255, 0, 0); - auto timer_ratio = 12.0f * id(first_active_timer).seconds_left / max(id(first_active_timer).total_seconds , static_cast(1)); - uint8_t last_led_on = static_cast(ceil(timer_ratio)) - 1; - for (uint8_t i = 0; i < 12; i++) { - float brightness_dip = ( i == id(global_led_animation_index) % 12 && i != last_led_on ) ? 0.9f : 1.0f ; - if (i <= timer_ratio) { - it[i] = color * min(255.0f * brightness_dip * (timer_ratio - i) , 255.0f * brightness_dip) ; - } else { - it[i] = Color::BLACK; - } - } - if (id(master_mute_switch).state) { - it[2] = Color::BLACK; - it[3] = muted_color; - it[4] = Color::BLACK; - it[8] = Color::BLACK; - it[9] = muted_color; - it[10] = Color::BLACK; - } - id(global_led_animation_index) = (12 + id(global_led_animation_index) - 1) % 12; - - addressable_rainbow: - name: "Rainbow" - width: 12 - - addressable_lambda: - name: "Tick" - update_interval: 333ms - lambda: |- - static uint8_t index = 0; - Color color(255, 0, 0); - if (initial_run) { - index = 0; - } - for (uint8_t i = 0; i < 12; i++) { - if (i <= index ) { - it[i] = Color::BLACK; - } else { - it[i] = color; - } - } - index = (index + 1) % 12; - - addressable_lambda: - name: "Factory Reset Coming Up" - update_interval: 1s - lambda: |- - static uint8_t index = 0; - Color color(255, 0, 0); - if (initial_run) { - index = 0; - } - for (uint8_t i = 0; i < 12; i++) { - if (i <= index ) { - it[i] = color; - } else { - it[i] = Color::BLACK; - } - } - index = (index + 1) % 12; - - addressable_lambda: - name: "Jack Plugged" - update_interval: 40ms - lambda: |- - static uint8_t index = 0; - if (initial_run) { - index = 0; - } - auto light_color = id(led_ring).current_values; - Color color(light_color.get_red() * 255, light_color.get_green() * 255, - light_color.get_blue() * 255); - if (index <= 6) { - for (uint8_t i = 0; i < 12; i++) { - if (i == index) { - it[i] = color; - } else if (i == (12 - index) % 12) { - it[i] = color; - } else { - it[i] = Color::BLACK; - } - } - } - index = (index + 1); - - addressable_lambda: - name: "Jack Unplugged" - update_interval: 40ms - lambda: |- - static uint8_t index = 0; - if (initial_run) { - index = 0; - } - auto light_color = id(led_ring).current_values; - Color color(light_color.get_red() * 255, light_color.get_green() * 255, - light_color.get_blue() * 255); - if (index <= 6) { - for (uint8_t i = 0; i < 12; i++) { - if (i == 6 - index) { - it[i] = color; - } else if (i == (6 + index) % 12) { - it[i] = color; - } else { - it[i] = Color::BLACK; - } - } - } - index = (index + 1); - - # User facing LED ring. Remapping of the internal LEDs. - # Exposed to be used by the user. - - platform: partition - id: led_ring - name: LED Ring - entity_category: config - icon: "mdi:circle-outline" - default_transition_length: 0ms - restore_mode: RESTORE_DEFAULT_OFF - initial_state: - color_mode: rgb - brightness: 66% - red: 9.4% - green: 73.3% - blue: 94.9% - segments: - - id: leds_internal - from: 7 - to: 11 - - id: leds_internal - from: 0 - to: 6 - -power_supply: - - id: led_power - pin: GPIO45 - -sensor: - # The dial. Used to control volume and Hue of the LED ring. - - platform: rotary_encoder - id: dial - pin_a: GPIO16 - pin_b: GPIO18 - resolution: 2 - on_clockwise: - - lambda: id(dial_touched) = true; - - if: - condition: - binary_sensor.is_off: center_button - then: - - script.execute: - id: control_volume - increase_volume: true - else: - - if: - condition: - media_player.is_playing: - id: sendspin_group_media_player - then: - - script.execute: - id: control_group_volume - increase_volume: true - else: - - script.execute: - id: control_hue - increase_hue: true - on_anticlockwise: - - lambda: id(dial_touched) = true; - - if: - condition: - binary_sensor.is_off: center_button - then: - - script.execute: - id: control_volume - increase_volume: false - else: - - if: - condition: - media_player.is_playing: - id: sendspin_group_media_player - then: - - script.execute: - id: control_group_volume - increase_volume: false - else: - - script.execute: - id: control_hue - increase_hue: false - -event: - # Event entity exposed to the user to automate on complex center button presses. - # The simple press is not exposed as it is used to control the device itself. - - platform: template - id: button_press_event - name: "Button press" - icon: mdi:button-pointer - device_class: button - event_types: - - double_press - - triple_press - - long_press - - easter_egg_press - -script: - # Master script controlling the LEDs, based on different conditions : initialization in progress, wifi and api connected and voice assistant phase. - # For the sake of simplicity and re-usability, the script calls child scripts defined below. - # This script will be called every time one of these conditions is changing. - - id: control_leds - then: - - lambda: | - if (id(voice_kit_component).is_failed()) { - id(control_leds_voice_kit_startup_failed).execute(); - return; - } - id(check_if_timers_active).execute(); - if (id(is_timer_active)){ - id(fetch_first_active_timer).execute(); - } - if (id(improv_ble_in_progress)) { - id(control_leds_improv_ble_state).execute(); - } else if (id(init_in_progress)) { - id(control_leds_init_state).execute(); - } else if (!id(wifi_id).is_connected() || !id(api_id).is_connected()){ - id(control_leds_no_ha_connection_state).execute(); - } else if (id(center_button).state) { - id(control_leds_center_button_touched).execute(); - } else if (id(jack_plugged_recently)) { - id(control_leds_jack_plugged_recently).execute(); - } else if (id(jack_unplugged_recently)) { - id(control_leds_jack_unplugged_recently).execute(); - } else if (id(dial_touched)) { - id(control_leds_dial_touched).execute(); - } else if (id(timer_ringing).state) { - id(control_leds_timer_ringing).execute(); - } else if (id(voice_assistant_phase) == ${voice_assist_waiting_for_command_phase_id}) { - id(control_leds_voice_assistant_waiting_for_command_phase).execute(); - } else if (id(voice_assistant_phase) == ${voice_assist_listening_for_command_phase_id}) { - id(control_leds_voice_assistant_listening_for_command_phase).execute(); - } else if (id(voice_assistant_phase) == ${voice_assist_thinking_phase_id}) { - id(control_leds_voice_assistant_thinking_phase).execute(); - } else if (id(voice_assistant_phase) == ${voice_assist_replying_phase_id}) { - id(control_leds_voice_assistant_replying_phase).execute(); - } else if (id(voice_assistant_phase) == ${voice_assist_error_phase_id}) { - id(control_leds_voice_assistant_error_phase).execute(); - } else if (id(voice_assistant_phase) == ${voice_assist_not_ready_phase_id}) { - id(control_leds_voice_assistant_not_ready_phase).execute(); - } else if (id(is_timer_active)) { - id(control_leds_timer_ticking).execute(); - } else if (id(master_mute_switch).state) { - id(control_leds_muted_or_silent).execute(); - } else if (id(external_media_player).volume == 0.0f || id(external_media_player).is_muted()) { - id(control_leds_muted_or_silent).execute(); - } else if (id(voice_assistant_phase) == ${voice_assist_idle_phase_id}) { - id(control_leds_voice_assistant_idle_phase).execute(); - } - - # Script executed if voice_kit startup failed - # Static red "X" - - id: control_leds_voice_kit_startup_failed - then: - - light.turn_on: - brightness: 40% - red: 0% - green: 0% - blue: 0% - id: voice_assistant_leds - effect: "Voice kit startup failed" - - # Script executed during Improv BLE - # Warm White Twinkle - - id: control_leds_improv_ble_state - then: - - light.turn_on: - brightness: 66% - red: 100% - green: 89% - blue: 71% - id: voice_assistant_leds - effect: "Twinkle" - - # Script executed during initialization - # Blue Twinkle if Wifi is connected, Else solid warm white - - id: control_leds_init_state - then: - - if: - condition: - wifi.connected: - then: - - light.turn_on: - brightness: 66% - red: 9.4% - green: 73.3% - blue: 94.9% - id: voice_assistant_leds - effect: "Twinkle" - else: - - light.turn_on: - brightness: 66% - red: 100% - green: 89% - blue: 71% - id: voice_assistant_leds - effect: "none" - - # Script executed when the device has no connection to Home Assistant - # Red Twinkle (This will be visible during HA updates for example) - - id: control_leds_no_ha_connection_state - then: - - light.turn_on: - brightness: 66% - red: 1 - green: 0 - blue: 0 - id: voice_assistant_leds - effect: "Twinkle" - - # Script executed when the voice assistant is idle (waiting for a wake word) - # Nothing (Either LED ring off or LED ring on if the user decided to turn the user facing LED ring on) - - id: control_leds_voice_assistant_idle_phase - then: - - light.turn_off: voice_assistant_leds - - if: - condition: - light.is_on: led_ring - then: - light.turn_on: led_ring - - # Script executed when the voice assistant is waiting for a command (After the wake word) - # Slow clockwise spin of the LED ring. - - id: control_leds_voice_assistant_waiting_for_command_phase - then: - - light.turn_on: - brightness: !lambda return max( id(led_ring).current_values.get_brightness() , 0.2f ); - id: voice_assistant_leds - effect: "Waiting for Command" - - # Script executed when the voice assistant is listening to a command - # Fast clockwise spin of the LED ring. - - id: control_leds_voice_assistant_listening_for_command_phase - then: - - light.turn_on: - brightness: !lambda return max( id(led_ring).current_values.get_brightness() , 0.2f ); - id: voice_assistant_leds - effect: "Listening For Command" - - # Script executed when the voice assistant is thinking to a command - # The spin stops and the 2 LEDs that are currently on and blinking indicating the commend is being processed. - - id: control_leds_voice_assistant_thinking_phase - then: - - light.turn_on: - brightness: !lambda return max( id(led_ring).current_values.get_brightness() , 0.2f ); - id: voice_assistant_leds - effect: "Thinking" - - # Script executed when the voice assistant is thinking to a command - # Fast anticlockwise spin of the LED ring. - - id: control_leds_voice_assistant_replying_phase - then: - - light.turn_on: - brightness: !lambda return max( id(led_ring).current_values.get_brightness() , 0.2f ); - id: voice_assistant_leds - effect: "Replying" - - # Script executed when the voice assistant is in error - # Fast Red Pulse - - id: control_leds_voice_assistant_error_phase - then: - - light.turn_on: - brightness: !lambda return min ( max( id(led_ring).current_values.get_brightness() , 0.2f ) + 0.1f , 1.0f ); - red: 1 - green: 0 - blue: 0 - id: voice_assistant_leds - effect: "Error" - - # Script executed when the voice assistant is muted or silent - # The LED next to the 2 microphones turn red / one red LED next to the speaker grill - - id: control_leds_muted_or_silent - then: - - light.turn_on: - brightness: !lambda return max( id(led_ring).current_values.get_brightness() , 0.2f ); - id: voice_assistant_leds - effect: "Muted or Silent" - - # Script executed when the voice assistant is not ready - - id: control_leds_voice_assistant_not_ready_phase - then: - - light.turn_on: - brightness: 66% - red: 1 - green: 0 - blue: 0 - id: voice_assistant_leds - effect: "Twinkle" - - # Script executed when the dial is touched - # A number of LEDs turn on indicating a visual representation of the volume of the media player entity. - - id: control_leds_dial_touched - then: - - light.turn_on: - brightness: !lambda return max( id(led_ring).current_values.get_brightness() , 0.2f ); - id: voice_assistant_leds - effect: "Volume Display" - - # Script executed when the jack has just been unplugged - # A ripple effect - - id: control_leds_jack_unplugged_recently - then: - - light.turn_on: - brightness: !lambda return max( id(led_ring).current_values.get_brightness() , 0.2f ); - id: voice_assistant_leds - effect: "Jack Unplugged" - - # Script executed when the jack has just been plugged - # A ripple effect - - id: control_leds_jack_plugged_recently - then: - - light.turn_on: - brightness: !lambda return max( id(led_ring).current_values.get_brightness() , 0.2f ); - id: voice_assistant_leds - effect: "Jack Plugged" - - # Script executed when the center button is touched - # The complete LED ring turns on - - id: control_leds_center_button_touched - then: - - light.turn_on: - brightness: !lambda return min ( max( id(led_ring).current_values.get_brightness() , 0.2f ) + 0.1f , 1.0f ); - id: voice_assistant_leds - effect: "Center Button Touched" - - # Script executed when the timer is ringing, to control the LEDs - # The LED ring blinks. - - id: control_leds_timer_ringing - then: - - light.turn_on: - brightness: !lambda return min ( max( id(led_ring).current_values.get_brightness() , 0.2f ) + 0.1f , 1.0f ); - id: voice_assistant_leds - effect: "Timer Ring" - - # Script executed when the timer is ticking, to control the LEDs - # The LEDs shows the remaining time as a fraction of the full ring. - - id: control_leds_timer_ticking - then: - - light.turn_on: - brightness: !lambda return max( id(led_ring).current_values.get_brightness() , 0.2f ); - id: voice_assistant_leds - effect: "Timer tick" - - # Script executed when the volume is increased/decreased from the dial - - id: control_volume - mode: restart - parameters: - increase_volume: bool # True: Increase volume / False: Decrease volume. - then: - - delay: 16ms - - if: - condition: - lambda: return increase_volume; - then: - - media_player.volume_up: - id: external_media_player - else: - - media_player.volume_down: - id: external_media_player - - script.execute: control_leds - - delay: 1s - - lambda: id(dial_touched) = false; - - sensor.rotary_encoder.set_value: - id: dial - value: 0 - - script.execute: control_leds - - # Script executed when the volume is increased/decreased from the dial for the group media player - - id: control_group_volume - mode: restart - parameters: - increase_volume: bool # True: Increase volume / False: Decrease volume. - then: - - delay: 16ms - - if: - condition: - lambda: return increase_volume; - then: - - lambda: id(group_volume_changed) = true; - - media_player.volume_up: - id: sendspin_group_media_player - else: - - lambda: id(group_volume_changed) = true; - - media_player.volume_down: - id: sendspin_group_media_player - - script.execute: control_leds - - delay: 1s - - lambda: id(dial_touched) = false; - - lambda: id(group_volume_changed) = false; - - sensor.rotary_encoder.set_value: - id: dial - value: 0 - - script.execute: control_leds - - # Script executed when the hue is increased/decreased from the dial - - id: control_hue - mode: restart - parameters: - increase_hue: bool # True: Increase hue / False: Decrease hue. - then: - - delay: 16ms - - if: - condition: - lambda: return(abs(int(id(dial).state)) > 3 || id(color_changed)); - then: - - lambda: | - id(color_changed) = true; - auto light_color = id(voice_assistant_leds).current_values; - int hue = 0; - float saturation = 0; - float value = 0; - rgb_to_hsv( light_color.get_red(), - light_color.get_green(), - light_color.get_blue(), - hue, - saturation, - value); - if (increase_hue) { - hue = (hue + 10) % 360; - } else { - hue = (hue + 350) % 360; - } - if (saturation < 0.05) { - saturation = 1; - } - float red = 0; - float green = 0; - float blue = 0; - hsv_to_rgb( hue, - saturation, - value, - red, - green, - blue); - id(voice_assistant_leds).make_call().set_rgb(red, green, blue).perform(); - - wait_until: - binary_sensor.is_off: center_button - - lambda: | - id(dial_touched) = false; - // now we "save" the new LED color/state to led_ring, maintaining its brightness and state - auto led_ring_call = id(led_ring).make_call(); - auto va_leds_cv = id(voice_assistant_leds).current_values; - led_ring_call.from_light_color_values(va_leds_cv); - led_ring_call.set_brightness(id(led_ring).current_values.get_brightness()); - led_ring_call.set_state(id(led_ring).current_values.is_on()); - led_ring_call.perform(); - - sensor.rotary_encoder.set_value: - id: dial - value: 0 - - script.execute: control_leds - - delay: 500ms - - lambda: id(color_changed) = false; - - # Script executed when the timer is ringing, to playback sounds. - - id: ring_timer - then: - - script.execute: enable_repeat_one - - script.execute: - id: play_sound - priority: true - sound_file: "timer_finished_sound" - - # Script executed when the timer is ringing, to repeat the timer finished sound. - - id: enable_repeat_one - then: - - media_player.repeat_one: - id: external_media_player - announcement: true - # Turn on the repeat mode and pause for 500 ms between playlist items/repeats - - lambda: |- - id(external_media_player)->set_playlist_delay_ms(1, 500); - - # Script execute when the timer is done ringing, to disable repeat mode. - - id: disable_repeat - then: - # Turn off the repeat mode and pause for 0 ms between playlist items/repeats - - media_player.repeat_off: - id: external_media_player - announcement: true - - lambda: |- - id(external_media_player)->set_playlist_delay_ms(1, 0); - - # Script executed when we want to play sounds on the device. - - id: play_sound - parameters: - priority: bool - sound_file: string - then: - - if: - condition: - lambda: return priority; - then: - - media_player.stop: - id: external_media_player - announcement: true - - lambda: |- - if ( (id(external_media_player).state != media_player::MediaPlayerState::MEDIA_PLAYER_STATE_ANNOUNCING ) || priority) { - id(external_media_player) - ->make_call() - .set_media_url("file://" + sound_file) - .set_announcement(true) - .perform(); - } - - # Script used to fetch the first active timer (Stored in global first_active_timer) - - id: fetch_first_active_timer - then: - - lambda: | - const auto &timers = id(va).get_timers(); - auto output_timer = *timers.begin(); - for (const auto &timer : timers) { - if (timer.is_active && timer.seconds_left <= output_timer.seconds_left) { - output_timer = timer; - } - } - id(first_active_timer) = output_timer; - - # Script used to check if a timer is active (Stored in global is_timer_active) - - id: check_if_timers_active - then: - - lambda: | - const auto &timers = id(va).get_timers(); - bool output = false; - for (const auto &timer : timers) { - if (timer.is_active) { - output = true; - } - } - id(is_timer_active) = output; - - # Script used activate the stop word if the TTS step is long. - # Why is this wrapped on a script? - # Becasue we want to stop the sequence if the TTS step is faster than that. - # This allows us to prevent having the deactivation of the stop word before its own activation. - - id: activate_stop_word_once - then: - - wait_until: - condition: - media_player.is_announcing: - id: external_media_player - timeout: 5s - - delay: 1s - # Enable stop wake word - - if: - condition: - switch.is_off: timer_ringing - then: - - micro_wake_word.enable_model: stop - - wait_until: - not: - media_player.is_announcing: - id: external_media_player - - if: - condition: - switch.is_off: timer_ringing - then: - - micro_wake_word.disable_model: stop - -i2s_audio: - - id: i2s_output - # i2s_output data pin is gpio10 - i2s_lrclk_pin: - number: GPIO7 - i2s_bclk_pin: - number: GPIO8 - - - id: i2s_input - # data line is GPIO15 - i2s_lrclk_pin: - number: GPIO14 - i2s_bclk_pin: - number: GPIO13 - -microphone: - - platform: i2s_audio - id: i2s_mics - i2s_din_pin: GPIO15 - adc_type: external - pdm: false - sample_rate: 16000 - bits_per_sample: 32bit - i2s_mode: secondary - i2s_audio_id: i2s_input - channel: stereo - -speaker: - # Hardware speaker output - - platform: i2s_audio - id: i2s_audio_speaker - sample_rate: 48000 - i2s_mode: secondary - i2s_dout_pin: GPIO10 - bits_per_sample: 32bit - i2s_audio_id: i2s_output - dac_type: external - channel: stereo - timeout: never - buffer_duration: 100ms - audio_dac: aic3204_dac - - # Virtual speakers to combine the announcement and media streams together into one output - - platform: mixer - id: mixing_speaker - output_speaker: i2s_audio_speaker - num_channels: 2 - task_stack_in_psram: true - source_speakers: - - id: announcement_mixing_input - timeout: never - - id: media_mixing_input - timeout: never - - # Virtual speakers to resample each pipelines' audio, if necessary, as the mixer speaker requires the same sample rate - - platform: resampler - id: announcement_resampling_speaker - output_speaker: announcement_mixing_input - sample_rate: 48000 - bits_per_sample: 16 - - platform: resampler - id: media_resampling_speaker - output_speaker: media_mixing_input - sample_rate: 48000 - bits_per_sample: 16 - -sendspin: - id: sendspin_hub - task_stack_in_psram: true - kalman_process_error: 0.01 - -http_request: - -media_source: - - platform: sendspin - id: sendspin_source - - platform: http_request - id: http_source - buffer_size: 500000 - - platform: file - id: file_source - files: - - id: center_button_press_sound - file: ${center_button_press_sound_file} - - id: center_button_double_press_sound - file: ${center_button_double_press_sound_file} - - id: center_button_triple_press_sound - file: ${center_button_triple_press_sound_file} - - id: center_button_long_press_sound - file: ${center_button_long_press_sound_file} - - id: factory_reset_initiated_sound - file: ${factory_reset_initiated_sound_file} - - id: factory_reset_cancelled_sound - file: ${factory_reset_cancelled_sound_file} - - id: factory_reset_confirmed_sound - file: ${factory_reset_confirmed_sound_file} - - id: jack_connected_sound - file: ${jack_connected_sound_file} - - id: jack_disconnected_sound - file: ${jack_disconnected_sound_file} - - id: mute_switch_on_sound - file: ${mute_switch_on_sound_file} - - id: mute_switch_off_sound - file: ${mute_switch_off_sound_file} - - id: timer_finished_sound - file: ${timer_finished_sound_file} - - id: wake_word_triggered_sound - file: ${wake_word_triggered_sound_file} - - id: easter_egg_tick_sound - file: ${easter_egg_tick_sound_file} - - id: easter_egg_tada_sound - file: ${easter_egg_tada_sound_file} - - id: error_cloud_expired - file: ${error_cloud_expired_sound_file} - -media_player: - - platform: sendspin - id: sendspin_group_media_player - - platform: speaker_source - id: external_media_player - name: Media Player - announcement_speaker: announcement_resampling_speaker - media_speaker: media_resampling_speaker - announcement_pipeline: - format: FLAC # FLAC is the least processor intensive codec - num_channels: 1 # Stereo audio is unnecessary for announcements - sample_rate: 48000 - media_pipeline: - format: FLAC # FLAC is the least processor intensive codec - num_channels: 2 - sample_rate: 48000 - volume_increment: 0.05 - volume_min: 0.4 - volume_max: 0.85 - sources: - - file_source - - http_source - - sendspin_source - on_mute: - - script.execute: control_leds - on_unmute: - - script.execute: control_leds - on_volume: - - script.execute: control_leds - on_announcement: - - mixer_speaker.apply_ducking: - id: media_mixing_input - decibel_reduction: 20 - duration: 0.0s - on_state: - if: - condition: - and: - - switch.is_off: timer_ringing - - not: - voice_assistant.is_running: - - not: - media_player.is_announcing: external_media_player - then: - - mixer_speaker.apply_ducking: - id: media_mixing_input - decibel_reduction: 0 - duration: 1.0s - -voice_kit: - id: voice_kit_component - i2c_id: internal_i2c - reset_pin: GPIO4 - firmware: - url: https://github.com/esphome/voice-kit-xmos-firmware/releases/download/v1.3.1/ffva_v1.3.1_upgrade.bin - version: "1.3.1" - md5: 964635c5bf125529dab14a2472a15401 - -# Sendspin related components are pinned to specific commits for reproducible builds -external_components: - - source: - type: git - url: https://github.com/esphome/home-assistant-voice-pe - ref: dev - components: - - voice_kit - refresh: 0s - - source: - # https://github.com/esphome/esphome/pull/12256 - type: git - url: https://github.com/esphome/esphome - ref: a2d98e1d5e020200db8f3caf27a74a939a661dc4 - components: [audio] - - source: - # https://github.com/esphome/esphome/pull/12258 - type: git - url: https://github.com/esphome/esphome - ref: b4b7c5b25ebe0f2ab988f700219fa3c57b2377b7 - components: [media_player] - - source: - # https://github.com/esphome/esphome/pull/12284 - type: git - url: https://github.com/esphome/esphome - ref: d48058e140c98f5c2d902661d851a6b712d62434 - components: [sendspin] - - source: - # https://github.com/esphome/esphome/pull/14013 - type: git - url: https://github.com/esphome/esphome - ref: 51dcce3d1f22865ebb458a5447bbc877ac946b5a - components: [mdns] - - source: - # https://github.com/esphome/esphome/pull/12429 - type: git - url: https://github.com/esphome/esphome - ref: b49b09b6ae56502aa3ce51be86f90d732d019b2c - refresh: 0s - components: [file, http_request, media_source, speaker_source] - -audio_dac: - - platform: aic3204 - id: aic3204_dac - i2c_id: internal_i2c - -micro_wake_word: - id: mww - microphone: - microphone: i2s_mics - channels: 1 - gain_factor: 4 - stop_after_detection: false - models: - - model: https://github.com/kahrendt/microWakeWord/releases/download/okay_nabu_20241226.3/okay_nabu.json - id: okay_nabu - - model: hey_jarvis - id: hey_jarvis - - model: hey_mycroft - id: hey_mycroft - - model: https://github.com/kahrendt/microWakeWord/releases/download/stop/stop.json - id: stop - internal: true - vad: - on_wake_word_detected: - # If the wake word is detected when the device is muted (Possible with the software mute switch): Do nothing - - if: - condition: - switch.is_off: master_mute_switch - then: - # If a timer is ringing: Stop it, do not start the voice assistant (We can stop timer from voice!) - - if: - condition: - switch.is_on: timer_ringing - then: - - switch.turn_off: timer_ringing - # Stop voice assistant if running - else: - - if: - condition: - voice_assistant.is_running: - then: - voice_assistant.stop: - # Stop any other media player announcement - else: - - if: - condition: - media_player.is_announcing: - id: external_media_player - then: - - media_player.stop: - announcement: true - id: external_media_player - # Start the voice assistant and play the wake sound, if enabled - else: - - if: - condition: - switch.is_on: wake_sound - then: - - script.execute: - id: play_sound - priority: true - sound_file: "wake_word_triggered_sound" - - delay: 300ms - - voice_assistant.start: - wake_word: !lambda return wake_word; - -select: - - platform: template - name: "Wake word sensitivity" - optimistic: true - initial_option: Slightly sensitive - restore_value: true - entity_category: config - options: - - Slightly sensitive - - Moderately sensitive - - Very sensitive - on_value: - # Sets specific wake word probabilities computed for each particular model - # Note probability cutoffs are set as a quantized uint8 value, each comment has the corresponding floating point cutoff - # False Accepts per Hour values are tested against all units and channels from the Dinner Party Corpus. - # These cutoffs apply only to the specific models included in the firmware: okay_nabu@20241226.3, hey_jarvis@v2, hey_mycroft@v2 - lambda: |- - if (x == "Slightly sensitive") { - id(okay_nabu).set_probability_cutoff(217); // 0.85 -> 0.000 FAPH on DipCo (Manifest's default) - id(hey_jarvis).set_probability_cutoff(247); // 0.97 -> 0.563 FAPH on DipCo (Manifest's default) - id(hey_mycroft).set_probability_cutoff(253); // 0.99 -> 0.567 FAPH on DipCo - } else if (x == "Moderately sensitive") { - id(okay_nabu).set_probability_cutoff(176); // 0.69 -> 0.376 FAPH on DipCo - id(hey_jarvis).set_probability_cutoff(235); // 0.92 -> 0.939 FAPH on DipCo - id(hey_mycroft).set_probability_cutoff(242); // 0.95 -> 1.502 FAPH on DipCo (Manifest's default) - } else if (x == "Very sensitive") { - id(okay_nabu).set_probability_cutoff(143); // 0.56 -> 0.751 FAPH on DipCo - id(hey_jarvis).set_probability_cutoff(212); // 0.83 -> 1.502 FAPH on DipCo - id(hey_mycroft).set_probability_cutoff(237); // 0.93 -> 1.878 FAPH on DipCo - } - -voice_assistant: - id: va - microphone: - microphone: i2s_mics - channels: 0 - media_player: external_media_player - micro_wake_word: mww - use_wake_word: false - noise_suppression_level: 0 - auto_gain: 0 dbfs - volume_multiplier: 1 - on_client_connected: - - lambda: id(init_in_progress) = false; - - micro_wake_word.start: - - lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id}; - - script.execute: control_leds - on_client_disconnected: - - voice_assistant.stop: - - lambda: id(voice_assistant_phase) = ${voice_assist_not_ready_phase_id}; - - script.execute: control_leds - on_error: - # Only set the error phase if the error code is different than duplicate_wake_up_detected or stt-no-text-recognized - # These two are ignored for a better user experience - - if: - condition: - and: - - lambda: return !id(init_in_progress); - - lambda: return code != "duplicate_wake_up_detected"; - - lambda: return code != "stt-no-text-recognized"; - then: - - lambda: id(voice_assistant_phase) = ${voice_assist_error_phase_id}; - - script.execute: control_leds - # If the error code is cloud-auth-failed, serve a local audio file guiding the user. - - if: - condition: - - lambda: return code == "cloud-auth-failed"; - then: - - script.execute: - id: play_sound - priority: true - sound_file: "error_cloud_expired" - # When the voice assistant starts: Play a wake up sound, duck audio. - on_start: - - mixer_speaker.apply_ducking: - id: media_mixing_input - decibel_reduction: 20 # Number of dB quieter; higher implies more quiet, 0 implies full volume - duration: 0.0s # The duration of the transition (default is no transition) - on_listening: - - lambda: id(voice_assistant_phase) = ${voice_assist_waiting_for_command_phase_id}; - - script.execute: control_leds - on_stt_vad_start: - - lambda: id(voice_assistant_phase) = ${voice_assist_listening_for_command_phase_id}; - - script.execute: control_leds - on_stt_vad_end: - - lambda: id(voice_assistant_phase) = ${voice_assist_thinking_phase_id}; - - script.execute: control_leds - on_intent_progress: - - if: - condition: - # A nonempty x variable means a streaming TTS url was sent to the media player - lambda: 'return !x.empty();' - then: - - lambda: id(voice_assistant_phase) = ${voice_assist_replying_phase_id}; - - script.execute: control_leds - # Start a script that would potentially enable the stop word if the response is longer than a second - - script.execute: activate_stop_word_once - on_tts_start: - - if: - condition: - # The intent_progress trigger didn't start the TTS Reponse - lambda: 'return id(voice_assistant_phase) != ${voice_assist_replying_phase_id};' - then: - - lambda: id(voice_assistant_phase) = ${voice_assist_replying_phase_id}; - - script.execute: control_leds - # Start a script that would potentially enable the stop word if the response is longer than a second - - script.execute: activate_stop_word_once - # When the voice assistant ends ... - on_end: - - wait_until: - not: - voice_assistant.is_running: - # Stop ducking audio. - - mixer_speaker.apply_ducking: - id: media_mixing_input - decibel_reduction: 0 - duration: 1.0s - # If the end happened because of an error, let the error phase on for a second - - if: - condition: - lambda: return id(voice_assistant_phase) == ${voice_assist_error_phase_id}; - then: - - delay: 1s - # Reset the voice assistant phase id and reset the LED animations. - - lambda: id(voice_assistant_phase) = ${voice_assist_idle_phase_id}; - - script.execute: control_leds - on_timer_finished: - - switch.turn_on: timer_ringing - on_timer_started: - - script.execute: control_leds - on_timer_cancelled: - - script.execute: control_leds - on_timer_updated: - - script.execute: control_leds - on_timer_tick: - - script.execute: control_leds - -button: - - platform: factory_reset - id: factory_reset_button - name: "Factory Reset" - entity_category: diagnostic - internal: true - - platform: restart - id: restart_button - name: "Restart" - entity_category: config - disabled_by_default: true - icon: "mdi:restart" - -debug: - update_interval: 5s diff --git a/extras/havpe-relay/flash_default.sh b/extras/havpe-relay/flash_default.sh deleted file mode 100755 index 0f351770..00000000 --- a/extras/havpe-relay/flash_default.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -# Flash default HA-style firmware for A/B audio comparison. -# -# Usage: -# ./flash_default.sh # compile and flash -# ./flash_default.sh logs # view device logs -# -# After flashing: -# uv run python capture_default_audio.py -# Hold the center button to stream audio, release to stop. - -set -e -cd "$(dirname "$0")/firmware" - -if [ ! -f secrets.yaml ]; then - echo "Error: firmware/secrets.yaml not found." - echo "Run ./init.sh and enable firmware setup, or:" - echo " cp secrets.template.yaml secrets.yaml" - echo " # then edit secrets.yaml with your WiFi and relay IP" - exit 1 -fi - -ACTION="${1:-run}" -cd .. -exec uv run --group firmware esphome "$ACTION" firmware/voice-default.yaml