[HELP] Audio From M5Stack Atom Echo Appears Sped Up / Choppy / Low Quality
-
Hi everyone,
I'm working on a project using the M5Stack Atom Echo to stream audio over Wi-Fi to a Python socket server and save it as a
.wav
file. However, I’ve been consistently running into the following audio quality issues:- Audio sounds sped up or accelerated
- Audio degrades with repeated runs, especially compared to the quality after a fresh flash of the device
- Low fidelity or choppy output
- Sometimes segments of the audio are missing intermittently and unpredictably
What I’ve Tried So Far
To rule out common causes, I’ve implemented the following:
- Added a magic header (0xAA55) for packet boundary detection
- Added a CRC32 checksum to verify packet integrity
- Tracked packet sequence numbers to detect out-of-order or lost packets
- Tuned record size to 120 samples per packet to avoid fragmentation
- Increased DMA buffer length and count to prevent underruns
- Verified no packet loss over Wi-Fi by checking sequence and CRC
- Used sample rate of 17000 Hz and 16-bit mono audio
Despite these efforts, the audio still exhibits the above problems.
My Setup
Arduino (M5Stack Atom Echo)
- Records mic input at 17000 Hz, 16-bit mono
- Sends audio in chunks of 120 samples per packet (with sequence, magic, and CRC)
- Uses a large circular buffer to avoid data loss during recording
Python Socket Server
- Receives packets with proper buffering
- Validates magic, sequence, and CRC
- Appends valid audio frames and writes to a
.wav
file
Sample Code
Arduino Code (M5Stack Atom Echo)
#include <M5Unified.h> #include <WiFi.h> #include <CRC32.h> const char* ssid = "xxx"; const char* password = "xxx"; const char* server_ip = "xx.xx.xx.xx"; const uint16_t server_port = 5000; WiFiClient client; CRC32 crc; // Reduce packet size to avoid WiFi fragmentation (MTU < 1400 bytes) static constexpr size_t record_length = 120; // Reduced from 240 static constexpr size_t record_number = 256; // Larger buffer static constexpr size_t record_samplerate = 17000; static constexpr size_t record_size = record_number * record_length; static size_t rec_record_idx = 0; static uint32_t packet_seq = 0; int16_t* rec_data; bool is_connected = false; // Packet with header (magic + seq + crc) struct AudioPacket { uint16_t magic; // 0xAA55 for boundary detection uint32_t seq; uint32_t crc; int16_t data[record_length]; } __attribute__((packed)); void setup() { M5.begin(); Serial.begin(115200); M5.Speaker.end(); // Configure mic for 16-bit mono 17000Hz auto mic_cfg = M5.Mic.config(); mic_cfg.sample_rate = 17000; mic_cfg.stereo = true; // Force mono mic_cfg.dma_buf_len = record_number; // Larger DMA buffer to avoid underflow mic_cfg.dma_buf_count = 8; M5.Mic.config(mic_cfg); // Apply config M5.Mic.begin(); // Initialize mic with default config // Allocate buffer rec_data = (int16_t*)heap_caps_malloc(record_size * sizeof(int16_t), MALLOC_CAP_8BIT); memset(rec_data, 0, record_size * sizeof(int16_t)); // Wi-Fi connection with retry WiFi.begin(ssid, password); while (WiFi.status() != WL_CONNECTED) { delay(500); Serial.print("."); } Serial.println("\nWi-Fi connected"); } void loop() { // Reconnect if disconnected if (!client.connected()) { is_connected = client.connect(server_ip, server_port); if (is_connected) { Serial.println("Server connected"); packet_seq = 0; // Reset sequence on new connection } else { delay(1000); return; } } if (client.connected() && M5.Mic.isEnabled()) { AudioPacket packet; auto data = &rec_data[rec_record_idx * record_length]; if (M5.Mic.record(data, record_length, record_samplerate)) { // Fill packet with magic number and reset CRC packet.magic = 0xAA55; packet.seq = packet_seq++; crc.reset(); // Critical: reset CRC for each packet memcpy(packet.data, data, record_length * sizeof(int16_t)); packet.crc = crc.calculate((uint8_t*)&packet.data, record_length * sizeof(int16_t)); client.write((uint8_t*)&packet, sizeof(AudioPacket)); delay(10); if (++rec_record_idx >= record_number) { memset(rec_data, 0, record_size * sizeof(int16_t)); rec_record_idx = 0; } } } }
Python Socket Server
import socket import wave import struct import zlib import numpy as np import sounddevice as sd HOST = '0.0.0.0' PORT = 5000 OUTPUT_FILE = 'recorded_audio.wav' SAMPLE_RATE = 17000 CHANNELS = 1 SAMPLE_WIDTH = 2 RECORD_LENGTH = 120 # Must match Arduino PACKET_SIZE = 2 + 4 + 4 + (RECORD_LENGTH * 2) # magic(2) + seq(4) + crc(4) + data frames = [] last_seq = -1 buffer = b'' try: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: sock.bind((HOST, PORT)) sock.listen(1) conn, addr = sock.accept() with conn: print(f"Connected by {addr}") while True: # Handle fragmented data by buffering buffer += conn.recv(1024) while len(buffer) >= PACKET_SIZE: # Extract complete packet packet = buffer[:PACKET_SIZE] buffer = buffer[PACKET_SIZE:] # Verify magic number magic, seq, crc_received = struct.unpack('<HII', packet[:10]) if magic != 0xAA55: print("Invalid packet magic number") continue # Verify sequence if seq != last_seq + 1: print(f"Sequence error: expected {last_seq + 1}, got {seq}") last_seq = seq # Verify CRC audio_data = packet[10:] crc_calculated = zlib.crc32(audio_data) & 0xFFFFFFFF if crc_received != crc_calculated: print(f"CRC error: received {crc_received}, calculated {crc_calculated}") continue frames.append(audio_data) except: # Save WAV with wave.open(OUTPUT_FILE, 'wb') as wf: wf.setnchannels(CHANNELS) wf.setsampwidth(SAMPLE_WIDTH) wf.setframerate(SAMPLE_RATE) wf.writeframes(b''.join(frames)) print(f"Audio saved to {OUTPUT_FILE}")
Recorded Audio
Any advice or pointers would be greatly appreciated.Thanks in advance.