[HELP] Audio From M5Stack Atom Echo Appears Sped Up / Choppy / Low Quality

echodine46

Hi everyone,

I'm working on a project using the M5Stack Atom Echo to stream audio over Wi-Fi to a Python socket server and save it as a .wav file. However, I’ve been consistently running into the following audio quality issues:

Audio sounds sped up or accelerated
Audio degrades with repeated runs, especially compared to the quality after a fresh flash of the device
Low fidelity or choppy output
Sometimes segments of the audio are missing intermittently and unpredictably

What I’ve Tried So Far

To rule out common causes, I’ve implemented the following:

Added a magic header (0xAA55) for packet boundary detection
Added a CRC32 checksum to verify packet integrity
Tracked packet sequence numbers to detect out-of-order or lost packets
Tuned record size to 120 samples per packet to avoid fragmentation
Increased DMA buffer length and count to prevent underruns
Verified no packet loss over Wi-Fi by checking sequence and CRC
Used sample rate of 17000 Hz and 16-bit mono audio

Despite these efforts, the audio still exhibits the above problems.

My Setup

Arduino (M5Stack Atom Echo)

Records mic input at 17000 Hz, 16-bit mono
Sends audio in chunks of 120 samples per packet (with sequence, magic, and CRC)
Uses a large circular buffer to avoid data loss during recording

Python Socket Server

Receives packets with proper buffering
Validates magic, sequence, and CRC
Appends valid audio frames and writes to a .wav file

Sample Code

Arduino Code (M5Stack Atom Echo)

#include <M5Unified.h>
#include <WiFi.h>
#include <CRC32.h>

const char* ssid = "xxx";
const char* password = "xxx";
const char* server_ip = "xx.xx.xx.xx";
const uint16_t server_port = 5000;

WiFiClient client;
CRC32 crc;

// Reduce packet size to avoid WiFi fragmentation (MTU < 1400 bytes)
static constexpr size_t record_length = 120;  // Reduced from 240
static constexpr size_t record_number = 256;  // Larger buffer
static constexpr size_t record_samplerate = 17000;
static constexpr size_t record_size = record_number * record_length;

static size_t rec_record_idx = 0;
static uint32_t packet_seq = 0;
int16_t* rec_data;
bool is_connected = false;

// Packet with header (magic + seq + crc)
struct AudioPacket {
  uint16_t magic;  // 0xAA55 for boundary detection
  uint32_t seq;
  uint32_t crc;
  int16_t data[record_length];
} __attribute__((packed));

void setup() {
  M5.begin();
  Serial.begin(115200);
  M5.Speaker.end();

  // Configure mic for 16-bit mono 17000Hz
  auto mic_cfg = M5.Mic.config();
  mic_cfg.sample_rate = 17000;
  mic_cfg.stereo = true;  // Force mono
  mic_cfg.dma_buf_len = record_number;  // Larger DMA buffer to avoid underflow
  mic_cfg.dma_buf_count = 8;
  M5.Mic.config(mic_cfg);  // Apply config
  M5.Mic.begin();  // Initialize mic with default config

  // Allocate buffer
  rec_data = (int16_t*)heap_caps_malloc(record_size * sizeof(int16_t), MALLOC_CAP_8BIT);
  memset(rec_data, 0, record_size * sizeof(int16_t));

  // Wi-Fi connection with retry
  WiFi.begin(ssid, password);
  while (WiFi.status() != WL_CONNECTED) {
    delay(500);
    Serial.print(".");
  }
  Serial.println("\nWi-Fi connected");
}

void loop() {
  // Reconnect if disconnected
  if (!client.connected()) {
    is_connected = client.connect(server_ip, server_port);
    if (is_connected) {
      Serial.println("Server connected");
      packet_seq = 0;  // Reset sequence on new connection
    } else {
      delay(1000);
      return;
    }
  }

  if (client.connected() && M5.Mic.isEnabled()) {
    AudioPacket packet;
    auto data = &rec_data[rec_record_idx * record_length];
    
    if (M5.Mic.record(data, record_length, record_samplerate)) {
      // Fill packet with magic number and reset CRC
      packet.magic = 0xAA55;
      packet.seq = packet_seq++;
      crc.reset();  // Critical: reset CRC for each packet
      
      memcpy(packet.data, data, record_length * sizeof(int16_t));
      packet.crc = crc.calculate((uint8_t*)&packet.data, record_length * sizeof(int16_t));
      
      client.write((uint8_t*)&packet, sizeof(AudioPacket));
      delay(10);
    
      if (++rec_record_idx >= record_number) {
        memset(rec_data, 0, record_size * sizeof(int16_t));
        rec_record_idx = 0;
      }
    }
  }
}

Python Socket Server

import socket
import wave
import struct
import zlib
import numpy as np
import sounddevice as sd

HOST = '0.0.0.0'
PORT = 5000
OUTPUT_FILE = 'recorded_audio.wav'

SAMPLE_RATE = 17000
CHANNELS = 1
SAMPLE_WIDTH = 2
RECORD_LENGTH = 120  # Must match Arduino
PACKET_SIZE = 2 + 4 + 4 + (RECORD_LENGTH * 2)  # magic(2) + seq(4) + crc(4) + data

frames = []
last_seq = -1
buffer = b''

try:
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
        sock.bind((HOST, PORT))
        sock.listen(1)
        conn, addr = sock.accept()
        with conn:
            print(f"Connected by {addr}")
            while True:
                # Handle fragmented data by buffering
                buffer += conn.recv(1024)
                while len(buffer) >= PACKET_SIZE:
                    # Extract complete packet
                    packet = buffer[:PACKET_SIZE]
                    buffer = buffer[PACKET_SIZE:]
                    
                    # Verify magic number
                    magic, seq, crc_received = struct.unpack('<HII', packet[:10])
                    if magic != 0xAA55:
                        print("Invalid packet magic number")
                        continue
                    
                    # Verify sequence
                    if seq != last_seq + 1:
                        print(f"Sequence error: expected {last_seq + 1}, got {seq}")
                    last_seq = seq
                    
                    # Verify CRC
                    audio_data = packet[10:]
                    crc_calculated = zlib.crc32(audio_data) & 0xFFFFFFFF
                    if crc_received != crc_calculated:
                        print(f"CRC error: received {crc_received}, calculated {crc_calculated}")
                        continue
                    
                    frames.append(audio_data)
except:
    # Save WAV
    with wave.open(OUTPUT_FILE, 'wb') as wf:
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(SAMPLE_WIDTH)
        wf.setframerate(SAMPLE_RATE)
        wf.writeframes(b''.join(frames))
    print(f"Audio saved to {OUTPUT_FILE}")

Recorded Audio
Any advice or pointers would be greatly appreciated.

Thanks in advance.