Hi everyone,
I'm working on a project using the M5Stack Atom Echo to stream audio over Wi-Fi to a Python socket server and save it as a .wav file. However, I’ve been consistently running into the following audio quality issues:
- Audio sounds sped up or accelerated
- Audio degrades with repeated runs, especially compared to the quality after a fresh flash of the device
- Low fidelity or choppy output
- Sometimes segments of the audio are missing intermittently and unpredictably
What I’ve Tried So Far
To rule out common causes, I’ve implemented the following:
- Added a magic header (0xAA55) for packet boundary detection
- Added a CRC32 checksum to verify packet integrity
- Tracked packet sequence numbers to detect out-of-order or lost packets
- Tuned record size to 120 samples per packet to avoid fragmentation
- Increased DMA buffer length and count to prevent underruns
- Verified no packet loss over Wi-Fi by checking sequence and CRC
- Used sample rate of 17000 Hz and 16-bit mono audio
Despite these efforts, the audio still exhibits the above problems.
My Setup
Arduino (M5Stack Atom Echo)
- Records mic input at 17000 Hz, 16-bit mono
- Sends audio in chunks of 120 samples per packet (with sequence, magic, and CRC)
- Uses a large circular buffer to avoid data loss during recording
Python Socket Server
- Receives packets with proper buffering
- Validates magic, sequence, and CRC
- Appends valid audio frames and writes to a .wavfile
Sample Code
Arduino Code (M5Stack Atom Echo)
#include <M5Unified.h>
#include <WiFi.h>
#include <CRC32.h>
const char* ssid = "xxx";
const char* password = "xxx";
const char* server_ip = "xx.xx.xx.xx";
const uint16_t server_port = 5000;
WiFiClient client;
CRC32 crc;
// Reduce packet size to avoid WiFi fragmentation (MTU < 1400 bytes)
static constexpr size_t record_length = 120;  // Reduced from 240
static constexpr size_t record_number = 256;  // Larger buffer
static constexpr size_t record_samplerate = 17000;
static constexpr size_t record_size = record_number * record_length;
static size_t rec_record_idx = 0;
static uint32_t packet_seq = 0;
int16_t* rec_data;
bool is_connected = false;
// Packet with header (magic + seq + crc)
struct AudioPacket {
  uint16_t magic;  // 0xAA55 for boundary detection
  uint32_t seq;
  uint32_t crc;
  int16_t data[record_length];
} __attribute__((packed));
void setup() {
  M5.begin();
  Serial.begin(115200);
  M5.Speaker.end();
  // Configure mic for 16-bit mono 17000Hz
  auto mic_cfg = M5.Mic.config();
  mic_cfg.sample_rate = 17000;
  mic_cfg.stereo = true;  // Force mono
  mic_cfg.dma_buf_len = record_number;  // Larger DMA buffer to avoid underflow
  mic_cfg.dma_buf_count = 8;
  M5.Mic.config(mic_cfg);  // Apply config
  M5.Mic.begin();  // Initialize mic with default config
  // Allocate buffer
  rec_data = (int16_t*)heap_caps_malloc(record_size * sizeof(int16_t), MALLOC_CAP_8BIT);
  memset(rec_data, 0, record_size * sizeof(int16_t));
  // Wi-Fi connection with retry
  WiFi.begin(ssid, password);
  while (WiFi.status() != WL_CONNECTED) {
    delay(500);
    Serial.print(".");
  }
  Serial.println("\nWi-Fi connected");
}
void loop() {
  // Reconnect if disconnected
  if (!client.connected()) {
    is_connected = client.connect(server_ip, server_port);
    if (is_connected) {
      Serial.println("Server connected");
      packet_seq = 0;  // Reset sequence on new connection
    } else {
      delay(1000);
      return;
    }
  }
  if (client.connected() && M5.Mic.isEnabled()) {
    AudioPacket packet;
    auto data = &rec_data[rec_record_idx * record_length];
    
    if (M5.Mic.record(data, record_length, record_samplerate)) {
      // Fill packet with magic number and reset CRC
      packet.magic = 0xAA55;
      packet.seq = packet_seq++;
      crc.reset();  // Critical: reset CRC for each packet
      
      memcpy(packet.data, data, record_length * sizeof(int16_t));
      packet.crc = crc.calculate((uint8_t*)&packet.data, record_length * sizeof(int16_t));
      
      client.write((uint8_t*)&packet, sizeof(AudioPacket));
      delay(10);
    
      if (++rec_record_idx >= record_number) {
        memset(rec_data, 0, record_size * sizeof(int16_t));
        rec_record_idx = 0;
      }
    }
  }
}
Python Socket Server
import socket
import wave
import struct
import zlib
import numpy as np
import sounddevice as sd
HOST = '0.0.0.0'
PORT = 5000
OUTPUT_FILE = 'recorded_audio.wav'
SAMPLE_RATE = 17000
CHANNELS = 1
SAMPLE_WIDTH = 2
RECORD_LENGTH = 120  # Must match Arduino
PACKET_SIZE = 2 + 4 + 4 + (RECORD_LENGTH * 2)  # magic(2) + seq(4) + crc(4) + data
frames = []
last_seq = -1
buffer = b''
try:
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
        sock.bind((HOST, PORT))
        sock.listen(1)
        conn, addr = sock.accept()
        with conn:
            print(f"Connected by {addr}")
            while True:
                # Handle fragmented data by buffering
                buffer += conn.recv(1024)
                while len(buffer) >= PACKET_SIZE:
                    # Extract complete packet
                    packet = buffer[:PACKET_SIZE]
                    buffer = buffer[PACKET_SIZE:]
                    
                    # Verify magic number
                    magic, seq, crc_received = struct.unpack('<HII', packet[:10])
                    if magic != 0xAA55:
                        print("Invalid packet magic number")
                        continue
                    
                    # Verify sequence
                    if seq != last_seq + 1:
                        print(f"Sequence error: expected {last_seq + 1}, got {seq}")
                    last_seq = seq
                    
                    # Verify CRC
                    audio_data = packet[10:]
                    crc_calculated = zlib.crc32(audio_data) & 0xFFFFFFFF
                    if crc_received != crc_calculated:
                        print(f"CRC error: received {crc_received}, calculated {crc_calculated}")
                        continue
                    
                    frames.append(audio_data)
except:
    # Save WAV
    with wave.open(OUTPUT_FILE, 'wb') as wf:
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(SAMPLE_WIDTH)
        wf.setframerate(SAMPLE_RATE)
        wf.writeframes(b''.join(frames))
    print(f"Audio saved to {OUTPUT_FILE}")
Recorded Audio
Any advice or pointers would be greatly appreciated.
Thanks in advance.