struct rtp_buf_t
{
void* buf;
unsigned len[48];
unsigned size;
unsigned types;
};
typedef struct {
volatile unsigned short w_index, r_index;
unsigned char *frame;
size_t capacity;
} frame_buf_t;
static audio_pipeline_handle_t _recorder = 0, _player = 0;
static audio_element_handle_t _raw_read1 = 0, _raw_read2 = 0, _raw_write = 0, _i2s_stream_reader = 0;
static int _pktCount = 0, _payloadType = 0, _sequenceNumber = 0;
static char _mutePlayout = 0;
static frame_buf_t _recordBuffer;
static esp_timer_handle_t _periodic_timer;
static uint8_t _started = 0;
static void rtp_pack_uint16(void* ptr, unsigned short val)
{
unsigned char* buf = ptr;
buf[0] = (unsigned char)(val >> 8);
buf[1] = (unsigned char)val;
}
static void rtp_pack_uint32(void* ptr, unsigned val)
{
unsigned char* buf = ptr;
buf[0] = (unsigned char)(val >> 24);
buf[1] = (unsigned char)(val >> 16);
buf[2] = (unsigned char)(val >> 8);
buf[3] = (unsigned char)(val);
}
static void* frame_buf_init(frame_buf_t *buf, size_t capacity)
{
buf->w_index = buf->r_index = 0;
buf->capacity = capacity;
return buf->frame = malloc(capacity);
}
static void frame_buf_clear(frame_buf_t *buf)
{
buf->w_index = buf->r_index = 0;
if (buf->frame) free(buf->frame);
buf->frame = NULL;
}
static size_t frame_buf_len(frame_buf_t *buf) {
return buf ? (buf->w_index + buf->capacity - buf->r_index) % buf->capacity : 0;
}
static int frame_buf_write(frame_buf_t *buf, const void* dat, const size_t len)
{
if (!buf || !dat || !len) return 0;
if (!buf->frame || len >= buf->capacity) return -1;
if (buf->r_index > buf->w_index) {
if (buf->w_index + len >= buf->r_index) return -1;
memcpy(buf->frame+buf->w_index, dat, len);
buf->w_index += len;
}
else {
const size_t cutlen = buf->capacity - buf->w_index;
if (cutlen >= len) {
memcpy(buf->frame+buf->w_index, dat, len);
buf->w_index += len;
if ( buf->w_index >= buf->capacity)
buf->w_index = 0;
}
else if (buf->r_index > len - cutlen) {
memcpy(buf->frame+buf->w_index, dat, cutlen);
memcpy(buf->frame, (char*)dat+cutlen, len - cutlen);
buf->w_index = len - cutlen;
}
else
return -1;
}
return len;
}
static void* frame_buf_read(frame_buf_t *buf, void* dat, const size_t len)
{
if (!buf || !dat || !len) return NULL;
if (!buf->frame || len >= buf->capacity) return NULL;
if (buf->r_index <= buf->w_index) {
if (buf->r_index + len > buf->w_index) return NULL;
memcpy(dat, buf->frame+buf->r_index, len);
buf->r_index += len;
}
else {
const size_t cutlen = buf->capacity - buf->r_index;
if (cutlen >= len) {
memcpy(dat, buf->frame+buf->r_index, len);
buf->r_index += len;
if (buf->r_index >= buf->capacity)
buf->r_index = 0;
}
else if (buf->w_index >= len - cutlen) {
memcpy(dat, buf->frame+buf->r_index, cutlen);
memcpy((char*)dat+cutlen, buf->frame, len - cutlen);
buf->r_index = len - cutlen;
}
else
return NULL;
}
return dat;
}
const char* MME_AudioCodecName(void)
{
return "PCMA/8000";
}
static esp_err_t i2s_driver_init(void)
{
i2s_config_t i2s_cfg = {
.mode = I2S_MODE_MASTER | I2S_MODE_TX | I2S_MODE_RX,
.sample_rate = I2S_SAMPLE_RATE,
.bits_per_sample = I2S_BITS,
.channel_format = I2S_CHANNELS,
.communication_format = I2S_COMM_FORMAT_STAND_I2S,
.tx_desc_auto_clear = true,
.dma_buf_count = 8,
.dma_buf_len = 64,
.use_apll = false,
.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
};
ESP_RETURN_ON_ERROR(i2s_driver_install(I2S_NUM_0, &i2s_cfg, 0, NULL), TAG, "install i2s failed");
i2s_pin_config_t i2s_pin_cfg = {0};
get_i2s_pins(I2S_NUM_0, &i2s_pin_cfg);
i2s_set_pin(I2S_NUM_0, &i2s_pin_cfg);
return ESP_OK;
}
static esp_err_t i2s_data_swap(int16_t *raw_buff, int len)
{
int16_t tmp;
for (int i = 0; i < len / 4; i++) {
tmp = raw_buff[i << 1];
raw_buff[i << 1] = raw_buff[(i << 1) + 1];
raw_buff[(i << 1) + 1] = tmp;
}
return ESP_OK;
}
static int i2s_stream_read_cb(audio_element_handle_t el, char *buf, int len, TickType_t wait_time, void *ctx)
{
size_t bytes_read = 0;
int ret = i2s_read(I2S_NUM_0, buf, len, &bytes_read, wait_time);
if (ret == ESP_OK) {
} else {
ESP_LOGE(TAG, "[echo] i2s read failed");
}
return bytes_read;
}
static int i2s_stream_write_cb(audio_element_handle_t el, char *buf, int len, TickType_t wait_time, void *ctx)
{
size_t bytes_writen = 0, data_size = len;
int16_t *buf_w;
buf_w = (int16_t *)buf;
#if CONFIG_ESP32_S3_KORVO2L_V1_BOARD
data_size = 2 * len;
int16_t *buf_2ch = audio_calloc(1, data_size);
for (int i = 0; i < len / 2; i++) {
buf_2ch[i << 1] = buf_w[i];
buf_2ch[(i << 1) + 1] = buf_w[i];
}
buf_w = buf_2ch;
#endif
int ret = i2s_write_expand(I2S_NUM_0, buf_w, data_size, 16, I2S_BITS, &bytes_writen, wait_time);
if (ret != ESP_OK) {
ESP_LOGE(TAG, "i2s write failed");
}
#if CONFIG_ESP32_S3_KORVO2L_V1_BOARD
free(buf_2ch);
#endif
return len;
}
static void periodic_timer_callback(void* arg)
{
uint8_t temp[256];
int ret = raw_stream_read(_raw_read2, (void *)temp, 256);
frame_buf_write(&_recordBuffer, temp, ret);
}
const esp_timer_create_args_t periodic_timer_args = {
.callback = &periodic_timer_callback,
.name = "periodic"
};
int MME_StartAudio(int payloadType, int sendCodecIndex, int recvCodecIndex)
{
i2s_driver_init();
audio_board_handle_t board_handle = (audio_board_handle_t) audio_calloc(1, sizeof(struct audio_board_handle));
audio_hal_codec_config_t audio_codec_cfg = AUDIO_CODEC_DEFAULT_CONFIG();
audio_codec_cfg.i2s_iface.samples = AUDIO_HAL_08K_SAMPLES;
board_handle->audio_hal = audio_hal_init(&audio_codec_cfg, &AUDIO_CODEC_ES8311_DEFAULT_HANDLE);
#if CONFIG_ESP32_S3_KORVO2_V3_BOARD
board_handle->adc_hal = audio_board_adc_init();
#endif
audio_hal_ctrl_codec(board_handle->audio_hal, AUDIO_HAL_CODEC_MODE_BOTH, AUDIO_HAL_CTRL_START);
audio_hal_set_volume(board_handle->audio_hal, 67);
_payloadType = payloadType;
_pktCount = 0;
_sequenceNumber = 0;
audio_pipeline_cfg_t pipeline_cfg2 = DEFAULT_AUDIO_PIPELINE_CONFIG();
_player = audio_pipeline_init(&pipeline_cfg2);
if(!_player) goto clean;
raw_stream_cfg_t raw_cfg2 = RAW_STREAM_CFG_DEFAULT();
raw_cfg2.type = AUDIO_STREAM_WRITER;
raw_cfg2.out_rb_size = 1024;
_raw_write = raw_stream_init(&raw_cfg2);
if(!_raw_write) goto clean;
g711_decoder_cfg_t g711_cfg2 = DEFAULT_G711_DECODER_CONFIG();
audio_element_handle_t g711_decoder = g711_decoder_init(&g711_cfg2);
g711_cfg2.task_core = 1;
if(!g711_decoder) goto clean;
audio_element_set_write_cb(g711_decoder, i2s_stream_write_cb, NULL);
audio_element_set_output_timeout(g711_decoder, portMAX_DELAY);
audio_pipeline_register(_player, _raw_write, "raw");
audio_pipeline_register(_player, g711_decoder, "dec");
const char *link_tag2[2] = {"raw", "dec"};
audio_pipeline_link(_player, &link_tag2[0], 2);
ESP_LOGI(TAG, "[3.0] Create audio pipeline for recording");
audio_pipeline_cfg_t pipeline_cfg1 = DEFAULT_AUDIO_PIPELINE_CONFIG();
_recorder = audio_pipeline_init(&pipeline_cfg1);
if(!_recorder) goto clean;
algorithm_stream_cfg_t algo_config = ALGORITHM_STREAM_CFG_DEFAULT();
#if CONFIG_ESP32_S3_KORVO2_V3_BOARD
algo_config.swap_ch = true;
#endif
algo_config.sample_rate = I2S_SAMPLE_RATE;
algo_config.out_rb_size = ESP_RING_BUFFER_SIZE;
algo_config.task_core = 0;
audio_element_handle_t element_algo = algo_stream_init(&algo_config);
if(!element_algo) goto clean;
audio_element_set_read_cb(element_algo, i2s_stream_read_cb, NULL);
audio_element_set_input_timeout(element_algo, portMAX_DELAY);
g711_encoder_cfg_t g711_cfg1 = DEFAULT_G711_ENCODER_CONFIG();
g711_cfg1.out_rb_size = ESP_RING_BUFFER_SIZE/2;
g711_cfg1.task_core = 1;
audio_element_handle_t g711_encoder = g711_encoder_init(&g711_cfg1);
if(!g711_encoder) goto clean;
raw_stream_cfg_t raw_cfg1 = RAW_STREAM_CFG_DEFAULT();
raw_cfg1.type = AUDIO_STREAM_READER;
raw_cfg1.out_rb_size = ESP_RING_BUFFER_SIZE / 2;
_raw_read2 = raw_stream_init(&raw_cfg1);
audio_element_set_output_timeout(_raw_read2, portMAX_DELAY);
if(!_raw_read2) goto clean;
audio_pipeline_register(_recorder, element_algo, "algo");
audio_pipeline_register(_recorder, g711_encoder, "enc");
audio_pipeline_register(_recorder, _raw_read2, "raw2");
const char *link_tag1[3] = {"algo", "enc", "raw2"};
audio_pipeline_link(_recorder, &link_tag1[0], 3);
AUDIO_MEM_SHOW(TAG);
audio_pipeline_run(_player);
audio_pipeline_run(_recorder);
if (!frame_buf_init(&_recordBuffer, RECORD_FRAME_BUF_SIZE))
{
ESP_LOGE(TAG, "init frame buf failed");
goto clean;
}
ESP_ERROR_CHECK(esp_timer_create(&periodic_timer_args, &_periodic_timer));
ESP_ERROR_CHECK(esp_timer_start_periodic(_periodic_timer, 10000));
_started = 1;
return 0;
clean:
ESP_LOGE(TAG, "MME_StopAudio");
MME_StopAudio();
return -1;
}
int MME_ReadAudioRTP(void* rtp, int rtpBufferSize, int rtpHeaderLength, unsigned timeoutMs)
{
int ret = 0;
unsigned int iFrmCnt = 0, i = 0;
uint32_t captureTimeStamp = _pktCount* RECORD_PTIME_MS * RECORD_SAMPLE_KHZ;
if(!rtp && timeoutMs < RECORD_PTIME_MS) timeoutMs = RECORD_PTIME_MS;
if(!rtp) return 0;
iFrmCnt = frame_buf_len(&_recordBuffer)/AUDIO_FRAME_SIZE;
if (iFrmCnt <= 0) return iFrmCnt;
i = (rtpBufferSize - rtpHeaderLength)/AUDIO_FRAME_SIZE;
if (i > 6) i = 6;
if (iFrmCnt > i) iFrmCnt = i;
for (i=0; i<iFrmCnt; ++i) {
char* p = (char*)rtp + rtpHeaderLength + i*AUDIO_FRAME_SIZE;
if (!frame_buf_read(&_recordBuffer, p, AUDIO_FRAME_SIZE)) {
iFrmCnt = i;
break;
}
_pktCount++;
}
ret = iFrmCnt * AUDIO_FRAME_SIZE + rtpHeaderLength;
if (ret <= rtpHeaderLength) {
return -1;
}
else {
uint8_t* dataBuffer = (uint8_t*)rtp;
dataBuffer[0] = 0x80;
dataBuffer[1] = (uint8_t)_payloadType;
rtp_pack_uint16(dataBuffer+2, _sequenceNumber++);
rtp_pack_uint32(dataBuffer+4, captureTimeStamp);
rtp_pack_uint32(dataBuffer+8, 0x6d692aa2);
}
return ret;
}
int MME_WriteAudioRTP(void* rtp, int rtpHeaderLength, int rtpLen)
{
int ret = 0;
uint8_t* dataBuffer = rtp;
if (dataBuffer[1] != (uint8_t)_payloadType || rtpLen <= 12) return 0;
if (rtpHeaderLength < 12) {
return -1;
}
if(!_mutePlayout)
ret = raw_stream_write(_raw_write, (char *)rtp + rtpHeaderLength, rtpLen - rtpHeaderLength);
return 0;
}
void MME_MutePlayout(char is_mute)
{
_mutePlayout = is_mute;
}
void MME_OnAudioCall(void)
{
}
void MME_StopAudio(void)
{
if(_started)
{
ESP_ERROR_CHECK(esp_timer_stop(_periodic_timer));
ESP_ERROR_CHECK(esp_timer_delete(_periodic_timer));
}
if(_player)
{
audio_pipeline_stop(_player);
audio_pipeline_wait_for_stop(_player);
audio_pipeline_deinit(_player);
_player = NULL;
}
if(_recorder)
{
audio_pipeline_stop(_recorder);
audio_pipeline_wait_for_stop(_recorder);
audio_pipeline_deinit(_recorder);
_recorder = NULL;
}
frame_buf_clear(&_recordBuffer);
_started = 0;
}