#include <Arduino.h>
#include <WiFi.h>
#include <WiFiClientSecure.h>
#include <ArduinoJson.h>
#include "driver/i2s.h"
#include "FS.h"
#include "SD.h"
#include "SPI.h"
#include <base64.h>
const char* WIFI_SSID = "你的 WiFi 名称";
const char* WIFI_PASS = "你的 WiFi 密码";
const char* COZE_API_KEY = "pat_DF8e73SOxxxxxxxxxx1VuKKxxxxxxaGwdBqc";
const char* COZE_BOT_ID = "757621xxxxxxx0";
const char* COZE_USER_ID = "123";
const char* COZE_API_DOMAIN = "api.coze.cn";
const int COZE_API_PORT = 443;
const char* BAIDU_API_KEY = "你的百度 API Key";
const char* BAIDU_SECRET_KEY = "你的百度 Secret Key";
const char* BAIDU_ASR_URL = "https://vop.baidu.com/pro_api";
const char* BAIDU_TTS_URL = "https://tsn.baidu.com/text2audio";
#define I2S_REC_BCLK 26
#define I2S_REC_LRC 25
#define I2S_REC_DIN 34
#define I2S_PLAY_BCLK 13
#define I2S_PLAY_LRC 12
#define I2S_PLAY_DOUT 14
#define SD_CS 5
#define SD_SCK 18
#define SD_MISO 19
#define SD_MOSI 23
#define SAMPLE_RATE 16000
#define BITS_PER_SAMPLE I2S_BITS_PER_SAMPLE_16BIT
#define BYTES_PER_SAMPLE (BITS_PER_SAMPLE / 8)
#define RECORD_DURATION 6000
#define RECORD_FILE_PATH "/recording.raw"
#define TTS_FILE_PATH "/tts.mp3"
typedef enum {
STATE_IDLE,
STATE_RECORDING,
STATE_ASR,
STATE_COZE,
STATE_TTS,
STATE_PLAYING
} DeviceState;
DeviceState currentState = STATE_IDLE;
WiFiClientSecure client;
String accessToken;
unsigned long tokenExpireTime = 0;
String asrText;
String cozeReply;
String speechBase64;
String response;
String retrieveResp;
String msgResp;
void logPrintln(String msg) {
Serial.printf("[%lu] %s\n", millis(), msg.c_str());
}
bool checkWiFi() {
if (WiFi.status() != WL_CONNECTED) {
logPrintln("WiFi 断线,正在重连...");
WiFi.reconnect();
int retry = 0;
while (WiFi.status() != WL_CONNECTED && retry < 10) {
delay(500);
retry++;
}
if (WiFi.status() == WL_CONNECTED) {
logPrintln("WiFi 重连成功!IP:" + WiFi.localIP().toString());
return true;
} else {
logPrintln("WiFi 重连失败");
return false;
}
}
return true;
}
String urlEncode(String str) {
String encodedString;
char c;
char code0;
char code1;
char code2;
for (int i = 0; i < str.length(); i++) {
c = str.charAt(i);
if (c == ' ') {
encodedString += '+';
} else if (isalnum(c)) {
encodedString += c;
} else {
code1 = (c & 0xf0) >> 4;
code2 = (c & 0x0f);
code0 = 0x25;
encodedString += code0;
encodedString += (code1 < 10) ? (char)(code1 + 48) : (char)(code1 + 55);
encodedString += (code2 < 10) ? (char)(code2 + 48) : (char)(code2 + 55);
}
delayMicroseconds(1);
}
return encodedString;
}
uint64_t getFileSize(String filePath) {
if (!SD.exists(filePath)) {
return 0;
}
File file = SD.open(filePath, FILE_READ);
uint64_t size = file.size();
file.close();
return size;
}
bool initSDCard() {
SPI.begin(SD_SCK, SD_MISO, SD_MOSI, SD_CS);
if (!SD.begin(SD_CS)) {
logPrintln("❌ SD 卡挂载失败!");
return false;
}
uint8_t cardType = SD.cardType();
if (cardType == CARD_NONE) {
logPrintln("❌ 未检测到 SD 卡!");
return false;
}
logPrintln("✅ SD 卡类型:" + String(cardType == CARD_MMC ? "MMC" : (cardType == CARD_SD ? "SDSC" : (cardType == CARD_SDHC ? "SDHC" : "未知"))));
uint64_t cardSize = SD.cardSize() / (1024 * 1024);
uint64_t freeSpace = (SD.totalBytes() - SD.usedBytes()) / (1024 * 1024);
logPrintln("✅ SD 卡总容量:" + String(cardSize) + " MB");
logPrintln("✅ SD 卡剩余空间:" + String(freeSpace) + " MB");
return true;
}
void initI2SRecord() {
i2s_config_t i2s_config = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX),
.sample_rate = SAMPLE_RATE,
.bits_per_sample = BITS_PER_SAMPLE,
.channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
.communication_format = I2S_COMM_FORMAT_I2S_MSB,
.intr_alloc_flags = 0,
.dma_buf_count = 2,
.dma_buf_len = 64,
.use_apll = false
};
i2s_driver_install(I2S_NUM_0, &i2s_config, 0, NULL);
i2s_pin_config_t pin_config = {
.bck_io_num = I2S_REC_BCLK,
.ws_io_num = I2S_REC_LRC,
.data_out_num = I2S_PIN_NO_CHANGE,
.data_in_num = I2S_REC_DIN
};
i2s_set_pin(I2S_NUM_0, &pin_config);
logPrintln("✅ I2S 录音模块初始化完成");
}
void initI2SPlay() {
i2s_config_t i2s_config = {
.mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_TX),
.sample_rate = 16000,
.bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
.channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
.communication_format = I2S_COMM_FORMAT_I2S_MSB,
.intr_alloc_flags = 0,
.dma_buf_count = 4,
.dma_buf_len = 1024,
.use_apll = false
};
i2s_driver_install(I2S_NUM_1, &i2s_config, 0, NULL);
i2s_pin_config_t pin_config = {
.bck_io_num = I2S_PLAY_BCLK,
.ws_io_num = I2S_PLAY_LRC,
.data_out_num = I2S_PLAY_DOUT,
.data_in_num = I2S_PIN_NO_CHANGE
};
i2s_set_pin(I2S_NUM_1, &pin_config);
i2s_stop(I2S_NUM_1);
logPrintln("✅ I2S 播放模块初始化完成");
}
void startRecording() {
if (currentState != STATE_IDLE) {
logPrintln("❌ 当前非空闲状态,无法录音!");
return;
}
if (!checkWiFi()) return;
currentState = STATE_RECORDING;
unsigned long recordStartMillis = millis();
if (SD.exists(RECORD_FILE_PATH)) {
SD.remove(RECORD_FILE_PATH);
logPrintln("ℹ️ 删除旧录音文件");
}
initI2SRecord();
File recFile = SD.open(RECORD_FILE_PATH, FILE_WRITE);
if (!recFile) {
logPrintln("❌ 打开录音文件失败!");
i2s_driver_uninstall(I2S_NUM_0);
currentState = STATE_IDLE;
return;
}
logPrintln("📢 开始录音(6 秒后自动结束)...");
int16_t sampleBuffer[64];
while (currentState == STATE_RECORDING && (millis() - recordStartMillis) < RECORD_DURATION) {
size_t bytesRead;
i2s_read(I2S_NUM_0, sampleBuffer, sizeof(sampleBuffer), &bytesRead, portMAX_DELAY);
if (bytesRead > 0) {
recFile.write((uint8_t*)sampleBuffer, bytesRead);
}
delay(1);
}
recFile.close();
i2s_driver_uninstall(I2S_NUM_0);
currentState = STATE_IDLE;
logPrintln("🛑 录音结束");
if (SD.exists(RECORD_FILE_PATH)) {
uint64_t fileSize = getFileSize(RECORD_FILE_PATH);
float duration = (float)fileSize / (SAMPLE_RATE * BYTES_PER_SAMPLE);
logPrintln("✅ 录音文件保存成功!大小:" + String(fileSize) + " 字节,时长:" + String(duration, 2) + "秒");
currentState = STATE_ASR;
} else {
logPrintln("❌ 录音文件保存失败!");
}
}
bool getBaiduToken() {
if (accessToken.length() > 0 && millis() < tokenExpireTime - 600000) {
return true;
}
logPrintln("ℹ️ 获取百度 API Token...");
String tokenUrl = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=" + String(BAIDU_API_KEY) + "&client_secret=" + String(BAIDU_SECRET_KEY);
if (client.connect("aip.baidubce.com", 443)) {
client.print("GET " + tokenUrl + " HTTP/1.1\r\n");
client.print("Host: aip.baidubce.com\r\n");
client.print("Connection: close\r\n\r\n");
while (client.connected() || client.available()) {
if (client.available()) {
response += client.readString();
}
}
client.stop();
int jsonStart = response.indexOf("{");
if (jsonStart != -1) {
String jsonStr = response.substring(jsonStart);
DynamicJsonDocument doc(1024);
DeserializationError error = deserializeJson(doc, jsonStr);
if (!error && doc.containsKey("access_token")) {
accessToken = doc["access_token"].as<String>();
long expireSeconds = doc["expires_in"].as<long>();
tokenExpireTime = millis() + expireSeconds * 1000;
logPrintln("✅ Token 获取成功,有效期:" + String(expireSeconds / 3600) + "小时");
return true;
} else {
logPrintln("❌ Token 解析失败:" + String(error.c_str()));
logPrintln("响应:" + jsonStr);
}
}
}
logPrintln("❌ Token 获取失败");
return false;
}
void baiduASR() {
if (currentState != STATE_ASR) return;
logPrintln("🔊 开始 ASR 识别...");
if (!getBaiduToken() || !SD.exists(RECORD_FILE_PATH)) {
currentState = STATE_IDLE;
return;
}
File recFile = SD.open(RECORD_FILE_PATH, FILE_READ);
if (!recFile) {
logPrintln("❌ 打开录音文件失败!");
currentState = STATE_IDLE;
return;
}
String requestUrl = BAIDU_ASR_URL + String("?access_token=") + accessToken;
String headers = "Host: vop.baidu.com\r\n";
headers += "Content-Type: application/json\r\n";
headers += "Connection: close\r\n";
const size_t chunkSize = 4096;
uint8_t chunk[chunkSize];
while (recFile.available() > 0) {
size_t bytesRead = recFile.read(chunk, chunkSize);
speechBase64 += base64::encode(chunk, bytesRead);
}
recFile.close();
DynamicJsonDocument reqDoc(4096);
reqDoc["format"] = "raw";
reqDoc["rate"] = SAMPLE_RATE;
reqDoc["dev_pid"] = 1537;
reqDoc["speech"] = speechBase64;
reqDoc["cuid"] = WiFi.macAddress();
reqDoc["len"] = getFileSize(RECORD_FILE_PATH);
String postBody;
serializeJson(reqDoc, postBody);
if (client.connect("vop.baidu.com", 443)) {
client.print("POST " + requestUrl + " HTTP/1.1\r\n");
client.print(headers);
client.print("Content-Length: " + String(postBody.length()) + "\r\n\r\n");
client.print(postBody);
while (client.connected() || client.available()) {
if (client.available()) {
response += client.readString();
}
}
client.stop();
int jsonStart = response.indexOf("{");
if (jsonStart != -1) {
String jsonStr = response.substring(jsonStart);
DynamicJsonDocument resDoc(1024);
DeserializationError error = deserializeJson(resDoc, jsonStr);
if (!error && resDoc["err_no"].as<int>() == 0) {
asrText = resDoc["result"][0].as<String>();
logPrintln("✅ ASR 识别成功:" + asrText);
currentState = STATE_COZE;
} else {
logPrintln("❌ ASR 识别失败:err_no=" + String(resDoc["err_no"].as<int>()) + ", err_msg=" + resDoc["err_msg"].as<String>());
currentState = STATE_IDLE;
}
} else {
logPrintln("❌ ASR 响应无 JSON");
currentState = STATE_IDLE;
}
} else {
logPrintln("❌ 连接 ASR 服务器失败");
currentState = STATE_IDLE;
}
}
String processCozeAnswer(DynamicJsonDocument& resDoc) {
if (resDoc["code"].as<int>() != 0) {
return "❌ Coze 错误:" + resDoc["msg"].as<String>();
}
JsonArray data = resDoc["data"].as<JsonArray>();
String reply = "无回复";
for (auto item : data) {
if (item["type"].as<String>() == "answer") {
reply = item["content"].as<String>();
break;
}
}
return reply;
}
String getCozeChatResult(String conversationId, String chatId) {
String retrieveUrl = "/v3/chat/retrieve?conversation_id=" + conversationId + "&chat_id=" + chatId;
String msgListUrl = "/v3/chat/message/list?chat_id=" + chatId + "&conversation_id=" + conversationId + "&bot_id=" + String(COZE_BOT_ID) + "&task_id=" + chatId;
int maxRetries = 20;
for (int retry = 0; retry < maxRetries; retry++) {
logPrintln("🤔 Coze 轮询中(" + String(retry+1) + "/" + String(maxRetries) + ")");
if (client.connect(COZE_API_DOMAIN, COZE_API_PORT)) {
client.print("GET " + retrieveUrl + " HTTP/1.1\r\n");
client.print("Host: " + String(COZE_API_DOMAIN) + "\r\n");
client.print("Authorization: Bearer " + String(COZE_API_KEY) + "\r\n");
client.print("Connection: close\r\n\r\n");
while (client.connected() || client.available()) {
if (client.available()) retrieveResp += client.readString();
}
client.stop();
int jsonStart = retrieveResp.indexOf("{");
if (jsonStart != -1) {
String jsonStr = retrieveResp.substring(jsonStart);
DynamicJsonDocument resDoc(1024);
DeserializationError error = deserializeJson(resDoc, jsonStr);
if (!error && resDoc["code"].as<int>() == 0) {
String status = resDoc["data"]["status"].as<String>();
if (status == "completed") {
if (client.connect(COZE_API_DOMAIN, COZE_API_PORT)) {
client.print("GET " + msgListUrl + " HTTP/1.1\r\n");
client.print("Host: " + String(COZE_API_DOMAIN) + "\r\n");
client.print("Authorization: Bearer " + String(COZE_API_KEY) + "\r\n");
client.print("Connection: close\r\n\r\n");
while (client.connected() || client.available()) {
if (client.available()) msgResp += client.readString();
}
client.stop();
int msgJsonStart = msgResp.indexOf("{");
if (msgJsonStart != -1) {
String msgJsonStr = msgResp.substring(msgJsonStart);
DynamicJsonDocument msgDoc(2048);
DeserializationError msgError = deserializeJson(msgDoc, msgJsonStr);
if (!msgError) {
return processCozeAnswer(msgDoc);
}
}
}
} else if (status == "failed") {
return "❌ Coze 任务失败:" + resDoc["data"]["error_msg"].as<String>();
}
}
}
}
delay(1000);
}
return "❌ Coze 轮询超时";
}
void callCozeAI() {
if (currentState != STATE_COZE || asrText.length() == 0) return;
logPrintln("🤖 调用 Coze AI:" + asrText);
if (!checkWiFi()) {
currentState = STATE_IDLE;
return;
}
DynamicJsonDocument reqDoc(1024);
reqDoc["bot_id"] = COZE_BOT_ID;
reqDoc["user_id"] = COZE_USER_ID;
reqDoc["stream"] = false;
reqDoc["auto_save_history"] = true;
JsonArray messages = reqDoc.createNestedArray("additional_messages");
JsonObject userMsg = messages.createNestedObject();
userMsg["role"] = "user";
userMsg["content"] = asrText;
userMsg["content_type"] = "text";
String postBody;
serializeJson(reqDoc, postBody);
if (client.connect(COZE_API_DOMAIN, COZE_API_PORT)) {
client.print("POST /v3/chat HTTP/1.1\r\n");
client.print("Host: " + String(COZE_API_DOMAIN) + "\r\n");
client.print("Authorization: Bearer " + String(COZE_API_KEY) + "\r\n");
client.print("Content-Type: application/json\r\n");
client.print("Content-Length: " + String(postBody.length()) + "\r\n");
client.print("Connection: close\r\n\r\n");
client.print(postBody);
while (client.connected() || client.available()) {
if (client.available()) response += client.readString();
}
client.stop();
int jsonStart = response.indexOf("{");
if (jsonStart != -1) {
String jsonStr = response.substring(jsonStart);
DynamicJsonDocument resDoc(1024);
DeserializationError error = deserializeJson(resDoc, jsonStr);
if (!error && resDoc["code"].as<int>() == 0) {
String chatId = resDoc["data"]["id"].as<String>();
String conversationId = resDoc["data"]["conversation_id"].as<String>();
logPrintln("✅ Coze 对话创建成功:" + chatId);
cozeReply = getCozeChatResult(conversationId, chatId);
logPrintln("✅ Coze 回复:" + cozeReply);
currentState = STATE_TTS;
} else {
logPrintln("❌ Coze 响应解析失败:" + String(error.c_str()));
currentState = STATE_IDLE;
}
} else {
logPrintln("❌ Coze 响应无 JSON");
currentState = STATE_IDLE;
}
} else {
logPrintln("❌ 连接 Coze 失败");
currentState = STATE_IDLE;
}
}
void baiduTTSAndPlay() {
if (currentState != STATE_TTS || cozeReply.length() == 0) return;
logPrintln("🎤 开始 TTS 合成:" + cozeReply);
if (!getBaiduToken()) {
currentState = STATE_IDLE;
return;
}
String encodedText = urlEncode(cozeReply);
String ttsParams = "tex=" + encodedText + "&lan=zh&cuid=" + WiFi.macAddress() + "&ctp=1&tok=" + accessToken + "&spd=5&pit=5&vol=15&per=0";
String requestUrl = String(BAIDU_TTS_URL) + "?" + ttsParams;
if (SD.exists(TTS_FILE_PATH)) {
SD.remove(TTS_FILE_PATH);
}
File ttsFile = SD.open(TTS_FILE_PATH, FILE_WRITE);
if (!ttsFile) {
logPrintln("❌ 打开 TTS 文件失败!");
currentState = STATE_IDLE;
return;
}
if (client.connect("tsn.baidu.com", 443)) {
client.print("GET " + requestUrl + " HTTP/1.1\r\n");
client.print("Host: tsn.baidu.com\r\n");
client.print("Connection: close\r\n\r\n");
bool headerEnd = false;
while (client.connected() || client.available()) {
if (client.available()) {
String line;
line = client.readStringUntil('\n');
if (headerEnd) {
ttsFile.write((const uint8_t*)line.c_str(), line.length());
}
if (line == "\r") {
headerEnd = true;
}
}
}
client.stop();
ttsFile.close();
uint64_t ttsFileSize = getFileSize(TTS_FILE_PATH);
if (SD.exists(TTS_FILE_PATH) && ttsFileSize > 100) {
logPrintln("🎵 开始播放 TTS 语音(大小:" + String(ttsFileSize) + "字节)...");
currentState = STATE_PLAYING;
File playFile = SD.open(TTS_FILE_PATH, FILE_READ);
if (playFile) {
i2s_start(I2S_NUM_1);
size_t bytesRead;
uint8_t playBuffer[1024];
while (playFile.available() > 0 && currentState == STATE_PLAYING) {
bytesRead = playFile.read(playBuffer, sizeof(playBuffer));
i2s_write(I2S_NUM_1, playBuffer, bytesRead, &bytesRead, portMAX_DELAY);
}
playFile.close();
i2s_stop(I2S_NUM_1);
}
logPrintln("🎵 TTS 播放完成");
currentState = STATE_IDLE;
} else {
logPrintln("❌ TTS 音频文件无效(大小:" + String(ttsFileSize) + "字节)!");
currentState = STATE_IDLE;
}
} else {
logPrintln("❌ 连接 TTS 服务器失败");
ttsFile.close();
currentState = STATE_IDLE;
}
}
void parseSerialCommand() {
if (Serial.available() > 0) {
String input = Serial.readStringUntil('\n');
input.trim();
if (input.length() == 0) return;
logPrintln("🗣️ 串口输入:" + input);
if (input == "1") {
startRecording();
} else if (input == "3") {
if (SD.exists(RECORD_FILE_PATH)) {
uint64_t size = getFileSize(RECORD_FILE_PATH);
logPrintln("📋 录音文件信息:大小=" + String(size) + "字节,时长=" + String((float)size/(SAMPLE_RATE*BYTES_PER_SAMPLE),2) + "秒");
} else {
logPrintln("📋 无录音文件");
}
} else if (input == "q") {
logPrintln("❌ 退出程序");
while (1);
} else {
if (currentState == STATE_IDLE) {
asrText = input;
currentState = STATE_COZE;
} else {
logPrintln("❌ 当前忙碌中,无法处理文本对话!");
}
}
}
}
void setup() {
Serial.begin(115200);
delay(1000);
logPrintln("=====================================");
logPrintln(" ESP32 语音 AI 对话机器人 ");
logPrintln("=====================================");
logPrintln("📋 支持指令:");
logPrintln(" 1 - 开始语音对话(录音 6 秒→ASR→AI→TTS)");
logPrintln(" 3 - 查询录音文件信息");
logPrintln(" q - 退出程序");
logPrintln(" 其他文本 - 直接文本对话");
logPrintln("=====================================\n");
if (!initSDCard()) {
while (1) {
logPrintln("❌ SD 卡初始化失败,程序暂停!");
delay(1000);
}
}
initI2SPlay();
WiFi.begin(WIFI_SSID, WIFI_PASS);
logPrintln("连接 WiFi:" + String(WIFI_SSID));
while (WiFi.status() != WL_CONNECTED) {
delay(500);
Serial.print(".");
}
logPrintln("\n✅ WiFi 连接成功!IP:" + WiFi.localIP().toString());
client.setInsecure();
getBaiduToken();
currentState = STATE_IDLE;
logPrintln("✅ 系统初始化完成,等待指令...");
}
void loop() {
parseSerialCommand();
switch (currentState) {
case STATE_ASR:
baiduASR();
break;
case STATE_COZE:
callCozeAI();
break;
case STATE_TTS:
baiduTTSAndPlay();
break;
default:
break;
}
delay(100);
}