Chào mọi người, bài viết này mình sẽ chia sẽ cho mọi người cách thức tương tác với giọng nói CHATGPT.
Đây là một chatbot giọng nói đơn giản để nói chuyện với ChatGPT và lắng nghe phản hồi ở chế độ liên tục. Sử dụng ChatGPT4o-mini, TTS và nhận dạng giọng nói.

Giới thiệu

Bài viết này nhằm mục đích chỉ ra cách tạo một trang web cho phép người dùng trò chuyện với ChatGPT ở chế độ liên tục, thẩm vấn mô hình văn bản và nhận câu trả lời bằng giọng nói, bằng cách sử dụng nhận dạng giọng nói và mô hình TTS.

Lý lịch

Sử dụng mã

Dự án được thực hiện bằng HTML/CSS và Vanilla JS thuần túy; phần giao diện được tạo thành từ một cấu trúc đơn giản với:

hai trường nhập cho Khóa API và cho lời nhắc
một biểu tượng để bắt đầu nhận dạng giọng nói
một div nơi câu trả lời được viết sẽ xuất hiện
một máy nghe nhạc (vô hình)

<header>
  <h1>AI Voice Chatbot</h1>
</header>
<div class="container">
  <form action="#" method="get" target="_blank" id="action-form">
    <input type="text" id="apikey" placeholder="Insert your API key here">
    <input id="prompt" type="text" placeholder="Activate Microphone to chat..." autocomplete="off" autofocus>

  </form>
  <div id="chathistory"> </div>
  <p class="info"></p>
  <audio controls id="audioPlayer" style="display: none;"></audio>
</div>

Tệp JavaScript chủ yếu bao gồm 3 phần:

chức năng/sự kiện nhận dạng giọng nói
yêu cầu ChatGPT4o-mini để nhận được câu trả lời
yêu cầu TTS tạo tệp âm thanh

Đầu tiên chúng ta sẽ xem xét phần nhận dạng giọng nói. Phần này dựa trên việc ghi lại một số sự kiện nhận dạng (bắt đầu, kết thúc, kết quả):

let recognition;

const SpeechRecognition =
  window.SpeechRecognition || window.webkitSpeechRecognition;

if (SpeechRecognition) {
  console.log("Your Browser supports speech Recognition");

  recognition = new SpeechRecognition();
  recognition.continuous = true;

  let idleTimer;

  actionForm.insertAdjacentHTML(
    "beforeend",
    '<button type="button"><i class="fas fa-microphone"></i></button>'
  );
  actionFormInput.style.paddingRight = "50px";

  const micBtn = actionForm.querySelector("button");
  const micIcon = micBtn.firstElementChild;

  micBtn.addEventListener("click", micBtnClick);
  function micBtnClick() {
    if (micIcon.classList.contains("fa-microphone")) {
      recognition.start();
    } else {
      recognition.stop();
    }
  }

  recognition.addEventListener("start", startSpeechRecognition);
  function startSpeechRecognition() {
    micIcon.classList.remove("fa-microphone");
    micIcon.classList.add("fa-microphone-slash");
    actionFormInput.focus();
    console.log("Voice activated, SPEAK");

    clearTimeout(idleTimer);
  }

  recognition.addEventListener("end", endSpeechRecognition);
  function endSpeechRecognition() {
    micIcon.classList.remove("fa-microphone-slash");
    micIcon.classList.add("fa-microphone");
    actionFormInput.focus();
    console.log("Speech recognition service disconnected");
  }

  recognition.addEventListener("result", resultOfSpeechRecognition);
  function resultOfSpeechRecognition(event) {
    const current = event.resultIndex;
    const transcript = event.results[current][0].transcript;

    const timestamp = new Date().toLocaleTimeString();
    const message = `${timestamp} - Guest: ${transcript}`;

    if (transcript.toLowerCase().trim() === "go") {
      recognition.stop();
    } else {
      clearTimeout(idleTimer);
      idleTimer = setTimeout(() => {
        recognition.stop();
      }, 2000);
    }
    sendMessage(transcript);
  }

Sau đó, chúng tôi tạo bộ nhớ ngữ cảnh cho chatbot bằng cách sử dụng hệ thống mảng:

let chatMemory = [];
chatMemory = createMemory([
  {
    role: "system",
    content: "You are a funny bot."
  }
]);
console.log(chatMemory);

function createMemory(messages) {
  const memory = [];
  for (const msg of messages) {
    memory.push({ role: msg.role, content: msg.content });
  }
  return memory;
}

Sau đó, chúng ta có hai chức năng để gửi tin nhắn đến mô hình OpenAI ChatGPT4o-mini và hiển thị phản hồi kết quả, với số lượng mã thông báo đầy đủ và ước tính chi phí.

async function sendMessage(transcript) {
  const apikey = document.getElementById("apikey").value;
  console.log(apikey);

  if (apikey === "") {
    alert("No OpenAI API Key found.");
  } else {
    console.log(apikey);
  }

  const userInput = transcript;
  console.log(userInput);
  if (userInput !== "") {
    showMessage("Guest", userInput, "");
    chatMemory = await getChatGPTResponse(userInput, chatMemory);
  }
}

function showMessage(sender, message, tokens, downloadLink) {
  const messageElement = document.createElement("div");

  if (sender === "Guest") {
    messageElement.innerHTML = `${sender}: ${message}`;
    messageElement.classList.add("user-message");
  } else {
    const timestampElement = document.createElement("p");
    timestampElement.innerHTML = `${sender}: ${message} `;
    timestampElement.classList.add("chatgpt-message");

    messageElement.appendChild(timestampElement);

    const separator = document.createElement("p");
    separator.innerHTML = `${tokens}`;
    messageElement.classList.add("chatgpt-message");
    messageElement.appendChild(separator);

    const downloadElem = document.createElement("div");
    downloadElem.innerHTML = downloadLink;
    messageElement.appendChild(downloadElem);
  }

  chatContainer.appendChild(messageElement);
  chatContainer.scrollTop = chatContainer.scrollHeight;
}

Cuối cùng, chúng ta có cuộc thẩm vấn OpenAI đầu tiên:

async function getChatGPTResponse(userInput, chatMemory = []) {
  const apikey = document.getElementById("apikey").value;

  console.log(apikey);
  if (apikey === "") {
    alert("No OpenAI API Key found.");
  } else {
    console.log(apikey);
  }
  const chatContainer = document.getElementById("chathistory");

  try {
    const response = await fetch("https://api.openai.com/v1/chat/completions", {
      method: "POST",
      headers: {
        "Content-Type": "application/json",

        Authorization: "Bearer " + apikey
      },
      body: JSON.stringify({
        model: "gpt-4o-mini", 
        messages: [...chatMemory, { role: "user", content: userInput }]
      })
    });
    if (!response.ok) {
      throw new Error("Error while requesting to the API");
    }
    const data = await response.json();
    if (
      !data.choices ||
      !data.choices.length ||
      !data.choices[0].message ||
      !data.choices[0].message.content
    ) {
      throw new Error("Invalid API response");
    }

    const chatGPTResponse = data.choices[0].message.content.trim();

    var cleanResponse = chatGPTResponse.replace(
      /(```html|```css|```javascript|```php|```python|```vb|```vb.net|cpp|java|csharp)(.*?)/gs,
      "$2"
    );
    console.log(chatGPTResponse);
    cleanResponse = cleanResponse.replace(/```/g, "");
    cleanResponse = cleanResponse.replace(/\*\*(.*?)\*\*/g, "$1");

    const tokenCount = document.createElement("p");

    if (data.usage.completion_tokens) {
      const requestTokens = data.usage.prompt_tokens;
      const responseTokens = data.usage.completion_tokens;
      const totalTokens = data.usage.total_tokens;
      const pricepertokenprompt = 0.15 / 1000000; //uses gpt-4o-mini price of 0.15/Mt USD
      const pricepertokenresponse = 0.6 / 1000000; //uses gpt-4o-mini price of 0.15/Mt USD
      const priceperrequest = pricepertokenprompt * requestTokens;
      const priceperresponse = pricepertokenresponse * responseTokens;
      const totalExpense = priceperrequest + priceperresponse;
      tokenCount.innerHTML = `<hr>Your request used ${requestTokens} tokens and costed ${priceperrequest.toFixed(
        6
      )}USD<br>This response used ${responseTokens} tokens and costed ${priceperresponse.toFixed(
        6
      )}USD<br>Total Tokens: ${totalTokens}. This interaction costed you: ${totalExpense.toFixed(
        6
      )}USD (audio not included).`;
    } else {
      tokenCount.innerHTML = "Unable to track the number of used tokens.";
    }

    const blob = new Blob([cleanResponse], { type: "text/html" });
    const url = URL.createObjectURL(blob);
    const downloadLink = `<a href="${url}" download="chat.txt">Click here to download the generated answer</a>`;

    showMessage(
      "VivacityGPT",
      cleanResponse,
      tokenCount.innerHTML,
      downloadLink
    );

    convertiTestoInAudio(cleanResponse);

    chatMemory.push({ role: "user", content: userInput });
    chatMemory.push({ role: "assistant", content: cleanResponse });

    return chatMemory;
  } catch (error) {
    console.error(error);

    alert(
      "An error occurred during the request. Check your OpenAI account or retry later."
    );
  }
}

Bây giờ chúng ta có hàm cuối cùng để thẩm vấn TTS:

function convertiTestoInAudio(response) {
  const apikey = document.getElementById("apikey").value;
  console.log(apikey);
  const prompt = response;
  const selectedvoice = "nova";

  if (prompt) {
    fetch("https://api.openai.com/v1/audio/speech", {
      method: "POST",
      headers: {
        Authorization: `Bearer ${apikey}`,
        "Content-Type": "application/json"
      },
      body: JSON.stringify({
        model: "tts-1",
        input: prompt,
        voice: selectedvoice
      })
    })
      .then((response) => response.blob())
      .then((blob) => {
        const audioUrl = URL.createObjectURL(blob);
        const audioPlayer = document.getElementById("audioPlayer");
        audioPlayer.src = audioUrl;
        audioPlayer.play();
        audioPlayer.addEventListener("ended", () => {
          recognition.start(); 
        });
      })
      .catch((error) => {
        console.error("Error while converting TTS: ", error);
      });
  } else {
    alert("Please insert a text prompt before converting.");
  }
}

Điểm quan tâm

Có hai điểm đáng chú ý trong đoạn mã này:
Nhận dạng giọng nói chạy theo vòng lặp, nghĩa là nó dừng lại sau khi nhận dạng hoàn tất và khởi động lại sau khi trình phát âm thanh phát xong âm thanh TTS được tạo ra. Điều này đảm bảo trò chuyện liên tục mà không cần phải nhấp vào biểu tượng micrô cho mọi tương tác. Chỉ cần nhấp vào biểu tượng đó để bắt đầu trò chuyện và nhấp lại vào biểu tượng đó khi bạn muốn kết thúc. Điều này đạt được thông qua điều khiển sự kiện trong cuộc gọi TTS:

audioPlayer.addEventListener("ended", () => {
          recognition.start();

Tất cả các chức năng được đan xen để tạo ra trải nghiệm mượt mà: recognition.resultsự kiện giọng nói gọi hàm sendMessage(), sự kiện đó gọi hàm ở chế độ không đồng bộ getChatGPTResponse(), sự kiện đó gọi hàm convertiTestoInAudio(), sự kiện đó gọi recogntition.start.

Như những điểm quan tâm bổ sung, tôi sẽ chỉ rõ rằng nhận dạng giọng nói chỉ được phép cục bộ hoặc trên các kết nối có kết nối an toàn (chứng chỉ SSL) và chatbot đã được thiết lập với tính cách 'chatbot vui nhộn' để làm cho cuộc trò chuyện bớt nhàm chán hơn. Tùy chỉnh lời nhắc hệ thống sẽ cho phép bất kỳ tính cách / tâm trạng nào thay đổi.

Như vậy là mọi người đã hoàn thành tạo giọng nói riêng cho ChatGPT theo khả năng của bản thân rồi nhé.
Chúc mọi người thành công.

DOWNLOAD VOICE CHATGPT

PASSWORD UNZIP: HUNG.PRO.VN.

[HTML] Complete Voice Interaction with ChatGPT

Giới thiệu

Lý lịch

Sử dụng mã

Điểm quan tâm

Comments

Categories

Archive

Short Codes

Catalogics