pebble/src/fw/services/normal/voice_endpoint.c
2025-01-27 11:38:16 -08:00

226 lines
8.6 KiB
C

/*
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "voice_endpoint.h"
#include "kernel/pbl_malloc.h"
#include "services/common/comm_session/session.h"
#include "services/normal/audio_endpoint.h"
#include "services/normal/voice/voice.h"
#include "system/logging.h"
#include "system/passert.h"
#include "util/generic_attribute.h"
#include "util/uuid.h"
#include <sys/types.h>
#include "voice_endpoint_private.h"
#define VOICE_CONTROL_ENDPOINT (11000)
static bool prv_handle_result_common(VoiceEndpointResult result,
bool app_initiated,
AudioEndpointSessionId session_id,
GenericAttributeList *attr_list,
size_t attr_list_size,
Uuid **app_uuid_out) {
GenericAttribute *uuid_attr = generic_attribute_find_attribute(attr_list,
VEAttributeIdAppUuid,
attr_list_size);
if (app_initiated && !uuid_attr) {
PBL_LOG(LOG_LEVEL_WARNING, "No app UUID found for dictation response from app-initiated "
"session");
voice_handle_dictation_result(VoiceEndpointResultFailInvalidMessage, session_id, NULL,
app_initiated, NULL);
return false;
}
Uuid *app_uuid = uuid_attr ? (Uuid *)uuid_attr->data : NULL;
if (result != VoiceEndpointResultSuccess) {
voice_handle_dictation_result(result, session_id, NULL, app_initiated, app_uuid);
return false;
}
if (attr_list->num_attributes == 0) {
PBL_LOG(LOG_LEVEL_WARNING, "No attributes in message");
voice_handle_dictation_result(VoiceEndpointResultFailInvalidMessage, session_id, NULL,
app_initiated, app_uuid);
return false;
}
*app_uuid_out = app_uuid;
return true;
}
static void prv_handle_dictation_result(VoiceSessionResultMsg *msg, size_t size) {
const size_t attr_list_size = size - sizeof(VoiceSessionResultMsg) + sizeof(GenericAttributeList);
const bool app_initiated = (msg->flags.app_initiated == 1);
Uuid *app_uuid = NULL;
if (!prv_handle_result_common(msg->result, app_initiated, msg->session_id,
&msg->attr_list, attr_list_size, &app_uuid)) {
return;
}
GenericAttribute *transcription_attr = generic_attribute_find_attribute(&msg->attr_list,
VEAttributeIdTranscription, attr_list_size);
if (!transcription_attr || transcription_attr->length == 0) {
PBL_LOG(LOG_LEVEL_WARNING, "No transcription attribute found");
voice_handle_dictation_result(VoiceEndpointResultFailInvalidMessage, msg->session_id, NULL,
app_initiated, app_uuid);
return;
}
Transcription *transcription = (Transcription *)transcription_attr->data;
bool valid = transcription_validate(transcription, transcription_attr->length);
if (!valid) {
PBL_LOG(LOG_LEVEL_WARNING, "Unrecognized transcription format received");
voice_handle_dictation_result(VoiceEndpointResultFailInvalidRecognizerResponse,
msg->session_id, NULL, app_initiated, app_uuid);
}
voice_handle_dictation_result(msg->result, msg->session_id, transcription,
app_initiated, app_uuid);
}
static void prv_handle_nlp_result(VoiceSessionResultMsg *msg, size_t size) {
const size_t attr_list_size = size - sizeof(VoiceSessionResultMsg) + sizeof(GenericAttributeList);
const bool app_initiated = (msg->flags.app_initiated == 1);
Uuid *app_uuid = NULL;
if (!prv_handle_result_common(msg->result, app_initiated, msg->session_id,
&msg->attr_list, attr_list_size, &app_uuid)) {
return;
}
if (app_uuid) {
PBL_LOG(LOG_LEVEL_WARNING, "Got an app UUID in a NLP result msg. Ignoring and continuing");
}
// The timestamp attribute is optional
time_t timestamp = 0;
GenericAttribute *timestamp_attr = generic_attribute_find_attribute(&msg->attr_list,
VEAttributeIdTimestamp, attr_list_size);
if (timestamp_attr && timestamp_attr->length == sizeof(uint32_t)) {
uint32_t *timestamp_ptr = (uint32_t*)timestamp_attr->data;
timestamp = *timestamp_ptr;
}
GenericAttribute *reminder_attr = generic_attribute_find_attribute(&msg->attr_list,
VEAttributeIdReminder, attr_list_size);
if (!reminder_attr || reminder_attr->length == 0) {
PBL_LOG(LOG_LEVEL_WARNING, "No reminder attribute found");
voice_handle_nlp_result(VoiceEndpointResultFailInvalidMessage, msg->session_id, NULL, 0);
return;
}
char *reminder_str = kernel_zalloc_check(reminder_attr->length + 1);
memcpy(reminder_str, reminder_attr->data, reminder_attr->length);
reminder_str[reminder_attr->length] = '\0';
voice_handle_nlp_result(msg->result, msg->session_id, reminder_str, timestamp);
kernel_free(reminder_str);
}
#if CAPABILITY_HAS_MICROPHONE
void voice_endpoint_protocol_msg_callback(CommSession *session, const uint8_t* data, size_t size) {
MsgId msg_id = data[0];
switch (msg_id) {
case MsgIdSessionSetup: {
if (size >= sizeof(SessionSetupResultMsg)) {
SessionSetupResultMsg *msg = (SessionSetupResultMsg *) data;
bool app_initiated = (msg->flags.app_initiated == 1);
voice_handle_session_setup_result(msg->result, msg->session_type, app_initiated);
} else {
PBL_LOG(LOG_LEVEL_WARNING, "Invalid size for session setup result message");
}
break;
}
case MsgIdDictationResult: {
if (size >= sizeof(VoiceSessionResultMsg)) {
VoiceSessionResultMsg *msg = (VoiceSessionResultMsg *) data;
prv_handle_dictation_result(msg, size);
} else {
PBL_LOG(LOG_LEVEL_WARNING, "Invalid size for dictation result message %zu", size);
}
break;
}
case MsgIdNLPResult: {
if (size >= sizeof(VoiceSessionResultMsg)) {
VoiceSessionResultMsg *msg = (VoiceSessionResultMsg *) data;
prv_handle_nlp_result(msg, size);
} else {
PBL_LOG(LOG_LEVEL_WARNING, "Invalid size for dictation result message %zu", size);
}
break;
}
default:
// Ignore invalid message ID
PBL_LOG(LOG_LEVEL_WARNING, "Invalid message ID");
break;
}
}
#else
void voice_endpoint_protocol_msg_callback(CommSession *session, const uint8_t* data, size_t size) {
}
#endif
void voice_endpoint_setup_session(VoiceEndpointSessionType session_type,
AudioEndpointSessionId session_id, AudioTransferInfoSpeex *info, Uuid *app_uuid) {
CommSession *comm_session = comm_session_get_system_session();
comm_session_set_responsiveness(comm_session, BtConsumerPpVoiceEndpoint, ResponseTimeMin,
MIN_LATENCY_MODE_TIMEOUT_VOICE_SECS);
// We're only sending one attribute now: the speex audio transfer info packet
size_t size = sizeof(SessionSetupMsg) + sizeof(GenericAttribute) +
sizeof(AudioTransferInfoSpeex) +
(app_uuid ? (sizeof(Uuid) + sizeof(GenericAttribute)) : 0);
SessionSetupMsg *msg = kernel_malloc_check(size);
*msg = (SessionSetupMsg) {
.msg_id = MsgIdSessionSetup,
.session_type = session_type,
.session_id = session_id,
.attr_list.num_attributes = 1,
};
GenericAttribute *attr = msg->attr_list.attributes;
if (app_uuid) {
// set this after struct initialization because the rest of the fields in the bitfield are left
// uninitialized if just one is set.
msg->flags.app_initiated = 1;
// we're also sending the app UUID
msg->attr_list.num_attributes += 1;
// add app UUID attribute
attr = generic_attribute_add_attribute(attr, VEAttributeIdAppUuid, app_uuid, sizeof(Uuid));
}
attr = generic_attribute_add_attribute(attr, VEAttributeIdAudioTransferInfoSpeex, info,
sizeof(AudioTransferInfoSpeex));
size_t actual_size = (uint8_t *)attr - (uint8_t *)msg;
PBL_ASSERTN(actual_size == size);
comm_session_send_data(comm_session, VOICE_CONTROL_ENDPOINT, (uint8_t *)msg,
size, COMM_SESSION_DEFAULT_TIMEOUT);
kernel_free(msg);
}