/* * Copyright 2024 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "voice_endpoint.h" #include "kernel/pbl_malloc.h" #include "services/common/comm_session/session.h" #include "services/normal/audio_endpoint.h" #include "services/normal/voice/voice.h" #include "system/logging.h" #include "system/passert.h" #include "util/generic_attribute.h" #include "util/uuid.h" #include #include "voice_endpoint_private.h" #define VOICE_CONTROL_ENDPOINT (11000) static bool prv_handle_result_common(VoiceEndpointResult result, bool app_initiated, AudioEndpointSessionId session_id, GenericAttributeList *attr_list, size_t attr_list_size, Uuid **app_uuid_out) { GenericAttribute *uuid_attr = generic_attribute_find_attribute(attr_list, VEAttributeIdAppUuid, attr_list_size); if (app_initiated && !uuid_attr) { PBL_LOG(LOG_LEVEL_WARNING, "No app UUID found for dictation response from app-initiated " "session"); voice_handle_dictation_result(VoiceEndpointResultFailInvalidMessage, session_id, NULL, app_initiated, NULL); return false; } Uuid *app_uuid = uuid_attr ? (Uuid *)uuid_attr->data : NULL; if (result != VoiceEndpointResultSuccess) { voice_handle_dictation_result(result, session_id, NULL, app_initiated, app_uuid); return false; } if (attr_list->num_attributes == 0) { PBL_LOG(LOG_LEVEL_WARNING, "No attributes in message"); voice_handle_dictation_result(VoiceEndpointResultFailInvalidMessage, session_id, NULL, app_initiated, app_uuid); return false; } *app_uuid_out = app_uuid; return true; } static void prv_handle_dictation_result(VoiceSessionResultMsg *msg, size_t size) { const size_t attr_list_size = size - sizeof(VoiceSessionResultMsg) + sizeof(GenericAttributeList); const bool app_initiated = (msg->flags.app_initiated == 1); Uuid *app_uuid = NULL; if (!prv_handle_result_common(msg->result, app_initiated, msg->session_id, &msg->attr_list, attr_list_size, &app_uuid)) { return; } GenericAttribute *transcription_attr = generic_attribute_find_attribute(&msg->attr_list, VEAttributeIdTranscription, attr_list_size); if (!transcription_attr || transcription_attr->length == 0) { PBL_LOG(LOG_LEVEL_WARNING, "No transcription attribute found"); voice_handle_dictation_result(VoiceEndpointResultFailInvalidMessage, msg->session_id, NULL, app_initiated, app_uuid); return; } Transcription *transcription = (Transcription *)transcription_attr->data; bool valid = transcription_validate(transcription, transcription_attr->length); if (!valid) { PBL_LOG(LOG_LEVEL_WARNING, "Unrecognized transcription format received"); voice_handle_dictation_result(VoiceEndpointResultFailInvalidRecognizerResponse, msg->session_id, NULL, app_initiated, app_uuid); } voice_handle_dictation_result(msg->result, msg->session_id, transcription, app_initiated, app_uuid); } static void prv_handle_nlp_result(VoiceSessionResultMsg *msg, size_t size) { const size_t attr_list_size = size - sizeof(VoiceSessionResultMsg) + sizeof(GenericAttributeList); const bool app_initiated = (msg->flags.app_initiated == 1); Uuid *app_uuid = NULL; if (!prv_handle_result_common(msg->result, app_initiated, msg->session_id, &msg->attr_list, attr_list_size, &app_uuid)) { return; } if (app_uuid) { PBL_LOG(LOG_LEVEL_WARNING, "Got an app UUID in a NLP result msg. Ignoring and continuing"); } // The timestamp attribute is optional time_t timestamp = 0; GenericAttribute *timestamp_attr = generic_attribute_find_attribute(&msg->attr_list, VEAttributeIdTimestamp, attr_list_size); if (timestamp_attr && timestamp_attr->length == sizeof(uint32_t)) { uint32_t *timestamp_ptr = (uint32_t*)timestamp_attr->data; timestamp = *timestamp_ptr; } GenericAttribute *reminder_attr = generic_attribute_find_attribute(&msg->attr_list, VEAttributeIdReminder, attr_list_size); if (!reminder_attr || reminder_attr->length == 0) { PBL_LOG(LOG_LEVEL_WARNING, "No reminder attribute found"); voice_handle_nlp_result(VoiceEndpointResultFailInvalidMessage, msg->session_id, NULL, 0); return; } char *reminder_str = kernel_zalloc_check(reminder_attr->length + 1); memcpy(reminder_str, reminder_attr->data, reminder_attr->length); reminder_str[reminder_attr->length] = '\0'; voice_handle_nlp_result(msg->result, msg->session_id, reminder_str, timestamp); kernel_free(reminder_str); } #if CAPABILITY_HAS_MICROPHONE void voice_endpoint_protocol_msg_callback(CommSession *session, const uint8_t* data, size_t size) { MsgId msg_id = data[0]; switch (msg_id) { case MsgIdSessionSetup: { if (size >= sizeof(SessionSetupResultMsg)) { SessionSetupResultMsg *msg = (SessionSetupResultMsg *) data; bool app_initiated = (msg->flags.app_initiated == 1); voice_handle_session_setup_result(msg->result, msg->session_type, app_initiated); } else { PBL_LOG(LOG_LEVEL_WARNING, "Invalid size for session setup result message"); } break; } case MsgIdDictationResult: { if (size >= sizeof(VoiceSessionResultMsg)) { VoiceSessionResultMsg *msg = (VoiceSessionResultMsg *) data; prv_handle_dictation_result(msg, size); } else { PBL_LOG(LOG_LEVEL_WARNING, "Invalid size for dictation result message %zu", size); } break; } case MsgIdNLPResult: { if (size >= sizeof(VoiceSessionResultMsg)) { VoiceSessionResultMsg *msg = (VoiceSessionResultMsg *) data; prv_handle_nlp_result(msg, size); } else { PBL_LOG(LOG_LEVEL_WARNING, "Invalid size for dictation result message %zu", size); } break; } default: // Ignore invalid message ID PBL_LOG(LOG_LEVEL_WARNING, "Invalid message ID"); break; } } #else void voice_endpoint_protocol_msg_callback(CommSession *session, const uint8_t* data, size_t size) { } #endif void voice_endpoint_setup_session(VoiceEndpointSessionType session_type, AudioEndpointSessionId session_id, AudioTransferInfoSpeex *info, Uuid *app_uuid) { CommSession *comm_session = comm_session_get_system_session(); comm_session_set_responsiveness(comm_session, BtConsumerPpVoiceEndpoint, ResponseTimeMin, MIN_LATENCY_MODE_TIMEOUT_VOICE_SECS); // We're only sending one attribute now: the speex audio transfer info packet size_t size = sizeof(SessionSetupMsg) + sizeof(GenericAttribute) + sizeof(AudioTransferInfoSpeex) + (app_uuid ? (sizeof(Uuid) + sizeof(GenericAttribute)) : 0); SessionSetupMsg *msg = kernel_malloc_check(size); *msg = (SessionSetupMsg) { .msg_id = MsgIdSessionSetup, .session_type = session_type, .session_id = session_id, .attr_list.num_attributes = 1, }; GenericAttribute *attr = msg->attr_list.attributes; if (app_uuid) { // set this after struct initialization because the rest of the fields in the bitfield are left // uninitialized if just one is set. msg->flags.app_initiated = 1; // we're also sending the app UUID msg->attr_list.num_attributes += 1; // add app UUID attribute attr = generic_attribute_add_attribute(attr, VEAttributeIdAppUuid, app_uuid, sizeof(Uuid)); } attr = generic_attribute_add_attribute(attr, VEAttributeIdAudioTransferInfoSpeex, info, sizeof(AudioTransferInfoSpeex)); size_t actual_size = (uint8_t *)attr - (uint8_t *)msg; PBL_ASSERTN(actual_size == size); comm_session_send_data(comm_session, VOICE_CONTROL_ENDPOINT, (uint8_t *)msg, size, COMM_SESSION_DEFAULT_TIMEOUT); kernel_free(msg); }