{"created":"2021-03-01T06:20:02.987334+00:00","id":3409,"links":{},"metadata":{"_buckets":{"deposit":"66f81f78-c272-4e70-8cf8-b4b8a3d9cc18"},"_deposit":{"id":"3409","owners":[],"pid":{"revision_id":0,"type":"depid","value":"3409"},"status":"published"},"_oai":{"id":"oai:repository.dl.itc.u-tokyo.ac.jp:00003409","sets":["34:105:262","9:233:234"]},"item_7_alternative_title_1":{"attribute_name":"その他のタイトル","attribute_value_mlt":[{"subitem_alternative_title":"音声の構造的表象から導出される動的特徴に関する研究"}]},"item_7_biblio_info_7":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2012-03-22","bibliographicIssueDateType":"Issued"},"bibliographic_titles":[{}]}]},"item_7_date_granted_25":{"attribute_name":"学位授与年月日","attribute_value_mlt":[{"subitem_dategranted":"2012-03-22"}]},"item_7_degree_name_20":{"attribute_name":"学位名","attribute_value_mlt":[{"subitem_degreename":"修士(情報理工学)"}]},"item_7_description_5":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"Due to the spread of smartphones, automatic speech recognition (ASR) systems are getting more and more popular as an interface to computers. While their successes have shown that the ASR systems have reached a practical level, the basic algorithm of state-of-the-art ASR systems is still Hidden Markov Model (HMM) based algorithm, which has been the de facto standard algorithm for ASR since 1980s. The HMM-based algorithms assume the frame-by-frame Markov property to decrease the calculation amount to the realistic level. Because of the assumption, long-term features, which cannot be defined for each time frame, such as duration of words, can never be considered. Researchers have developed various methods to improve the performance of ASR systems with the constraint of Markov property. However, the ASR algorithms are undergoing a paradigm shift. The new paradigm algorithms don't assume the Markov property have been proposed, and they showed better performance than HMM-based old paradigm algorithms in the practical calculation time. Those new paradigm algorithms can consider long-term features, which can never be considered in the old paradigm algorithms. Therefore, effective long-term features are now being investigated by researchers. Speech structure is one of the long-term features, which can potentially be a effective feature for the new paradigm algorithm. Speech structure was proposed as a feature that is invariant for non-linguistic variations, such as the difference of speakers, recording environment, etc. While the speech structure has been applied to several applications, such as pronunciation proficiency assessment, and has shown the good performance, it has not been applied to continuous speech recognition, because it is not a frame-by-frame feature but a long-term feature and cannot be used as a feature for the old paradigm algorithms. On the contrary, the new paradigm algorithms can leverage the speech structure. An preliminary experiment on combining the speech structure with a new paradigm algorithm was already carried out and showed the good performance. However, the current implementation of speech structure is still immature and can be improved in some aspects. Dynamic feature is one of them. Dynamic features are defined as temporal derivatives of static features. They were firstly proposed in 1986, and are now effectively used in almost all the speech systems including ASR, speech synthesis, speaker identification, etc. However, no algorithms to leverage dynamic features in speech structure was proposed, and dynamic features are omitted in previous studies on speech structure. To solve the problem, I propose two algorithms to leverage dynamic features derived from speech structure, differential speech structure and trajectory speech structure. By using these algorithms, the dynamic features, can be effectively used for speech systems based on speech structure. Several experiments were carried out to show the effectiveness of proposed methods. By using the differential speech structure 11.0% relative decrease in word error rate was obtained in an experiment of isolated word recognition. Furthermore, by using the trajectory speech structure, 28.5% relative decrease in word error rate was obtained in an experiment of N-best rescoring of isolated word recognition. These results show that the proposed method works effectively and contributes to the speech structure as the feature for the new paradigm algorithms.","subitem_description_type":"Abstract"}]},"item_7_full_name_3":{"attribute_name":"著者別名","attribute_value_mlt":[{"nameIdentifiers":[{"nameIdentifier":"8193","nameIdentifierScheme":"WEKO"}],"names":[{"name":"清水, 信哉"}]}]},"item_7_select_21":{"attribute_name":"学位","attribute_value_mlt":[{"subitem_select_item":"master"}]},"item_7_subject_13":{"attribute_name":"日本十進分類法","attribute_value_mlt":[{"subitem_subject":"007","subitem_subject_scheme":"NDC"}]},"item_7_text_24":{"attribute_name":"研究科・専攻","attribute_value_mlt":[{"subitem_text_value":"情報理工学系研究科電子情報学専攻"}]},"item_7_text_4":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"東京大学大学院情報理工学系研究科電子情報学専攻"},{"subitem_text_value":"Department of Information and Communication Engineering, Graduate School of Information Science and Technology, The University of Tokyo"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Shimizu, Shinya"}],"nameIdentifiers":[{"nameIdentifier":"8192","nameIdentifierScheme":"WEKO"}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2017-05-31"}],"displaytype":"detail","filename":"48106415.pdf","filesize":[{"value":"5.3 MB"}],"format":"application/pdf","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"48106415.pdf","url":"https://repository.dl.itc.u-tokyo.ac.jp/record/3409/files/48106415.pdf"},"version_id":"1e2dcceb-0617-40a5-b13d-10f6277dd17f"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"thesis","resourceuri":"http://purl.org/coar/resource_type/c_46ec"}]},"item_title":"Research on Dynamic Features Derived From Speech Structure","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Research on Dynamic Features Derived From Speech Structure"}]},"item_type_id":"7","owner":"1","path":["234","262"],"pubdate":{"attribute_name":"公開日","attribute_value":"2012-05-29"},"publish_date":"2012-05-29","publish_status":"0","recid":"3409","relation_version_is_last":true,"title":["Research on Dynamic Features Derived From Speech Structure"],"weko_creator_id":"1","weko_shared_id":null},"updated":"2022-12-19T03:45:04.614367+00:00"}