{"created":"2021-03-01T06:19:02.709377+00:00","id":2436,"links":{},"metadata":{"_buckets":{"deposit":"4669c971-61b9-40d9-96ac-a1a537d64b8e"},"_deposit":{"id":"2436","owners":[],"pid":{"revision_id":0,"type":"depid","value":"2436"},"status":"published"},"_oai":{"id":"oai:repository.dl.itc.u-tokyo.ac.jp:00002436","sets":["34:105:330","9:233:280"]},"item_7_alternative_title_1":{"attribute_name":"その他のタイトル","attribute_value_mlt":[{"subitem_alternative_title":"同期性に基づく音と映像の統合解析"}]},"item_7_biblio_info_7":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2009-09","bibliographicIssueDateType":"Issued"},"bibliographic_titles":[{}]}]},"item_7_date_granted_25":{"attribute_name":"学位授与年月日","attribute_value_mlt":[{"subitem_dategranted":"2009-09-28"}]},"item_7_degree_grantor_23":{"attribute_name":"学位授与機関","attribute_value_mlt":[{"subitem_degreegrantor":[{"subitem_degreegrantor_name":"University of Tokyo (東京大学)"}]}]},"item_7_degree_name_20":{"attribute_name":"学位名","attribute_value_mlt":[{"subitem_degreename":"博士(情報理工学)"}]},"item_7_description_5":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"This thesis presents a computational framework to jointly analyze auditory and visual information. The integration of audiovisual information is realized based on synchrony evaluation, which is motivated by the neuroscience discovery, that synchrony is a key for human beings to perceive across the senses of different modalities. The works in this thesis focus on answering two questions: how to perform and where to apply this audiovisual analysis with synchrony evaluation. To answer the first question, we develop novel effective methods to analyze the audiovisual correlation, and perform a classification and an experimental comparison of the existing techniques, including the ones we developed. Since this is the first work that classifies and experimentally compares the methods of this field, it supplies a basis for designing algorithms to computationally analyze the audiovisual correlation. To answer the second question, we apply audiovisual correlation analysis to solve three different problems. The first problem is the detection of a speaker's face region in a video, whose previous solutions either require special devices like microphone array or supply only highly fragmental results. Assuming that speaker is stationary within an analysis time window, we introduce a novel method to analyze the audiovisual correlation for speaker using newly introduced audiovisual differential feature and quadratic mutual information, and integrate the result of this correlation analysis into graph cut-based image segmentation to compute the speaker face region. This method not only achieves the smoothness of the detected face region, but also is robust against the change of background, view, and scale. The second problem is the localization of sound source. General sound sources are diverse in types and usually non-stationary while emitting sounds. To solve this problem, we develop an audiovisual correlation maximization framework to trace the sound source movement, and introduce audiovisual inconsistency feature to extract audiovisual events for all kinds of sound sources. We also propose an incremental computation of mutual information to significantly speed up the computation. This method can successfully localize different moving sound sources in the experiments. The third problem is the recovery of drifted audio-to-video synchronization, which used to require both special device and dedicated human effort. Considering that the correlation reaches the maximum only when audio is synchronized with video, we develop an automatic recovery method by analyzing the audiovisual correlation for a given speaker in the video clip. The recovery demonstrates high accuracy for both simulation and real data. While the theoretical justification and experimental justification are performed independently, this thesis taken as a whole lays a necessary groundwork for jointly analyzing audiovisual information based on synchrony evaluation.","subitem_description_type":"Abstract"}]},"item_7_dissertation_number_26":{"attribute_name":"学位授与番号","attribute_value_mlt":[{"subitem_dissertationnumber":"甲第25373号"}]},"item_7_full_name_3":{"attribute_name":"著者別名","attribute_value_mlt":[{"nameIdentifiers":[{"nameIdentifier":"6707","nameIdentifierScheme":"WEKO"}],"names":[{"name":"Liu, Yuyu"}]}]},"item_7_identifier_registration":{"attribute_name":"ID登録","attribute_value_mlt":[{"subitem_identifier_reg_text":"10.15083/00002430","subitem_identifier_reg_type":"JaLC"}]},"item_7_select_21":{"attribute_name":"学位","attribute_value_mlt":[{"subitem_select_item":"doctoral"}]},"item_7_subject_13":{"attribute_name":"日本十進分類法","attribute_value_mlt":[{"subitem_subject":"548","subitem_subject_scheme":"NDC"}]},"item_7_text_22":{"attribute_name":"学位分野","attribute_value_mlt":[{"subitem_text_value":"Information Science and Technology (情報理工学)"}]},"item_7_text_24":{"attribute_name":"研究科・専攻","attribute_value_mlt":[{"subitem_text_value":"Department of Information and Communication Engineering, Graduate School of Information Science and Technology (情報理工学系研究科電子情報学専攻)"}]},"item_7_text_27":{"attribute_name":"学位記番号","attribute_value_mlt":[{"subitem_text_value":"博情第255号"}]},"item_7_text_4":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_text_value":"大学院情報理工学系研究科電子情報学専攻"},{"subitem_text_value":"Graduate School of Information Science and Technology Department of Information and Communication Engineering The University of Tokyo"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"劉, 玉宇"}],"nameIdentifiers":[{"nameIdentifier":"6706","nameIdentifierScheme":"WEKO"}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2017-05-31"}],"displaytype":"detail","filename":"37067413.pdf","filesize":[{"value":"5.0 MB"}],"format":"application/pdf","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"37067413.pdf","url":"https://repository.dl.itc.u-tokyo.ac.jp/record/2436/files/37067413.pdf"},"version_id":"b7272e03-4347-413c-b70d-1aac52f2681f"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"Audiovisual analysis","subitem_subject_scheme":"Other"},{"subitem_subject":"Synchrony","subitem_subject_scheme":"Other"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"thesis","resourceuri":"http://purl.org/coar/resource_type/c_46ec"}]},"item_title":"Synchrony-based Audiovisual Analysis","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Synchrony-based Audiovisual Analysis"}]},"item_type_id":"7","owner":"1","path":["280","330"],"pubdate":{"attribute_name":"公開日","attribute_value":"2012-03-01"},"publish_date":"2012-03-01","publish_status":"0","recid":"2436","relation_version_is_last":true,"title":["Synchrony-based Audiovisual Analysis"],"weko_creator_id":"1","weko_shared_id":null},"updated":"2022-12-19T03:44:10.379322+00:00"}